mirror of
https://git.savannah.gnu.org/git/emacs.git
synced 2025-01-11 16:08:13 +00:00
Merge FFAP's URI-detection code into thingatpt.el.
* lisp/ffap.el: Require thingatpt. (ffap-url-at-point): Delegate URI detection to thing-at-point. All URI-valid characters are now recognized. (ffap-string-at-point): Use use-region-p. (ffap-url-regexp): Extra character is handled by thing-at-point. (ffap-string-at-point-mode-alist): Allow parentheses. (ffap-newsgroup-regexp, ffap-newsgroup-heads, ffap-newsgroup-p): Convert to aliases; code moved to thingatpt.el. (ffap-gnus-hook): Use setq-local. * lisp/thingatpt.el: Rewrite the URL detection routines, absorbing some code from ffap.el. (thing-at-point-beginning-of-url-regexp): New var. (thing-at-point-uri-schemes): Update list of URI schemes. (thing-at-point-url-regexp): Variable deleted. (thing-at-point-markedup-url-regexp): Disallow newlines. (thing-at-point-newsgroup-regexp) (thing-at-point-newsgroup-heads) (thing-at-point-default-mail-uri-scheme): New variables. (thing-at-point-bounds-of-url-at-point): Rewrite. Use ffap's method to find the possible bounds of the URI at point. New optional argument to find ill-formed URIs. (thing-at-point-url-at-point): Rewrite. New arguments for finding ill-formed URIs. Use thing-at-point-bounds-of-url-at-point, and the scheme-adding heuristics from ffap-url-at-point. (thing-at-point--bounds-of-well-formed-url): New function. Do parens matching to decide whether to include parens in the URI * test/automated/thingatpt.el: New file. Fixes: debbugs:5673
This commit is contained in:
parent
84a06b500f
commit
6e5c1569e9
@ -1,3 +1,34 @@
|
||||
2013-02-04 Chong Yidong <cyd@gnu.org>
|
||||
|
||||
* thingatpt.el: Rewrite the URL detection routines, absorbing some
|
||||
code from ffap.el.
|
||||
(thing-at-point-beginning-of-url-regexp): New var.
|
||||
(thing-at-point-uri-schemes): Update list of URI schemes.
|
||||
(thing-at-point-url-regexp): Variable deleted.
|
||||
(thing-at-point-markedup-url-regexp): Disallow newlines.
|
||||
(thing-at-point-newsgroup-regexp)
|
||||
(thing-at-point-newsgroup-heads)
|
||||
(thing-at-point-default-mail-uri-scheme): New variables.
|
||||
(thing-at-point-bounds-of-url-at-point): Rewrite. Use ffap's
|
||||
method to find the possible bounds of the URI at point. New
|
||||
optional argument to find ill-formed URIs.
|
||||
(thing-at-point-url-at-point): Rewrite. New arguments for finding
|
||||
ill-formed URIs. Use thing-at-point-bounds-of-url-at-point, and
|
||||
the scheme-adding heuristics from ffap-url-at-point.
|
||||
(thing-at-point--bounds-of-well-formed-url): New function. Do
|
||||
parens matching to decide whether to include parens in the URI
|
||||
(Bug#9153).
|
||||
|
||||
* ffap.el: Require thingatpt.
|
||||
(ffap-url-at-point): Delegate URI detection to thing-at-point.
|
||||
All URI-valid characters are now recognized (Bug#5673).
|
||||
(ffap-string-at-point): Use use-region-p.
|
||||
(ffap-url-regexp): Extra character is handled by thing-at-point.
|
||||
(ffap-string-at-point-mode-alist): Allow parentheses.
|
||||
(ffap-newsgroup-regexp, ffap-newsgroup-heads, ffap-newsgroup-p):
|
||||
Convert to aliases; code moved to thingatpt.el.
|
||||
(ffap-gnus-hook): Use setq-local.
|
||||
|
||||
2013-02-04 Glenn Morris <rgm@gnu.org>
|
||||
|
||||
* emacs-lisp/ert.el (ert--explain-format-atom):
|
||||
|
115
lisp/ffap.el
115
lisp/ffap.el
@ -106,6 +106,7 @@
|
||||
;;; Code:
|
||||
|
||||
(require 'url-parse)
|
||||
(require 'thingatpt)
|
||||
|
||||
(define-obsolete-variable-alias 'ffap-version 'emacs-version "23.2")
|
||||
|
||||
@ -178,16 +179,14 @@ Note this name may be omitted if it equals the default
|
||||
:group 'ffap)
|
||||
|
||||
(defvar ffap-url-regexp
|
||||
;; Could just use `url-nonrelative-link' of w3, if loaded.
|
||||
;; This regexp is not exhaustive, it just matches common cases.
|
||||
(concat
|
||||
"\\("
|
||||
"news\\(post\\)?:\\|mailto:\\|file:" ; no host ok
|
||||
"\\|"
|
||||
"\\(ftp\\|https?\\|telnet\\|gopher\\|www\\|wais\\)://" ; needs host
|
||||
"\\)." ; require one more character
|
||||
)
|
||||
"Regexp matching URLs. Use nil to disable URL features in ffap.")
|
||||
"\\)")
|
||||
"Regexp matching the beginning of a URI, for FFAP.
|
||||
If the value is nil, disable URL-matching features in ffap.")
|
||||
|
||||
(defcustom ffap-foo-at-bar-prefix "mailto"
|
||||
"Presumed URL prefix type of strings like \"<foo.9z@bar>\".
|
||||
@ -571,38 +570,9 @@ Looks at `ffap-ftp-default-user', returns \"\" for \"localhost\"."
|
||||
(ffap-ftp-regexp (ffap-host-to-filename mach))
|
||||
))
|
||||
|
||||
(defvar ffap-newsgroup-regexp "^[[:lower:]]+\\.[-+[:lower:]_0-9.]+$"
|
||||
"Strings not matching this fail `ffap-newsgroup-p'.")
|
||||
(defvar ffap-newsgroup-heads ; entirely inadequate
|
||||
'("alt" "comp" "gnu" "misc" "news" "sci" "soc" "talk")
|
||||
"Used by `ffap-newsgroup-p' if gnus is not running.")
|
||||
|
||||
(defun ffap-newsgroup-p (string)
|
||||
"Return STRING if it looks like a newsgroup name, else nil."
|
||||
(and
|
||||
(string-match ffap-newsgroup-regexp string)
|
||||
(let ((htbs '(gnus-active-hashtb gnus-newsrc-hashtb gnus-killed-hashtb))
|
||||
(heads ffap-newsgroup-heads)
|
||||
htb ret)
|
||||
(while htbs
|
||||
(setq htb (car htbs) htbs (cdr htbs))
|
||||
(condition-case nil
|
||||
(progn
|
||||
;; errs: htb symbol may be unbound, or not a hash-table.
|
||||
;; gnus-gethash is just a macro for intern-soft.
|
||||
(and (symbol-value htb)
|
||||
(intern-soft string (symbol-value htb))
|
||||
(setq ret string htbs nil))
|
||||
;; If we made it this far, gnus is running, so ignore "heads":
|
||||
(setq heads nil))
|
||||
(error nil)))
|
||||
(or ret (not heads)
|
||||
(let ((head (string-match "\\`\\([[:lower:]]+\\)\\." string)))
|
||||
(and head (setq head (substring string 0 (match-end 1)))
|
||||
(member head heads)
|
||||
(setq ret string))))
|
||||
;; Is there ever a need to modify string as a newsgroup name?
|
||||
ret)))
|
||||
(defvaralias 'ffap-newsgroup-regexp 'thing-at-point-newsgroup-regexp)
|
||||
(defvaralias 'ffap-newsgroup-heads 'thing-at-point-newsgroup-heads)
|
||||
(defalias 'ffap-newsgroup-p 'thing-at-point-newsgroup-p)
|
||||
|
||||
(defsubst ffap-url-p (string)
|
||||
"If STRING looks like an URL, return it (maybe improved), else nil."
|
||||
@ -1017,7 +987,7 @@ If a given RFC isn't in these then `ffap-rfc-path' is offered."
|
||||
;; * no commas (good for latex)
|
||||
(file "--:\\\\$+<>@-Z_[:alpha:]~*?" "<@" "@>;.,!:")
|
||||
;; An url, or maybe a email/news message-id:
|
||||
(url "--:=&?$+@-Z_[:alpha:]~#,%;*" "^[:alnum:]" ":;.,!?")
|
||||
(url "--:=&?$+@-Z_[:alpha:]~#,%;*()!'" "^[0-9a-zA-Z]" ":;.,!?")
|
||||
;; Find a string that does *not* contain a colon:
|
||||
(nocolon "--9$+<>@-Z_[:alpha:]~" "<@" "@>;.,!?")
|
||||
;; A machine:
|
||||
@ -1031,7 +1001,7 @@ possibly a major-mode name, or one of the symbol
|
||||
Function `ffap-string-at-point' uses the data fields as follows:
|
||||
1. find a maximal string of CHARS around point,
|
||||
2. strip BEG chars before point from the beginning,
|
||||
3. Strip END chars after point from the end.")
|
||||
3. strip END chars after point from the end.")
|
||||
|
||||
(defvar ffap-string-at-point nil
|
||||
;; Added at suggestion of RHOGEE (for ff-paths), 7/24/95.
|
||||
@ -1050,22 +1020,22 @@ Sets the variable `ffap-string-at-point' and the variable
|
||||
(or (assq (or mode major-mode) ffap-string-at-point-mode-alist)
|
||||
(assq 'file ffap-string-at-point-mode-alist))))
|
||||
(pt (point))
|
||||
(str
|
||||
(if (and transient-mark-mode mark-active)
|
||||
(buffer-substring
|
||||
(setcar ffap-string-at-point-region (region-beginning))
|
||||
(setcar (cdr ffap-string-at-point-region) (region-end)))
|
||||
(buffer-substring
|
||||
(save-excursion
|
||||
(skip-chars-backward (car args))
|
||||
(skip-chars-forward (nth 1 args) pt)
|
||||
(setcar ffap-string-at-point-region (point)))
|
||||
(save-excursion
|
||||
(skip-chars-forward (car args))
|
||||
(skip-chars-backward (nth 2 args) pt)
|
||||
(setcar (cdr ffap-string-at-point-region) (point)))))))
|
||||
(set-text-properties 0 (length str) nil str)
|
||||
(setq ffap-string-at-point str)))
|
||||
(beg (if (use-region-p)
|
||||
(region-beginning)
|
||||
(save-excursion
|
||||
(skip-chars-backward (car args))
|
||||
(skip-chars-forward (nth 1 args) pt)
|
||||
(point))))
|
||||
(end (if (use-region-p)
|
||||
(region-end)
|
||||
(save-excursion
|
||||
(skip-chars-forward (car args))
|
||||
(skip-chars-backward (nth 2 args) pt)
|
||||
(point)))))
|
||||
(setq ffap-string-at-point
|
||||
(buffer-substring-no-properties
|
||||
(setcar ffap-string-at-point-region beg)
|
||||
(setcar (cdr ffap-string-at-point-region) end)))))
|
||||
|
||||
(defun ffap-string-around ()
|
||||
;; Sometimes useful to decide how to treat a string.
|
||||
@ -1098,35 +1068,15 @@ Assumes the buffer has not changed."
|
||||
|
||||
(defun ffap-url-at-point ()
|
||||
"Return URL from around point if it exists, or nil."
|
||||
;; Could use w3's url-get-url-at-point instead. Both handle "URL:",
|
||||
;; ignore non-relative links, trim punctuation. The other will
|
||||
;; actually look back if point is in whitespace, but I would rather
|
||||
;; ffap be less aggressive in such situations.
|
||||
(when ffap-url-regexp
|
||||
(or (and (eq major-mode 'w3-mode) ; In a w3 buffer button?
|
||||
(w3-view-this-url t))
|
||||
;; Is there a reason not to strip trailing colon?
|
||||
(let ((name (ffap-string-at-point 'url)))
|
||||
(cond
|
||||
((string-match "^url:" name) (setq name (substring name 4)))
|
||||
((and (string-match "\\`[^:</>@]+@[^:</>@]+[[:alnum:]]\\'" name)
|
||||
;; "foo@bar": could be "mailto" or "news" (a Message-ID).
|
||||
;; Without "<>" it must be "mailto". Otherwise could be
|
||||
;; either, so consult `ffap-foo-at-bar-prefix'.
|
||||
(let ((prefix (if (and (equal (ffap-string-around) "<>")
|
||||
;; Expect some odd characters:
|
||||
(string-match "[$.0-9].*[$.0-9].*@" name))
|
||||
;; Could be news:
|
||||
ffap-foo-at-bar-prefix
|
||||
"mailto")))
|
||||
(and prefix (setq name (concat prefix ":" name))))))
|
||||
((ffap-newsgroup-p name) (setq name (concat "news:" name)))
|
||||
((and (string-match "\\`[[:alnum:]]+\\'" name) ; <mic> <root> <nobody>
|
||||
(equal (ffap-string-around) "<>")
|
||||
;; (ffap-user-p name):
|
||||
(not (string-match "~" (expand-file-name (concat "~" name)))))
|
||||
(setq name (concat "mailto:" name)))
|
||||
((ffap-url-p name)))))))
|
||||
(let ((thing-at-point-beginning-of-url-regexp ffap-url-regexp)
|
||||
(thing-at-point-default-mail-scheme ffap-foo-at-bar-prefix))
|
||||
(thing-at-point-url-at-point t
|
||||
(if (use-region-p)
|
||||
(cons (region-beginning)
|
||||
(region-end))))))))
|
||||
|
||||
(defvar ffap-gopher-regexp
|
||||
"^.*\\<\\(Type\\|Name\\|Path\\|Host\\|Port\\) *= *\\(.*\\) *$"
|
||||
@ -1763,7 +1713,8 @@ Only intended for interactive use."
|
||||
|
||||
(defun ffap-gnus-hook ()
|
||||
"Bind `ffap-gnus-next' and `ffap-gnus-menu' to M-l and M-m, resp."
|
||||
(set (make-local-variable 'ffap-foo-at-bar-prefix) "news") ; message-id's
|
||||
;; message-id's
|
||||
(setq-local thing-at-point-default-mail-uri-scheme "news")
|
||||
;; Note "l", "L", "m", "M" are taken:
|
||||
(local-set-key "\M-l" 'ffap-gnus-next)
|
||||
(local-set-key "\M-m" 'ffap-gnus-menu))
|
||||
|
@ -232,7 +232,7 @@ The bounds of THING are determined by `bounds-of-thing-at-point'."
|
||||
(put 'defun 'end-op 'end-of-defun)
|
||||
(put 'defun 'forward-op 'end-of-defun)
|
||||
|
||||
;; Filenames and URLs www.com/foo%32bar
|
||||
;; Filenames
|
||||
|
||||
(defvar thing-at-point-file-name-chars "-~/[:alnum:]_.${}#%,:"
|
||||
"Characters allowable in filenames.")
|
||||
@ -248,94 +248,224 @@ The bounds of THING are determined by `bounds-of-thing-at-point'."
|
||||
(forward-char)
|
||||
(goto-char (point-min)))))
|
||||
|
||||
;; URIs
|
||||
|
||||
(defvar thing-at-point-beginning-of-url-regexp nil
|
||||
"Regexp matching the beginning of a well-formed URI.
|
||||
If nil, construct the regexp from `thing-at-point-uri-schemes'.")
|
||||
|
||||
(defvar thing-at-point-url-path-regexp
|
||||
"[^]\t\n \"'<>[^`{}]*[^]\t\n \"'<>[^`{}.,;]+"
|
||||
"A regular expression probably matching the host and filename or e-mail part of a URL.")
|
||||
"Regexp matching the host and filename or e-mail part of a URL.")
|
||||
|
||||
(defvar thing-at-point-short-url-regexp
|
||||
(concat "[-A-Za-z0-9]+\\.[-A-Za-z0-9.]+" thing-at-point-url-path-regexp)
|
||||
"A regular expression probably matching a URL without an access scheme.
|
||||
Hostname matching is stricter in this case than for
|
||||
``thing-at-point-url-regexp''.")
|
||||
"Regexp matching a URI without a scheme component.")
|
||||
|
||||
(defvar thing-at-point-uri-schemes
|
||||
;; Officials from http://www.iana.org/assignments/uri-schemes.html
|
||||
'("ftp://" "http://" "gopher://" "mailto:" "news:" "nntp:"
|
||||
"telnet://" "wais://" "file:/" "prospero:" "z39.50s:" "z39.50r:"
|
||||
"cid:" "mid:" "vemmi:" "service:" "imap:" "nfs:" "acap:" "rtsp:"
|
||||
"tip:" "pop:" "data:" "dav:" "opaquelocktoken:" "sip:" "tel:" "fax:"
|
||||
"modem:" "ldap:" "https://" "soap.beep:" "soap.beeps:" "urn:" "go:"
|
||||
"afs:" "tn3270:" "mailserver:"
|
||||
"crid:" "dict:" "dns:" "dtn:" "h323:" "im:" "info:" "ipp:"
|
||||
"iris.beep:" "mtqp:" "mupdate:" "pres:" "sips:" "snmp:" "tag:"
|
||||
"tftp:" "xmlrpc.beep:" "xmlrpc.beeps:" "xmpp:"
|
||||
;; Compatibility
|
||||
"snews:" "irc:" "mms://" "mmsh://")
|
||||
"Uniform Resource Identifier (URI) Schemes.")
|
||||
'("aaa://" "about:" "acap://" "apt:" "bzr://" "bzr+ssh://"
|
||||
"attachment:/" "chrome://" "cid:" "content://" "crid://" "cvs://"
|
||||
"data:" "dav:" "dict://" "doi:" "dns:" "dtn:" "feed:" "file:/"
|
||||
"finger://" "fish://" "ftp://" "geo:" "git://" "go:" "gopher://"
|
||||
"h323:" "http://" "https://" "im:" "imap://" "info:" "ipp:"
|
||||
"irc://" "irc6://" "ircs://" "iris.beep:" "jar:" "ldap://"
|
||||
"ldaps://" "mailto:" "mid:" "mtqp://" "mupdate://" "news:"
|
||||
"nfs://" "nntp://" "opaquelocktoken:" "pop://" "pres:"
|
||||
"resource://" "rmi://" "rsync://" "rtsp://" "rtspu://" "service:"
|
||||
"sftp://" "sip:" "sips:" "smb://" "sms:" "snmp://" "soap.beep://"
|
||||
"soap.beeps://" "ssh://" "svn://" "svn+ssh://" "tag:" "tel:"
|
||||
"telnet://" "tftp://" "tip://" "tn3270://" "udp://" "urn:"
|
||||
"uuid:" "vemmi://" "webcal://" "xri://" "xmlrpc.beep://"
|
||||
"xmlrpc.beeps://" "z39.50r://" "z39.50s://" "xmpp:"
|
||||
;; Compatibility
|
||||
"fax:" "mms://" "mmsh://" "modem:" "prospero:" "snews:"
|
||||
"wais://")
|
||||
"List of URI schemes recognized by `thing-at-point-url-at-point'.
|
||||
Each string in this list should correspond to the start of a
|
||||
URI's scheme component, up to and including the trailing // if
|
||||
the scheme calls for that to be present.")
|
||||
|
||||
(defvar thing-at-point-url-regexp
|
||||
(concat "\\<\\(" (mapconcat 'identity thing-at-point-uri-schemes "\\|") "\\)"
|
||||
thing-at-point-url-path-regexp)
|
||||
"A regular expression probably matching a complete URL.")
|
||||
(defvar thing-at-point-markedup-url-regexp "<URL:\\([^<>\n]+\\)>"
|
||||
"Regexp matching a URL marked up per RFC1738.
|
||||
This kind of markup was formerly recommended as a way to indicate
|
||||
URIs, but as of RFC 3986 it is no longer recommended.
|
||||
Subexpression 1 should contain the delimited URL.")
|
||||
|
||||
(defvar thing-at-point-markedup-url-regexp
|
||||
"<URL:[^>]+>"
|
||||
"A regular expression matching a URL marked up per RFC1738.
|
||||
This may contain whitespace (including newlines) .")
|
||||
(defvar thing-at-point-newsgroup-regexp
|
||||
"\\`[[:lower:]]+\\.[-+[:lower:]_0-9.]+\\'"
|
||||
"Regexp matching a newsgroup name.")
|
||||
|
||||
(defvar thing-at-point-newsgroup-heads
|
||||
'("alt" "comp" "gnu" "misc" "news" "sci" "soc" "talk")
|
||||
"Used by `thing-at-point-newsgroup-p' if gnus is not running.")
|
||||
|
||||
(defvar thing-at-point-default-mail-uri-scheme "mailto"
|
||||
"Default scheme for ill-formed URIs that look like <foo@example.com>.
|
||||
If nil, do not give such URIs a scheme.")
|
||||
|
||||
(put 'url 'bounds-of-thing-at-point 'thing-at-point-bounds-of-url-at-point)
|
||||
(defun thing-at-point-bounds-of-url-at-point ()
|
||||
(let ((strip (thing-at-point-looking-at
|
||||
thing-at-point-markedup-url-regexp))) ;; (url "") short
|
||||
(if (or strip
|
||||
(thing-at-point-looking-at thing-at-point-url-regexp)
|
||||
;; Access scheme omitted?
|
||||
;; (setq short (thing-at-point-looking-at
|
||||
;; thing-at-point-short-url-regexp))
|
||||
)
|
||||
(let ((beginning (match-beginning 0))
|
||||
(end (match-end 0)))
|
||||
(when strip
|
||||
(setq beginning (+ beginning 5))
|
||||
(setq end (- end 1)))
|
||||
(cons beginning end)))))
|
||||
|
||||
(defun thing-at-point-bounds-of-url-at-point (&optional lax)
|
||||
"Return a cons cell containing the start and end of the URI at point.
|
||||
Try to find a URI using `thing-at-point-markedup-url-regexp'.
|
||||
If that fails, try with `thing-at-point-beginning-of-url-regexp'.
|
||||
If that also fails, and optional argument LAX is non-nil, return
|
||||
the bounds of a possible ill-formed URI (one lacking a scheme)."
|
||||
;; Look for the old <URL:foo> markup. If found, use it.
|
||||
(or (thing-at-point--bounds-of-markedup-url)
|
||||
;; Otherwise, find the bounds within which a URI may exist. The
|
||||
;; method is similar to `ffap-string-at-point'. Note that URIs
|
||||
;; may contain parentheses but may not contain spaces (RFC3986).
|
||||
(let* ((allowed-chars "--:=&?$+@-Z_[:alpha:]~#,%;*()!'")
|
||||
(skip-before "^[0-9a-zA-Z]")
|
||||
(skip-after ":;.,!?")
|
||||
(pt (point))
|
||||
(beg (save-excursion
|
||||
(skip-chars-backward allowed-chars)
|
||||
(skip-chars-forward skip-before pt)
|
||||
(point)))
|
||||
(end (save-excursion
|
||||
(skip-chars-forward allowed-chars)
|
||||
(skip-chars-backward skip-after pt)
|
||||
(point))))
|
||||
(or (thing-at-point--bounds-of-well-formed-url beg end pt)
|
||||
(if lax (cons beg end))))))
|
||||
|
||||
(defun thing-at-point--bounds-of-markedup-url ()
|
||||
(when thing-at-point-markedup-url-regexp
|
||||
(let ((case-fold-search t)
|
||||
(pt (point))
|
||||
(beg (line-beginning-position))
|
||||
(end (line-end-position))
|
||||
found)
|
||||
(save-excursion
|
||||
(goto-char beg)
|
||||
(while (and (not found)
|
||||
(<= (point) pt)
|
||||
(< (point) end))
|
||||
(and (re-search-forward thing-at-point-markedup-url-regexp
|
||||
end 1)
|
||||
(> (point) pt)
|
||||
(setq found t))))
|
||||
(if found
|
||||
(cons (match-beginning 1) (match-end 1))))))
|
||||
|
||||
(defun thing-at-point--bounds-of-well-formed-url (beg end pt)
|
||||
(save-excursion
|
||||
(goto-char beg)
|
||||
(let (url-beg paren-end regexp)
|
||||
(save-restriction
|
||||
(narrow-to-region beg end)
|
||||
;; The scheme component must either match at BEG, or have no
|
||||
;; other alphanumerical ASCII characters before it.
|
||||
(setq regexp (concat "\\(?:\\`\\|[^a-zA-Z0-9]\\)\\("
|
||||
(or thing-at-point-beginning-of-url-regexp
|
||||
(regexp-opt thing-at-point-uri-schemes))
|
||||
"\\)"))
|
||||
(and (re-search-forward regexp end t)
|
||||
;; URI must have non-empty contents.
|
||||
(< (point) end)
|
||||
(setq url-beg (match-beginning 1))))
|
||||
(when url-beg
|
||||
;; If there is an open paren before the URI, truncate to the
|
||||
;; matching close paren.
|
||||
(and (> url-beg (point-min))
|
||||
(eq (car-safe (syntax-after (1- url-beg))) 4)
|
||||
(save-restriction
|
||||
(narrow-to-region (1- url-beg) (min end (point-max)))
|
||||
(setq paren-end (ignore-errors
|
||||
(scan-lists (1- url-beg) 1 0))))
|
||||
(not (blink-matching-check-mismatch (1- url-beg) paren-end))
|
||||
(setq end (1- paren-end)))
|
||||
(cons url-beg end)))))
|
||||
|
||||
(put 'url 'thing-at-point 'thing-at-point-url-at-point)
|
||||
(defun thing-at-point-url-at-point ()
|
||||
|
||||
(defun thing-at-point-url-at-point (&optional lax bounds)
|
||||
"Return the URL around or before point.
|
||||
If no URL is found, return nil.
|
||||
|
||||
Search backwards for the start of a URL ending at or after point. If
|
||||
no URL found, return nil. The access scheme will be prepended if
|
||||
absent: \"mailto:\" if the string contains \"@\", \"ftp://\" if it
|
||||
starts with \"ftp\" and not \"ftp:/\", or \"http://\" by default."
|
||||
If optional argument LAX is non-nil, look for URLs that are not
|
||||
well-formed, such as foo@bar or <nobody>.
|
||||
|
||||
(let ((url "") short strip)
|
||||
(if (or (setq strip (thing-at-point-looking-at
|
||||
thing-at-point-markedup-url-regexp))
|
||||
(thing-at-point-looking-at thing-at-point-url-regexp)
|
||||
;; Access scheme omitted?
|
||||
(setq short (thing-at-point-looking-at
|
||||
thing-at-point-short-url-regexp)))
|
||||
(progn
|
||||
(setq url (buffer-substring-no-properties (match-beginning 0)
|
||||
(match-end 0)))
|
||||
(and strip (setq url (substring url 5 -1))) ; Drop "<URL:" & ">"
|
||||
;; strip whitespace
|
||||
(while (string-match "[ \t\n\r]+" url)
|
||||
(setq url (replace-match "" t t url)))
|
||||
(and short (setq url (concat (cond ((string-match "^[a-zA-Z]+:" url)
|
||||
;; already has a URL scheme.
|
||||
"")
|
||||
((string-match "@" url)
|
||||
"mailto:")
|
||||
;; e.g. ftp.swiss... or ftp-swiss...
|
||||
((string-match "^ftp" url)
|
||||
"ftp://")
|
||||
(t "http://"))
|
||||
url)))
|
||||
(if (string-equal "" url)
|
||||
nil
|
||||
url)))))
|
||||
If optional arguments BOUNDS are non-nil, it should be a cons
|
||||
cell of the form (START . END), containing the beginning and end
|
||||
positions of the URI. Otherwise, these positions are detected
|
||||
automatically from the text around point.
|
||||
|
||||
If the scheme component is absent, either because a URI delimited
|
||||
with <url:...> lacks one, or because an ill-formed URI was found
|
||||
with LAX or BEG and END, try to add a scheme in the returned URI.
|
||||
The scheme is chosen heuristically: \"mailto:\" if the address
|
||||
looks like an email address, \"ftp://\" if it starts with
|
||||
\"ftp\", etc."
|
||||
(unless bounds
|
||||
(setq bounds (thing-at-point-bounds-of-url-at-point lax)))
|
||||
(when (and bounds (< (car bounds) (cdr bounds)))
|
||||
(let ((str (buffer-substring-no-properties (car bounds) (cdr bounds))))
|
||||
;; If there is no scheme component, try to add one.
|
||||
(unless (string-match "\\`[a-zA-Z][-a-zA-Z0-9+.]*:" str)
|
||||
(or
|
||||
;; If the URI has the form <foo@bar>, treat it according to
|
||||
;; `thing-at-point-default-mail-uri-scheme'. If there are
|
||||
;; no angle brackets, it must be mailto.
|
||||
(when (string-match "\\`[^:</>@]+@[-.0-9=&?$+A-Z_a-z~#,%;*]" str)
|
||||
(let ((scheme (if (and (eq (char-before (car bounds)) ?<)
|
||||
(eq (char-after (cdr bounds)) ?>))
|
||||
thing-at-point-default-mail-uri-scheme
|
||||
"mailto")))
|
||||
(if scheme
|
||||
(setq str (concat scheme ":" str)))))
|
||||
;; If the string is like <FOO>, where FOO is an existing user
|
||||
;; name on the system, treat that as an email address.
|
||||
(and (string-match "\\`[[:alnum:]]+\\'" str)
|
||||
(eq (char-before (car bounds)) ?<)
|
||||
(eq (char-after (cdr bounds)) ?>)
|
||||
(not (string-match "~" (expand-file-name (concat "~" str))))
|
||||
(setq str (concat "mailto:" str)))
|
||||
;; If it looks like news.example.com, treat it as news.
|
||||
(if (thing-at-point-newsgroup-p str)
|
||||
(setq str (concat "news:" str)))
|
||||
;; If it looks like ftp.example.com. treat it as ftp.
|
||||
(if (string-match "\\`ftp\\." str)
|
||||
(setq str (concat "ftp://" str)))
|
||||
;; If it looks like www.example.com. treat it as http.
|
||||
(if (string-match "\\`www\\." str)
|
||||
(setq str (concat "http://" str)))
|
||||
;; Otherwise, it just isn't a URI.
|
||||
(setq str nil)))
|
||||
str)))
|
||||
|
||||
(defun thing-at-point-newsgroup-p (string)
|
||||
"Return STRING if it looks like a newsgroup name, else nil."
|
||||
(and
|
||||
(string-match thing-at-point-newsgroup-regexp string)
|
||||
(let ((htbs '(gnus-active-hashtb gnus-newsrc-hashtb gnus-killed-hashtb))
|
||||
(heads thing-at-point-newsgroup-heads)
|
||||
htb ret)
|
||||
(while htbs
|
||||
(setq htb (car htbs) htbs (cdr htbs))
|
||||
(condition-case nil
|
||||
(progn
|
||||
;; errs: htb symbol may be unbound, or not a hash-table.
|
||||
;; gnus-gethash is just a macro for intern-soft.
|
||||
(and (symbol-value htb)
|
||||
(intern-soft string (symbol-value htb))
|
||||
(setq ret string htbs nil))
|
||||
;; If we made it this far, gnus is running, so ignore "heads":
|
||||
(setq heads nil))
|
||||
(error nil)))
|
||||
(or ret (not heads)
|
||||
(let ((head (string-match "\\`\\([[:lower:]]+\\)\\." string)))
|
||||
(and head (setq head (substring string 0 (match-end 1)))
|
||||
(member head heads)
|
||||
(setq ret string))))
|
||||
ret)))
|
||||
|
||||
(put 'url 'end-op (lambda () (end-of-thing 'url)))
|
||||
|
||||
(put 'url 'beginning-op (lambda () (end-of-thing 'url)))
|
||||
|
||||
;; The normal thingatpt mechanism doesn't work for complex regexps.
|
||||
;; This should work for almost any regexp wherever we are in the
|
||||
@ -372,19 +502,6 @@ point."
|
||||
(goto-char match)
|
||||
(looking-at regexp)))))
|
||||
|
||||
(put 'url 'end-op
|
||||
(lambda ()
|
||||
(let ((bounds (thing-at-point-bounds-of-url-at-point)))
|
||||
(if bounds
|
||||
(goto-char (cdr bounds))
|
||||
(error "No URL here")))))
|
||||
(put 'url 'beginning-op
|
||||
(lambda ()
|
||||
(let ((bounds (thing-at-point-bounds-of-url-at-point)))
|
||||
(if bounds
|
||||
(goto-char (car bounds))
|
||||
(error "No URL here")))))
|
||||
|
||||
;; Email addresses
|
||||
(defvar thing-at-point-email-regexp
|
||||
"<?[-+_.~a-zA-Z][-+_.~:a-zA-Z0-9]*@[-.a-zA-Z0-9]+>?"
|
||||
|
@ -1,3 +1,7 @@
|
||||
2013-02-04 Chong Yidong <cyd@gnu.org>
|
||||
|
||||
* automated/thingatpt.el: New file.
|
||||
|
||||
2013-02-03 Chong Yidong <cyd@gnu.org>
|
||||
|
||||
* automated/files.el (file-test--do-local-variables-test): Avoid
|
||||
|
88
test/automated/thingatpt.el
Normal file
88
test/automated/thingatpt.el
Normal file
@ -0,0 +1,88 @@
|
||||
;;; thingatpt.el --- tests for thing-at-point.
|
||||
|
||||
;; Copyright (C) 2013 Free Software Foundation, Inc.
|
||||
|
||||
;; This file is part of GNU Emacs.
|
||||
|
||||
;; GNU Emacs is free software: you can redistribute it and/or modify
|
||||
;; it under the terms of the GNU General Public License as published by
|
||||
;; the Free Software Foundation, either version 3 of the License, or
|
||||
;; (at your option) any later version.
|
||||
|
||||
;; GNU Emacs is distributed in the hope that it will be useful,
|
||||
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
;; GNU General Public License for more details.
|
||||
|
||||
;; You should have received a copy of the GNU General Public License
|
||||
;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
;;; Code:
|
||||
|
||||
(require 'ert)
|
||||
|
||||
(defvar thing-at-point-test-data
|
||||
'(("http://1.gnu.org" 1 url "http://1.gnu.org")
|
||||
("http://2.gnu.org" 6 url "http://2.gnu.org")
|
||||
("http://3.gnu.org" 19 url "http://3.gnu.org")
|
||||
("https://4.gnu.org" 1 url "https://4.gnu.org")
|
||||
("bzr://savannah.gnu.org" 1 url "bzr://savannah.gnu.org")
|
||||
("A geo URI (geo:3.14159,-2.71828)." 12 url "geo:3.14159,-2.71828")
|
||||
("Visit http://5.gnu.org now." 5 url nil)
|
||||
("Visit http://6.gnu.org now." 7 url "http://6.gnu.org")
|
||||
("Visit http://7.gnu.org now." 22 url "http://7.gnu.org")
|
||||
("Visit http://8.gnu.org now." 22 url "http://8.gnu.org")
|
||||
("Visit http://9.gnu.org now." 24 url nil)
|
||||
;; Invalid URIs
|
||||
("<<<<" 2 url nil)
|
||||
("<>" 1 url nil)
|
||||
("<url:>" 1 url nil)
|
||||
("http://" 1 url nil)
|
||||
;; Invalid schema
|
||||
("foo://www.gnu.org" 1 url nil)
|
||||
("foohttp://www.gnu.org" 1 url nil)
|
||||
;; Non alphanumeric characters can be found in URIs
|
||||
("ftp://example.net/~foo!;#bar=baz&goo=bob" 3 url "ftp://example.net/~foo!;#bar=baz&goo=bob")
|
||||
("bzr+ssh://user@example.net:5/a%20d,5" 34 url "bzr+ssh://user@example.net:5/a%20d,5")
|
||||
;; <url:...> markup
|
||||
("Url: <url:foo://1.example.com>..." 8 url "foo://1.example.com")
|
||||
("Url: <url:foo://2.example.com>..." 30 url "foo://2.example.com")
|
||||
("Url: <url:foo://www.gnu.org/a bc>..." 20 url "foo://www.gnu.org/a bc")
|
||||
;; Hack used by thing-at-point: drop punctuation at end of URI.
|
||||
("Go to http://www.gnu.org, for details" 7 url "http://www.gnu.org")
|
||||
("Go to http://www.gnu.org." 24 url "http://www.gnu.org")
|
||||
;; Standard URI delimiters
|
||||
("Go to \"http://10.gnu.org\"." 8 url "http://10.gnu.org")
|
||||
("Go to \"http://11.gnu.org/\"." 26 url "http://11.gnu.org/")
|
||||
("Go to <http://12.gnu.org> now." 8 url "http://12.gnu.org")
|
||||
("Go to <http://13.gnu.org> now." 24 url "http://13.gnu.org")
|
||||
;; Parenthesis handling (non-standard)
|
||||
("http://example.com/a(b)c" 21 url "http://example.com/a(b)c")
|
||||
("http://example.com/a(b)" 21 url "http://example.com/a(b)")
|
||||
("(http://example.com/abc)" 2 url "http://example.com/abc")
|
||||
("This (http://example.com/a(b))" 7 url "http://example.com/a(b)")
|
||||
("This (http://example.com/a(b))" 30 url "http://example.com/a(b)")
|
||||
("This (http://example.com/a(b))" 5 url nil)
|
||||
("http://example.com/ab)c" 4 url "http://example.com/ab)c")
|
||||
;; URL markup, lacking schema
|
||||
("<url:foo@example.com>" 1 url "mailto:foo@example.com")
|
||||
("<url:ftp.example.net/abc/>" 1 url "ftp://ftp.example.net/abc/"))
|
||||
"List of thing-at-point tests.
|
||||
Each list element should have the form
|
||||
|
||||
(STRING POS THING RESULT)
|
||||
|
||||
where STRING is a string of buffer contents, POS is the value of
|
||||
point, THING is a symbol argument for `thing-at-point', and
|
||||
RESULT should be the result of calling `thing-at-point' from that
|
||||
position to retrieve THING.")
|
||||
|
||||
(ert-deftest thing-at-point-tests ()
|
||||
"Test the file-local variables implementation."
|
||||
(dolist (test thing-at-point-test-data)
|
||||
(with-temp-buffer
|
||||
(insert (nth 0 test))
|
||||
(goto-char (nth 1 test))
|
||||
(should (equal (thing-at-point (nth 2 test)) (nth 3 test))))))
|
||||
|
||||
;;; thingatpt.el ends here
|
Loading…
Reference in New Issue
Block a user