1
0
mirror of https://git.savannah.gnu.org/git/emacs.git synced 2025-01-11 16:08:13 +00:00

Merge FFAP's URI-detection code into thingatpt.el.

* lisp/ffap.el: Require thingatpt.
(ffap-url-at-point): Delegate URI detection to thing-at-point.
All URI-valid characters are now recognized.
(ffap-string-at-point): Use use-region-p.
(ffap-url-regexp): Extra character is handled by thing-at-point.
(ffap-string-at-point-mode-alist): Allow parentheses.
(ffap-newsgroup-regexp, ffap-newsgroup-heads, ffap-newsgroup-p):
Convert to aliases; code moved to thingatpt.el.
(ffap-gnus-hook): Use setq-local.

* lisp/thingatpt.el: Rewrite the URL detection routines, absorbing some
code from ffap.el.
(thing-at-point-beginning-of-url-regexp): New var.
(thing-at-point-uri-schemes): Update list of URI schemes.
(thing-at-point-url-regexp): Variable deleted.
(thing-at-point-markedup-url-regexp): Disallow newlines.
(thing-at-point-newsgroup-regexp)
(thing-at-point-newsgroup-heads)
(thing-at-point-default-mail-uri-scheme): New variables.
(thing-at-point-bounds-of-url-at-point): Rewrite.  Use ffap's
method to find the possible bounds of the URI at point.  New
optional argument to find ill-formed URIs.
(thing-at-point-url-at-point): Rewrite.  New arguments for finding
ill-formed URIs.  Use thing-at-point-bounds-of-url-at-point, and
the scheme-adding heuristics from ffap-url-at-point.
(thing-at-point--bounds-of-well-formed-url): New function.  Do
parens matching to decide whether to include parens in the URI

* test/automated/thingatpt.el: New file.

Fixes: debbugs:5673
This commit is contained in:
Chong Yidong 2013-02-04 20:02:25 +08:00
parent 84a06b500f
commit 6e5c1569e9
5 changed files with 358 additions and 167 deletions

View File

@ -1,3 +1,34 @@
2013-02-04 Chong Yidong <cyd@gnu.org>
* thingatpt.el: Rewrite the URL detection routines, absorbing some
code from ffap.el.
(thing-at-point-beginning-of-url-regexp): New var.
(thing-at-point-uri-schemes): Update list of URI schemes.
(thing-at-point-url-regexp): Variable deleted.
(thing-at-point-markedup-url-regexp): Disallow newlines.
(thing-at-point-newsgroup-regexp)
(thing-at-point-newsgroup-heads)
(thing-at-point-default-mail-uri-scheme): New variables.
(thing-at-point-bounds-of-url-at-point): Rewrite. Use ffap's
method to find the possible bounds of the URI at point. New
optional argument to find ill-formed URIs.
(thing-at-point-url-at-point): Rewrite. New arguments for finding
ill-formed URIs. Use thing-at-point-bounds-of-url-at-point, and
the scheme-adding heuristics from ffap-url-at-point.
(thing-at-point--bounds-of-well-formed-url): New function. Do
parens matching to decide whether to include parens in the URI
(Bug#9153).
* ffap.el: Require thingatpt.
(ffap-url-at-point): Delegate URI detection to thing-at-point.
All URI-valid characters are now recognized (Bug#5673).
(ffap-string-at-point): Use use-region-p.
(ffap-url-regexp): Extra character is handled by thing-at-point.
(ffap-string-at-point-mode-alist): Allow parentheses.
(ffap-newsgroup-regexp, ffap-newsgroup-heads, ffap-newsgroup-p):
Convert to aliases; code moved to thingatpt.el.
(ffap-gnus-hook): Use setq-local.
2013-02-04 Glenn Morris <rgm@gnu.org>
* emacs-lisp/ert.el (ert--explain-format-atom):

View File

@ -106,6 +106,7 @@
;;; Code:
(require 'url-parse)
(require 'thingatpt)
(define-obsolete-variable-alias 'ffap-version 'emacs-version "23.2")
@ -178,16 +179,14 @@ Note this name may be omitted if it equals the default
:group 'ffap)
(defvar ffap-url-regexp
;; Could just use `url-nonrelative-link' of w3, if loaded.
;; This regexp is not exhaustive, it just matches common cases.
(concat
"\\("
"news\\(post\\)?:\\|mailto:\\|file:" ; no host ok
"\\|"
"\\(ftp\\|https?\\|telnet\\|gopher\\|www\\|wais\\)://" ; needs host
"\\)." ; require one more character
)
"Regexp matching URLs. Use nil to disable URL features in ffap.")
"\\)")
"Regexp matching the beginning of a URI, for FFAP.
If the value is nil, disable URL-matching features in ffap.")
(defcustom ffap-foo-at-bar-prefix "mailto"
"Presumed URL prefix type of strings like \"<foo.9z@bar>\".
@ -571,38 +570,9 @@ Looks at `ffap-ftp-default-user', returns \"\" for \"localhost\"."
(ffap-ftp-regexp (ffap-host-to-filename mach))
))
(defvar ffap-newsgroup-regexp "^[[:lower:]]+\\.[-+[:lower:]_0-9.]+$"
"Strings not matching this fail `ffap-newsgroup-p'.")
(defvar ffap-newsgroup-heads ; entirely inadequate
'("alt" "comp" "gnu" "misc" "news" "sci" "soc" "talk")
"Used by `ffap-newsgroup-p' if gnus is not running.")
(defun ffap-newsgroup-p (string)
"Return STRING if it looks like a newsgroup name, else nil."
(and
(string-match ffap-newsgroup-regexp string)
(let ((htbs '(gnus-active-hashtb gnus-newsrc-hashtb gnus-killed-hashtb))
(heads ffap-newsgroup-heads)
htb ret)
(while htbs
(setq htb (car htbs) htbs (cdr htbs))
(condition-case nil
(progn
;; errs: htb symbol may be unbound, or not a hash-table.
;; gnus-gethash is just a macro for intern-soft.
(and (symbol-value htb)
(intern-soft string (symbol-value htb))
(setq ret string htbs nil))
;; If we made it this far, gnus is running, so ignore "heads":
(setq heads nil))
(error nil)))
(or ret (not heads)
(let ((head (string-match "\\`\\([[:lower:]]+\\)\\." string)))
(and head (setq head (substring string 0 (match-end 1)))
(member head heads)
(setq ret string))))
;; Is there ever a need to modify string as a newsgroup name?
ret)))
(defvaralias 'ffap-newsgroup-regexp 'thing-at-point-newsgroup-regexp)
(defvaralias 'ffap-newsgroup-heads 'thing-at-point-newsgroup-heads)
(defalias 'ffap-newsgroup-p 'thing-at-point-newsgroup-p)
(defsubst ffap-url-p (string)
"If STRING looks like an URL, return it (maybe improved), else nil."
@ -1017,7 +987,7 @@ If a given RFC isn't in these then `ffap-rfc-path' is offered."
;; * no commas (good for latex)
(file "--:\\\\$+<>@-Z_[:alpha:]~*?" "<@" "@>;.,!:")
;; An url, or maybe a email/news message-id:
(url "--:=&?$+@-Z_[:alpha:]~#,%;*" "^[:alnum:]" ":;.,!?")
(url "--:=&?$+@-Z_[:alpha:]~#,%;*()!'" "^[0-9a-zA-Z]" ":;.,!?")
;; Find a string that does *not* contain a colon:
(nocolon "--9$+<>@-Z_[:alpha:]~" "<@" "@>;.,!?")
;; A machine:
@ -1031,7 +1001,7 @@ possibly a major-mode name, or one of the symbol
Function `ffap-string-at-point' uses the data fields as follows:
1. find a maximal string of CHARS around point,
2. strip BEG chars before point from the beginning,
3. Strip END chars after point from the end.")
3. strip END chars after point from the end.")
(defvar ffap-string-at-point nil
;; Added at suggestion of RHOGEE (for ff-paths), 7/24/95.
@ -1050,22 +1020,22 @@ Sets the variable `ffap-string-at-point' and the variable
(or (assq (or mode major-mode) ffap-string-at-point-mode-alist)
(assq 'file ffap-string-at-point-mode-alist))))
(pt (point))
(str
(if (and transient-mark-mode mark-active)
(buffer-substring
(setcar ffap-string-at-point-region (region-beginning))
(setcar (cdr ffap-string-at-point-region) (region-end)))
(buffer-substring
(save-excursion
(skip-chars-backward (car args))
(skip-chars-forward (nth 1 args) pt)
(setcar ffap-string-at-point-region (point)))
(save-excursion
(skip-chars-forward (car args))
(skip-chars-backward (nth 2 args) pt)
(setcar (cdr ffap-string-at-point-region) (point)))))))
(set-text-properties 0 (length str) nil str)
(setq ffap-string-at-point str)))
(beg (if (use-region-p)
(region-beginning)
(save-excursion
(skip-chars-backward (car args))
(skip-chars-forward (nth 1 args) pt)
(point))))
(end (if (use-region-p)
(region-end)
(save-excursion
(skip-chars-forward (car args))
(skip-chars-backward (nth 2 args) pt)
(point)))))
(setq ffap-string-at-point
(buffer-substring-no-properties
(setcar ffap-string-at-point-region beg)
(setcar (cdr ffap-string-at-point-region) end)))))
(defun ffap-string-around ()
;; Sometimes useful to decide how to treat a string.
@ -1098,35 +1068,15 @@ Assumes the buffer has not changed."
(defun ffap-url-at-point ()
"Return URL from around point if it exists, or nil."
;; Could use w3's url-get-url-at-point instead. Both handle "URL:",
;; ignore non-relative links, trim punctuation. The other will
;; actually look back if point is in whitespace, but I would rather
;; ffap be less aggressive in such situations.
(when ffap-url-regexp
(or (and (eq major-mode 'w3-mode) ; In a w3 buffer button?
(w3-view-this-url t))
;; Is there a reason not to strip trailing colon?
(let ((name (ffap-string-at-point 'url)))
(cond
((string-match "^url:" name) (setq name (substring name 4)))
((and (string-match "\\`[^:</>@]+@[^:</>@]+[[:alnum:]]\\'" name)
;; "foo@bar": could be "mailto" or "news" (a Message-ID).
;; Without "<>" it must be "mailto". Otherwise could be
;; either, so consult `ffap-foo-at-bar-prefix'.
(let ((prefix (if (and (equal (ffap-string-around) "<>")
;; Expect some odd characters:
(string-match "[$.0-9].*[$.0-9].*@" name))
;; Could be news:
ffap-foo-at-bar-prefix
"mailto")))
(and prefix (setq name (concat prefix ":" name))))))
((ffap-newsgroup-p name) (setq name (concat "news:" name)))
((and (string-match "\\`[[:alnum:]]+\\'" name) ; <mic> <root> <nobody>
(equal (ffap-string-around) "<>")
;; (ffap-user-p name):
(not (string-match "~" (expand-file-name (concat "~" name)))))
(setq name (concat "mailto:" name)))
((ffap-url-p name)))))))
(let ((thing-at-point-beginning-of-url-regexp ffap-url-regexp)
(thing-at-point-default-mail-scheme ffap-foo-at-bar-prefix))
(thing-at-point-url-at-point t
(if (use-region-p)
(cons (region-beginning)
(region-end))))))))
(defvar ffap-gopher-regexp
"^.*\\<\\(Type\\|Name\\|Path\\|Host\\|Port\\) *= *\\(.*\\) *$"
@ -1763,7 +1713,8 @@ Only intended for interactive use."
(defun ffap-gnus-hook ()
"Bind `ffap-gnus-next' and `ffap-gnus-menu' to M-l and M-m, resp."
(set (make-local-variable 'ffap-foo-at-bar-prefix) "news") ; message-id's
;; message-id's
(setq-local thing-at-point-default-mail-uri-scheme "news")
;; Note "l", "L", "m", "M" are taken:
(local-set-key "\M-l" 'ffap-gnus-next)
(local-set-key "\M-m" 'ffap-gnus-menu))

View File

@ -232,7 +232,7 @@ The bounds of THING are determined by `bounds-of-thing-at-point'."
(put 'defun 'end-op 'end-of-defun)
(put 'defun 'forward-op 'end-of-defun)
;; Filenames and URLs www.com/foo%32bar
;; Filenames
(defvar thing-at-point-file-name-chars "-~/[:alnum:]_.${}#%,:"
"Characters allowable in filenames.")
@ -248,94 +248,224 @@ The bounds of THING are determined by `bounds-of-thing-at-point'."
(forward-char)
(goto-char (point-min)))))
;; URIs
(defvar thing-at-point-beginning-of-url-regexp nil
"Regexp matching the beginning of a well-formed URI.
If nil, construct the regexp from `thing-at-point-uri-schemes'.")
(defvar thing-at-point-url-path-regexp
"[^]\t\n \"'<>[^`{}]*[^]\t\n \"'<>[^`{}.,;]+"
"A regular expression probably matching the host and filename or e-mail part of a URL.")
"Regexp matching the host and filename or e-mail part of a URL.")
(defvar thing-at-point-short-url-regexp
(concat "[-A-Za-z0-9]+\\.[-A-Za-z0-9.]+" thing-at-point-url-path-regexp)
"A regular expression probably matching a URL without an access scheme.
Hostname matching is stricter in this case than for
``thing-at-point-url-regexp''.")
"Regexp matching a URI without a scheme component.")
(defvar thing-at-point-uri-schemes
;; Officials from http://www.iana.org/assignments/uri-schemes.html
'("ftp://" "http://" "gopher://" "mailto:" "news:" "nntp:"
"telnet://" "wais://" "file:/" "prospero:" "z39.50s:" "z39.50r:"
"cid:" "mid:" "vemmi:" "service:" "imap:" "nfs:" "acap:" "rtsp:"
"tip:" "pop:" "data:" "dav:" "opaquelocktoken:" "sip:" "tel:" "fax:"
"modem:" "ldap:" "https://" "soap.beep:" "soap.beeps:" "urn:" "go:"
"afs:" "tn3270:" "mailserver:"
"crid:" "dict:" "dns:" "dtn:" "h323:" "im:" "info:" "ipp:"
"iris.beep:" "mtqp:" "mupdate:" "pres:" "sips:" "snmp:" "tag:"
"tftp:" "xmlrpc.beep:" "xmlrpc.beeps:" "xmpp:"
;; Compatibility
"snews:" "irc:" "mms://" "mmsh://")
"Uniform Resource Identifier (URI) Schemes.")
'("aaa://" "about:" "acap://" "apt:" "bzr://" "bzr+ssh://"
"attachment:/" "chrome://" "cid:" "content://" "crid://" "cvs://"
"data:" "dav:" "dict://" "doi:" "dns:" "dtn:" "feed:" "file:/"
"finger://" "fish://" "ftp://" "geo:" "git://" "go:" "gopher://"
"h323:" "http://" "https://" "im:" "imap://" "info:" "ipp:"
"irc://" "irc6://" "ircs://" "iris.beep:" "jar:" "ldap://"
"ldaps://" "mailto:" "mid:" "mtqp://" "mupdate://" "news:"
"nfs://" "nntp://" "opaquelocktoken:" "pop://" "pres:"
"resource://" "rmi://" "rsync://" "rtsp://" "rtspu://" "service:"
"sftp://" "sip:" "sips:" "smb://" "sms:" "snmp://" "soap.beep://"
"soap.beeps://" "ssh://" "svn://" "svn+ssh://" "tag:" "tel:"
"telnet://" "tftp://" "tip://" "tn3270://" "udp://" "urn:"
"uuid:" "vemmi://" "webcal://" "xri://" "xmlrpc.beep://"
"xmlrpc.beeps://" "z39.50r://" "z39.50s://" "xmpp:"
;; Compatibility
"fax:" "mms://" "mmsh://" "modem:" "prospero:" "snews:"
"wais://")
"List of URI schemes recognized by `thing-at-point-url-at-point'.
Each string in this list should correspond to the start of a
URI's scheme component, up to and including the trailing // if
the scheme calls for that to be present.")
(defvar thing-at-point-url-regexp
(concat "\\<\\(" (mapconcat 'identity thing-at-point-uri-schemes "\\|") "\\)"
thing-at-point-url-path-regexp)
"A regular expression probably matching a complete URL.")
(defvar thing-at-point-markedup-url-regexp "<URL:\\([^<>\n]+\\)>"
"Regexp matching a URL marked up per RFC1738.
This kind of markup was formerly recommended as a way to indicate
URIs, but as of RFC 3986 it is no longer recommended.
Subexpression 1 should contain the delimited URL.")
(defvar thing-at-point-markedup-url-regexp
"<URL:[^>]+>"
"A regular expression matching a URL marked up per RFC1738.
This may contain whitespace (including newlines) .")
(defvar thing-at-point-newsgroup-regexp
"\\`[[:lower:]]+\\.[-+[:lower:]_0-9.]+\\'"
"Regexp matching a newsgroup name.")
(defvar thing-at-point-newsgroup-heads
'("alt" "comp" "gnu" "misc" "news" "sci" "soc" "talk")
"Used by `thing-at-point-newsgroup-p' if gnus is not running.")
(defvar thing-at-point-default-mail-uri-scheme "mailto"
"Default scheme for ill-formed URIs that look like <foo@example.com>.
If nil, do not give such URIs a scheme.")
(put 'url 'bounds-of-thing-at-point 'thing-at-point-bounds-of-url-at-point)
(defun thing-at-point-bounds-of-url-at-point ()
(let ((strip (thing-at-point-looking-at
thing-at-point-markedup-url-regexp))) ;; (url "") short
(if (or strip
(thing-at-point-looking-at thing-at-point-url-regexp)
;; Access scheme omitted?
;; (setq short (thing-at-point-looking-at
;; thing-at-point-short-url-regexp))
)
(let ((beginning (match-beginning 0))
(end (match-end 0)))
(when strip
(setq beginning (+ beginning 5))
(setq end (- end 1)))
(cons beginning end)))))
(defun thing-at-point-bounds-of-url-at-point (&optional lax)
"Return a cons cell containing the start and end of the URI at point.
Try to find a URI using `thing-at-point-markedup-url-regexp'.
If that fails, try with `thing-at-point-beginning-of-url-regexp'.
If that also fails, and optional argument LAX is non-nil, return
the bounds of a possible ill-formed URI (one lacking a scheme)."
;; Look for the old <URL:foo> markup. If found, use it.
(or (thing-at-point--bounds-of-markedup-url)
;; Otherwise, find the bounds within which a URI may exist. The
;; method is similar to `ffap-string-at-point'. Note that URIs
;; may contain parentheses but may not contain spaces (RFC3986).
(let* ((allowed-chars "--:=&?$+@-Z_[:alpha:]~#,%;*()!'")
(skip-before "^[0-9a-zA-Z]")
(skip-after ":;.,!?")
(pt (point))
(beg (save-excursion
(skip-chars-backward allowed-chars)
(skip-chars-forward skip-before pt)
(point)))
(end (save-excursion
(skip-chars-forward allowed-chars)
(skip-chars-backward skip-after pt)
(point))))
(or (thing-at-point--bounds-of-well-formed-url beg end pt)
(if lax (cons beg end))))))
(defun thing-at-point--bounds-of-markedup-url ()
(when thing-at-point-markedup-url-regexp
(let ((case-fold-search t)
(pt (point))
(beg (line-beginning-position))
(end (line-end-position))
found)
(save-excursion
(goto-char beg)
(while (and (not found)
(<= (point) pt)
(< (point) end))
(and (re-search-forward thing-at-point-markedup-url-regexp
end 1)
(> (point) pt)
(setq found t))))
(if found
(cons (match-beginning 1) (match-end 1))))))
(defun thing-at-point--bounds-of-well-formed-url (beg end pt)
(save-excursion
(goto-char beg)
(let (url-beg paren-end regexp)
(save-restriction
(narrow-to-region beg end)
;; The scheme component must either match at BEG, or have no
;; other alphanumerical ASCII characters before it.
(setq regexp (concat "\\(?:\\`\\|[^a-zA-Z0-9]\\)\\("
(or thing-at-point-beginning-of-url-regexp
(regexp-opt thing-at-point-uri-schemes))
"\\)"))
(and (re-search-forward regexp end t)
;; URI must have non-empty contents.
(< (point) end)
(setq url-beg (match-beginning 1))))
(when url-beg
;; If there is an open paren before the URI, truncate to the
;; matching close paren.
(and (> url-beg (point-min))
(eq (car-safe (syntax-after (1- url-beg))) 4)
(save-restriction
(narrow-to-region (1- url-beg) (min end (point-max)))
(setq paren-end (ignore-errors
(scan-lists (1- url-beg) 1 0))))
(not (blink-matching-check-mismatch (1- url-beg) paren-end))
(setq end (1- paren-end)))
(cons url-beg end)))))
(put 'url 'thing-at-point 'thing-at-point-url-at-point)
(defun thing-at-point-url-at-point ()
(defun thing-at-point-url-at-point (&optional lax bounds)
"Return the URL around or before point.
If no URL is found, return nil.
Search backwards for the start of a URL ending at or after point. If
no URL found, return nil. The access scheme will be prepended if
absent: \"mailto:\" if the string contains \"@\", \"ftp://\" if it
starts with \"ftp\" and not \"ftp:/\", or \"http://\" by default."
If optional argument LAX is non-nil, look for URLs that are not
well-formed, such as foo@bar or <nobody>.
(let ((url "") short strip)
(if (or (setq strip (thing-at-point-looking-at
thing-at-point-markedup-url-regexp))
(thing-at-point-looking-at thing-at-point-url-regexp)
;; Access scheme omitted?
(setq short (thing-at-point-looking-at
thing-at-point-short-url-regexp)))
(progn
(setq url (buffer-substring-no-properties (match-beginning 0)
(match-end 0)))
(and strip (setq url (substring url 5 -1))) ; Drop "<URL:" & ">"
;; strip whitespace
(while (string-match "[ \t\n\r]+" url)
(setq url (replace-match "" t t url)))
(and short (setq url (concat (cond ((string-match "^[a-zA-Z]+:" url)
;; already has a URL scheme.
"")
((string-match "@" url)
"mailto:")
;; e.g. ftp.swiss... or ftp-swiss...
((string-match "^ftp" url)
"ftp://")
(t "http://"))
url)))
(if (string-equal "" url)
nil
url)))))
If optional arguments BOUNDS are non-nil, it should be a cons
cell of the form (START . END), containing the beginning and end
positions of the URI. Otherwise, these positions are detected
automatically from the text around point.
If the scheme component is absent, either because a URI delimited
with <url:...> lacks one, or because an ill-formed URI was found
with LAX or BEG and END, try to add a scheme in the returned URI.
The scheme is chosen heuristically: \"mailto:\" if the address
looks like an email address, \"ftp://\" if it starts with
\"ftp\", etc."
(unless bounds
(setq bounds (thing-at-point-bounds-of-url-at-point lax)))
(when (and bounds (< (car bounds) (cdr bounds)))
(let ((str (buffer-substring-no-properties (car bounds) (cdr bounds))))
;; If there is no scheme component, try to add one.
(unless (string-match "\\`[a-zA-Z][-a-zA-Z0-9+.]*:" str)
(or
;; If the URI has the form <foo@bar>, treat it according to
;; `thing-at-point-default-mail-uri-scheme'. If there are
;; no angle brackets, it must be mailto.
(when (string-match "\\`[^:</>@]+@[-.0-9=&?$+A-Z_a-z~#,%;*]" str)
(let ((scheme (if (and (eq (char-before (car bounds)) ?<)
(eq (char-after (cdr bounds)) ?>))
thing-at-point-default-mail-uri-scheme
"mailto")))
(if scheme
(setq str (concat scheme ":" str)))))
;; If the string is like <FOO>, where FOO is an existing user
;; name on the system, treat that as an email address.
(and (string-match "\\`[[:alnum:]]+\\'" str)
(eq (char-before (car bounds)) ?<)
(eq (char-after (cdr bounds)) ?>)
(not (string-match "~" (expand-file-name (concat "~" str))))
(setq str (concat "mailto:" str)))
;; If it looks like news.example.com, treat it as news.
(if (thing-at-point-newsgroup-p str)
(setq str (concat "news:" str)))
;; If it looks like ftp.example.com. treat it as ftp.
(if (string-match "\\`ftp\\." str)
(setq str (concat "ftp://" str)))
;; If it looks like www.example.com. treat it as http.
(if (string-match "\\`www\\." str)
(setq str (concat "http://" str)))
;; Otherwise, it just isn't a URI.
(setq str nil)))
str)))
(defun thing-at-point-newsgroup-p (string)
"Return STRING if it looks like a newsgroup name, else nil."
(and
(string-match thing-at-point-newsgroup-regexp string)
(let ((htbs '(gnus-active-hashtb gnus-newsrc-hashtb gnus-killed-hashtb))
(heads thing-at-point-newsgroup-heads)
htb ret)
(while htbs
(setq htb (car htbs) htbs (cdr htbs))
(condition-case nil
(progn
;; errs: htb symbol may be unbound, or not a hash-table.
;; gnus-gethash is just a macro for intern-soft.
(and (symbol-value htb)
(intern-soft string (symbol-value htb))
(setq ret string htbs nil))
;; If we made it this far, gnus is running, so ignore "heads":
(setq heads nil))
(error nil)))
(or ret (not heads)
(let ((head (string-match "\\`\\([[:lower:]]+\\)\\." string)))
(and head (setq head (substring string 0 (match-end 1)))
(member head heads)
(setq ret string))))
ret)))
(put 'url 'end-op (lambda () (end-of-thing 'url)))
(put 'url 'beginning-op (lambda () (end-of-thing 'url)))
;; The normal thingatpt mechanism doesn't work for complex regexps.
;; This should work for almost any regexp wherever we are in the
@ -372,19 +502,6 @@ point."
(goto-char match)
(looking-at regexp)))))
(put 'url 'end-op
(lambda ()
(let ((bounds (thing-at-point-bounds-of-url-at-point)))
(if bounds
(goto-char (cdr bounds))
(error "No URL here")))))
(put 'url 'beginning-op
(lambda ()
(let ((bounds (thing-at-point-bounds-of-url-at-point)))
(if bounds
(goto-char (car bounds))
(error "No URL here")))))
;; Email addresses
(defvar thing-at-point-email-regexp
"<?[-+_.~a-zA-Z][-+_.~:a-zA-Z0-9]*@[-.a-zA-Z0-9]+>?"

View File

@ -1,3 +1,7 @@
2013-02-04 Chong Yidong <cyd@gnu.org>
* automated/thingatpt.el: New file.
2013-02-03 Chong Yidong <cyd@gnu.org>
* automated/files.el (file-test--do-local-variables-test): Avoid

View File

@ -0,0 +1,88 @@
;;; thingatpt.el --- tests for thing-at-point.
;; Copyright (C) 2013 Free Software Foundation, Inc.
;; This file is part of GNU Emacs.
;; GNU Emacs is free software: you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
;; the Free Software Foundation, either version 3 of the License, or
;; (at your option) any later version.
;; GNU Emacs is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
;; GNU General Public License for more details.
;; You should have received a copy of the GNU General Public License
;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
;;; Code:
(require 'ert)
(defvar thing-at-point-test-data
'(("http://1.gnu.org" 1 url "http://1.gnu.org")
("http://2.gnu.org" 6 url "http://2.gnu.org")
("http://3.gnu.org" 19 url "http://3.gnu.org")
("https://4.gnu.org" 1 url "https://4.gnu.org")
("bzr://savannah.gnu.org" 1 url "bzr://savannah.gnu.org")
("A geo URI (geo:3.14159,-2.71828)." 12 url "geo:3.14159,-2.71828")
("Visit http://5.gnu.org now." 5 url nil)
("Visit http://6.gnu.org now." 7 url "http://6.gnu.org")
("Visit http://7.gnu.org now." 22 url "http://7.gnu.org")
("Visit http://8.gnu.org now." 22 url "http://8.gnu.org")
("Visit http://9.gnu.org now." 24 url nil)
;; Invalid URIs
("<<<<" 2 url nil)
("<>" 1 url nil)
("<url:>" 1 url nil)
("http://" 1 url nil)
;; Invalid schema
("foo://www.gnu.org" 1 url nil)
("foohttp://www.gnu.org" 1 url nil)
;; Non alphanumeric characters can be found in URIs
("ftp://example.net/~foo!;#bar=baz&goo=bob" 3 url "ftp://example.net/~foo!;#bar=baz&goo=bob")
("bzr+ssh://user@example.net:5/a%20d,5" 34 url "bzr+ssh://user@example.net:5/a%20d,5")
;; <url:...> markup
("Url: <url:foo://1.example.com>..." 8 url "foo://1.example.com")
("Url: <url:foo://2.example.com>..." 30 url "foo://2.example.com")
("Url: <url:foo://www.gnu.org/a bc>..." 20 url "foo://www.gnu.org/a bc")
;; Hack used by thing-at-point: drop punctuation at end of URI.
("Go to http://www.gnu.org, for details" 7 url "http://www.gnu.org")
("Go to http://www.gnu.org." 24 url "http://www.gnu.org")
;; Standard URI delimiters
("Go to \"http://10.gnu.org\"." 8 url "http://10.gnu.org")
("Go to \"http://11.gnu.org/\"." 26 url "http://11.gnu.org/")
("Go to <http://12.gnu.org> now." 8 url "http://12.gnu.org")
("Go to <http://13.gnu.org> now." 24 url "http://13.gnu.org")
;; Parenthesis handling (non-standard)
("http://example.com/a(b)c" 21 url "http://example.com/a(b)c")
("http://example.com/a(b)" 21 url "http://example.com/a(b)")
("(http://example.com/abc)" 2 url "http://example.com/abc")
("This (http://example.com/a(b))" 7 url "http://example.com/a(b)")
("This (http://example.com/a(b))" 30 url "http://example.com/a(b)")
("This (http://example.com/a(b))" 5 url nil)
("http://example.com/ab)c" 4 url "http://example.com/ab)c")
;; URL markup, lacking schema
("<url:foo@example.com>" 1 url "mailto:foo@example.com")
("<url:ftp.example.net/abc/>" 1 url "ftp://ftp.example.net/abc/"))
"List of thing-at-point tests.
Each list element should have the form
(STRING POS THING RESULT)
where STRING is a string of buffer contents, POS is the value of
point, THING is a symbol argument for `thing-at-point', and
RESULT should be the result of calling `thing-at-point' from that
position to retrieve THING.")
(ert-deftest thing-at-point-tests ()
"Test the file-local variables implementation."
(dolist (test thing-at-point-test-data)
(with-temp-buffer
(insert (nth 0 test))
(goto-char (nth 1 test))
(should (equal (thing-at-point (nth 2 test)) (nth 3 test))))))
;;; thingatpt.el ends here