mirror of
https://git.savannah.gnu.org/git/emacs.git
synced 2024-12-22 10:26:20 +00:00
451 lines
13 KiB
EmacsLisp
451 lines
13 KiB
EmacsLisp
|
;;; mm-url.el --- a wrapper of url functions/commands for Gnus
|
|||
|
;; Copyright (C) 2001, 2002, 2003 Free Software Foundation, Inc.
|
|||
|
|
|||
|
;; Author: Shenghuo Zhu <zsh@cs.rochester.edu>
|
|||
|
|
|||
|
;; This file is part of GNU Emacs.
|
|||
|
|
|||
|
;; GNU Emacs is free software; you can redistribute it and/or modify
|
|||
|
;; it under the terms of the GNU General Public License as published
|
|||
|
;; by the Free Software Foundation; either version 2, or (at your
|
|||
|
;; option) any later version.
|
|||
|
|
|||
|
;; GNU Emacs is distributed in the hope that it will be useful, but
|
|||
|
;; WITHOUT ANY WARRANTY; without even the implied warranty of
|
|||
|
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|||
|
;; General Public License for more details.
|
|||
|
|
|||
|
;; You should have received a copy of the GNU General Public License
|
|||
|
;; along with GNU Emacs; see the file COPYING. If not, write to the
|
|||
|
;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
|||
|
;; Boston, MA 02111-1307, USA.
|
|||
|
|
|||
|
;;; Commentary:
|
|||
|
|
|||
|
;; Some codes are stolen from w3 and url packages. Some are moved from
|
|||
|
;; nnweb.
|
|||
|
|
|||
|
;; TODO: Support POST, cookie.
|
|||
|
|
|||
|
;;; Code:
|
|||
|
|
|||
|
(eval-when-compile (require 'cl))
|
|||
|
|
|||
|
(require 'mm-util)
|
|||
|
(require 'gnus)
|
|||
|
|
|||
|
(eval-and-compile
|
|||
|
(autoload 'executable-find "executable"))
|
|||
|
|
|||
|
(eval-when-compile
|
|||
|
(require 'timer))
|
|||
|
|
|||
|
(defgroup mm-url nil
|
|||
|
"A wrapper of url package and external url command for Gnus."
|
|||
|
:group 'gnus)
|
|||
|
|
|||
|
(defcustom mm-url-use-external (not
|
|||
|
(condition-case nil
|
|||
|
(require 'url)
|
|||
|
(error nil)))
|
|||
|
"*If non-nil, use external grab program `mm-url-program'."
|
|||
|
:type 'boolean
|
|||
|
:group 'mm-url)
|
|||
|
|
|||
|
(defvar mm-url-predefined-programs
|
|||
|
'((wget "wget" "--user-agent=mm-url" "-q" "-O" "-")
|
|||
|
(w3m "w3m" "-dump_source")
|
|||
|
(lynx "lynx" "-source")
|
|||
|
(curl "curl")))
|
|||
|
|
|||
|
(defcustom mm-url-program
|
|||
|
(cond
|
|||
|
((executable-find "wget") 'wget)
|
|||
|
((executable-find "w3m") 'w3m)
|
|||
|
((executable-find "lynx") 'lynx)
|
|||
|
((executable-find "curl") 'curl)
|
|||
|
(t "GET"))
|
|||
|
"The url grab program.
|
|||
|
Likely values are `wget', `w3m', `lynx' and `curl'."
|
|||
|
:type '(choice
|
|||
|
(symbol :tag "wget" wget)
|
|||
|
(symbol :tag "w3m" w3m)
|
|||
|
(symbol :tag "lynx" lynx)
|
|||
|
(symbol :tag "curl" curl)
|
|||
|
(string :tag "other"))
|
|||
|
:group 'mm-url)
|
|||
|
|
|||
|
(defcustom mm-url-arguments nil
|
|||
|
"The arguments for `mm-url-program'."
|
|||
|
:type '(repeat string)
|
|||
|
:group 'mm-url)
|
|||
|
|
|||
|
|
|||
|
;;; Internal variables
|
|||
|
|
|||
|
(defvar mm-url-package-name
|
|||
|
(gnus-replace-in-string
|
|||
|
(gnus-replace-in-string gnus-version " v.*$" "")
|
|||
|
" " "-"))
|
|||
|
|
|||
|
(defvar mm-url-package-version gnus-version-number)
|
|||
|
|
|||
|
;; Stolen from w3.
|
|||
|
(defvar mm-url-html-entities
|
|||
|
'(
|
|||
|
;;(excl . 33)
|
|||
|
(quot . 34)
|
|||
|
;;(num . 35)
|
|||
|
;;(dollar . 36)
|
|||
|
;;(percent . 37)
|
|||
|
(amp . 38)
|
|||
|
(rsquo . 39) ; should be U+8217
|
|||
|
;;(apos . 39)
|
|||
|
;;(lpar . 40)
|
|||
|
;;(rpar . 41)
|
|||
|
;;(ast . 42)
|
|||
|
;;(plus . 43)
|
|||
|
;;(comma . 44)
|
|||
|
;;(period . 46)
|
|||
|
;;(colon . 58)
|
|||
|
;;(semi . 59)
|
|||
|
(lt . 60)
|
|||
|
;;(equals . 61)
|
|||
|
(gt . 62)
|
|||
|
;;(quest . 63)
|
|||
|
;;(commat . 64)
|
|||
|
;;(lsqb . 91)
|
|||
|
;;(rsqb . 93)
|
|||
|
(uarr . 94) ; should be U+8593
|
|||
|
;;(lowbar . 95)
|
|||
|
(lsquo . 96) ; should be U+8216
|
|||
|
(lcub . 123)
|
|||
|
;;(verbar . 124)
|
|||
|
(rcub . 125)
|
|||
|
(tilde . 126)
|
|||
|
(nbsp . 160)
|
|||
|
(iexcl . 161)
|
|||
|
(cent . 162)
|
|||
|
(pound . 163)
|
|||
|
(curren . 164)
|
|||
|
(yen . 165)
|
|||
|
(brvbar . 166)
|
|||
|
(sect . 167)
|
|||
|
(uml . 168)
|
|||
|
(copy . 169)
|
|||
|
(ordf . 170)
|
|||
|
(laquo . 171)
|
|||
|
(not . 172)
|
|||
|
(shy . 173)
|
|||
|
(reg . 174)
|
|||
|
(macr . 175)
|
|||
|
(deg . 176)
|
|||
|
(plusmn . 177)
|
|||
|
(sup2 . 178)
|
|||
|
(sup3 . 179)
|
|||
|
(acute . 180)
|
|||
|
(micro . 181)
|
|||
|
(para . 182)
|
|||
|
(middot . 183)
|
|||
|
(cedil . 184)
|
|||
|
(sup1 . 185)
|
|||
|
(ordm . 186)
|
|||
|
(raquo . 187)
|
|||
|
(frac14 . 188)
|
|||
|
(frac12 . 189)
|
|||
|
(frac34 . 190)
|
|||
|
(iquest . 191)
|
|||
|
(Agrave . 192)
|
|||
|
(Aacute . 193)
|
|||
|
(Acirc . 194)
|
|||
|
(Atilde . 195)
|
|||
|
(Auml . 196)
|
|||
|
(Aring . 197)
|
|||
|
(AElig . 198)
|
|||
|
(Ccedil . 199)
|
|||
|
(Egrave . 200)
|
|||
|
(Eacute . 201)
|
|||
|
(Ecirc . 202)
|
|||
|
(Euml . 203)
|
|||
|
(Igrave . 204)
|
|||
|
(Iacute . 205)
|
|||
|
(Icirc . 206)
|
|||
|
(Iuml . 207)
|
|||
|
(ETH . 208)
|
|||
|
(Ntilde . 209)
|
|||
|
(Ograve . 210)
|
|||
|
(Oacute . 211)
|
|||
|
(Ocirc . 212)
|
|||
|
(Otilde . 213)
|
|||
|
(Ouml . 214)
|
|||
|
(times . 215)
|
|||
|
(Oslash . 216)
|
|||
|
(Ugrave . 217)
|
|||
|
(Uacute . 218)
|
|||
|
(Ucirc . 219)
|
|||
|
(Uuml . 220)
|
|||
|
(Yacute . 221)
|
|||
|
(THORN . 222)
|
|||
|
(szlig . 223)
|
|||
|
(agrave . 224)
|
|||
|
(aacute . 225)
|
|||
|
(acirc . 226)
|
|||
|
(atilde . 227)
|
|||
|
(auml . 228)
|
|||
|
(aring . 229)
|
|||
|
(aelig . 230)
|
|||
|
(ccedil . 231)
|
|||
|
(egrave . 232)
|
|||
|
(eacute . 233)
|
|||
|
(ecirc . 234)
|
|||
|
(euml . 235)
|
|||
|
(igrave . 236)
|
|||
|
(iacute . 237)
|
|||
|
(icirc . 238)
|
|||
|
(iuml . 239)
|
|||
|
(eth . 240)
|
|||
|
(ntilde . 241)
|
|||
|
(ograve . 242)
|
|||
|
(oacute . 243)
|
|||
|
(ocirc . 244)
|
|||
|
(otilde . 245)
|
|||
|
(ouml . 246)
|
|||
|
(divide . 247)
|
|||
|
(oslash . 248)
|
|||
|
(ugrave . 249)
|
|||
|
(uacute . 250)
|
|||
|
(ucirc . 251)
|
|||
|
(uuml . 252)
|
|||
|
(yacute . 253)
|
|||
|
(thorn . 254)
|
|||
|
(yuml . 255)
|
|||
|
|
|||
|
;; Special handling of these
|
|||
|
(frac56 . "5/6")
|
|||
|
(frac16 . "1/6")
|
|||
|
(frac45 . "4/5")
|
|||
|
(frac35 . "3/5")
|
|||
|
(frac25 . "2/5")
|
|||
|
(frac15 . "1/5")
|
|||
|
(frac23 . "2/3")
|
|||
|
(frac13 . "1/3")
|
|||
|
(frac78 . "7/8")
|
|||
|
(frac58 . "5/8")
|
|||
|
(frac38 . "3/8")
|
|||
|
(frac18 . "1/8")
|
|||
|
|
|||
|
;; The following 5 entities are not mentioned in the HTML 2.0
|
|||
|
;; standard, nor in any other HTML proposed standard of which I
|
|||
|
;; am aware. I am not even sure they are ISO entity names. ***
|
|||
|
;; Hence, some arrangement should be made to give a bad HTML
|
|||
|
;; message when they are seen.
|
|||
|
(ndash . 45)
|
|||
|
(mdash . 45)
|
|||
|
(emsp . 32)
|
|||
|
(ensp . 32)
|
|||
|
(sim . 126)
|
|||
|
(le . "<=")
|
|||
|
(agr . "alpha")
|
|||
|
(rdquo . "''")
|
|||
|
(ldquo . "``")
|
|||
|
(trade . "(TM)")
|
|||
|
;; To be done
|
|||
|
;; (shy . ????) ; soft hyphen
|
|||
|
)
|
|||
|
"*An assoc list of entity names and how to actually display them.")
|
|||
|
|
|||
|
(defconst mm-url-unreserved-chars
|
|||
|
'(
|
|||
|
?a ?b ?c ?d ?e ?f ?g ?h ?i ?j ?k ?l ?m ?n ?o ?p ?q ?r ?s ?t ?u ?v ?w ?x ?y ?z
|
|||
|
?A ?B ?C ?D ?E ?F ?G ?H ?I ?J ?K ?L ?M ?N ?O ?P ?Q ?R ?S ?T ?U ?V ?W ?X ?Y ?Z
|
|||
|
?0 ?1 ?2 ?3 ?4 ?5 ?6 ?7 ?8 ?9
|
|||
|
?- ?_ ?. ?! ?~ ?* ?' ?\( ?\))
|
|||
|
"A list of characters that are _NOT_ reserved in the URL spec.
|
|||
|
This is taken from RFC 2396.")
|
|||
|
|
|||
|
(defun mm-url-load-url ()
|
|||
|
"Load `url-insert-file-contents'."
|
|||
|
(unless (condition-case ()
|
|||
|
(require 'url-handlers)
|
|||
|
(error nil))
|
|||
|
;; w3-4.0pre0.46 or earlier version.
|
|||
|
(require 'w3-vars)
|
|||
|
(require 'url)))
|
|||
|
|
|||
|
;;;###autoload
|
|||
|
(defun mm-url-insert-file-contents (url)
|
|||
|
"Insert file contents of URL.
|
|||
|
If `mm-url-use-external' is non-nil, use `mm-url-program'."
|
|||
|
(if mm-url-use-external
|
|||
|
(progn
|
|||
|
(if (string-match "^file:/+" url)
|
|||
|
(insert-file-contents (substring url (1- (match-end 0))))
|
|||
|
(mm-url-insert-file-contents-external url))
|
|||
|
(goto-char (point-min))
|
|||
|
(if (fboundp 'url-generic-parse-url)
|
|||
|
(setq url-current-object
|
|||
|
(url-generic-parse-url url)))
|
|||
|
(list url (buffer-size)))
|
|||
|
(mm-url-load-url)
|
|||
|
(let ((name buffer-file-name)
|
|||
|
(url-request-extra-headers (list (cons "Connection" "Close")))
|
|||
|
(url-package-name (or mm-url-package-name
|
|||
|
url-package-name))
|
|||
|
(url-package-version (or mm-url-package-version
|
|||
|
url-package-version))
|
|||
|
result)
|
|||
|
(setq result (url-insert-file-contents url))
|
|||
|
(save-excursion
|
|||
|
(goto-char (point-min))
|
|||
|
(while (re-search-forward "\r 1000\r ?" nil t)
|
|||
|
(replace-match "")))
|
|||
|
(setq buffer-file-name name)
|
|||
|
(if (and (fboundp 'url-generic-parse-url)
|
|||
|
(listp result))
|
|||
|
(setq url-current-object (url-generic-parse-url
|
|||
|
(car result))))
|
|||
|
result)))
|
|||
|
|
|||
|
;;;###autoload
|
|||
|
(defun mm-url-insert-file-contents-external (url)
|
|||
|
"Insert file contents of URL using `mm-url-program'."
|
|||
|
(let (program args)
|
|||
|
(if (symbolp mm-url-program)
|
|||
|
(let ((item (cdr (assq mm-url-program mm-url-predefined-programs))))
|
|||
|
(setq program (car item)
|
|||
|
args (append (cdr item) (list url))))
|
|||
|
(setq program mm-url-program
|
|||
|
args (append mm-url-arguments (list url))))
|
|||
|
(unless (eq 0 (apply 'call-process program nil t nil args))
|
|||
|
(error "Couldn't fetch %s" url))))
|
|||
|
|
|||
|
(defvar mm-url-timeout 30
|
|||
|
"The number of seconds before timing out an URL fetch.")
|
|||
|
|
|||
|
(defvar mm-url-retries 10
|
|||
|
"The number of retries after timing out when fetching an URL.")
|
|||
|
|
|||
|
(defun mm-url-insert (url &optional follow-refresh)
|
|||
|
"Insert the contents from an URL in the current buffer.
|
|||
|
If FOLLOW-REFRESH is non-nil, redirect refresh url in META."
|
|||
|
(let ((times mm-url-retries)
|
|||
|
(done nil)
|
|||
|
(first t)
|
|||
|
result)
|
|||
|
(while (and (not (zerop (decf times)))
|
|||
|
(not done))
|
|||
|
(with-timeout (mm-url-timeout)
|
|||
|
(unless first
|
|||
|
(message "Trying again (%s)..." (- mm-url-retries times)))
|
|||
|
(setq first nil)
|
|||
|
(if follow-refresh
|
|||
|
(save-restriction
|
|||
|
(narrow-to-region (point) (point))
|
|||
|
(mm-url-insert-file-contents url)
|
|||
|
(goto-char (point-min))
|
|||
|
(when (re-search-forward
|
|||
|
"<meta[ \t\r\n]*http-equiv=\"Refresh\"[^>]*URL=\\([^\"]+\\)\"" nil t)
|
|||
|
(let ((url (match-string 1)))
|
|||
|
(delete-region (point-min) (point-max))
|
|||
|
(setq result (mm-url-insert url t)))))
|
|||
|
(setq result (mm-url-insert-file-contents url)))
|
|||
|
(setq done t)))
|
|||
|
result))
|
|||
|
|
|||
|
(defun mm-url-decode-entities ()
|
|||
|
"Decode all HTML entities."
|
|||
|
(goto-char (point-min))
|
|||
|
(while (re-search-forward "&\\(#[0-9]+\\|[a-z]+\\);" nil t)
|
|||
|
(let ((elem (if (eq (aref (match-string 1) 0) ?\#)
|
|||
|
(let ((c
|
|||
|
(string-to-number (substring
|
|||
|
(match-string 1) 1))))
|
|||
|
(if (mm-char-or-char-int-p c) c 32))
|
|||
|
(or (cdr (assq (intern (match-string 1))
|
|||
|
mm-url-html-entities))
|
|||
|
?#))))
|
|||
|
(unless (stringp elem)
|
|||
|
(setq elem (char-to-string elem)))
|
|||
|
(replace-match elem t t))))
|
|||
|
|
|||
|
(defun mm-url-decode-entities-nbsp ()
|
|||
|
"Decode all HTML entities and to a space."
|
|||
|
(let ((mm-url-html-entities (cons '(nbsp . 32) mm-url-html-entities)))
|
|||
|
(mm-url-decode-entities)))
|
|||
|
|
|||
|
(defun mm-url-decode-entities-string (string)
|
|||
|
(with-temp-buffer
|
|||
|
(insert string)
|
|||
|
(mm-url-decode-entities)
|
|||
|
(buffer-string)))
|
|||
|
|
|||
|
(defun mm-url-form-encode-xwfu (chunk)
|
|||
|
"Escape characters in a string for application/x-www-form-urlencoded.
|
|||
|
Blasphemous crap because someone didn't think %20 was good enough for encoding
|
|||
|
spaces. Die Die Die."
|
|||
|
;; This will get rid of the 'attributes' specified by the file type,
|
|||
|
;; which are useless for an application/x-www-form-urlencoded form.
|
|||
|
(if (consp chunk)
|
|||
|
(setq chunk (cdr chunk)))
|
|||
|
|
|||
|
(mapconcat
|
|||
|
(lambda (char)
|
|||
|
(cond
|
|||
|
((= char ? ) "+")
|
|||
|
((memq char mm-url-unreserved-chars) (char-to-string char))
|
|||
|
(t (upcase (format "%%%02x" char)))))
|
|||
|
;; Fixme: Should this actually be accepting multibyte? Is there a
|
|||
|
;; better way in XEmacs?
|
|||
|
(if (featurep 'mule)
|
|||
|
(encode-coding-string chunk
|
|||
|
(if (fboundp 'find-coding-systems-string)
|
|||
|
(car (find-coding-systems-string chunk))
|
|||
|
buffer-file-coding-system))
|
|||
|
chunk)
|
|||
|
""))
|
|||
|
|
|||
|
(defun mm-url-encode-www-form-urlencoded (pairs)
|
|||
|
"Return PAIRS encoded for forms."
|
|||
|
(mapconcat
|
|||
|
(lambda (data)
|
|||
|
(concat (mm-url-form-encode-xwfu (car data)) "="
|
|||
|
(mm-url-form-encode-xwfu (cdr data))))
|
|||
|
pairs "&"))
|
|||
|
|
|||
|
(defun mm-url-fetch-form (url pairs)
|
|||
|
"Fetch a form from URL with PAIRS as the data using the POST method."
|
|||
|
(mm-url-load-url)
|
|||
|
(let ((url-request-data (mm-url-encode-www-form-urlencoded pairs))
|
|||
|
(url-request-method "POST")
|
|||
|
(url-request-extra-headers
|
|||
|
'(("Content-type" . "application/x-www-form-urlencoded"))))
|
|||
|
(url-insert-file-contents url)
|
|||
|
(setq buffer-file-name nil))
|
|||
|
t)
|
|||
|
|
|||
|
(defun mm-url-fetch-simple (url content)
|
|||
|
(mm-url-load-url)
|
|||
|
(let ((url-request-data content)
|
|||
|
(url-request-method "POST")
|
|||
|
(url-request-extra-headers
|
|||
|
'(("Content-type" . "application/x-www-form-urlencoded"))))
|
|||
|
(url-insert-file-contents url)
|
|||
|
(setq buffer-file-name nil))
|
|||
|
t)
|
|||
|
|
|||
|
(defun mm-url-remove-markup ()
|
|||
|
"Remove all HTML markup, leaving just plain text."
|
|||
|
(goto-char (point-min))
|
|||
|
(while (search-forward "<!--" nil t)
|
|||
|
(delete-region (match-beginning 0)
|
|||
|
(or (search-forward "-->" nil t)
|
|||
|
(point-max))))
|
|||
|
(goto-char (point-min))
|
|||
|
(while (re-search-forward "<[^>]+>" nil t)
|
|||
|
(replace-match "" t t)))
|
|||
|
|
|||
|
(provide 'mm-url)
|
|||
|
|
|||
|
;;; arch-tag: 0594f9b3-417c-48b0-adc2-5082e1e7917f
|
|||
|
;;; mm-url.el ends here
|