1
0
mirror of https://git.savannah.gnu.org/git/emacs.git synced 2025-01-03 11:33:37 +00:00
emacs/lisp/url/url-queue.el

193 lines
6.4 KiB
EmacsLisp
Raw Normal View History

;;; url-queue.el --- Fetching web pages in parallel -*- lexical-binding: t -*-
;; Copyright (C) 2011-2017 Free Software Foundation, Inc.
;; Author: Lars Magne Ingebrigtsen <larsi@gnus.org>
;; Keywords: comm
;; This file is part of GNU Emacs.
;; GNU Emacs is free software: you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
;; the Free Software Foundation, either version 3 of the License, or
;; (at your option) any later version.
;; GNU Emacs is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
;; GNU General Public License for more details.
;; You should have received a copy of the GNU General Public License
;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
;;; Commentary:
;; The point of this package is to allow fetching web pages in
;; parallel -- but control the level of parallelism to avoid DoS-ing
;; web servers and Emacs.
;;; Code:
More CL cleanups and reduction of use of cl.el. * woman.el, winner.el, vc/vc-rcs.el, vc/vc-hooks.el, vc/vc-hg.el: * vc/vc-git.el, vc/vc-dir.el, vc/vc-bzr.el, vc/vc-annotate.el: * textmodes/tex-mode.el, textmodes/sgml-mode.el, tar-mode.el: * strokes.el, ses.el, server.el, progmodes/js.el, progmodes/gdb-mi.el: * progmodes/flymake.el, progmodes/ebrowse.el, progmodes/compile.el: * play/tetris.el, play/snake.el, play/pong.el, play/landmark.el: * play/hanoi.el, play/decipher.el, play/5x5.el, nxml/nxml-mode.el: * net/secrets.el, net/quickurl.el, midnight.el, mail/footnote.el: * image-dired.el, ibuffer.el, ibuf-macs.el, ibuf-ext.el, hexl.el: * eshell/eshell.el, eshell/esh-io.el, eshell/esh-ext.el: * eshell/esh-cmd.el, eshell/em-ls.el, eshell/em-hist.el: * eshell/em-cmpl.el, eshell/em-banner.el: * url/url.el, url/url-queue.el, url/url-parse.el, url/url-http.el: * url/url-future.el, url/url-dav.el, url/url-cookie.el: * calendar/parse-time.el, test/eshell.el: Use cl-lib. * wid-browse.el, wdired.el, vc/vc.el, vc/vc-mtn.el, vc/vc-cvs.el: * vc/vc-arch.el, tree-widget.el, textmodes/texinfo.el: * textmodes/refill.el, textmodes/css-mode.el, term/tvi970.el: * term/ns-win.el, term.el, shell.el, ps-samp.el: * progmodes/perl-mode.el, progmodes/pascal.el, progmodes/gud.el: * progmodes/glasses.el, progmodes/etags.el, progmodes/cwarn.el: * play/gamegrid.el, play/bubbles.el, novice.el, notifications.el: * net/zeroconf.el, net/xesam.el, net/snmp-mode.el, net/mairix.el: * net/ldap.el, net/eudc.el, net/browse-url.el, man.el: * mail/mailheader.el, mail/feedmail.el: * url/url-util.el, url/url-privacy.el, url/url-nfs.el, url/url-misc.el: * url/url-methods.el, url/url-gw.el, url/url-file.el, url/url-expand.el: Dont use CL. * ibuf-ext.el (ibuffer-mark-old-buffers): Use float-time. * eshell/esh-opt.el (eshell-eval-using-options): Quote code with `lambda' rather than with `quote'. (eshell-do-opt): Adjust accordingly. (eshell-process-option): Simplify. * eshell/esh-var.el: * eshell/em-script.el: Require `esh-opt' for eshell-eval-using-options. * emacs-pcase.el (pcase--dontcare-upats, pcase--let*) (pcase--expand, pcase--u1): Rename pcase's internal `dontcare' pattern to `pcase--dontcare'. * emacs-cl.el (labels): Mark obsolete. (cl--letf, letf): Move to cl-lib. (cl--letf*, letf*): Remove. * emacs-cl-lib.el (cl-nth-value): Use defalias. * emacs-cl-macs.el (cl-dolist, cl-dotimes): Add indent rule. (cl-progv): Rewrite. (cl--letf, cl-letf): Move from cl.el. (cl-letf*): New macro. * emacs-cl-extra.el (cl--progv-before, cl--progv-after): Remove.
2012-07-11 23:13:41 +00:00
(eval-when-compile (require 'cl-lib))
(require 'browse-url)
(require 'url-parse)
(defcustom url-queue-parallel-processes 6
"The number of concurrent processes."
:version "24.1"
:type 'integer
:group 'url)
(defcustom url-queue-timeout 5
"How long to let a job live once it's started (in seconds)."
:version "24.1"
:type 'integer
:group 'url)
;;; Internal variables.
(defvar url-queue nil)
(defvar url-queue-progress-timer nil)
More CL cleanups and reduction of use of cl.el. * woman.el, winner.el, vc/vc-rcs.el, vc/vc-hooks.el, vc/vc-hg.el: * vc/vc-git.el, vc/vc-dir.el, vc/vc-bzr.el, vc/vc-annotate.el: * textmodes/tex-mode.el, textmodes/sgml-mode.el, tar-mode.el: * strokes.el, ses.el, server.el, progmodes/js.el, progmodes/gdb-mi.el: * progmodes/flymake.el, progmodes/ebrowse.el, progmodes/compile.el: * play/tetris.el, play/snake.el, play/pong.el, play/landmark.el: * play/hanoi.el, play/decipher.el, play/5x5.el, nxml/nxml-mode.el: * net/secrets.el, net/quickurl.el, midnight.el, mail/footnote.el: * image-dired.el, ibuffer.el, ibuf-macs.el, ibuf-ext.el, hexl.el: * eshell/eshell.el, eshell/esh-io.el, eshell/esh-ext.el: * eshell/esh-cmd.el, eshell/em-ls.el, eshell/em-hist.el: * eshell/em-cmpl.el, eshell/em-banner.el: * url/url.el, url/url-queue.el, url/url-parse.el, url/url-http.el: * url/url-future.el, url/url-dav.el, url/url-cookie.el: * calendar/parse-time.el, test/eshell.el: Use cl-lib. * wid-browse.el, wdired.el, vc/vc.el, vc/vc-mtn.el, vc/vc-cvs.el: * vc/vc-arch.el, tree-widget.el, textmodes/texinfo.el: * textmodes/refill.el, textmodes/css-mode.el, term/tvi970.el: * term/ns-win.el, term.el, shell.el, ps-samp.el: * progmodes/perl-mode.el, progmodes/pascal.el, progmodes/gud.el: * progmodes/glasses.el, progmodes/etags.el, progmodes/cwarn.el: * play/gamegrid.el, play/bubbles.el, novice.el, notifications.el: * net/zeroconf.el, net/xesam.el, net/snmp-mode.el, net/mairix.el: * net/ldap.el, net/eudc.el, net/browse-url.el, man.el: * mail/mailheader.el, mail/feedmail.el: * url/url-util.el, url/url-privacy.el, url/url-nfs.el, url/url-misc.el: * url/url-methods.el, url/url-gw.el, url/url-file.el, url/url-expand.el: Dont use CL. * ibuf-ext.el (ibuffer-mark-old-buffers): Use float-time. * eshell/esh-opt.el (eshell-eval-using-options): Quote code with `lambda' rather than with `quote'. (eshell-do-opt): Adjust accordingly. (eshell-process-option): Simplify. * eshell/esh-var.el: * eshell/em-script.el: Require `esh-opt' for eshell-eval-using-options. * emacs-pcase.el (pcase--dontcare-upats, pcase--let*) (pcase--expand, pcase--u1): Rename pcase's internal `dontcare' pattern to `pcase--dontcare'. * emacs-cl.el (labels): Mark obsolete. (cl--letf, letf): Move to cl-lib. (cl--letf*, letf*): Remove. * emacs-cl-lib.el (cl-nth-value): Use defalias. * emacs-cl-macs.el (cl-dolist, cl-dotimes): Add indent rule. (cl-progv): Rewrite. (cl--letf, cl-letf): Move from cl.el. (cl-letf*): New macro. * emacs-cl-extra.el (cl--progv-before, cl--progv-after): Remove.
2012-07-11 23:13:41 +00:00
(cl-defstruct url-queue
url callback cbargs silentp
buffer start-time pre-triggered
inhibit-cookiesp)
;;;###autoload
(defun url-queue-retrieve (url callback &optional cbargs silent inhibit-cookies)
"Retrieve URL asynchronously and call CALLBACK with CBARGS when finished.
This is like `url-retrieve' (which see for details of the arguments),
but with limits on the degree of parallelism. The variable
`url-queue-parallel-processes' sets the number of concurrent processes.
The variable `url-queue-timeout' sets a timeout."
(setq url-queue
(append url-queue
(list (make-url-queue :url url
:callback callback
:cbargs cbargs
:silentp silent
:inhibit-cookiesp inhibit-cookies))))
(url-queue-setup-runners))
;; To ensure asynch behavior, we start the required number of queue
;; runners from `run-with-idle-timer'. So we're basically going
;; through the queue in two ways: 1) synchronously when a program
;; calls `url-queue-retrieve' (which will then start the required
;; number of queue runners), and 2) at the exit of each job, which
;; will then not start any further threads, but just reuse the
;; previous "slot".
(defun url-queue-setup-runners ()
(let ((running 0)
waiting)
(dolist (entry url-queue)
(cond
((or (url-queue-start-time entry)
(url-queue-pre-triggered entry))
More CL cleanups and reduction of use of cl.el. * woman.el, winner.el, vc/vc-rcs.el, vc/vc-hooks.el, vc/vc-hg.el: * vc/vc-git.el, vc/vc-dir.el, vc/vc-bzr.el, vc/vc-annotate.el: * textmodes/tex-mode.el, textmodes/sgml-mode.el, tar-mode.el: * strokes.el, ses.el, server.el, progmodes/js.el, progmodes/gdb-mi.el: * progmodes/flymake.el, progmodes/ebrowse.el, progmodes/compile.el: * play/tetris.el, play/snake.el, play/pong.el, play/landmark.el: * play/hanoi.el, play/decipher.el, play/5x5.el, nxml/nxml-mode.el: * net/secrets.el, net/quickurl.el, midnight.el, mail/footnote.el: * image-dired.el, ibuffer.el, ibuf-macs.el, ibuf-ext.el, hexl.el: * eshell/eshell.el, eshell/esh-io.el, eshell/esh-ext.el: * eshell/esh-cmd.el, eshell/em-ls.el, eshell/em-hist.el: * eshell/em-cmpl.el, eshell/em-banner.el: * url/url.el, url/url-queue.el, url/url-parse.el, url/url-http.el: * url/url-future.el, url/url-dav.el, url/url-cookie.el: * calendar/parse-time.el, test/eshell.el: Use cl-lib. * wid-browse.el, wdired.el, vc/vc.el, vc/vc-mtn.el, vc/vc-cvs.el: * vc/vc-arch.el, tree-widget.el, textmodes/texinfo.el: * textmodes/refill.el, textmodes/css-mode.el, term/tvi970.el: * term/ns-win.el, term.el, shell.el, ps-samp.el: * progmodes/perl-mode.el, progmodes/pascal.el, progmodes/gud.el: * progmodes/glasses.el, progmodes/etags.el, progmodes/cwarn.el: * play/gamegrid.el, play/bubbles.el, novice.el, notifications.el: * net/zeroconf.el, net/xesam.el, net/snmp-mode.el, net/mairix.el: * net/ldap.el, net/eudc.el, net/browse-url.el, man.el: * mail/mailheader.el, mail/feedmail.el: * url/url-util.el, url/url-privacy.el, url/url-nfs.el, url/url-misc.el: * url/url-methods.el, url/url-gw.el, url/url-file.el, url/url-expand.el: Dont use CL. * ibuf-ext.el (ibuffer-mark-old-buffers): Use float-time. * eshell/esh-opt.el (eshell-eval-using-options): Quote code with `lambda' rather than with `quote'. (eshell-do-opt): Adjust accordingly. (eshell-process-option): Simplify. * eshell/esh-var.el: * eshell/em-script.el: Require `esh-opt' for eshell-eval-using-options. * emacs-pcase.el (pcase--dontcare-upats, pcase--let*) (pcase--expand, pcase--u1): Rename pcase's internal `dontcare' pattern to `pcase--dontcare'. * emacs-cl.el (labels): Mark obsolete. (cl--letf, letf): Move to cl-lib. (cl--letf*, letf*): Remove. * emacs-cl-lib.el (cl-nth-value): Use defalias. * emacs-cl-macs.el (cl-dolist, cl-dotimes): Add indent rule. (cl-progv): Rewrite. (cl--letf, cl-letf): Move from cl.el. (cl-letf*): New macro. * emacs-cl-extra.el (cl--progv-before, cl--progv-after): Remove.
2012-07-11 23:13:41 +00:00
(cl-incf running))
((not waiting)
(setq waiting entry))))
(when (and waiting
(< running url-queue-parallel-processes))
(setf (url-queue-pre-triggered waiting) t)
;; We start fetching from this idle timer...
(run-with-idle-timer 0.01 nil #'url-queue-run-queue)
;; And then we set up a separate timer to ensure progress when a
;; web server is unresponsive.
(unless url-queue-progress-timer
(setq url-queue-progress-timer
(run-with-idle-timer 1 1 #'url-queue-check-progress))))))
(defun url-queue-run-queue ()
(url-queue-prune-old-entries)
(let ((running 0)
waiting)
(dolist (entry url-queue)
(cond
((url-queue-start-time entry)
More CL cleanups and reduction of use of cl.el. * woman.el, winner.el, vc/vc-rcs.el, vc/vc-hooks.el, vc/vc-hg.el: * vc/vc-git.el, vc/vc-dir.el, vc/vc-bzr.el, vc/vc-annotate.el: * textmodes/tex-mode.el, textmodes/sgml-mode.el, tar-mode.el: * strokes.el, ses.el, server.el, progmodes/js.el, progmodes/gdb-mi.el: * progmodes/flymake.el, progmodes/ebrowse.el, progmodes/compile.el: * play/tetris.el, play/snake.el, play/pong.el, play/landmark.el: * play/hanoi.el, play/decipher.el, play/5x5.el, nxml/nxml-mode.el: * net/secrets.el, net/quickurl.el, midnight.el, mail/footnote.el: * image-dired.el, ibuffer.el, ibuf-macs.el, ibuf-ext.el, hexl.el: * eshell/eshell.el, eshell/esh-io.el, eshell/esh-ext.el: * eshell/esh-cmd.el, eshell/em-ls.el, eshell/em-hist.el: * eshell/em-cmpl.el, eshell/em-banner.el: * url/url.el, url/url-queue.el, url/url-parse.el, url/url-http.el: * url/url-future.el, url/url-dav.el, url/url-cookie.el: * calendar/parse-time.el, test/eshell.el: Use cl-lib. * wid-browse.el, wdired.el, vc/vc.el, vc/vc-mtn.el, vc/vc-cvs.el: * vc/vc-arch.el, tree-widget.el, textmodes/texinfo.el: * textmodes/refill.el, textmodes/css-mode.el, term/tvi970.el: * term/ns-win.el, term.el, shell.el, ps-samp.el: * progmodes/perl-mode.el, progmodes/pascal.el, progmodes/gud.el: * progmodes/glasses.el, progmodes/etags.el, progmodes/cwarn.el: * play/gamegrid.el, play/bubbles.el, novice.el, notifications.el: * net/zeroconf.el, net/xesam.el, net/snmp-mode.el, net/mairix.el: * net/ldap.el, net/eudc.el, net/browse-url.el, man.el: * mail/mailheader.el, mail/feedmail.el: * url/url-util.el, url/url-privacy.el, url/url-nfs.el, url/url-misc.el: * url/url-methods.el, url/url-gw.el, url/url-file.el, url/url-expand.el: Dont use CL. * ibuf-ext.el (ibuffer-mark-old-buffers): Use float-time. * eshell/esh-opt.el (eshell-eval-using-options): Quote code with `lambda' rather than with `quote'. (eshell-do-opt): Adjust accordingly. (eshell-process-option): Simplify. * eshell/esh-var.el: * eshell/em-script.el: Require `esh-opt' for eshell-eval-using-options. * emacs-pcase.el (pcase--dontcare-upats, pcase--let*) (pcase--expand, pcase--u1): Rename pcase's internal `dontcare' pattern to `pcase--dontcare'. * emacs-cl.el (labels): Mark obsolete. (cl--letf, letf): Move to cl-lib. (cl--letf*, letf*): Remove. * emacs-cl-lib.el (cl-nth-value): Use defalias. * emacs-cl-macs.el (cl-dolist, cl-dotimes): Add indent rule. (cl-progv): Rewrite. (cl--letf, cl-letf): Move from cl.el. (cl-letf*): New macro. * emacs-cl-extra.el (cl--progv-before, cl--progv-after): Remove.
2012-07-11 23:13:41 +00:00
(cl-incf running))
((not waiting)
(setq waiting entry))))
(when (and waiting
(< running url-queue-parallel-processes))
(setf (url-queue-start-time waiting) (float-time))
(url-queue-start-retrieve waiting))))
(defun url-queue-check-progress ()
(when url-queue-progress-timer
(if url-queue
(url-queue-run-queue)
(cancel-timer url-queue-progress-timer)
(setq url-queue-progress-timer nil))))
(defun url-queue-callback-function (status job)
(setq url-queue (delq job url-queue))
(when (and (eq (car status) :error)
(eq (cadr (cadr status)) 'connection-failed))
;; If we get a connection error, then flush all other jobs from
;; the host from the queue. This particularly makes sense if the
;; error really is a DNS resolver issue, which happens
;; synchronously and totally halts Emacs.
(url-queue-remove-jobs-from-host
(plist-get (nthcdr 3 (cadr status)) :host)))
(url-queue-run-queue)
(apply (url-queue-callback job) (cons status (url-queue-cbargs job))))
(defun url-queue-remove-jobs-from-host (host)
(let ((jobs nil))
(dolist (job url-queue)
(when (equal (url-host (url-generic-parse-url (url-queue-url job)))
host)
(push job jobs)))
(dolist (job jobs)
(url-queue-kill-job job)
(setq url-queue (delq job url-queue)))))
(defun url-queue-start-retrieve (job)
(setf (url-queue-buffer job)
(ignore-errors
(let ((url-request-noninteractive t))
(url-retrieve (url-queue-url job)
#'url-queue-callback-function (list job)
(url-queue-silentp job)
(url-queue-inhibit-cookiesp job))))))
(defun url-queue-prune-old-entries ()
(let (dead-jobs)
(dolist (job url-queue)
;; Kill jobs that have lasted longer than the timeout.
(when (and (url-queue-start-time job)
(> (- (float-time) (url-queue-start-time job))
url-queue-timeout))
(push job dead-jobs)))
(dolist (job dead-jobs)
(url-queue-kill-job job)
(setq url-queue (delq job url-queue)))))
(defun url-queue-kill-job (job)
(when (bufferp (url-queue-buffer job))
(let (process)
(while (setq process (get-buffer-process (url-queue-buffer job)))
(set-process-sentinel process 'ignore)
(ignore-errors
(delete-process process)))))
;; Call the callback with an error message to ensure that the caller
;; is notified that the job has failed.
(with-current-buffer
(if (and (bufferp (url-queue-buffer job))
(buffer-live-p (url-queue-buffer job)))
;; Use the (partially filled) process buffer it it exists.
(url-queue-buffer job)
;; If not, just create a new buffer, which will probably be
;; killed again by the caller.
(generate-new-buffer " *temp*"))
(apply (url-queue-callback job)
(cons (list :error (list 'error 'url-queue-timeout
"Queue timeout exceeded"))
(url-queue-cbargs job)))))
(provide 'url-queue)
;;; url-queue.el ends here