mirror of
https://git.savannah.gnu.org/git/emacs.git
synced 2024-12-03 08:30:09 +00:00
ecf08f0621
dc4e6b1329
; Update copyright years in more files64b3777631
; Run set-copyright from admin.el8e1c56ae46
; Add 2024 to copyright years # Conflicts: # doc/misc/modus-themes.org # doc/misc/texinfo.tex # etc/NEWS # etc/refcards/ru-refcard.tex # etc/themes/modus-operandi-theme.el # etc/themes/modus-themes.el # etc/themes/modus-vivendi-theme.el # lib/alloca.in.h # lib/binary-io.h # lib/c-ctype.h # lib/c-strcasecmp.c # lib/c-strncasecmp.c # lib/careadlinkat.c # lib/cloexec.c # lib/close-stream.c # lib/diffseq.h # lib/dup2.c # lib/filemode.h # lib/fpending.c # lib/fpending.h # lib/fsusage.c # lib/getgroups.c # lib/getloadavg.c # lib/gettext.h # lib/gettime.c # lib/gettimeofday.c # lib/group-member.c # lib/malloc.c # lib/md5-stream.c # lib/md5.c # lib/md5.h # lib/memmem.c # lib/memrchr.c # lib/nanosleep.c # lib/save-cwd.h # lib/sha1.c # lib/sig2str.c # lib/stdlib.in.h # lib/strtoimax.c # lib/strtol.c # lib/strtoll.c # lib/time_r.c # lib/xalloc-oversized.h # lisp/auth-source-pass.el # lisp/emacs-lisp/lisp-mnt.el # lisp/emacs-lisp/timer.el # lisp/info-look.el # lisp/jit-lock.el # lisp/loadhist.el # lisp/mail/rmail.el # lisp/net/ntlm.el # lisp/net/webjump.el # lisp/progmodes/asm-mode.el # lisp/progmodes/project.el # lisp/progmodes/sh-script.el # lisp/textmodes/flyspell.el # lisp/textmodes/reftex-toc.el # lisp/textmodes/reftex.el # lisp/textmodes/tex-mode.el # lisp/url/url-gw.el # m4/alloca.m4 # m4/clock_time.m4 # m4/d-type.m4 # m4/dirent_h.m4 # m4/dup2.m4 # m4/euidaccess.m4 # m4/fchmodat.m4 # m4/filemode.m4 # m4/fsusage.m4 # m4/getgroups.m4 # m4/getloadavg.m4 # m4/getrandom.m4 # m4/gettime.m4 # m4/gettimeofday.m4 # m4/gnulib-common.m4 # m4/group-member.m4 # m4/inttypes.m4 # m4/malloc.m4 # m4/manywarnings.m4 # m4/mempcpy.m4 # m4/memrchr.m4 # m4/mkostemp.m4 # m4/mktime.m4 # m4/nproc.m4 # m4/nstrftime.m4 # m4/pathmax.m4 # m4/pipe2.m4 # m4/pselect.m4 # m4/pthread_sigmask.m4 # m4/readlink.m4 # m4/realloc.m4 # m4/sig2str.m4 # m4/ssize_t.m4 # m4/stat-time.m4 # m4/stddef_h.m4 # m4/stdint.m4 # m4/stdio_h.m4 # m4/stdlib_h.m4 # m4/stpcpy.m4 # m4/strnlen.m4 # m4/strtoimax.m4 # m4/strtoll.m4 # m4/time_h.m4 # m4/timegm.m4 # m4/timer_time.m4 # m4/timespec.m4 # m4/unistd_h.m4 # m4/warnings.m4 # nt/configure.bat # nt/preprep.c # test/lisp/register-tests.el
356 lines
11 KiB
EmacsLisp
356 lines
11 KiB
EmacsLisp
;;; rng-uri.el --- URI parsing and manipulation -*- lexical-binding:t -*-
|
|
|
|
;; Copyright (C) 2003, 2007-2024 Free Software Foundation, Inc.
|
|
|
|
;; Author: James Clark
|
|
;; Keywords: text, hypermedia, languages, XML
|
|
|
|
;; This file is part of GNU Emacs.
|
|
|
|
;; GNU Emacs is free software: you can redistribute it and/or modify
|
|
;; it under the terms of the GNU General Public License as published by
|
|
;; the Free Software Foundation, either version 3 of the License, or
|
|
;; (at your option) any later version.
|
|
|
|
;; GNU Emacs is distributed in the hope that it will be useful,
|
|
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
;; GNU General Public License for more details.
|
|
|
|
;; You should have received a copy of the GNU General Public License
|
|
;; along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>.
|
|
|
|
;;; Commentary:
|
|
|
|
;;; Code:
|
|
|
|
(defun rng-file-name-uri (f)
|
|
"Return a URI for the filename F.
|
|
Multibyte characters are left as is. Use `rng-uri-escape-multibyte' to
|
|
escape them using %HH."
|
|
(setq f (expand-file-name f))
|
|
(let ((url
|
|
;; FIXME. Explain why the pattern doesn't also have "!$&'()*+,/:@=".
|
|
;; See Internet RFC 3986 section 2.2.
|
|
(replace-regexp-in-string "[]\0-\s\"#%;<>?[\\^`{|}\177]"
|
|
'rng-percent-encode f)))
|
|
(concat "file:"
|
|
(if (and (> (length url) 0)
|
|
(= (aref url 0) ?/))
|
|
"//"
|
|
"///")
|
|
url)))
|
|
|
|
(defun rng-uri-escape-multibyte (uri)
|
|
"Escape multibyte characters in URI."
|
|
(replace-regexp-in-string "[[:nonascii:]]"
|
|
'rng-percent-encode
|
|
(encode-coding-string uri 'utf-8)))
|
|
|
|
(defun rng-percent-encode (str)
|
|
(apply 'concat
|
|
(mapcar (lambda (ch)
|
|
(format "%%%x%x" (/ ch 16) (% ch 16)))
|
|
(string-to-list str))))
|
|
|
|
|
|
(defun rng-uri-file-name (uri)
|
|
"Return the filename represented by a URI.
|
|
Signal an error if URI is not a valid file URL."
|
|
(rng-uri-file-name-1 uri nil))
|
|
|
|
(defun rng-uri-pattern-file-name-regexp (pattern)
|
|
"Return a regexp for filenames represented by URIs that match PATTERN."
|
|
(rng-uri-file-name-1 pattern 'match))
|
|
|
|
(defun rng-uri-pattern-file-name-replace-match (pattern)
|
|
(rng-uri-file-name-1 pattern 'replace))
|
|
|
|
;; pattern is either nil or match or replace
|
|
(defun rng-uri-file-name-1 (uri pattern)
|
|
(unless (string-match "\\`\\(?:[^%]\\|%[[:xdigit:]]\\{2\\}\\)*\\'" uri)
|
|
(rng-uri-error "Bad escapes in URI `%s'" uri))
|
|
(setq uri (rng-uri-unescape-multibyte uri))
|
|
(let* ((components
|
|
(or (rng-uri-split uri)
|
|
(rng-uri-error "Cannot split URI `%s' into its components" uri)))
|
|
(scheme (nth 0 components))
|
|
(authority (nth 1 components))
|
|
(path (nth 2 components))
|
|
(absolutep (string-match "\\`/" path))
|
|
(query (nth 3 components))
|
|
(fragment-id (nth 4 components)))
|
|
(cond ((not scheme)
|
|
(unless pattern
|
|
(rng-uri-error "URI `%s' does not have a scheme" uri)))
|
|
((not (string= (downcase scheme) "file"))
|
|
(rng-uri-error "URI `%s' does not use the `file:' scheme" uri)))
|
|
(when (not (member authority
|
|
(cons (system-name) '(nil "" "localhost"))))
|
|
(rng-uri-error "URI `%s' does not start with `file:///' or `file://localhost/'"
|
|
uri))
|
|
(when query
|
|
(rng-uri-error "`?' not escaped in file URI `%s'" uri))
|
|
(when fragment-id
|
|
(rng-uri-error "URI `%s' has a fragment identifier" uri))
|
|
(when (string-search ";" path)
|
|
(rng-uri-error "`;' not escaped in URI `%s'" uri))
|
|
(when (string-match "%2[fF]" path) ;; 2f is hex code of slash
|
|
(rng-uri-error "Escaped slash in URI `%s'" uri))
|
|
(when (and (eq system-type 'windows-nt)
|
|
absolutep
|
|
(file-name-absolute-p (substring path 1)))
|
|
(setq path (substring path 1)))
|
|
(when (and pattern (string-match "\\`\\./" path))
|
|
(setq path (substring path 2)))
|
|
(setq path
|
|
(cond ((eq pattern 'match)
|
|
(rng-uri-unescape-unibyte-match path))
|
|
((eq pattern 'replace)
|
|
(rng-uri-unescape-unibyte-replace path 2))
|
|
(t
|
|
(rng-uri-unescape-unibyte path))))
|
|
(when (string-search "\000" path)
|
|
(rng-uri-error "URI `%s' has NUL character in path" uri))
|
|
(when (eq pattern 'match)
|
|
(setq path
|
|
(concat (if absolutep
|
|
"\\(\\)"
|
|
"\\(\\(?:[^/]*/\\)*\\)")
|
|
path)))
|
|
(cond ((eq pattern 'match)
|
|
(concat "\\`" path "\\'"))
|
|
((and (eq pattern 'replace)
|
|
(not absolutep))
|
|
(concat "\\1" path))
|
|
(t path))))
|
|
|
|
(defun rng-uri-error (&rest args)
|
|
(signal 'rng-uri-error (list (apply #'format-message args))))
|
|
|
|
(define-error 'rng-uri-error "Invalid URI")
|
|
|
|
(defun rng-uri-split (str)
|
|
(and (string-match "\\`\\(?:\\([^:/?#]+\\):\\)?\
|
|
\\(?://\\([^/?#]*\\)\\)?\
|
|
\\([^?#]*\\)\
|
|
\\(?:\\?\\([^#]*\\)\\)?\
|
|
\\(?:#\\(\\(?:.\\|\n\\)*\\)\\)?\\'"
|
|
str)
|
|
(list (match-string 1 str)
|
|
(match-string 2 str)
|
|
(match-string 3 str)
|
|
(match-string 4 str)
|
|
(match-string 5 str))))
|
|
|
|
(defun rng-uri-join (scheme authority path &optional query fragment-id)
|
|
(when path
|
|
(let (parts)
|
|
(when fragment-id
|
|
(setq parts (list "#" fragment-id)))
|
|
(when query
|
|
(setq parts
|
|
(cons "?"
|
|
(cons query parts))))
|
|
(setq parts (cons path parts))
|
|
(when authority
|
|
(setq parts
|
|
(cons "//"
|
|
(cons authority parts))))
|
|
(when scheme
|
|
(setq parts
|
|
(cons scheme
|
|
(cons ":" parts))))
|
|
(apply 'concat parts))))
|
|
|
|
(defun rng-uri-resolve (uri-ref base-uri)
|
|
"Resolve a possibly relative URI reference into absolute form.
|
|
URI-REF is the URI reference to be resolved.
|
|
BASE-URI is the base URI to use for resolving it.
|
|
The algorithm is specified by RFC 2396.
|
|
If there is some problem with URI-REF or BASE-URI, then
|
|
URI-REF will be returned."
|
|
(let* ((components (rng-uri-split uri-ref))
|
|
(scheme (nth 0 components))
|
|
(authority (nth 1 components))
|
|
(path (nth 2 components))
|
|
(query (nth 3 components))
|
|
(fragment-id (nth 4 components))
|
|
(base-components (rng-uri-split base-uri)))
|
|
(if (or (not components)
|
|
scheme
|
|
(not base-components)
|
|
(not (nth 0 base-components)))
|
|
uri-ref
|
|
(setq scheme (nth 0 base-components))
|
|
(when (not authority)
|
|
(setq authority (nth 1 base-components))
|
|
(if (and (equal path "") (not query))
|
|
;; Handle same document reference by returning
|
|
;; same URI (RFC 2396bis does this too).
|
|
(setq path (nth 2 base-components)
|
|
query (nth 3 base-components))
|
|
(setq path (rng-resolve-path path (nth 2 base-components)))))
|
|
(rng-uri-join scheme
|
|
authority
|
|
path
|
|
query
|
|
fragment-id))))
|
|
|
|
;; See RFC 2396 5.2, steps 5 and 6
|
|
(defun rng-resolve-path (path base-path)
|
|
;; Step 5
|
|
(if (or (string-match "\\`/" path)
|
|
(not (string-match "\\`/" base-path)))
|
|
path
|
|
;; Step 6
|
|
;; (a), (b)
|
|
(let ((segments (rng-split-path path))
|
|
(base-segments (rng-split-path base-path)))
|
|
(if (> (length base-segments) 1)
|
|
(setq segments (nconc (nbutlast base-segments)
|
|
segments))
|
|
(setcar segments
|
|
(concat (car base-segments) (car segments))))
|
|
;; (d)
|
|
(let ((last-segment (last segments)))
|
|
(when (equal (car last-segment) ".")
|
|
(setcar last-segment "")))
|
|
;; (c)
|
|
(setq segments (delete "." segments))
|
|
;; (e)
|
|
(let (iter matched)
|
|
(while (progn
|
|
(setq matched nil)
|
|
(setq iter (cdr segments))
|
|
(while (and iter (not matched))
|
|
(if (or (not (equal (cadr iter) ".."))
|
|
(equal (car iter) ".."))
|
|
(setq iter (cdr iter))
|
|
(setcar iter nil)
|
|
(setcar (cdr iter)
|
|
;; (f)
|
|
(if (cddr iter) nil ""))
|
|
(setq matched t)
|
|
(setq segments (delq nil segments))))
|
|
matched)))
|
|
(rng-join-path segments))))
|
|
|
|
(defun rng-relative-uri (full base)
|
|
"Return a URI that relative to BASE is equivalent to FULL.
|
|
The returned URI will be relative if possible.
|
|
Both FULL and BASE must be absolute URIs."
|
|
(let* ((components (rng-uri-split full))
|
|
(scheme (nth 0 components))
|
|
(authority (nth 1 components))
|
|
(path (nth 2 components))
|
|
(query (nth 3 components))
|
|
(fragment-id (nth 4 components))
|
|
(base-components (rng-uri-split base)))
|
|
(if (and components
|
|
base-components
|
|
scheme
|
|
(equal scheme
|
|
(nth 0 base-components)))
|
|
(progn
|
|
(setq scheme nil)
|
|
(when (and authority
|
|
(equal authority
|
|
(nth 1 base-components)))
|
|
(setq authority nil)
|
|
(setq path (rng-relative-path path (nth 2 base-components))))
|
|
(rng-uri-join scheme authority path query fragment-id))
|
|
full)))
|
|
|
|
(defun rng-relative-path (path base-path)
|
|
(let ((segments (rng-split-path path))
|
|
(base-segments (rng-split-path base-path)))
|
|
(when (> (length base-segments) 1)
|
|
(setq base-segments (nbutlast base-segments)))
|
|
(if (or (member "." segments)
|
|
(member ".." segments)
|
|
(member "." base-segments)
|
|
(member ".." base-segments))
|
|
path
|
|
(while (and segments
|
|
base-segments
|
|
(string= (car segments)
|
|
(car base-segments)))
|
|
(setq segments (cdr segments))
|
|
(setq base-segments (cdr base-segments)))
|
|
(while base-segments
|
|
(setq base-segments (cdr base-segments))
|
|
(setq segments (cons ".." segments)))
|
|
(when (equal (car segments) "")
|
|
(setq segments (cons "." segments)))
|
|
(rng-join-path segments))))
|
|
|
|
(defun rng-split-path (path)
|
|
(let ((start 0)
|
|
segments)
|
|
(while (string-match "/" path start)
|
|
(setq segments (cons (substring path start (match-beginning 0))
|
|
segments))
|
|
(setq start (match-end 0)))
|
|
(nreverse (cons (substring path start) segments))))
|
|
|
|
(defun rng-join-path (segments)
|
|
(and segments
|
|
(mapconcat 'identity segments "/")))
|
|
|
|
(defun rng-uri-unescape-multibyte (str)
|
|
(replace-regexp-in-string "\\(?:%[89a-fA-F][[:xdigit:]]\\)+"
|
|
'rng-multibyte-percent-decode
|
|
str))
|
|
|
|
(defun rng-multibyte-percent-decode (str)
|
|
(decode-coding-string (apply 'string
|
|
(mapcar (lambda (h) (string-to-number h 16))
|
|
(split-string str "%")))
|
|
'utf-8))
|
|
|
|
(defun rng-uri-unescape-unibyte (str)
|
|
(replace-regexp-in-string "%[0-7][[:xdigit:]]"
|
|
(lambda (h)
|
|
(string (string-to-number (substring h 1) 16)))
|
|
str
|
|
t
|
|
t))
|
|
|
|
(defun rng-uri-unescape-unibyte-match (str)
|
|
(replace-regexp-in-string "%[0-7][[:xdigit:]]\\|[^%]"
|
|
(lambda (match)
|
|
(if (string= match "*")
|
|
"\\([^/]*\\)"
|
|
(regexp-quote
|
|
(if (= (length match) 1)
|
|
match
|
|
(string (string-to-number (substring match 1)
|
|
16))))))
|
|
str
|
|
t
|
|
t))
|
|
|
|
(defun rng-uri-unescape-unibyte-replace (str next-match-index)
|
|
(replace-regexp-in-string
|
|
"%[0-7][[:xdigit:]]\\|[^%]"
|
|
(lambda (match)
|
|
(if (string= match "*")
|
|
(let ((n next-match-index))
|
|
(setq next-match-index (1+ n))
|
|
(format "\\%s" n))
|
|
(let ((ch (if (= (length match) 1)
|
|
(aref match 0)
|
|
(string-to-number (substring match 1)
|
|
16))))
|
|
(if (eq ch ?\\)
|
|
(string ?\\ ?\\)
|
|
(string ch)))))
|
|
str
|
|
t
|
|
t))
|
|
|
|
(provide 'rng-uri)
|
|
|
|
;;; rng-uri.el ends here
|