mirror of
https://git.savannah.gnu.org/git/emacs.git
synced 2025-01-05 11:45:45 +00:00
Improve xml parameter entity parsing, and add a new ERT test.
* test/automated/xml-parse-tests.el: New file. * lisp/xml.el (xml--parse-buffer): New function. Move most of xml-parse-region here. (xml-parse-region): Copy region into a temporary buffer, since parameter entity substitution requires changing buffer contents. Use xml--parse-buffer. (xml-parse-file): Use xml--parse-buffer. (xml-parse-dtd): Make parameter entity substitution work right.
This commit is contained in:
parent
b95b72547b
commit
fbf2e7ad3b
@ -1,3 +1,13 @@
|
||||
2012-07-01 Chong Yidong <cyd@gnu.org>
|
||||
|
||||
* xml.el (xml--parse-buffer): New function. Move most of
|
||||
xml-parse-region here.
|
||||
(xml-parse-region): Copy region into a temporary buffer, since
|
||||
parameter entity substitution requires changing buffer contents.
|
||||
Use xml--parse-buffer.
|
||||
(xml-parse-file): Use xml--parse-buffer.
|
||||
(xml-parse-dtd): Make parameter entity substitution work right.
|
||||
|
||||
2012-06-30 Glenn Morris <rgm@gnu.org>
|
||||
|
||||
* comint.el (follow-comint-scroll-to-bottom): Fix declaration.
|
||||
|
180
lisp/xml.el
180
lisp/xml.el
@ -165,23 +165,12 @@ See also `xml-get-attribute-or-nil'."
|
||||
;;;###autoload
|
||||
(defun xml-parse-file (file &optional parse-dtd parse-ns)
|
||||
"Parse the well-formed XML file FILE.
|
||||
If FILE is already visited, use its buffer and don't kill it.
|
||||
Returns the top node with all its children.
|
||||
Return the top node with all its children.
|
||||
If PARSE-DTD is non-nil, the DTD is parsed rather than skipped.
|
||||
If PARSE-NS is non-nil, then QNAMES are expanded."
|
||||
(if (get-file-buffer file)
|
||||
(with-current-buffer (get-file-buffer file)
|
||||
(save-excursion
|
||||
(xml-parse-region (point-min)
|
||||
(point-max)
|
||||
(current-buffer)
|
||||
parse-dtd parse-ns)))
|
||||
(with-temp-buffer
|
||||
(insert-file-contents file)
|
||||
(xml-parse-region (point-min)
|
||||
(point-max)
|
||||
(current-buffer)
|
||||
parse-dtd parse-ns))))
|
||||
(with-temp-buffer
|
||||
(insert-file-contents file)
|
||||
(xml--parse-buffer parse-dtd parse-ns)))
|
||||
|
||||
(eval-and-compile
|
||||
(let* ((start-chars (concat "[:alpha:]:_"))
|
||||
@ -320,42 +309,44 @@ and returned as the first element of the list.
|
||||
If PARSE-NS is non-nil, then QNAMES are expanded."
|
||||
;; Use fixed syntax table to ensure regexp char classes and syntax
|
||||
;; specs DTRT.
|
||||
(unless buffer
|
||||
(setq buffer (current-buffer)))
|
||||
(with-temp-buffer
|
||||
(insert-buffer-substring buffer beg end)
|
||||
(xml--parse-buffer parse-dtd parse-ns)))
|
||||
|
||||
(defun xml--parse-buffer (parse-dtd parse-ns)
|
||||
(with-syntax-table (standard-syntax-table)
|
||||
(let ((case-fold-search nil) ; XML is case-sensitive.
|
||||
;; Prevent entity definitions from changing the defaults
|
||||
(xml-entity-alist xml-entity-alist)
|
||||
(xml-parameter-entity-alist xml-parameter-entity-alist)
|
||||
xml result dtd)
|
||||
(save-excursion
|
||||
(if buffer
|
||||
(set-buffer buffer))
|
||||
(save-restriction
|
||||
(narrow-to-region beg end)
|
||||
(goto-char (point-min))
|
||||
(while (not (eobp))
|
||||
(if (search-forward "<" nil t)
|
||||
(progn
|
||||
(forward-char -1)
|
||||
(setq result (xml-parse-tag parse-dtd parse-ns))
|
||||
(cond
|
||||
((null result)
|
||||
;; Not looking at an xml start tag.
|
||||
(unless (eobp)
|
||||
(forward-char 1)))
|
||||
((and xml (not xml-sub-parser))
|
||||
;; Translation of rule [1] of XML specifications
|
||||
(error "XML: (Not Well-Formed) Only one root tag allowed"))
|
||||
((and (listp (car result))
|
||||
parse-dtd)
|
||||
(setq dtd (car result))
|
||||
(if (cdr result) ; possible leading comment
|
||||
(add-to-list 'xml (cdr result))))
|
||||
(t
|
||||
(add-to-list 'xml result))))
|
||||
(goto-char (point-max))))
|
||||
(if parse-dtd
|
||||
(cons dtd (nreverse xml))
|
||||
(nreverse xml)))))))
|
||||
(goto-char (point-min))
|
||||
(while (not (eobp))
|
||||
(if (search-forward "<" nil t)
|
||||
(progn
|
||||
(forward-char -1)
|
||||
(setq result (xml-parse-tag parse-dtd parse-ns))
|
||||
(cond
|
||||
((null result)
|
||||
;; Not looking at an xml start tag.
|
||||
(unless (eobp)
|
||||
(forward-char 1)))
|
||||
((and xml (not xml-sub-parser))
|
||||
;; Translation of rule [1] of XML specifications
|
||||
(error "XML: (Not Well-Formed) Only one root tag allowed"))
|
||||
((and (listp (car result))
|
||||
parse-dtd)
|
||||
(setq dtd (car result))
|
||||
(if (cdr result) ; possible leading comment
|
||||
(add-to-list 'xml (cdr result))))
|
||||
(t
|
||||
(add-to-list 'xml result))))
|
||||
(goto-char (point-max))))
|
||||
(if parse-dtd
|
||||
(cons dtd (nreverse xml))
|
||||
(nreverse xml)))))
|
||||
|
||||
(defun xml-maybe-do-ns (name default xml-ns)
|
||||
"Perform any namespace expansion.
|
||||
@ -600,7 +591,10 @@ This follows the rule [28] in the XML specifications."
|
||||
;; Get the name of the document
|
||||
(looking-at xml-name-regexp)
|
||||
(let ((dtd (list (match-string-no-properties 0) 'dtd))
|
||||
(xml-parameter-entity-alist xml-parameter-entity-alist))
|
||||
(xml-parameter-entity-alist xml-parameter-entity-alist)
|
||||
(parameter-entity-re (eval-when-compile
|
||||
(concat "%\\(" xml-name-re "\\);")))
|
||||
next-parameter-entity)
|
||||
(goto-char (match-end 0))
|
||||
(skip-syntax-forward " ")
|
||||
|
||||
@ -638,13 +632,28 @@ This follows the rule [28] in the XML specifications."
|
||||
(error "XML: Bad DTD"))
|
||||
(forward-char)
|
||||
|
||||
;; [2.8]: "markup declarations may be made up in whole or in
|
||||
;; part of the replacement text of parameter entities."
|
||||
|
||||
;; Since parameter entities are valid only within the DTD, we
|
||||
;; first search for the position of the next possible parameter
|
||||
;; entity. Then, search for the next DTD element; if it ends
|
||||
;; before the next parameter entity, expand the parameter entity
|
||||
;; and try again.
|
||||
(setq next-parameter-entity
|
||||
(save-excursion
|
||||
(if (re-search-forward parameter-entity-re nil t)
|
||||
(match-beginning 0))))
|
||||
|
||||
;; Parse the rest of the DTD
|
||||
;; Fixme: Deal with NOTATION, PIs.
|
||||
(while (not (looking-at "\\s-*\\]"))
|
||||
(skip-syntax-forward " ")
|
||||
(cond
|
||||
;; Element declaration [45]:
|
||||
((looking-at "<!ELEMENT\\s-+\\([[:alnum:].%;]+\\)\\s-+\\([^>]+\\)>")
|
||||
((and (looking-at "<!ELEMENT\\s-+\\([[:alnum:].%;]+\\)\\s-+\\([^>]+\\)>")
|
||||
(or (null next-parameter-entity)
|
||||
(<= (match-end 0) next-parameter-entity)))
|
||||
(let ((element (match-string-no-properties 1))
|
||||
(type (match-string-no-properties 2))
|
||||
(end-pos (match-end 0)))
|
||||
@ -672,19 +681,31 @@ This follows the rule [28] in the XML specifications."
|
||||
(goto-char end-pos)))
|
||||
|
||||
;; Attribute-list declaration [52] (currently unsupported):
|
||||
((looking-at (concat "<!ATTLIST[ \t\n\r]*\\(" xml-name-re
|
||||
"\\)[ \t\n\r]*\\(" xml-att-def-re
|
||||
"\\)*[ \t\n\r]*>"))
|
||||
((and (looking-at (eval-when-compile
|
||||
(concat "<!ATTLIST[ \t\n\r]*\\(" xml-name-re
|
||||
"\\)[ \t\n\r]*\\(" xml-att-def-re
|
||||
"\\)*[ \t\n\r]*>")))
|
||||
(or (null next-parameter-entity)
|
||||
(<= (match-end 0) next-parameter-entity)))
|
||||
(goto-char (match-end 0)))
|
||||
|
||||
;; Comments (skip to end):
|
||||
;; Comments (skip to end, ignoring parameter entity):
|
||||
((looking-at "<!--")
|
||||
(search-forward "-->"))
|
||||
(search-forward "-->")
|
||||
(and next-parameter-entity
|
||||
(> (point) next-parameter-entity)
|
||||
(setq next-parameter-entity
|
||||
(save-excursion
|
||||
(if (re-search-forward parameter-entity-re nil t)
|
||||
(match-beginning 0))))))
|
||||
|
||||
;; Internal entity declarations:
|
||||
((looking-at (concat "<!ENTITY[ \t\n\r]+\\(%[ \t\n\r]+\\)?\\("
|
||||
xml-name-re "\\)[ \t\n\r]*\\("
|
||||
xml-entity-value-re "\\)[ \t\n\r]*>"))
|
||||
((and (looking-at (eval-when-compile
|
||||
(concat "<!ENTITY[ \t\n\r]+\\(%[ \t\n\r]+\\)?\\("
|
||||
xml-name-re "\\)[ \t\n\r]*\\("
|
||||
xml-entity-value-re "\\)[ \t\n\r]*>")))
|
||||
(or (null next-parameter-entity)
|
||||
(<= (match-end 0) next-parameter-entity)))
|
||||
(let* ((name (prog1 (match-string-no-properties 2)
|
||||
(goto-char (match-end 0))))
|
||||
(alist (if (match-string 1)
|
||||
@ -700,26 +721,39 @@ This follows the rule [28] in the XML specifications."
|
||||
(set alist (cons (cons name value) (symbol-value alist))))))
|
||||
|
||||
;; External entity declarations (currently unsupported):
|
||||
((or (looking-at (concat "<!ENTITY[ \t\n\r]+\\(%[ \t\n\r]+\\)?\\("
|
||||
xml-name-re "\\)[ \t\n\r]+SYSTEM[ \t\n\r]+"
|
||||
"\\(\"[^\"]*\"\\|'[^']*'\\)[ \t\n\r]*>"))
|
||||
(looking-at (concat "<!ENTITY[ \t\n\r]+\\(%[ \t\n\r]+\\)?\\("
|
||||
xml-name-re "\\)[ \t\n\r]+PUBLIC[ \t\n\r]+"
|
||||
"\"[- \r\na-zA-Z0-9'()+,./:=?;!*#@$_%]*\""
|
||||
"\\|'[- \r\na-zA-Z0-9()+,./:=?;!*#@$_%]*'"
|
||||
"[ \t\n\r]+\\(\"[^\"]*\"\\|'[^']*'\\)"
|
||||
"[ \t\n\r]*>")))
|
||||
((and (or (looking-at (eval-when-compile
|
||||
(concat "<!ENTITY[ \t\n\r]+\\(%[ \t\n\r]+\\)?\\("
|
||||
xml-name-re "\\)[ \t\n\r]+SYSTEM[ \t\n\r]+"
|
||||
"\\(\"[^\"]*\"\\|'[^']*'\\)[ \t\n\r]*>")))
|
||||
(looking-at (eval-when-compile
|
||||
(concat "<!ENTITY[ \t\n\r]+\\(%[ \t\n\r]+\\)?\\("
|
||||
xml-name-re "\\)[ \t\n\r]+PUBLIC[ \t\n\r]+"
|
||||
"\"[- \r\na-zA-Z0-9'()+,./:=?;!*#@$_%]*\""
|
||||
"\\|'[- \r\na-zA-Z0-9()+,./:=?;!*#@$_%]*'"
|
||||
"[ \t\n\r]+\\(\"[^\"]*\"\\|'[^']*'\\)"
|
||||
"[ \t\n\r]*>"))))
|
||||
(or (null next-parameter-entity)
|
||||
(<= (match-end 0) next-parameter-entity)))
|
||||
(goto-char (match-end 0)))
|
||||
|
||||
;; Parameter entity:
|
||||
((looking-at (concat "%\\(" xml-name-re "\\);"))
|
||||
(goto-char (match-end 0))
|
||||
(let* ((entity (match-string 1))
|
||||
(end (point-marker))
|
||||
(elt (assoc entity xml-parameter-entity-alist)))
|
||||
(when elt
|
||||
(replace-match (cdr elt) t t)
|
||||
(goto-char end))))
|
||||
;; If a parameter entity is in the way, expand it.
|
||||
(next-parameter-entity
|
||||
(save-excursion
|
||||
(goto-char next-parameter-entity)
|
||||
(unless (looking-at parameter-entity-re)
|
||||
(error "XML: Internal error"))
|
||||
(let* ((entity (match-string 1))
|
||||
(beg (point-marker))
|
||||
(elt (assoc entity xml-parameter-entity-alist)))
|
||||
(if elt
|
||||
(progn
|
||||
(replace-match (cdr elt) t t)
|
||||
;; The replacement can itself be a parameter entity.
|
||||
(goto-char next-parameter-entity))
|
||||
(goto-char (match-end 0))))
|
||||
(setq next-parameter-entity
|
||||
(if (re-search-forward parameter-entity-re nil t)
|
||||
(match-beginning 0)))))
|
||||
|
||||
;; Anything else:
|
||||
(xml-validating-parser
|
||||
|
@ -1,3 +1,7 @@
|
||||
2012-07-01 Chong Yidong <cyd@gnu.org>
|
||||
|
||||
* automated/xml-parse-tests.el: New file.
|
||||
|
||||
2012-06-27 Stefan Monnier <monnier@iro.umontreal.ca>
|
||||
|
||||
* automated/ert-x-tests.el (ert-test-run-tests-interactively-2):
|
||||
|
57
test/automated/xml-parse-tests.el
Normal file
57
test/automated/xml-parse-tests.el
Normal file
@ -0,0 +1,57 @@
|
||||
;;; xml-parse-tests.el --- Test suite for XML parsing.
|
||||
|
||||
;; Copyright (C) 2012 Free Software Foundation, Inc.
|
||||
|
||||
;; Author: Chong Yidong <cyd@stupidchicken.com>
|
||||
;; Keywords: internal
|
||||
;; Human-Keywords: internal
|
||||
|
||||
;; This file is part of GNU Emacs.
|
||||
|
||||
;; GNU Emacs is free software: you can redistribute it and/or modify
|
||||
;; it under the terms of the GNU General Public License as published by
|
||||
;; the Free Software Foundation, either version 3 of the License, or
|
||||
;; (at your option) any later version.
|
||||
|
||||
;; GNU Emacs is distributed in the hope that it will be useful,
|
||||
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
;; GNU General Public License for more details.
|
||||
|
||||
;; You should have received a copy of the GNU General Public License
|
||||
;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
;;; Commentary:
|
||||
|
||||
;; Type M-x test-xml-parse RET to generate the test buffer.
|
||||
|
||||
;;; Code:
|
||||
|
||||
(require 'xml)
|
||||
|
||||
(defvar xml-parse-tests--data
|
||||
'(;; General entity substitution
|
||||
("<?xml version=\"1.0\"?><!DOCTYPE foo SYSTEM \"bar.dtd\" [<!ENTITY ent \"AbC\">]><foo a=\"b\"><bar>&ent;;</bar></foo>" .
|
||||
((foo ((a . "b")) (bar nil "AbC;"))))
|
||||
;; Parameter entity substitution
|
||||
("<?xml version=\"1.0\"?><!DOCTYPE foo SYSTEM \"bar.dtd\" [<!ENTITY % pent \"AbC\"><!ENTITY ent \"%pent;\">]><foo a=\"b\"><bar>&ent;;</bar></foo>" .
|
||||
((foo ((a . "b")) (bar nil "AbC;"))))
|
||||
;; Tricky parameter entity substitution (like XML spec Appendix D)
|
||||
("<?xml version='1.0'?><!DOCTYPE foo [ <!ENTITY % xx '%zz;'><!ENTITY % zz '<!ENTITY ent \"b\" >' > %xx; ]><foo>A&ent;C</foo>" .
|
||||
((foo nil "AbC"))))
|
||||
"Alist of XML strings and their expected parse trees.")
|
||||
|
||||
(ert-deftest xml-parse-tests ()
|
||||
"Test XML parsing."
|
||||
(with-temp-buffer
|
||||
(dolist (test xml-parse-tests--data)
|
||||
(erase-buffer)
|
||||
(insert (car test))
|
||||
(should (equal (cdr test)
|
||||
(xml-parse-region (point-min) (point-max)))))))
|
||||
|
||||
;; Local Variables:
|
||||
;; no-byte-compile: t
|
||||
;; End:
|
||||
|
||||
;;; xml-parse-tests.el ends here.
|
Loading…
Reference in New Issue
Block a user