1
0
mirror of https://git.savannah.gnu.org/git/emacs.git synced 2025-01-05 11:45:45 +00:00

Improve xml parameter entity parsing, and add a new ERT test.

* test/automated/xml-parse-tests.el: New file.

* lisp/xml.el (xml--parse-buffer): New function.  Move most of
xml-parse-region here.
(xml-parse-region): Copy region into a temporary buffer, since
parameter entity substitution requires changing buffer contents.
Use xml--parse-buffer.
(xml-parse-file): Use xml--parse-buffer.
(xml-parse-dtd): Make parameter entity substitution work right.
This commit is contained in:
Chong Yidong 2012-07-01 15:17:05 +08:00
parent b95b72547b
commit fbf2e7ad3b
4 changed files with 178 additions and 73 deletions

View File

@ -1,3 +1,13 @@
2012-07-01 Chong Yidong <cyd@gnu.org>
* xml.el (xml--parse-buffer): New function. Move most of
xml-parse-region here.
(xml-parse-region): Copy region into a temporary buffer, since
parameter entity substitution requires changing buffer contents.
Use xml--parse-buffer.
(xml-parse-file): Use xml--parse-buffer.
(xml-parse-dtd): Make parameter entity substitution work right.
2012-06-30 Glenn Morris <rgm@gnu.org>
* comint.el (follow-comint-scroll-to-bottom): Fix declaration.

View File

@ -165,23 +165,12 @@ See also `xml-get-attribute-or-nil'."
;;;###autoload
(defun xml-parse-file (file &optional parse-dtd parse-ns)
"Parse the well-formed XML file FILE.
If FILE is already visited, use its buffer and don't kill it.
Returns the top node with all its children.
Return the top node with all its children.
If PARSE-DTD is non-nil, the DTD is parsed rather than skipped.
If PARSE-NS is non-nil, then QNAMES are expanded."
(if (get-file-buffer file)
(with-current-buffer (get-file-buffer file)
(save-excursion
(xml-parse-region (point-min)
(point-max)
(current-buffer)
parse-dtd parse-ns)))
(with-temp-buffer
(insert-file-contents file)
(xml-parse-region (point-min)
(point-max)
(current-buffer)
parse-dtd parse-ns))))
(with-temp-buffer
(insert-file-contents file)
(xml--parse-buffer parse-dtd parse-ns)))
(eval-and-compile
(let* ((start-chars (concat "[:alpha:]:_"))
@ -320,42 +309,44 @@ and returned as the first element of the list.
If PARSE-NS is non-nil, then QNAMES are expanded."
;; Use fixed syntax table to ensure regexp char classes and syntax
;; specs DTRT.
(unless buffer
(setq buffer (current-buffer)))
(with-temp-buffer
(insert-buffer-substring buffer beg end)
(xml--parse-buffer parse-dtd parse-ns)))
(defun xml--parse-buffer (parse-dtd parse-ns)
(with-syntax-table (standard-syntax-table)
(let ((case-fold-search nil) ; XML is case-sensitive.
;; Prevent entity definitions from changing the defaults
(xml-entity-alist xml-entity-alist)
(xml-parameter-entity-alist xml-parameter-entity-alist)
xml result dtd)
(save-excursion
(if buffer
(set-buffer buffer))
(save-restriction
(narrow-to-region beg end)
(goto-char (point-min))
(while (not (eobp))
(if (search-forward "<" nil t)
(progn
(forward-char -1)
(setq result (xml-parse-tag parse-dtd parse-ns))
(cond
((null result)
;; Not looking at an xml start tag.
(unless (eobp)
(forward-char 1)))
((and xml (not xml-sub-parser))
;; Translation of rule [1] of XML specifications
(error "XML: (Not Well-Formed) Only one root tag allowed"))
((and (listp (car result))
parse-dtd)
(setq dtd (car result))
(if (cdr result) ; possible leading comment
(add-to-list 'xml (cdr result))))
(t
(add-to-list 'xml result))))
(goto-char (point-max))))
(if parse-dtd
(cons dtd (nreverse xml))
(nreverse xml)))))))
(goto-char (point-min))
(while (not (eobp))
(if (search-forward "<" nil t)
(progn
(forward-char -1)
(setq result (xml-parse-tag parse-dtd parse-ns))
(cond
((null result)
;; Not looking at an xml start tag.
(unless (eobp)
(forward-char 1)))
((and xml (not xml-sub-parser))
;; Translation of rule [1] of XML specifications
(error "XML: (Not Well-Formed) Only one root tag allowed"))
((and (listp (car result))
parse-dtd)
(setq dtd (car result))
(if (cdr result) ; possible leading comment
(add-to-list 'xml (cdr result))))
(t
(add-to-list 'xml result))))
(goto-char (point-max))))
(if parse-dtd
(cons dtd (nreverse xml))
(nreverse xml)))))
(defun xml-maybe-do-ns (name default xml-ns)
"Perform any namespace expansion.
@ -600,7 +591,10 @@ This follows the rule [28] in the XML specifications."
;; Get the name of the document
(looking-at xml-name-regexp)
(let ((dtd (list (match-string-no-properties 0) 'dtd))
(xml-parameter-entity-alist xml-parameter-entity-alist))
(xml-parameter-entity-alist xml-parameter-entity-alist)
(parameter-entity-re (eval-when-compile
(concat "%\\(" xml-name-re "\\);")))
next-parameter-entity)
(goto-char (match-end 0))
(skip-syntax-forward " ")
@ -638,13 +632,28 @@ This follows the rule [28] in the XML specifications."
(error "XML: Bad DTD"))
(forward-char)
;; [2.8]: "markup declarations may be made up in whole or in
;; part of the replacement text of parameter entities."
;; Since parameter entities are valid only within the DTD, we
;; first search for the position of the next possible parameter
;; entity. Then, search for the next DTD element; if it ends
;; before the next parameter entity, expand the parameter entity
;; and try again.
(setq next-parameter-entity
(save-excursion
(if (re-search-forward parameter-entity-re nil t)
(match-beginning 0))))
;; Parse the rest of the DTD
;; Fixme: Deal with NOTATION, PIs.
(while (not (looking-at "\\s-*\\]"))
(skip-syntax-forward " ")
(cond
;; Element declaration [45]:
((looking-at "<!ELEMENT\\s-+\\([[:alnum:].%;]+\\)\\s-+\\([^>]+\\)>")
((and (looking-at "<!ELEMENT\\s-+\\([[:alnum:].%;]+\\)\\s-+\\([^>]+\\)>")
(or (null next-parameter-entity)
(<= (match-end 0) next-parameter-entity)))
(let ((element (match-string-no-properties 1))
(type (match-string-no-properties 2))
(end-pos (match-end 0)))
@ -672,19 +681,31 @@ This follows the rule [28] in the XML specifications."
(goto-char end-pos)))
;; Attribute-list declaration [52] (currently unsupported):
((looking-at (concat "<!ATTLIST[ \t\n\r]*\\(" xml-name-re
"\\)[ \t\n\r]*\\(" xml-att-def-re
"\\)*[ \t\n\r]*>"))
((and (looking-at (eval-when-compile
(concat "<!ATTLIST[ \t\n\r]*\\(" xml-name-re
"\\)[ \t\n\r]*\\(" xml-att-def-re
"\\)*[ \t\n\r]*>")))
(or (null next-parameter-entity)
(<= (match-end 0) next-parameter-entity)))
(goto-char (match-end 0)))
;; Comments (skip to end):
;; Comments (skip to end, ignoring parameter entity):
((looking-at "<!--")
(search-forward "-->"))
(search-forward "-->")
(and next-parameter-entity
(> (point) next-parameter-entity)
(setq next-parameter-entity
(save-excursion
(if (re-search-forward parameter-entity-re nil t)
(match-beginning 0))))))
;; Internal entity declarations:
((looking-at (concat "<!ENTITY[ \t\n\r]+\\(%[ \t\n\r]+\\)?\\("
xml-name-re "\\)[ \t\n\r]*\\("
xml-entity-value-re "\\)[ \t\n\r]*>"))
((and (looking-at (eval-when-compile
(concat "<!ENTITY[ \t\n\r]+\\(%[ \t\n\r]+\\)?\\("
xml-name-re "\\)[ \t\n\r]*\\("
xml-entity-value-re "\\)[ \t\n\r]*>")))
(or (null next-parameter-entity)
(<= (match-end 0) next-parameter-entity)))
(let* ((name (prog1 (match-string-no-properties 2)
(goto-char (match-end 0))))
(alist (if (match-string 1)
@ -700,26 +721,39 @@ This follows the rule [28] in the XML specifications."
(set alist (cons (cons name value) (symbol-value alist))))))
;; External entity declarations (currently unsupported):
((or (looking-at (concat "<!ENTITY[ \t\n\r]+\\(%[ \t\n\r]+\\)?\\("
xml-name-re "\\)[ \t\n\r]+SYSTEM[ \t\n\r]+"
"\\(\"[^\"]*\"\\|'[^']*'\\)[ \t\n\r]*>"))
(looking-at (concat "<!ENTITY[ \t\n\r]+\\(%[ \t\n\r]+\\)?\\("
xml-name-re "\\)[ \t\n\r]+PUBLIC[ \t\n\r]+"
"\"[- \r\na-zA-Z0-9'()+,./:=?;!*#@$_%]*\""
"\\|'[- \r\na-zA-Z0-9()+,./:=?;!*#@$_%]*'"
"[ \t\n\r]+\\(\"[^\"]*\"\\|'[^']*'\\)"
"[ \t\n\r]*>")))
((and (or (looking-at (eval-when-compile
(concat "<!ENTITY[ \t\n\r]+\\(%[ \t\n\r]+\\)?\\("
xml-name-re "\\)[ \t\n\r]+SYSTEM[ \t\n\r]+"
"\\(\"[^\"]*\"\\|'[^']*'\\)[ \t\n\r]*>")))
(looking-at (eval-when-compile
(concat "<!ENTITY[ \t\n\r]+\\(%[ \t\n\r]+\\)?\\("
xml-name-re "\\)[ \t\n\r]+PUBLIC[ \t\n\r]+"
"\"[- \r\na-zA-Z0-9'()+,./:=?;!*#@$_%]*\""
"\\|'[- \r\na-zA-Z0-9()+,./:=?;!*#@$_%]*'"
"[ \t\n\r]+\\(\"[^\"]*\"\\|'[^']*'\\)"
"[ \t\n\r]*>"))))
(or (null next-parameter-entity)
(<= (match-end 0) next-parameter-entity)))
(goto-char (match-end 0)))
;; Parameter entity:
((looking-at (concat "%\\(" xml-name-re "\\);"))
(goto-char (match-end 0))
(let* ((entity (match-string 1))
(end (point-marker))
(elt (assoc entity xml-parameter-entity-alist)))
(when elt
(replace-match (cdr elt) t t)
(goto-char end))))
;; If a parameter entity is in the way, expand it.
(next-parameter-entity
(save-excursion
(goto-char next-parameter-entity)
(unless (looking-at parameter-entity-re)
(error "XML: Internal error"))
(let* ((entity (match-string 1))
(beg (point-marker))
(elt (assoc entity xml-parameter-entity-alist)))
(if elt
(progn
(replace-match (cdr elt) t t)
;; The replacement can itself be a parameter entity.
(goto-char next-parameter-entity))
(goto-char (match-end 0))))
(setq next-parameter-entity
(if (re-search-forward parameter-entity-re nil t)
(match-beginning 0)))))
;; Anything else:
(xml-validating-parser

View File

@ -1,3 +1,7 @@
2012-07-01 Chong Yidong <cyd@gnu.org>
* automated/xml-parse-tests.el: New file.
2012-06-27 Stefan Monnier <monnier@iro.umontreal.ca>
* automated/ert-x-tests.el (ert-test-run-tests-interactively-2):

View File

@ -0,0 +1,57 @@
;;; xml-parse-tests.el --- Test suite for XML parsing.
;; Copyright (C) 2012 Free Software Foundation, Inc.
;; Author: Chong Yidong <cyd@stupidchicken.com>
;; Keywords: internal
;; Human-Keywords: internal
;; This file is part of GNU Emacs.
;; GNU Emacs is free software: you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
;; the Free Software Foundation, either version 3 of the License, or
;; (at your option) any later version.
;; GNU Emacs is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
;; GNU General Public License for more details.
;; You should have received a copy of the GNU General Public License
;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
;;; Commentary:
;; Type M-x test-xml-parse RET to generate the test buffer.
;;; Code:
(require 'xml)
(defvar xml-parse-tests--data
'(;; General entity substitution
("<?xml version=\"1.0\"?><!DOCTYPE foo SYSTEM \"bar.dtd\" [<!ENTITY ent \"AbC\">]><foo a=\"b\"><bar>&ent;;</bar></foo>" .
((foo ((a . "b")) (bar nil "AbC;"))))
;; Parameter entity substitution
("<?xml version=\"1.0\"?><!DOCTYPE foo SYSTEM \"bar.dtd\" [<!ENTITY % pent \"AbC\"><!ENTITY ent \"%pent;\">]><foo a=\"b\"><bar>&ent;;</bar></foo>" .
((foo ((a . "b")) (bar nil "AbC;"))))
;; Tricky parameter entity substitution (like XML spec Appendix D)
("<?xml version='1.0'?><!DOCTYPE foo [ <!ENTITY % xx '&#37;zz;'><!ENTITY % zz '&#60;!ENTITY ent \"b\" >' > %xx; ]><foo>A&ent;C</foo>" .
((foo nil "AbC"))))
"Alist of XML strings and their expected parse trees.")
(ert-deftest xml-parse-tests ()
"Test XML parsing."
(with-temp-buffer
(dolist (test xml-parse-tests--data)
(erase-buffer)
(insert (car test))
(should (equal (cdr test)
(xml-parse-region (point-min) (point-max)))))))
;; Local Variables:
;; no-byte-compile: t
;; End:
;;; xml-parse-tests.el ends here.