1
0
mirror of https://git.savannah.gnu.org/git/emacs.git synced 2024-11-27 07:37:33 +00:00
emacs/lisp/nxml/nxml-rap.el
Juanma Barranquero 10545bd83d Fix typos, and general docstring cleanup.
* nxml/nxml-glyph.el (nxml-glyph-set-hook):
* nxml/nxml-uchnm.el (nxml-unicode-blocks)
(nxml-unicode-block-char-name-set):
* nxml/nxml-util.el (nxml-debug, nxml-make-namespace):
* nxml/rng-nxml.el (rng-set-state-after):
* nxml/rng-uri.el (rng-file-name-uri): Fix typo in docstring.

* nxml/rng-cmpct.el (rng-c-lookup-create, rng-c-parse-primary)
(rng-c-parse-annotation-body):
* nxml/rng-dt.el (rng-dt-namespace-context-getter): Reflow docstrings.

* nxml/nxml-mode.el (nxml, nxml-mode, nxml-after-change1)
(nxml-extend-region, nxml-merge-indent-context-type, nxml-complete)
(nxml-forward-balanced-item, nxml-dynamic-markup-word)
(nxml-define-char-name-set, nxml-toggle-char-ref-extra-display):
Fix typos in docstrings.
(nxml-attribute-indent): Reflow docstring.
(nxml-bind-meta-tab-to-complete-flag, nxml-last-fontify-end)
(nxml-default-buffer-file-coding-system): Doc fixes.

* nxml/nxml-ns.el (nxml-ns-state, nxml-ns-initial-state)
(nxml-ns-set-prefix): Fix typos in docstrings.
(nxml-ns-push-state, nxml-ns-pop-state, nxml-ns-set-default):
Reflow docstring.
(nxml-ns-get-prefix, nxml-ns-get-default): Doc fixes.

* nxml/nxml-outln.el (nxml-hide-all-text-content)
(nxml-show-direct-text-content, nxml-show-direct-subheadings)
(nxml-hide-direct-text-content, nxml-hide-subheadings)
(nxml-hide-text-content, nxml-show-subheadings, nxml-hide-other)
(nxml-outline-display-rest, nxml-outline-set-overlay)
(nxml-section-tag-forward, nxml-section-tag-backward)
(nxml-back-to-section-start): Fix typos in docstrings.

* nxml/nxml-parse.el (nxml-validate-function, nxml-parse-file):
Doc fixes.

* nxml/nxml-rap.el (nxml-scan-end, nxml-move-tag-backwards)
(nxml-scan-element-forward, nxml-scan-element-backward): Doc fixes.
(nxml-scan-after-change): Fix typo in docstring.

* nxml/rng-match.el (rng-being-compiled, rng-normalize-choice-list)
(rng-name-class-possible-names): Doc fixes.
(rng-memo-map-add, rng-intern-group, rng-match-possible-namespace-uris)
(rng-match-possible-start-tag-names, rng-match-possible-value-strings):
Fix typos in docstrings.
(rng-intern-group-shortcut, rng-intern-choice-shortcut):
Reflow docstrings.

* nxml/rng-util.el (rng-uniquify-eq, rng-uniquify-equal): Doc fixes.
(rng-substq, rng-complete-before-point): Fix typos in docstrings.

* nxml/rng-xsd.el (rng-xsd-make-date-time-regexp)
(rng-xsd-convert-date-time): Reflow docstrings.
(rng-xsd-compile): Fix typo in docstring.

* nxml/rng-loc.el (rng-current-schema-file-name)
(rng-locate-schema-file-using, rng-locate-schema-file-from-type-id):
Doc fixes.
(rng-set-schema-file): Fix typo in docstring.

* nxml/rng-valid.el (rng-error-count, rng-validate-mode)
(rng-do-some-validation, rng-process-start-tag, rng-process-text):
Fix typos in docstrings.
(rng-message-overlay, rng-conditional-up-to-date-start)
(rng-conditional-up-to-date-end): Doc fixes.
(rng-next-error, rng-previous-error): Reflow docstrings.

* nxml/xmltok.el (xmltok-attribute-raw-normalized-value): Doc fix.
(xmltok-dtd, xmltok-dependent-regions, xmltok-attribute-refs)
(xmltok-valid-char-p, xmltok-standalone, xmltok-forward-prolog)
(xmltok-merge-attributes): Fix typos in docstrings.
(xmltok-make-attribute, xmltok-forward-special)
(xmltok-get-declared-encoding-position): Reflow docstrings.

* nxml/xsd-regexp.el (xsdre-char-class-to-range-list): Doc fix.
(xsdre-range-list-union, xsdre-check-range-list, xsdre-current-regexp):
Fix typos in docstrings.
2008-07-03 12:25:23 +00:00

487 lines
17 KiB
EmacsLisp

;;; nxml-rap.el --- low-level support for random access parsing for nXML mode
;; Copyright (C) 2003, 2004, 2007, 2008 Free Software Foundation, Inc.
;; Author: James Clark
;; Keywords: XML
;; This file is part of GNU Emacs.
;; GNU Emacs is free software: you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
;; the Free Software Foundation, either version 3 of the License, or
;; (at your option) any later version.
;; GNU Emacs is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
;; GNU General Public License for more details.
;; You should have received a copy of the GNU General Public License
;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
;;; Commentary:
;; This uses xmltok.el to do XML parsing. The fundamental problem is
;; how to handle changes. We don't want to maintain a complete parse
;; tree. We also don't want to reparse from the start of the document
;; on every keystroke. However, it is not possible in general to
;; parse an XML document correctly starting at a random point in the
;; middle. The main problems are comments, CDATA sections and
;; processing instructions: these can all contain things that are
;; indistinguishable from elements. Literals in the prolog are also a
;; problem. Attribute value literals are not a problem because
;; attribute value literals cannot contain less-than signs.
;;
;; Our strategy is to keep track of just the problematic things.
;; Specifically, we keep track of all comments, CDATA sections and
;; processing instructions in the instance. We do this by marking all
;; except the first character of these with a non-nil nxml-inside text
;; property. The value of the nxml-inside property is comment,
;; cdata-section or processing-instruction. The first character does
;; not have the nxml-inside property so we can find the beginning of
;; the construct by looking for a change in a text property value
;; (Emacs provides primitives for this). We use text properties
;; rather than overlays, since the implementation of overlays doesn't
;; look like it scales to large numbers of overlays in a buffer.
;;
;; We don't in fact track all these constructs, but only track them in
;; some initial part of the instance. The variable `nxml-scan-end'
;; contains the limit of where we have scanned up to for them.
;;
;; Thus to parse some random point in the file we first ensure that we
;; have scanned up to that point. Then we search backwards for a
;; <. Then we check whether the < has an nxml-inside property. If it
;; does we go backwards to first character that does not have an
;; nxml-inside property (this character must be a <). Then we start
;; parsing forward from the < we have found.
;;
;; The prolog has to be parsed specially, so we also keep track of the
;; end of the prolog in `nxml-prolog-end'. The prolog is reparsed on
;; every change to the prolog. This won't work well if people try to
;; edit huge internal subsets. Hopefully that will be rare.
;;
;; We keep track of the changes by adding to the buffer's
;; after-change-functions hook. Scanning is also done as a
;; prerequisite to fontification by adding to fontification-functions
;; (in the same way as jit-lock). This means that scanning for these
;; constructs had better be quick. Fortunately it is. Firstly, the
;; typical proportion of comments, CDATA sections and processing
;; instructions is small relative to other things. Secondly, to scan
;; we just search for the regexp <[!?].
;;
;; One problem is unclosed comments, processing instructions and CDATA
;; sections. Suppose, for example, we encounter a <!-- but there's no
;; matching -->. This is not an unexpected situation if the user is
;; creating a comment. It is not helpful to treat the whole of the
;; file starting from the <!-- onwards as a single unclosed comment
;; token. Instead we treat just the <!-- as a piece of not well-formed
;; markup and continue. The problem is that if at some later stage a
;; --> gets added to the buffer after the unclosed <!--, we will need
;; to reparse the buffer starting from the <!--. We need to keep
;; track of these reparse dependencies; they are called dependent
;; regions in the code.
;;; Code:
(require 'xmltok)
(require 'nxml-util)
(defvar nxml-prolog-end nil
"Integer giving position following end of the prolog.")
(make-variable-buffer-local 'nxml-prolog-end)
(defvar nxml-scan-end nil
"Marker giving position up to which we have scanned.
nxml-scan-end must be >= nxml-prolog-end. Furthermore, nxml-scan-end
must not be an inside position in the following sense. A position is
inside if the following character is a part of, but not the first
character of, a CDATA section, comment or processing instruction.
Furthermore all positions >= nxml-prolog-end and < nxml-scan-end that
are inside positions must have a non-nil `nxml-inside' property whose
value is a symbol specifying what it is inside. Any characters with a
non-nil `fontified' property must have position < nxml-scan-end and
the correct face. Dependent regions must also be established for any
unclosed constructs starting before nxml-scan-end.
There must be no `nxml-inside' properties after nxml-scan-end.")
(make-variable-buffer-local 'nxml-scan-end)
(defsubst nxml-get-inside (pos)
(get-text-property pos 'nxml-inside))
(defsubst nxml-clear-inside (start end)
(nxml-debug-clear-inside start end)
(remove-text-properties start end '(nxml-inside nil)))
(defsubst nxml-set-inside (start end type)
(nxml-debug-set-inside start end)
(put-text-property start end 'nxml-inside type))
(defun nxml-inside-end (pos)
"Return the end of the inside region containing POS.
Return nil if the character at POS is not inside."
(if (nxml-get-inside pos)
(or (next-single-property-change pos 'nxml-inside)
(point-max))
nil))
(defun nxml-inside-start (pos)
"Return the start of the inside region containing POS.
Return nil if the character at POS is not inside."
(if (nxml-get-inside pos)
(or (previous-single-property-change (1+ pos) 'nxml-inside)
(point-min))
nil))
;;; Change management
(defun nxml-scan-after-change (start end)
"Restore `nxml-scan-end' invariants after a change.
The change happened between START and END.
Return position after which lexical state is unchanged.
END must be > `nxml-prolog-end'. START must be outside
any 'inside' regions and at the beginning of a token."
(if (>= start nxml-scan-end)
nxml-scan-end
(let ((inside-remove-start start)
xmltok-errors
xmltok-dependent-regions)
(while (or (when (xmltok-forward-special (min end nxml-scan-end))
(when (memq xmltok-type
'(comment
cdata-section
processing-instruction))
(nxml-clear-inside inside-remove-start
(1+ xmltok-start))
(nxml-set-inside (1+ xmltok-start)
(point)
xmltok-type)
(setq inside-remove-start (point)))
(if (< (point) (min end nxml-scan-end))
t
(setq end (point))
nil))
;; The end of the change was inside but is now outside.
;; Imagine something really weird like
;; <![CDATA[foo <!-- bar ]]> <![CDATA[ stuff --> <!-- ]]> -->
;; and suppose we deleted "<![CDATA[f"
(let ((inside-end (nxml-inside-end end)))
(when inside-end
(setq end inside-end)
t))))
(nxml-clear-inside inside-remove-start end)
(nxml-clear-dependent-regions start end)
(nxml-mark-parse-dependent-regions))
(when (> end nxml-scan-end)
(set-marker nxml-scan-end end))
end))
;; n-s-p only called from nxml-mode.el, where this variable is defined.
(defvar nxml-prolog-regions)
(defun nxml-scan-prolog ()
(goto-char (point-min))
(let (xmltok-dtd
xmltok-errors
xmltok-dependent-regions)
(setq nxml-prolog-regions (xmltok-forward-prolog))
(setq nxml-prolog-end (point))
(nxml-clear-inside (point-min) nxml-prolog-end)
(nxml-clear-dependent-regions (point-min) nxml-prolog-end)
(nxml-mark-parse-dependent-regions))
(when (< nxml-scan-end nxml-prolog-end)
(set-marker nxml-scan-end nxml-prolog-end)))
;;; Dependent regions
(defun nxml-adjust-start-for-dependent-regions (start end pre-change-length)
(let ((overlays (overlays-in (1- start) start))
(adjusted-start start))
(while overlays
(let* ((overlay (car overlays))
(ostart (overlay-start overlay)))
(when (and (eq (overlay-get overlay 'category) 'nxml-dependent)
(< ostart adjusted-start))
(let ((funargs (overlay-get overlay 'nxml-funargs)))
(when (apply (car funargs)
(append (list start
end
pre-change-length
ostart
(overlay-end overlay))
(cdr funargs)))
(setq adjusted-start ostart)))))
(setq overlays (cdr overlays)))
adjusted-start))
(defun nxml-mark-parse-dependent-regions ()
(while xmltok-dependent-regions
(apply 'nxml-mark-parse-dependent-region
(car xmltok-dependent-regions))
(setq xmltok-dependent-regions
(cdr xmltok-dependent-regions))))
(defun nxml-mark-parse-dependent-region (fun start end &rest args)
(let ((overlay (make-overlay start end nil t t)))
(overlay-put overlay 'category 'nxml-dependent)
(overlay-put overlay 'nxml-funargs (cons fun args))))
(put 'nxml-dependent 'evaporate t)
(defun nxml-clear-dependent-regions (start end)
(let ((overlays (overlays-in start end)))
(while overlays
(let* ((overlay (car overlays))
(category (overlay-get overlay 'category)))
(when (and (eq category 'nxml-dependent)
(<= start (overlay-start overlay)))
(delete-overlay overlay)))
(setq overlays (cdr overlays)))))
;;; Random access parsing
(defun nxml-token-after ()
"Return the position after the token containing the char after point.
Sets up the variables `xmltok-type', `xmltok-start',
`xmltok-name-end', `xmltok-name-colon', `xmltok-attributes',
`xmltok-namespace-attributes' in the same was as does
`xmltok-forward'. The prolog will be treated as a single token with
type `prolog'."
(let ((pos (point)))
(if (< pos nxml-prolog-end)
(progn
(setq xmltok-type 'prolog
xmltok-start (point-min))
(min nxml-prolog-end (point-max)))
(nxml-ensure-scan-up-to-date)
(if (nxml-get-inside pos)
(save-excursion
(nxml-move-outside-backwards)
(xmltok-forward)
(point))
(save-excursion
(if (or (eq (char-after) ?<)
(search-backward "<"
(max (point-min) nxml-prolog-end)
t))
(nxml-move-outside-backwards)
(goto-char (if (<= (point-min) nxml-prolog-end)
nxml-prolog-end
(or (nxml-inside-end (point-min))
(point-min)))))
(while (and (nxml-tokenize-forward)
(<= (point) pos)))
(point))))))
(defun nxml-token-before ()
"Return the position after the token containing the char before point.
Sets variables like `nxml-token-after'."
(if (/= (point-min) (point))
(save-excursion
(goto-char (1- (point)))
(nxml-token-after))
(setq xmltok-start (point))
(setq xmltok-type nil)
(point)))
(defun nxml-tokenize-forward ()
(let (xmltok-dependent-regions
xmltok-errors)
(when (and (xmltok-forward)
(> (point) nxml-scan-end))
(cond ((memq xmltok-type '(comment
cdata-section
processing-instruction))
(nxml-with-unmodifying-text-property-changes
(nxml-set-inside (1+ xmltok-start) (point) xmltok-type)))
(xmltok-dependent-regions
(nxml-mark-parse-dependent-regions)))
(set-marker nxml-scan-end (point)))
xmltok-type))
(defun nxml-move-tag-backwards (bound)
"Move point backwards outside any 'inside' regions or tags.
Point will not move past `nxml-prolog-end'.
Point will either be at BOUND or a '<' character starting a tag
outside any 'inside' regions. Ignores dependent regions.
As a precondition, point must be >= BOUND."
(nxml-move-outside-backwards)
(when (not (equal (char-after) ?<))
(if (search-backward "<" bound t)
(progn
(nxml-move-outside-backwards)
(when (not (equal (char-after) ?<))
(search-backward "<" bound t)))
(goto-char bound))))
(defun nxml-move-outside-backwards ()
"Move point to first character of the containing special thing.
Leave point unmoved if it is not inside anything special."
(let ((start (nxml-inside-start (point))))
(when start
(goto-char (1- start))
(when (nxml-get-inside (point))
(error "Char before inside-start at %s had nxml-inside property %s"
(point)
(nxml-get-inside (point)))))))
(defun nxml-ensure-scan-up-to-date ()
(let ((pos (point)))
(when (< nxml-scan-end pos)
(save-excursion
(goto-char nxml-scan-end)
(let (xmltok-errors
xmltok-dependent-regions)
(while (when (xmltok-forward-special pos)
(when (memq xmltok-type
'(comment
processing-instruction
cdata-section))
(nxml-with-unmodifying-text-property-changes
(nxml-set-inside (1+ xmltok-start)
(point)
xmltok-type)))
(if (< (point) pos)
t
(setq pos (point))
nil)))
(nxml-clear-dependent-regions nxml-scan-end pos)
(nxml-mark-parse-dependent-regions)
(set-marker nxml-scan-end pos))))))
;;; Element scanning
(defun nxml-scan-element-forward (from &optional up)
"Scan forward from FROM over a single balanced element.
Point must be between tokens. Return the position of the end of
the tag that ends the element. `xmltok-start' will contain the
position of the start of the tag. If UP is non-nil, then scan
past end-tag of element containing point. If no element is
found, return nil. If a well-formedness error prevents scanning,
signal an `nxml-scan-error'. Point is not moved."
(let ((open-tags (and up t))
found)
(save-excursion
(goto-char from)
(while (cond ((not (nxml-tokenize-forward))
(when (consp open-tags)
(nxml-scan-error (cadr open-tags)
"Start-tag has no end-tag"))
nil)
((eq xmltok-type 'start-tag)
(setq open-tags
(cons (xmltok-start-tag-qname)
(cons xmltok-start
open-tags)))
t)
((eq xmltok-type 'end-tag)
(cond ((not open-tags) nil)
((not (consp open-tags)) (setq found (point)) nil)
((not (string= (car open-tags)
(xmltok-end-tag-qname)))
(nxml-scan-error (+ 2 xmltok-start)
"Mismatched end-tag; \
expected `%s'"
(car open-tags)))
((setq open-tags (cddr open-tags)) t)
(t (setq found (point)) nil)))
((memq xmltok-type '(empty-element
partial-empty-element))
(if open-tags
t
(setq found (point))
nil))
((eq xmltok-type 'partial-end-tag)
(cond ((not open-tags) nil)
((not (consp open-tags)) (setq found (point)) nil)
((setq open-tags (cddr open-tags)) t)
(t (setq found (point)) nil)))
((eq xmltok-type 'partial-start-tag)
(nxml-scan-error xmltok-start
"Missing `>'"))
(t t))))
found))
(defun nxml-scan-element-backward (from &optional up bound)
"Scan backward from FROM over a single balanced element.
Point must be between tokens. Return the position of the end of
the tag that starts the element. `xmltok-start' will contain the
position of the start of the tag. If UP is non-nil, then scan
past start-tag of element containing point. If BOUND is non-nil,
then don't scan back past BOUND. If no element is found, return
nil. If a well-formedness error prevents scanning, signal an
`nxml-scan-error'. Point is not moved."
(let ((open-tags (and up t))
token-end found)
(save-excursion
(goto-char from)
(while (cond ((or (< (point) nxml-prolog-end)
(not (search-backward "<"
(max (or bound 0)
nxml-prolog-end)
t)))
(when (and (consp open-tags) (not bound))
(nxml-scan-error (cadr open-tags)
"End-tag has no start-tag"))
nil)
((progn
(nxml-move-outside-backwards)
(save-excursion
(nxml-tokenize-forward)
(setq token-end (point)))
(eq xmltok-type 'end-tag))
(setq open-tags
(cons (xmltok-end-tag-qname)
(cons xmltok-start open-tags)))
t)
((eq xmltok-type 'start-tag)
(cond ((not open-tags) nil)
((not (consp open-tags))
(setq found token-end)
nil)
((and (car open-tags)
(not (string= (car open-tags)
(xmltok-start-tag-qname))))
(nxml-scan-error (1+ xmltok-start)
"Mismatched start-tag; \
expected `%s'"
(car open-tags)))
((setq open-tags (cddr open-tags)) t)
(t (setq found token-end) nil)))
((memq xmltok-type '(empty-element
partial-empty-element))
(if open-tags
t
(setq found token-end)
nil))
((eq xmltok-type 'partial-end-tag)
(setq open-tags
(cons nil (cons xmltok-start open-tags)))
t)
((eq xmltok-type 'partial-start-tag)
;; if we have only a partial-start-tag
;; then it's unlikely that there's a matching
;; end-tag, so it's probably not helpful
;; to treat it as a complete start-tag
(nxml-scan-error xmltok-start
"Missing `>'"))
(t t))))
found))
(defun nxml-scan-error (&rest args)
(signal 'nxml-scan-error args))
(put 'nxml-scan-error
'error-conditions
'(error nxml-error nxml-scan-error))
(put 'nxml-scan-error
'error-message
"Scan over element that is not well-formed")
(provide 'nxml-rap)
;; arch-tag: cba241ec-4c59-4ef3-aa51-2cf92b3dd24f
;;; nxml-rap.el ends here