1
0
mirror of https://git.savannah.gnu.org/git/emacs.git synced 2025-01-09 15:50:21 +00:00
emacs/lisp/progmodes/cc-awk.el
2005-07-04 17:55:18 +00:00

917 lines
43 KiB
EmacsLisp

;;; cc-awk.el --- AWK specific code within cc-mode.
;; Copyright (C) 1988,94,96,2000,01,02,03 Free Software Foundation, Inc.
;; Author: Alan Mackenzie <acm@muc.de> (originally based on awk-mode.el)
;; Maintainer: FSF
;; Keywords: AWK, cc-mode, unix, languages
;; This file is part of GNU Emacs.
;; GNU Emacs is free software; you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
;; the Free Software Foundation; either version 2, or (at your option)
;; any later version.
;; GNU Emacs is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
;; GNU General Public License for more details.
;; You should have received a copy of the GNU General Public License
;; along with GNU Emacs; see the file COPYING. If not, write to the
;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
;; Boston, MA 02110-1301, USA.
;;; Commentary:
;; This file contains (most of) the adaptations to cc-mode required for the
;; integration of AWK Mode.
;; It is organised thusly:
;; 1. The AWK Mode syntax table.
;; 2. Indentation calculation stuff ("c-awk-NL-prop text-property").
;; 3. Syntax-table property/font-locking stuff, but not including the
;; font-lock-keywords setting.
;; 4. The AWK Mode before/after-change-functions.
;; 5. AWK Mode specific versions of commands like beginning-of-defun.
;; The AWK Mode keymap, abbreviation table, and the mode function itself are
;; in cc-mode.el.
;;; Code:
(eval-when-compile
(let ((load-path
(if (and (boundp 'byte-compile-dest-file)
(stringp byte-compile-dest-file))
(cons (file-name-directory byte-compile-dest-file) load-path)
load-path)))
(load "cc-bytecomp" nil t)))
(cc-require 'cc-defs)
;; Silence the byte compiler.
(cc-bytecomp-defvar font-lock-mode) ; Checked with boundp before use.
;; Some functions in cc-engine that are used below. There's a cyclic
;; dependency so it can't be required here. (Perhaps some functions
;; could be moved to cc-engine to avoid it.)
(cc-bytecomp-defun c-backward-token-1)
(cc-bytecomp-defun c-beginning-of-statement-1)
(cc-bytecomp-defun c-backward-sws)
(defvar awk-mode-syntax-table
(let ((st (make-syntax-table)))
(modify-syntax-entry ?\\ "\\" st)
(modify-syntax-entry ?\n "> " st)
(modify-syntax-entry ?\r "> " st)
(modify-syntax-entry ?\f "> " st)
(modify-syntax-entry ?\# "< " st)
;; / can delimit regexes or be a division operator. By default we assume
;; that it is a division sign, and fix the regexp operator cases with
;; `font-lock-syntactic-keywords'.
(modify-syntax-entry ?/ "." st) ; ACM 2002/4/27.
(modify-syntax-entry ?* "." st)
(modify-syntax-entry ?+ "." st)
(modify-syntax-entry ?- "." st)
(modify-syntax-entry ?= "." st)
(modify-syntax-entry ?% "." st)
(modify-syntax-entry ?< "." st)
(modify-syntax-entry ?> "." st)
(modify-syntax-entry ?& "." st)
(modify-syntax-entry ?| "." st)
(modify-syntax-entry ?_ "_" st)
(modify-syntax-entry ?\' "." st)
st)
"Syntax table in use in AWK Mode buffers.")
;; ACM, 2002/5/29:
;;
;; The next section of code is about determining whether or not an AWK
;; statement is complete or not. We use this to indent the following line.
;; The determination is pretty straightforward in C, where a statement ends
;; with either a ; or a }. Only "while" really gives any trouble there, since
;; it might be the end of a do-while. In AWK, on the other hand, semicolons
;; are rarely used, and EOLs _usually_ act as "virtual semicolons". In
;; addition, we have the complexity of escaped EOLs. The core of this
;; analysis is in the middle of the function
;; c-awk-calculate-NL-prop-prev-line, about 130 lines lower down.
;;
;; To avoid continually repeating this expensive analysis, we "cache" its
;; result in a text-property, c-awk-NL-prop, whose value for a line is set on
;; the EOL (if any) which terminates that line. Should the property be
;; required for the very last line (which has no EOL), it is calculated as
;; required but not cached. The c-awk-NL-prop property should be thought of
;; as only really valid immediately after a buffer change, not a permanently
;; set property. (By contrast, the syntax-table text properties (set by an
;; after-change function) must be constantly updated for the mode to work
;; properly).
;;
;; The valid values for c-awk-NL-prop are:
;;
;; nil The property is not currently set for this line.
;; '#' There is NO statement on this line (at most a comment), and no open
;; statement from a previous line which could have been completed on this
;; line.
;; '{' There is an unfinished statement on this (or a previous) line which
;; doesn't require \s to continue onto another line, e.g. the line ends
;; with {, or the && operator, or "if (condition)". Note that even if the
;; newline is redundantly escaped, it remains a '{' line.
;; '\' There is an escaped newline at the end of this line and this '\' is
;; essential to the syntax of the program. (i.e. if it had been a
;; frivolous \, it would have been ignored and the line been given one of
;; the other property values.)
;; ';' A statement is completed as the last thing (aside from ws) on the line -
;; i.e. there is (at least part of) a statement on this line, and the last
;; statement on the line is complete, OR (2002/10/25) the line is
;; content-free but terminates a statement from the preceding (continued)
;; line (which has property \).
;;
;; This set of values has been chosen so that the property's value on a line
;; is completely determined by the contents of the line and the property on
;; the previous line, EXCEPT for where a "while" might be the closing
;; statement of a do-while.
(defun c-awk-after-if-for-while-condition-p (&optional do-lim)
;; Are we just after the ) in "if/for/while (<condition>)"?
;;
;; Note that the end of the ) in a do .... while (<condition>) doesn't
;; count, since the purpose of this routine is essentially to decide
;; whether to indent the next line.
;;
;; DO-LIM sets a limit on how far back we search for the "do" of a possible
;; do-while.
(and
(eq (char-before) ?\))
(save-excursion
(let ((par-pos (c-safe (scan-lists (point) -1 0))))
(when par-pos
(goto-char par-pos) ; back over "(...)"
(c-backward-token-1) ; BOB isn't a problem.
(or (looking-at "\\(if\\|for\\)\\>\\([^_]\\|$\\)")
(and (looking-at "while\\>\\([^_]\\|$\\)") ; Ensure this isn't a do-while.
(not (eq (c-beginning-of-statement-1 do-lim)
'beginning)))))))))
(defun c-awk-after-function-decl-param-list ()
;; Are we just after the ) in "function foo (bar)" ?
(and (eq (char-before) ?\))
(save-excursion
(let ((par-pos (c-safe (scan-lists (point) -1 0))))
(when par-pos
(goto-char par-pos) ; back over "(...)"
(c-backward-token-1) ; BOB isn't a problem
(and (looking-at "[_a-zA-Z][_a-zA-Z0-9]*\\>")
(progn (c-backward-token-1)
(looking-at "func\\(tion\\)?\\>"))))))))
;; 2002/11/8: FIXME! Check c-backward-token-1/2 for success (0 return code).
(defun c-awk-after-continue-token ()
;; Are we just after a token which can be continued onto the next line without
;; a backslash?
(save-excursion
(c-backward-token-1) ; FIXME 2002/10/27. What if this fails?
(if (and (looking-at "[&|]") (not (bobp)))
(backward-char)) ; c-backward-token-1 doesn't do this :-(
(looking-at "[,{?:]\\|&&\\|||\\|do\\>\\|else\\>")))
(defun c-awk-after-rbrace-or-statement-semicolon ()
;; Are we just after a } or a ; which closes a statement?
;; Be careful about ;s in for loop control bits. They don't count!
(or (eq (char-before) ?\})
(and
(eq (char-before) ?\;)
(save-excursion
(let ((par-pos (c-safe (scan-lists (point) -1 1))))
(when par-pos
(goto-char par-pos) ; go back to containing (
(not (and (looking-at "(")
(c-backward-token-1) ; BOB isn't a problem
(looking-at "for\\>")))))))))
(defun c-awk-back-to-contentful-text-or-NL-prop ()
;; Move back to just after the first found of either (i) an EOL which has
;; the c-awk-NL-prop text-property set; or (ii) non-ws text; or (iii) BOB.
;; We return either the value of c-awk-NL-prop (in case (i)) or nil.
;; Calling function can best distinguish cases (ii) and (iii) with (bolp).
;;
;; Note that an escaped eol counts as whitespace here.
;;
;; Kludge: If c-backward-syntactic-ws gets stuck at a BOL, it is likely
;; that the previous line contains an unterminated string (without \). In
;; this case, assume that the previous line's c-awk-NL-prop is a ;.
;;
;; POINT MUST BE AT THE START OF A LINE when calling this function. This
;; is to ensure that the various backward-comment functions will work
;; properly.
(let ((nl-prop nil)
bol-pos bsws-pos) ; starting pos for a backward-syntactic-ws call.
(while ;; We are at a BOL here. Go back one line each iteration.
(and
(not (bobp))
(not (setq nl-prop (c-get-char-property (1- (point)) 'c-awk-NL-prop)))
(progn (setq bol-pos (c-point 'bopl))
(setq bsws-pos (point))
;; N.B. the following function will not go back past an EOL if
;; there is an open string (without \) on the previous line.
(c-backward-syntactic-ws bol-pos)
(or (/= (point) bsws-pos)
(progn (setq nl-prop ?\;)
nil)))
;; If we had a backslash at EOL, c-backward-syntactic-ws will
;; have gone backwards over it. Check the backslash was "real".
(progn
(if (looking-at "[ \t]*\\\\+$")
(if (progn
(end-of-line)
(search-backward-regexp
"\\(^\\|[^\\]\\)\\(\\\\\\\\\\)*\\\\$" ; ODD number of \s at EOL :-)
bol-pos t))
(progn (end-of-line) ; escaped EOL.
(backward-char)
(c-backward-syntactic-ws bol-pos))
(end-of-line))) ; The \ at eol is a fake.
(bolp))))
nl-prop))
(defun c-awk-calculate-NL-prop-prev-line (&optional do-lim)
;; Calculate and set the value of the c-awk-NL-prop on the immediately
;; preceding EOL. This may also involve doing the same for several
;; preceding EOLs.
;;
;; NOTE that if the property was already set, we return it without
;; recalculation. (This is by accident rather than design.)
;;
;; Return the property which got set (or was already set) on the previous
;; line. Return nil if we hit BOB.
;;
;; See c-awk-after-if-for-while-condition-p for a description of DO-LIM.
(save-excursion
(save-match-data
(beginning-of-line)
(let* ((pos (point))
(nl-prop (c-awk-back-to-contentful-text-or-NL-prop)))
;; We are either (1) at a BOL (with nl-prop containing the previous
;; line's c-awk-NL-prop) or (2) after contentful text on a line. At
;; the BOB counts as case (1), so we test next for bolp rather than
;; non-nil nl-prop.
(when (not (bolp))
(setq nl-prop
(cond
;; Incomplete statement which doesn't require escaped EOL?
((or (c-awk-after-if-for-while-condition-p do-lim)
(c-awk-after-function-decl-param-list)
(c-awk-after-continue-token))
?\{)
;; Escaped EOL (where there's also something to continue)?
((and (looking-at "[ \t]*\\\\$")
(not (c-awk-after-rbrace-or-statement-semicolon)))
?\\)
(t ?\;))) ; A statement was completed on this line
(end-of-line)
(c-put-char-property (point) 'c-awk-NL-prop nl-prop)
(forward-line))
;; We are now at a (possibly empty) sequence of content-free lines.
;; Set c-awk-NL-prop on each of these lines's EOL.
(while (< (point) pos) ; one content-free line each iteration.
(cond ; recalculate nl-prop from previous line's value.
((memq nl-prop '(?\; nil)) (setq nl-prop ?\#))
((eq nl-prop ?\\)
(if (not (looking-at "[ \t]*\\\\$")) (setq nl-prop ?\;))) ; was ?\# 2002/10/25
;; ?\# (empty line) and ?\{ (open stmt) don't change.
)
(forward-line)
(c-put-char-property (1- (point)) 'c-awk-NL-prop nl-prop))
nl-prop))))
(defun c-awk-get-NL-prop-prev-line (&optional do-lim)
;; Get the c-awk-NL-prop text-property from the previous line, calculating
;; it if necessary. Return nil iff we're already at BOB.
;; See c-awk-after-if-for-while-condition-p for a description of DO-LIM.
(if (bobp)
nil
(or (c-get-char-property (c-point 'eopl) 'c-awk-NL-prop)
(c-awk-calculate-NL-prop-prev-line do-lim))))
(defun c-awk-get-NL-prop-cur-line (&optional do-lim)
;; Get the c-awk-NL-prop text-property from the current line, calculating it
;; if necessary. (As a special case, the property doesn't get set on an
;; empty line at EOB (there's no position to set the property on), but the
;; function returns the property value an EOL would have got.)
;;
;; See c-awk-after-if-for-while-condition-p for a description of DO-LIM.
(save-excursion
(let ((extra-nl nil))
(end-of-line) ; Necessary for the following test to work.
(when (= (forward-line) 1) ; if we were on the last line....
(insert-char ?\n 1) ; ...artificial eol is needed for comment detection.
(setq extra-nl t))
(prog1 (c-awk-get-NL-prop-prev-line do-lim)
(if extra-nl (delete-backward-char 1))))))
(defun c-awk-prev-line-incomplete-p (&optional do-lim)
;; Is there an incomplete statement at the end of the previous line?
;; See c-awk-after-if-for-while-condition-p for a description of DO-LIM.
(memq (c-awk-get-NL-prop-prev-line do-lim) '(?\\ ?\{)))
(defun c-awk-cur-line-incomplete-p (&optional do-lim)
;; Is there an incomplete statement at the end of the current line?
;; See c-awk-after-if-for-while-condition-p for a description of DO-LIM.
(memq (c-awk-get-NL-prop-cur-line do-lim) '(?\\ ?\{)))
(defun c-awk-completed-stmt-ws-ends-prev-line-p (&optional do-lim)
;; Is there a termination of a statement as the last thing (apart from an
;; optional comment) on the previous line?
;; See c-awk-after-if-for-while-condition-p for a description of DO-LIM.
(eq (c-awk-get-NL-prop-prev-line do-lim) ?\;))
(defun c-awk-completed-stmt-ws-ends-line-p (&optional pos do-lim)
;; Same as previous function, but for the line containing position POS (or
;; the current line if POS is omitted).
;; See c-awk-after-if-for-while-condition-p for a description of DO-LIM.
(save-excursion
(if pos (goto-char pos))
(eq (c-awk-get-NL-prop-cur-line do-lim) ?\;)))
(defun c-awk-after-logical-semicolon (&optional do-lim)
;; Are we at BOL, the preceding EOL being a "logical semicolon"?
;; See c-awk-after-if-for-while-condition-p for a description of DO-LIM.
(and (bolp)
(eq (c-awk-get-NL-prop-prev-line do-lim) ?\;)))
(defun c-awk-backward-syntactic-ws (&optional lim)
;; Skip backwards over awk-syntactic whitespace. This is whitespace
;; characters, comments, and NEWLINES WHICH AREN'T "VIRTUAL SEMICOLONS". For
;; this function, a newline isn't a "virtual semicolon" if that line ends with
;; a real semicolon (or closing brace).
;; However if point starts inside a comment or preprocessor directive, the
;; content of it is not treated as whitespace. LIM (optional) sets a limit on
;; the backward movement.
(let ((lim (or lim (point-min)))
after-real-br)
(c-backward-syntactic-ws (max lim (c-point 'bol)))
(while ; go back one WS line each time round this loop.
(and (bolp)
(> (point) lim)
(/= (c-awk-get-NL-prop-prev-line) ?\;)
(/= (point)
;; The following function requires point at BONL [not EOL] to
;; recognise a preceding comment,.
(progn (c-backward-syntactic-ws (max lim (c-point 'bopl)))
(point)))))
;; Does the previous line end with a real ; or }? If so, go back to it.
(if (and (bolp)
(eq (c-awk-get-NL-prop-prev-line) ?\;)
(save-excursion
(c-backward-syntactic-ws (max lim (c-point 'bopl)))
(setq after-real-br (point))
(c-awk-after-rbrace-or-statement-semicolon)))
(goto-char after-real-br))))
(defun c-awk-NL-prop-not-set ()
;; Is the NL-prop on the current line either nil or unset?
(not (c-get-char-property (c-point 'eol) 'c-awk-NL-prop)))
(defun c-awk-clear-NL-props (beg end)
;; This function is run from before-change-hooks. It clears the
;; c-awk-NL-prop text property from beg to the end of the buffer (The END
;; parameter is ignored). This ensures that the indentation engine will
;; never use stale values for this property.
(save-restriction
(widen)
(c-clear-char-properties beg (point-max) 'c-awk-NL-prop)))
(defun c-awk-unstick-NL-prop ()
;; Ensure that the text property c-awk-NL-prop is "non-sticky". Without
;; this, a new newline inserted after an old newline (e.g. by C-j) would
;; inherit any c-awk-NL-prop from the old newline. This would be a Bad
;; Thing. This function's action is required by c-put-char-property.
(if (and (boundp 'text-property-default-nonsticky) ; doesn't exist in Xemacs
(not (assoc 'c-awk-NL-prop text-property-default-nonsticky)))
(setq text-property-default-nonsticky
(cons '(c-awk-NL-prop . t) text-property-default-nonsticky))))
;; The following is purely a diagnostic command, to be commented out of the
;; final release. ACM, 2002/6/1
;; (defun NL-props ()
;; (interactive)
;; (let (pl-prop cl-prop)
;; (message "Prev-line: %s Cur-line: %s"
;; (if (setq pl-prop (c-get-char-property (c-point 'eopl) 'c-awk-NL-prop))
;; (char-to-string pl-prop)
;; "nil")
;; (if (setq cl-prop (c-get-char-property (c-point 'eol) 'c-awk-NL-prop))
;; (char-to-string cl-prop)
;; "nil"))))
;(define-key awk-mode-map [?\C-c ?\r] 'NL-props) ; commented out, 2002/8/31
;for now. In the byte compiled version, this causes things to crash because
;awk-mode-map isn't yet defined. :-(
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; The following section of the code is to do with font-locking. The biggest
;; problem for font-locking is deciding whether a / is a regular expression
;; delimiter or a division sign - determining precisely where strings and
;; regular expressions start and stop is also troublesome. This is the
;; purpose of the function c-awk-set-syntax-table-properties and the myriad
;; elisp regular expressions it uses.
;;
;; Because AWK is a line oriented language, I felt the normal cc-mode strategy
;; for font-locking unterminated strings (i.e. font-locking the buffer up to
;; the next string delimiter as a string) was inappropriate. Instead,
;; unbalanced string/regexp delimiters are given the warning font, being
;; refonted with the string font as soon as the matching delimiter is entered.
;;
;; This requires the region processed by the current font-lock after-change
;; function to have access to the start of the string/regexp, which may be
;; several lines back. The elisp "advice" feature is used on these functions
;; to allow this.
(defun c-awk-beginning-of-logical-line (&optional pos)
;; Go back to the start of the (apparent) current line (or the start of the
;; line containing POS), returning the buffer position of that point. I.e.,
;; go back to the last line which doesn't have an escaped EOL before it.
;;
;; This is guaranteed to be "safe" for syntactic analysis, i.e. outwith any
;; comment, string or regexp. IT MAY WELL BE that this function should not be
;; executed on a narrowed buffer.
(if pos (goto-char pos))
(forward-line 0)
(while (and (> (point) (point-min))
(eq (char-before (1- (point))) ?\\))
(forward-line -1))
(point))
(defun c-awk-end-of-logical-line (&optional pos)
;; Go forward to the end of the (apparent) current logical line (or the end of
;; the line containing POS), returning the buffer position of that point. I.e.,
;; go to the end of the next line which doesn't have an escaped EOL.
;;
;; This is guaranteed to be "safe" for syntactic analysis, i.e. outwith any
;; comment, string or regexp. IT MAY WELL BE that this function should not be
;; executed on a narrowed buffer.
(if pos (goto-char pos))
(end-of-line)
(while (and (< (point) (point-max))
(eq (char-before) ?\\))
(end-of-line 2))
(point))
;; N.B. In the following regexps, an EOL is either \n OR \r. This is because
;; Emacs has in the past used \r to mark hidden lines in some fashion (and
;; maybe still does).
(defconst c-awk-esc-pair-re "\\\\\\(.\\|\n\\|\r\\|\\'\\)")
;; Matches any escaped (with \) character-pair, including an escaped newline.
(defconst c-awk-comment-without-nl "#.*")
;; Matches an AWK comment, not including the terminating NL (if any). Note
;; that the "enclosing" (elisp) regexp must ensure the # is real.
(defconst c-awk-nl-or-eob "\\(\n\\|\r\\|\\'\\)")
;; Matches a newline, or the end of buffer.
;; "Space" regular expressions.
(defconst c-awk-escaped-nl "\\\\[\n\r]")
;; Matches an escaped newline.
(defconst c-awk-escaped-nls* (concat "\\(" c-awk-escaped-nl "\\)*"))
;; Matches a possibly empty sequence of escaped newlines. Used in
;; awk-font-lock-keywords.
;; (defconst c-awk-escaped-nls*-with-space*
;; (concat "\\(" c-awk-escaped-nls* "\\|" "[ \t]+" "\\)*"))
;; The above RE was very slow. It's runtime was doubling with each additional
;; space :-( Reformulate it as below:
(defconst c-awk-escaped-nls*-with-space*
(concat "\\(" c-awk-escaped-nl "\\|" "[ \t]" "\\)*"))
;; Matches a possibly empty sequence of escaped newlines with optional
;; interspersed spaces and tabs. Used in awk-font-lock-keywords.
;; REGEXPS FOR "HARMLESS" STRINGS/LINES.
(defconst c-awk-harmless-char-re "[^_#/\"\\\\\n\r]")
;; Matches any character but a _, #, /, ", \, or newline. N.B. _" starts a
;; localisation string in gawk 3.1
(defconst c-awk-harmless-_ "_\\([^\"]\\|\\'\\)")
;; Matches an underline NOT followed by ".
(defconst c-awk-harmless-string*-re
(concat "\\(" c-awk-harmless-char-re "\\|" c-awk-esc-pair-re "\\|" c-awk-harmless-_ "\\)*"))
;; Matches a (possibly empty) sequence of chars without unescaped /, ", \,
;; #, or newlines.
(defconst c-awk-harmless-string*-here-re
(concat "\\=" c-awk-harmless-string*-re))
;; Matches the (possibly empty) sequence of chars without unescaped /, ", \,
;; at point.
(defconst c-awk-harmless-line-re
(concat c-awk-harmless-string*-re
"\\(" c-awk-comment-without-nl "\\)?" c-awk-nl-or-eob))
;; Matches (the tail of) an AWK \"logical\" line not containing an unescaped
;; " or /. "logical" means "possibly containing escaped newlines". A comment
;; is matched as part of the line even if it contains a " or a /. The End of
;; buffer is also an end of line.
(defconst c-awk-harmless-lines+-here-re
(concat "\\=\\(" c-awk-harmless-line-re "\\)+"))
;; Matches a sequence of (at least one) \"harmless-line\" at point.
;; REGEXPS FOR AWK STRINGS.
(defconst c-awk-string-ch-re "[^\"\\\n\r]")
;; Matches any character which can appear unescaped in a string.
(defconst c-awk-string-innards-re
(concat "\\(" c-awk-string-ch-re "\\|" c-awk-esc-pair-re "\\)*"))
;; Matches the inside of an AWK string (i.e. without the enclosing quotes).
(defconst c-awk-string-without-end-here-re
(concat "\\=_?\"" c-awk-string-innards-re))
;; Matches an AWK string at point up to, but not including, any terminator.
;; A gawk 3.1+ string may look like _"localisable string".
;; REGEXPS FOR AWK REGEXPS.
(defconst c-awk-regexp-normal-re "[^[/\\\n\r]")
;; Matches any AWK regexp character which doesn't require special analysis.
(defconst c-awk-escaped-newlines*-re "\\(\\\\[\n\r]\\)*")
;; Matches a (possibly empty) sequence of escaped newlines.
;; NOTE: In what follows, "[asdf]" in a regexp will be called a "character
;; list", and "[:alpha:]" inside a character list will be known as a
;; "character class". These terms for these things vary between regexp
;; descriptions .
(defconst c-awk-regexp-char-class-re
"\\[:[a-z]+:\\]")
;; Matches a character class spec (e.g. [:alpha:]).
(defconst c-awk-regexp-char-list-re
(concat "\\[" c-awk-escaped-newlines*-re "^?" c-awk-escaped-newlines*-re "]?"
"\\(" c-awk-esc-pair-re "\\|" c-awk-regexp-char-class-re
"\\|" "[^]\n\r]" "\\)*" "\\(]\\|$\\)"))
;; Matches a regexp char list, up to (but not including) EOL if the ] is
;; missing.
(defconst c-awk-regexp-innards-re
(concat "\\(" c-awk-esc-pair-re "\\|" c-awk-regexp-char-list-re
"\\|" c-awk-regexp-normal-re "\\)*"))
;; Matches the inside of an AWK regexp (i.e. without the enclosing /s)
(defconst c-awk-regexp-without-end-re
(concat "/" c-awk-regexp-innards-re))
;; Matches an AWK regexp up to, but not including, any terminating /.
;; REGEXPS used for scanning an AWK buffer in order to decide IF A '/' IS A
;; REGEXP OPENER OR A DIVISION SIGN. By "state" in the following is meant
;; whether a '/' at the current position would by a regexp opener or a
;; division sign.
(defconst c-awk-neutral-re
; "\\([{}@` \t]\\|\\+\\+\\|--\\|\\\\.\\)+") ; changed, 2003/6/7
"\\([{}@` \t]\\|\\+\\+\\|--\\|\\\\.\\)")
;; A "neutral" char(pair). Doesn't change the "state" of a subsequent /.
;; This is space/tab, braces, an auto-increment/decrement operator or an
;; escaped character. Or one of the (illegal) characters @ or `. But NOT an
;; end of line (even if escaped).
(defconst c-awk-neutrals*-re
(concat "\\(" c-awk-neutral-re "\\)*"))
;; A (possibly empty) string of neutral characters (or character pairs).
(defconst c-awk-var-num-ket-re "[]\)0-9a-zA-Z_$.\x80-\xff]+")
;; Matches a char which is a constituent of a variable or number, or a ket
;; (i.e. closing bracKET), round or square. Assume that all characters \x80 to
;; \xff are "letters".
(defconst c-awk-div-sign-re
(concat c-awk-var-num-ket-re c-awk-neutrals*-re "/"))
;; Will match a piece of AWK buffer ending in / which is a division sign, in
;; a context where an immediate / would be a regexp bracket. It follows a
;; variable or number (with optional intervening "neutral" characters). This
;; will only work when there won't be a preceding " or / before the sought /
;; to foul things up.
(defconst c-awk-non-arith-op-bra-re
"[[\(&=:!><,?;'~|]")
;; Matches an openeing BRAcket ,round or square, or any operator character
;; apart from +,-,/,*,%. For the purpose at hand (detecting a / which is a
;; regexp bracket) these arith ops are unnecessary and a pain, because of "++"
;; and "--".
(defconst c-awk-regexp-sign-re
(concat c-awk-non-arith-op-bra-re c-awk-neutrals*-re "/"))
;; Will match a piece of AWK buffer ending in / which is an opening regexp
;; bracket, in a context where an immediate / would be a division sign. This
;; will only work when there won't be a preceding " or / before the sought /
;; to foul things up.
;; ACM, 2002/02/15: The idea of the next function is to put the "Error font"
;; on strings/regexps which are missing their closing delimiter.
;; 2002/4/28. The default syntax for / has been changed from "string" to
;; "punctuation", to reduce hassle when this character appears within a string
;; or comment.
(defun c-awk-set-string-regexp-syntax-table-properties (beg end)
;; BEG and END bracket a (possibly unterminated) string or regexp. The
;; opening delimiter is after BEG, and the closing delimiter, IF ANY, is AFTER
;; END. Set the appropriate syntax-table properties on the delimiters and
;; contents of this string/regex.
;;
;; "String" here can also mean a gawk 3.1 "localizable" string which starts
;; with _". In this case, we step over the _ and ignore it; It will get it's
;; font from an entry in awk-font-lock-keywords.
;;
;; If the closing delimiter is missing (i.e., there is an EOL there) set the
;; STRING-FENCE property on the opening " or / and closing EOL.
(if (eq (char-after beg) ?_) (setq beg (1+ beg)))
;; First put the properties on the delimiters.
(cond ((eq end (point-max)) ; string/regexp terminated by EOB
(put-text-property beg (1+ beg) 'syntax-table '(15))) ; (15) = "string fence"
((/= (char-after beg) (char-after end)) ; missing end delimiter
(put-text-property beg (1+ beg) 'syntax-table '(15))
(put-text-property end (1+ end) 'syntax-table '(15)))
((eq (char-after beg) ?/) ; Properly bracketed regexp
(put-text-property beg (1+ beg) 'syntax-table '(7)) ; (7) = "string"
(put-text-property end (1+ end) 'syntax-table '(7)))
(t)) ; Properly bracketed string: Nothing to do.
;; Now change the properties of any escaped "s in the string to punctuation.
(save-excursion
(goto-char (1+ beg))
(or (eobp)
(while (search-forward "\"" end t)
(put-text-property (1- (point)) (point) 'syntax-table '(1))))))
(defun c-awk-syntax-tablify-string ()
;; Point is at the opening " or _" of a string. Set the syntax-table
;; properties on this string, leaving point just after the string.
;;
;; The result is nil if a / immediately after the string would be a regexp
;; opener, t if it would be a division sign.
(search-forward-regexp c-awk-string-without-end-here-re nil t) ; a (possibly unterminated) string
(c-awk-set-string-regexp-syntax-table-properties
(match-beginning 0) (match-end 0))
(cond ((looking-at "\"")
(forward-char)
t) ; In AWK, ("15" / 5) gives 3 ;-)
((looking-at "[\n\r]") ; Unterminated string with EOL.
(forward-char)
nil) ; / on next line would start a regexp
(t nil))) ; Unterminated string at EOB
(defun c-awk-syntax-tablify-/ (anchor anchor-state-/div)
;; Point is at a /. Determine whether this is a division sign or a regexp
;; opener, and if the latter, apply syntax-table properties to the entire
;; regexp. Point is left immediately after the division sign or regexp, as
;; the case may be.
;;
;; ANCHOR-STATE-/DIV identifies whether a / at ANCHOR would have been a
;; division sign (value t) or a regexp opener (value nil). The idea is that
;; we analyse the line from ANCHOR up till point to determine what the / at
;; point is.
;;
;; The result is what ANCHOR-STATE-/DIV (see above) is where point is left.
(let ((/point (point)))
(goto-char anchor)
;; Analyse the line to find out what the / is.
(if (if anchor-state-/div
(not (search-forward-regexp c-awk-regexp-sign-re (1+ /point) t))
(search-forward-regexp c-awk-div-sign-re (1+ /point) t))
;; A division sign.
(progn (goto-char (1+ /point)) nil)
;; A regexp opener
;; Jump over the regexp innards, setting the match data.
(goto-char /point)
(search-forward-regexp c-awk-regexp-without-end-re)
(c-awk-set-string-regexp-syntax-table-properties
(match-beginning 0) (match-end 0))
(cond ((looking-at "/") ; Terminating /
(forward-char)
t)
((looking-at "[\n\r]") ; Incomplete regexp terminated by EOL
(forward-char)
nil) ; / on next line would start another regexp
(t nil))))) ; Unterminated regexp at EOB
(defun c-awk-set-syntax-table-properties (lim)
;; Scan the buffer text between point and LIM, setting (and clearing) the
;; syntax-table property where necessary.
;;
;; This function is designed to be called as the FUNCTION in a MATCHER in
;; font-lock-syntactic-keywords, and it always returns NIL (to inhibit
;; repeated calls from font-lock: See elisp info page "Search-based
;; Fontification"). It also gets called, with a bit of glue, from
;; after-change-functions when font-lock isn't active. Point is left
;; "undefined" after this function exits. THE BUFFER SHOULD HAVE BEEN
;; WIDENED, AND ANY PRECIOUS MATCH-DATA SAVED BEFORE CALLING THIS ROUTINE.
;;
;; We need to set/clear the syntax-table property on:
;; (i) / - It is set to "string" on a / which is the opening or closing
;; delimiter of the properly terminated regexp (and left unset on a
;; division sign).
;; (ii) the opener of an unterminated string/regexp, we set the property
;; "generic string delimiter" on both the opening " or / and the end of the
;; line where the closing delimiter is missing.
;; (iii) "s inside strings/regexps (these will all be escaped "s). They are
;; given the property "punctuation". This will later allow other routines
;; to use the regexp "\\S\"*" to skip over the string innards.
;; (iv) Inside a comment, all syntax-table properties are cleared.
(let (anchor
(anchor-state-/div nil)) ; t means a following / would be a div sign.
(c-awk-beginning-of-logical-line) ; ACM 2002/7/21. This is probably redundant.
(put-text-property (point) lim 'syntax-table nil)
(search-forward-regexp c-awk-harmless-lines+-here-re nil t) ; skip harmless lines.
;; Once round the next loop for each string, regexp, or div sign
(while (< (point) lim)
(setq anchor (point))
(search-forward-regexp c-awk-harmless-string*-here-re nil t)
;; We are now looking at either a " or a /.
;; Do our thing on the string, regexp or divsion sign.
(setq anchor-state-/div
(if (looking-at "_?\"")
(c-awk-syntax-tablify-string)
(c-awk-syntax-tablify-/ anchor anchor-state-/div)))
;; Skip any further "harmless" lines before the next tricky one.
(if (search-forward-regexp c-awk-harmless-lines+-here-re nil t)
(setq anchor-state-/div nil)))
nil))
;; ACM, 2002/07/21: Thoughts: We need an AWK Mode after-change function to set
;; the syntax-table properties even when font-lock isn't enabled, for the
;; subsequent use of movement functions, etc. However, it seems that if font
;; lock _is_ enabled, we can always leave it to do the job.
(defvar c-awk-old-EOLL 0)
(make-variable-buffer-local 'c-awk-old-EOLL)
;; End of logical line following the region which is about to be changed. Set
;; in c-awk-before-change and used in c-awk-after-change.
(defun c-awk-before-change (beg end)
;; This function is called exclusively from the before-change-functions hook.
;; It does two things: Finds the end of the (logical) line on which END lies,
;; and clears c-awk-NL-prop text properties from this point onwards.
(save-restriction
(save-excursion
(setq c-awk-old-EOLL (c-awk-end-of-logical-line end))
(c-save-buffer-state nil
(c-awk-clear-NL-props end (point-max))))))
(defun c-awk-end-of-change-region (beg end old-len)
;; Find the end of the region which needs to be font-locked after a change.
;; This is the end of the logical line on which the change happened, either
;; as it was before the change, or as it is now, which ever is later.
;; N.B. point is left undefined.
(max (+ (- c-awk-old-EOLL old-len) (- end beg))
(c-awk-end-of-logical-line end)))
(defun c-awk-after-change (beg end old-len)
;; This function is called exclusively as an after-change function in
;; AWK Mode. It ensures that the syntax-table properties get set in the
;; changed region. However, if font-lock is enabled, this function does
;; nothing, since an enabled font-lock after-change function will always do
;; this.
(unless (and (boundp 'font-lock-mode) font-lock-mode)
(save-restriction
(save-excursion
(setq end (c-awk-end-of-change-region beg end old-len))
(c-awk-beginning-of-logical-line beg)
(c-save-buffer-state nil ; So that read-only status isn't affected.
; (e.g. when first loading the buffer)
(c-awk-set-syntax-table-properties end))))))
;; ACM 2002/5/25. When font-locking is invoked by a buffer change, the region
;; specified by the font-lock after-change function must be expanded to
;; include ALL of any string or regexp within the region. The simplest way to
;; do this in practice is to use the beginning/end-of-logical-line functions.
;; Don't overlook the possibility of the buffer change being the "recapturing"
;; of a previously escaped newline.
(defmacro c-awk-advise-fl-for-awk-region (function)
`(defadvice ,function (before get-awk-region activate)
;; When font-locking an AWK Mode buffer, make sure that any string/regexp is
;; completely font-locked.
(when (eq major-mode 'awk-mode)
(save-excursion
(ad-set-arg 1 (c-awk-end-of-change-region
(ad-get-arg 0) ; beg
(ad-get-arg 1) ; end
(ad-get-arg 2))) ; old-len
(ad-set-arg 0 (c-awk-beginning-of-logical-line (ad-get-arg 0)))))))
(c-awk-advise-fl-for-awk-region font-lock-after-change-function)
(c-awk-advise-fl-for-awk-region jit-lock-after-change)
(c-awk-advise-fl-for-awk-region lazy-lock-defer-rest-after-change)
(c-awk-advise-fl-for-awk-region lazy-lock-defer-line-after-change)
;; ACM 2002/9/29. Functions for C-M-a and C-M-e
(defconst c-awk-terminated-regexp-or-string-here-re "\\=\\s\"\\S\"*\\s\"")
;; Matches a terminated string/regexp (utilising syntax-table properties).
(defconst c-awk-unterminated-regexp-or-string-here-re "\\=\\s|\\S|*$")
;; Matches an unterminated string/regexp, NOT including the eol at the end.
(defconst c-awk-harmless-pattern-characters*
(concat "\\([^{;#/\"\\\\\n\r]\\|" c-awk-esc-pair-re "\\)*"))
;; Matches any "harmless" character in a pattern or an escaped character pair.
(defun c-awk-beginning-of-defun (&optional arg)
"Move backward to the beginning of an AWK \"defun\". With ARG, do it that
many times. Negative arg -N means move forward to Nth following beginning of
defun. Returns t unless search stops due to beginning or end of buffer.
By a \"defun\" is meant either a pattern-action pair or a function. The start
of a defun is recognized as code starting at column zero which is neither a
closing brace nor a comment nor a continuation of the previous line. Unlike
in some other modes, having an opening brace at column 0 is neither necessary
nor helpful."
(interactive "p")
(save-match-data
(c-save-buffer-state ; ensures the buffer is writable.
nil
(let ((found t)) ; Has the most recent regexp search found b-of-defun?
(if (>= arg 0)
;; Go back one defun each time round the following loop. (For +ve arg)
(while (and found (> arg 0) (not (eq (point) (point-min))))
;; Go back one "candidate" each time round the next loop until one
;; is genuinely a beginning-of-defun.
(while (and (setq found (search-backward-regexp
"^[^#} \t\n\r]" (point-min) 'stop-at-limit))
(not (memq (c-awk-get-NL-prop-prev-line) '(?\; ?\#)))))
(setq arg (1- arg)))
;; The same for a -ve arg.
(if (not (eq (point) (point-max))) (forward-char 1))
(while (and found (< arg 0) (not (eq (point) (point-max)))) ; The same for -ve arg.
(while (and (setq found (search-forward-regexp
"^[^#} \t\n\r]" (point-max) 'stop-at-limit))
(not (memq (c-awk-get-NL-prop-prev-line) '(?\; ?\#)))))
(setq arg (1+ arg)))
(if found (goto-char (match-beginning 0))))
(eq arg 0)))))
(defun c-awk-forward-awk-pattern ()
;; Point is at the start of an AWK pattern (which may be null) or function
;; declaration. Move to the pattern's end, and past any trailing space or
;; comment. Typically, we stop at the { which denotes the corresponding AWK
;; action/function body. Otherwise we stop at the EOL (or ;) marking the
;; absence of an explicit action.
(while
(progn
(search-forward-regexp c-awk-harmless-pattern-characters*)
(if (looking-at "#") (end-of-line))
(cond
((eobp) nil)
((looking-at "[{;]") nil) ; We've finished!
((eolp)
(if (c-awk-cur-line-incomplete-p)
(forward-line) ; returns non-nil
nil))
((search-forward-regexp c-awk-terminated-regexp-or-string-here-re nil t))
((search-forward-regexp c-awk-unterminated-regexp-or-string-here-re nil t))
((looking-at "/") (forward-char) t))))) ; division sign.
(defun c-awk-end-of-defun1 ()
;; point is at the start of a "defun". Move to its end. Return end position.
(c-awk-forward-awk-pattern)
(cond
((looking-at "{") (goto-char (scan-sexps (point) 1)))
((looking-at ";") (forward-char))
((eolp))
(t (error "c-awk-end-of-defun1: Failure of c-awk-forward-awk-pattern")))
(point))
(defun c-awk-beginning-of-defun-p ()
;; Are we already at the beginning of a defun? (i.e. at code in column 0
;; which isn't a }, and isn't a continuation line of any sort.
(and (looking-at "^[^#} \t\n\r]")
(not (c-awk-prev-line-incomplete-p))))
(defun c-awk-end-of-defun (&optional arg)
"Move forward to next end of defun. With argument, do it that many times.
Negative argument -N means move back to Nth preceding end of defun.
An end of a defun occurs right after the closing brace that matches the
opening brace at its start, or immediately after the AWK pattern when there is
no explicit action; see function `c-awk-beginning-of-defun'."
(interactive "p")
(or arg (setq arg 1))
(save-match-data
(c-save-buffer-state
nil
(let ((start-point (point)) end-point)
;; Strategy: (For +ve ARG): If we're not already at a beginning-of-defun,
;; move backwards to one.
;; Repeat [(i) move forward to end-of-current-defun (see below);
;; (ii) If this isn't it, move forward to beginning-of-defun].
;; We start counting ARG only when step (i) has passed the original point.
(when (> arg 0)
;; Try to move back to a beginning-of-defun, if not already at one.
(if (not (c-awk-beginning-of-defun-p))
(when (not (c-awk-beginning-of-defun 1)) ; No bo-defun before point.
(goto-char start-point)
(c-awk-beginning-of-defun -1))) ; if this fails, we're at EOB, tough!
;; Now count forward, one defun at a time
(while (and (not (eobp))
(c-awk-end-of-defun1)
(if (> (point) start-point) (setq arg (1- arg)) t)
(> arg 0)
(c-awk-beginning-of-defun -1))))
(when (< arg 0)
(setq end-point start-point)
(while (and (not (bobp))
(c-awk-beginning-of-defun 1)
(if (< (setq end-point (if (bobp) (point)
(save-excursion (c-awk-end-of-defun1))))
start-point)
(setq arg (1+ arg)) t)
(< arg 0)))
(goto-char (min start-point end-point)))))))
(cc-provide 'cc-awk) ; Changed from 'awk-mode, ACM 2002/5/21
;;; arch-tag: c4836289-3aa4-4a59-9934-9ccc2bacccf3
;;; awk-mode.el ends here