1
0
mirror of https://git.savannah.gnu.org/git/emacs.git synced 2024-12-13 09:32:47 +00:00
emacs/lisp/progmodes/ebnf-bnf.el

591 lines
16 KiB
EmacsLisp
Raw Normal View History

;;; ebnf-bnf.el --- parser for EBNF
2000-01-27 14:31:16 +00:00
;; Copyright (C) 1999, 2000, 2001 Free Sofware Foundation, Inc.
2000-01-27 14:31:16 +00:00
;; Author: Vinicius Jose Latorre <vinicius@cpqd.com.br>
2000-01-27 14:31:16 +00:00
;; Maintainer: Vinicius Jose Latorre <vinicius@cpqd.com.br>
;; Keywords: wp, ebnf, PostScript
2003-02-10 09:56:14 +00:00
;; Time-stamp: <2003-02-10 10:29:48 jbarranquero>
;; Version: 1.7
2000-01-27 14:31:16 +00:00
2000-01-31 20:11:54 +00:00
;; This file is part of GNU Emacs.
2000-01-27 14:31:16 +00:00
2000-01-31 20:11:54 +00:00
;; GNU Emacs is free software; you can redistribute it and/or modify
2000-01-27 14:31:16 +00:00
;; it under the terms of the GNU General Public License as published by
;; the Free Software Foundation; either version 2, or (at your option)
;; any later version.
2000-01-31 20:11:54 +00:00
;; GNU Emacs is distributed in the hope that it will be useful,
2000-01-27 14:31:16 +00:00
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
;; GNU General Public License for more details.
;; You should have received a copy of the GNU General Public License
;; along with GNU Emacs; see the file COPYING. If not, write to the
;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
;; Boston, MA 02111-1307, USA.
;;; Commentary:
;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;;
;; This is part of ebnf2ps package.
;;
;; This package defines a parser for EBNF.
;;
;; See ebnf2ps.el for documentation.
;;
;;
;; EBNF Syntax
;; -----------
;;
;; The current EBNF that ebnf2ps accepts has the following constructions:
;;
;; ; comment (until end of line)
;; A non-terminal
;; "C" terminal
;; ?C? special
;; $A default non-terminal
;; $"C" default terminal
;; $?C? default special
;; A = B. production (A is the header and B the body)
;; C D sequence (C occurs before D)
;; C | D alternative (C or D occurs)
;; A - B exception (A excluding B, B without any non-terminal)
;; n * A repetition (A repeats n (integer) times)
;; (C) group (expression C is grouped together)
;; [C] optional (C may or not occurs)
;; C+ one or more occurrences of C
;; {C}+ one or more occurrences of C
;; {C}* zero or more occurrences of C
;; {C} zero or more occurrences of C
;; C / D equivalent to: C {D C}*
;; {C || D}+ equivalent to: C {D C}*
;; {C || D}* equivalent to: [C {D C}*]
;; {C || D} equivalent to: [C {D C}*]
;;
;; The EBNF syntax written using the notation above is:
;;
;; EBNF = {production}+.
;;
;; production = non_terminal "=" body ".". ;; production
;;
;; body = {sequence || "|"}*. ;; alternative
;;
;; sequence = {exception}*. ;; sequence
;;
;; exception = repeat [ "-" repeat]. ;; exception
;;
;; repeat = [ integer "*" ] term. ;; repetition
;;
;; term = factor
;; | [factor] "+" ;; one-or-more
;; | [factor] "/" [factor] ;; one-or-more
;; .
;;
;; factor = [ "$" ] "\"" terminal "\"" ;; terminal
;; | [ "$" ] non_terminal ;; non-terminal
;; | [ "$" ] "?" special "?" ;; special
;; | "(" body ")" ;; group
;; | "[" body "]" ;; zero-or-one
;; | "{" body [ "||" body ] "}+" ;; one-or-more
;; | "{" body [ "||" body ] "}*" ;; zero-or-more
;; | "{" body [ "||" body ] "}" ;; zero-or-more
;; .
;;
;; non_terminal = "[!#%&'*-,0-:<>@-Z\\\\^-z~\\240-\\377]+".
2000-01-27 14:31:16 +00:00
;;
;; terminal = "\\([^\"\\]\\|\\\\[ -~\\240-\\377]\\)+".
;;
;; special = "[^?\\n\\000-\\010\\016-\\037\\177-\\237]*".
;;
;; integer = "[0-9]+".
;;
;; comment = ";" "[^\\n\\000-\\010\\016-\\037\\177-\\237]*" "\\n".
;;
;;
;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; Code:
2000-01-27 14:31:16 +00:00
(require 'ebnf-otz)
(defvar ebnf-bnf-lex nil
"Value returned by `ebnf-bnf-lex' function.")
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2003-02-10 09:56:14 +00:00
;; Syntactic analyzer
2000-01-27 14:31:16 +00:00
;;; EBNF = {production}+.
(defun ebnf-bnf-parser (start)
"EBNF parser."
(let ((total (+ (- ebnf-limit start) 1))
(bias (1- start))
(origin (point))
prod-list token rule)
(goto-char start)
(setq token (ebnf-bnf-lex))
(and (eq token 'end-of-input)
(error "Invalid EBNF file format"))
2000-01-27 14:31:16 +00:00
(while (not (eq token 'end-of-input))
(ebnf-message-float
"Parsing...%s%%"
(/ (* (- (point) bias) 100.0) total))
(setq token (ebnf-production token)
rule (cdr token)
token (car token))
(or (ebnf-add-empty-rule-list rule)
(setq prod-list (cons rule prod-list))))
(goto-char origin)
prod-list))
;;; production = non-terminal "=" body ".".
(defun ebnf-production (token)
(let ((header ebnf-bnf-lex)
(action ebnf-action)
body)
(setq ebnf-action nil)
(or (eq token 'non-terminal)
(error "Invalid header production"))
2000-01-27 14:31:16 +00:00
(or (eq (ebnf-bnf-lex) 'equal)
(error "Invalid production: missing `='"))
2000-01-27 14:31:16 +00:00
(setq body (ebnf-body))
(or (eq (car body) 'period)
(error "Invalid production: missing `.'"))
2000-01-27 14:31:16 +00:00
(setq body (cdr body))
(ebnf-eps-add-production header)
(cons (ebnf-bnf-lex)
(ebnf-make-production header body action))))
;;; body = {sequence || "|"}*.
(defun ebnf-body ()
(let (body sequence)
(while (eq (car (setq sequence (ebnf-sequence))) 'alternative)
(setq sequence (cdr sequence)
body (cons sequence body)))
(ebnf-token-alternative body sequence)))
;;; sequence = {exception}*.
(defun ebnf-sequence ()
(let ((token (ebnf-bnf-lex))
seq term)
(while (setq term (ebnf-exception token)
token (car term)
term (cdr term))
(setq seq (cons term seq)))
(cons token
(cond
;; null sequence
((null seq)
(ebnf-make-empty))
;; sequence with only one element
((= (length seq) 1)
(car seq))
;; a real sequence
(t
(ebnf-make-sequence (nreverse seq)))
))))
;;; exception = repeat [ "-" repeat].
(defun ebnf-exception (token)
(let ((term (ebnf-repeat token)))
(if (not (eq (car term) 'except))
;; repeat
term
;; repeat - repeat
(let ((exception (ebnf-repeat (ebnf-bnf-lex))))
(ebnf-no-non-terminal (cdr exception))
(ebnf-token-except (cdr term) exception)))))
(defun ebnf-no-non-terminal (node)
(and (vectorp node)
(let ((kind (ebnf-node-kind node)))
(cond
((eq kind 'ebnf-generate-non-terminal)
(error "Exception sequence should not contain a non-terminal"))
2000-01-27 14:31:16 +00:00
((eq kind 'ebnf-generate-repeat)
(ebnf-no-non-terminal (ebnf-node-separator node)))
((memq kind '(ebnf-generate-optional ebnf-generate-except))
(ebnf-no-non-terminal (ebnf-node-list node)))
((memq kind '(ebnf-generate-one-or-more ebnf-generate-zero-or-more))
(ebnf-no-non-terminal (ebnf-node-list node))
(ebnf-no-non-terminal (ebnf-node-separator node)))
((memq kind '(ebnf-generate-alternative ebnf-generate-sequence))
(let ((seq (ebnf-node-list node)))
(while seq
(ebnf-no-non-terminal (car seq))
(setq seq (cdr seq)))))
))))
;;; repeat = [ integer "*" ] term.
(defun ebnf-repeat (token)
(if (not (eq token 'integer))
(ebnf-term token)
(let ((times ebnf-bnf-lex))
(or (eq (ebnf-bnf-lex) 'repeat)
(error "Missing `*'"))
2000-01-27 14:31:16 +00:00
(ebnf-token-repeat times (ebnf-term (ebnf-bnf-lex))))))
;;; term = factor
;;; | [factor] "+" ;; one-or-more
;;; | [factor] "/" [factor] ;; one-or-more
;;; .
(defun ebnf-term (token)
(let ((factor (ebnf-factor token)))
(and factor
(setq token (ebnf-bnf-lex)))
(cond
;; [factor] +
((eq token 'one-or-more)
(cons (ebnf-bnf-lex)
(and factor
(let ((kind (ebnf-node-kind factor)))
(cond
;; { A }+ + ==> { A }+
;; { A }* + ==> { A }*
((memq kind '(ebnf-generate-zero-or-more
ebnf-generate-one-or-more))
factor)
;; [ A ] + ==> { A }*
((eq kind 'ebnf-generate-optional)
(ebnf-make-zero-or-more (list factor)))
;; A +
(t
(ebnf-make-one-or-more (list factor)))
)))))
;; [factor] / [factor]
((eq token 'list)
(setq token (ebnf-bnf-lex))
(let ((sep (ebnf-factor token)))
(and sep
(setq factor (or factor (ebnf-make-empty))))
(cons (if sep
(ebnf-bnf-lex)
token)
(and factor
(ebnf-make-one-or-more factor sep)))))
;; factor
(t
(cons token factor))
)))
;;; factor = [ "$" ] "\"" terminal "\"" ;; terminal
;;; | [ "$" ] non_terminal ;; non-terminal
;;; | [ "$" ] "?" special "?" ;; special
;;; | "(" body ")" ;; group
;;; | "[" body "]" ;; zero-or-one
;;; | "{" body [ "||" body ] "}+" ;; one-or-more
;;; | "{" body [ "||" body ] "}*" ;; zero-or-more
;;; | "{" body [ "||" body ] "}" ;; zero-or-more
;;; .
(defun ebnf-factor (token)
(cond
;; terminal
((eq token 'terminal)
(ebnf-make-terminal ebnf-bnf-lex))
;; non-terminal
((eq token 'non-terminal)
(ebnf-make-non-terminal ebnf-bnf-lex))
;; special
((eq token 'special)
(ebnf-make-special ebnf-bnf-lex))
;; group
((eq token 'begin-group)
(let ((body (ebnf-body)))
(or (eq (car body) 'end-group)
(error "Missing `)'"))
2000-01-27 14:31:16 +00:00
(cdr body)))
;; optional
((eq token 'begin-optional)
(let ((body (ebnf-body)))
(or (eq (car body) 'end-optional)
(error "Missing `]'"))
2000-01-27 14:31:16 +00:00
(ebnf-token-optional (cdr body))))
;; list
((eq token 'begin-list)
(let* ((body (ebnf-body))
(token (car body))
(list-part (cdr body))
sep-part)
(and (eq token 'list-separator)
;; { A || B }
(setq body (ebnf-body) ; get separator
token (car body)
sep-part (cdr body)))
(cond
;; { A }+
((eq token 'end-one-or-more)
(ebnf-make-one-or-more list-part sep-part))
;; { A }*
((eq token 'end-zero-or-more)
(ebnf-make-zero-or-more list-part sep-part))
(t
(error "Missing `}+', `}*' or `}'"))
2000-01-27 14:31:16 +00:00
)))
;; no term
(t
nil)
))
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Lexical analyzer
(defconst ebnf-bnf-token-table (make-vector 256 'error)
"Vector used to map characters to a lexical token.")
(defun ebnf-bnf-initialize ()
"Initialize EBNF token table."
;; control character & control 8-bit character are set to `error'
(let ((char ?\040))
;; printable character:
(while (< char ?\060)
(aset ebnf-bnf-token-table char 'non-terminal)
(setq char (1+ char)))
;; digits:
(while (< char ?\072)
(aset ebnf-bnf-token-table char 'integer)
(setq char (1+ char)))
;; printable character:
(while (< char ?\177)
(aset ebnf-bnf-token-table char 'non-terminal)
(setq char (1+ char)))
;; European 8-bit accentuated characters:
(setq char ?\240)
(while (< char ?\400)
(aset ebnf-bnf-token-table char 'non-terminal)
(setq char (1+ char)))
;; Override space characters:
(aset ebnf-bnf-token-table ?\013 'space) ; [VT] vertical tab
(aset ebnf-bnf-token-table ?\n 'space) ; [NL] linefeed
(aset ebnf-bnf-token-table ?\r 'space) ; [CR] carriage return
(aset ebnf-bnf-token-table ?\t 'space) ; [HT] horizontal tab
(aset ebnf-bnf-token-table ?\ 'space) ; [SP] space
;; Override form feed character:
(aset ebnf-bnf-token-table ?\f 'form-feed) ; [FF] form feed
;; Override other lexical characters:
(aset ebnf-bnf-token-table ?\" 'terminal)
(aset ebnf-bnf-token-table ?\? 'special)
(aset ebnf-bnf-token-table ?\( 'begin-group)
(aset ebnf-bnf-token-table ?\) 'end-group)
(aset ebnf-bnf-token-table ?* 'repeat)
(aset ebnf-bnf-token-table ?- 'except)
(aset ebnf-bnf-token-table ?= 'equal)
(aset ebnf-bnf-token-table ?\[ 'begin-optional)
(aset ebnf-bnf-token-table ?\] 'end-optional)
(aset ebnf-bnf-token-table ?\{ 'begin-list)
(aset ebnf-bnf-token-table ?| 'alternative)
(aset ebnf-bnf-token-table ?\} 'end-list)
(aset ebnf-bnf-token-table ?/ 'list)
(aset ebnf-bnf-token-table ?+ 'one-or-more)
(aset ebnf-bnf-token-table ?$ 'default)
;; Override comment character:
(aset ebnf-bnf-token-table ebnf-lex-comment-char 'comment)
;; Override end of production character:
(aset ebnf-bnf-token-table ebnf-lex-eop-char 'period)))
;; replace the range "\240-\377" (see `ebnf-range-regexp').
(defconst ebnf-bnf-non-terminal-chars
(ebnf-range-regexp "!#%&'*-,0-:<>@-Z\\\\^-z~" ?\240 ?\377))
2000-01-27 14:31:16 +00:00
(defun ebnf-bnf-lex ()
"Lexical analyser for EBNF.
Return a lexical token.
See documentation for variable `ebnf-bnf-lex'."
(if (>= (point) ebnf-limit)
'end-of-input
(let (token)
;; skip spaces and comments
(while (if (> (following-char) 255)
(progn
(setq token 'error)
nil)
(setq token (aref ebnf-bnf-token-table (following-char)))
(cond
((eq token 'space)
(skip-chars-forward " \013\n\r\t" ebnf-limit)
(< (point) ebnf-limit))
((eq token 'comment)
(ebnf-bnf-skip-comment))
((eq token 'form-feed)
(forward-char)
(setq ebnf-action 'form-feed))
(t nil)
)))
(setq ebnf-default-p nil)
(cond
;; end of input
((>= (point) ebnf-limit)
'end-of-input)
;; error
((eq token 'error)
(error "Illegal character"))
2000-01-27 14:31:16 +00:00
;; default
((eq token 'default)
(forward-char)
(if (memq (aref ebnf-bnf-token-table (following-char))
'(terminal non-terminal special))
(prog1
(ebnf-bnf-lex)
(setq ebnf-default-p t))
(error "Illegal `default' element")))
2000-01-27 14:31:16 +00:00
;; integer
((eq token 'integer)
(setq ebnf-bnf-lex (ebnf-buffer-substring "0-9"))
'integer)
;; special: ?special?
((eq token 'special)
(setq ebnf-bnf-lex (concat "?"
(ebnf-string " ->@-~" ?\? "special")
"?"))
'special)
;; terminal: "string"
((eq token 'terminal)
(setq ebnf-bnf-lex (ebnf-unescape-string (ebnf-get-string)))
'terminal)
;; non-terminal or terminal
((eq token 'non-terminal)
(setq ebnf-bnf-lex (ebnf-buffer-substring ebnf-bnf-non-terminal-chars))
2000-01-27 14:31:16 +00:00
(let ((case-fold-search ebnf-case-fold-search)
match)
(if (and ebnf-terminal-regexp
(setq match (string-match ebnf-terminal-regexp
ebnf-bnf-lex))
(zerop match)
(= (match-end 0) (length ebnf-bnf-lex)))
'terminal
'non-terminal)))
;; end of list: }+, }*, }
((eq token 'end-list)
(forward-char)
(cond
((= (following-char) ?+)
(forward-char)
'end-one-or-more)
((= (following-char) ?*)
(forward-char)
'end-zero-or-more)
(t
'end-zero-or-more)
))
;; alternative: |, ||
((eq token 'alternative)
(forward-char)
(if (/= (following-char) ?|)
'alternative
(forward-char)
'list-separator))
;; miscellaneous: {, (, ), [, ], ., =, /, +, -, *
(t
(forward-char)
token)
))))
;; replace the range "\177-\237" (see `ebnf-range-regexp').
(defconst ebnf-bnf-comment-chars
(ebnf-range-regexp "^\n\000-\010\016-\037" ?\177 ?\237))
2000-01-27 14:31:16 +00:00
(defun ebnf-bnf-skip-comment ()
(forward-char)
(cond
;; open EPS file
((and ebnf-eps-executing (= (following-char) ?\[))
(ebnf-eps-add-context (ebnf-bnf-eps-filename)))
;; close EPS file
((and ebnf-eps-executing (= (following-char) ?\]))
(ebnf-eps-remove-context (ebnf-bnf-eps-filename)))
;; any other action in comment
(t
(setq ebnf-action (aref ebnf-comment-table (following-char)))
(skip-chars-forward ebnf-bnf-comment-chars ebnf-limit))
)
;; check for a valid end of comment
(cond ((>= (point) ebnf-limit)
nil)
((= (following-char) ?\n)
(forward-char)
t)
(t
(error "Illegal character"))
2000-01-27 14:31:16 +00:00
))
(defun ebnf-bnf-eps-filename ()
(forward-char)
(ebnf-buffer-substring ebnf-bnf-comment-chars))
(defun ebnf-unescape-string (str)
(let* ((len (length str))
(size (1- len))
(istr 0)
(n-esc 0))
;; count number of escapes
(while (< istr size)
(setq istr (+ istr
(if (= (aref str istr) ?\\)
(progn
(setq n-esc (1+ n-esc))
2)
1))))
(if (zerop n-esc)
;; no escapes
str
;; at least one escape
(let ((new (make-string (- len n-esc) ?\ ))
(inew 0))
;; eliminate all escapes
(setq istr 0)
(while (> n-esc 0)
(and (= (aref str istr) ?\\)
(setq istr (1+ istr)
n-esc (1- n-esc)))
(aset new inew (aref str istr))
(setq inew (1+ inew)
istr (1+ istr)))
;; remaining string has no escape
(while (< istr len)
(aset new inew (aref str istr))
(setq inew (1+ inew)
istr (1+ istr)))
new))))
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(provide 'ebnf-bnf)
2003-09-01 15:45:59 +00:00
;;; arch-tag: 3b1834d3-8367-475b-80d5-8e0bbd00ce50
2000-01-27 14:31:16 +00:00
;;; ebnf-bnf.el ends here