1
0
mirror of https://git.savannah.gnu.org/git/emacs.git synced 2024-12-04 08:47:11 +00:00
emacs/lisp/progmodes/ebnf-abn.el

664 lines
18 KiB
EmacsLisp
Raw Normal View History

2004-02-24 22:58:07 +00:00
;;; ebnf-abn.el --- parser for ABNF (Augmented BNF)
;; Copyright (C) 2004 Free Sofware Foundation, Inc.
;; Author: Vinicius Jose Latorre <viniciusjl@ig.com.br>
;; Maintainer: Vinicius Jose Latorre <viniciusjl@ig.com.br>
;; Time-stamp: <2004/02/23 22:38:59 vinicius>
;; Keywords: wp, ebnf, PostScript
;; Version: 1.0
;; This file is part of GNU Emacs.
;; GNU Emacs is free software; you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
;; the Free Software Foundation; either version 2, or (at your option)
;; any later version.
;; GNU Emacs is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
;; GNU General Public License for more details.
;; You should have received a copy of the GNU General Public License
;; along with GNU Emacs; see the file COPYING. If not, write to the
;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
;; Boston, MA 02111-1307, USA.
;;; Commentary:
;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;;
;; This is part of ebnf2ps package.
;;
;; This package defines a parser for ABNF (Augmented BNF).
;;
;; See ebnf2ps.el for documentation.
;;
;;
;; ABNF Syntax
;; -----------
;;
;; See the URL:
;; `http://www.faqs.org/rfcs/rfc2234.html'
;; or
;; `http://www.rnp.br/ietf/rfc/rfc2234.txt'
;; ("Augmented BNF for Syntax Specifications: ABNF").
;;
;;
;; rulelist = 1*( rule / (*c-wsp c-nl) )
;;
;; rule = rulename defined-as elements c-nl
;; ; continues if next line starts with white space
;;
;; rulename = ALPHA *(ALPHA / DIGIT / "-")
;;
;; defined-as = *c-wsp ("=" / "=/") *c-wsp
;; ; basic rules definition and incremental
;; ; alternatives
;;
;; elements = alternation *c-wsp
;;
;; c-wsp = WSP / (c-nl WSP)
;;
;; c-nl = comment / CRLF
;; ; comment or newline
;;
;; comment = ";" *(WSP / VCHAR) CRLF
;;
;; alternation = concatenation
;; *(*c-wsp "/" *c-wsp concatenation)
;;
;; concatenation = repetition *(1*c-wsp repetition)
;;
;; repetition = [repeat] element
;;
;; repeat = 1*DIGIT / (*DIGIT "*" *DIGIT)
;;
;; element = rulename / group / option /
;; char-val / num-val / prose-val
;;
;; group = "(" *c-wsp alternation *c-wsp ")"
;;
;; option = "[" *c-wsp alternation *c-wsp "]"
;;
;; char-val = DQUOTE *(%x20-21 / %x23-7E) DQUOTE
;; ; quoted string of SP and VCHAR without DQUOTE
;;
;; num-val = "%" (bin-val / dec-val / hex-val)
;;
;; bin-val = "b" 1*BIT
;; [ 1*("." 1*BIT) / ("-" 1*BIT) ]
;; ; series of concatenated bit values
;; ; or single ONEOF range
;;
;; dec-val = "d" 1*DIGIT
;; [ 1*("." 1*DIGIT) / ("-" 1*DIGIT) ]
;;
;; hex-val = "x" 1*HEXDIG
;; [ 1*("." 1*HEXDIG) / ("-" 1*HEXDIG) ]
;;
;; prose-val = "<" *(%x20-3D / %x3F-7E) ">"
;; ; bracketed string of SP and VCHAR without
;; ; angles
;; ; prose description, to be used as last resort
;;
;; ; Core rules -- the coding depends on the system, here is used 7-bit ASCII
;;
;; ALPHA = %x41-5A / %x61-7A
;; ; A-Z / a-z
;;
;; BIT = "0" / "1"
;;
;; CHAR = %x01-7F
;; ; any 7-bit US-ASCII character, excluding NUL
;;
;; CR = %x0D
;; ; carriage return
;;
;; CRLF = CR LF
;; ; Internet standard newline
;;
;; CTL = %x00-1F / %x7F
;; ; controls
;;
;; DIGIT = %x30-39
;; ; 0-9
;;
;; DQUOTE = %x22
;; ; " (Double Quote)
;;
;; HEXDIG = DIGIT / "A" / "B" / "C" / "D" / "E" / "F"
;;
;; HTAB = %x09
;; ; horizontal tab
;;
;; LF = %x0A
;; ; linefeed
;;
;; LWSP = *(WSP / CRLF WSP)
;; ; linear white space (past newline)
;;
;; OCTET = %x00-FF
;; ; 8 bits of data
;;
;; SP = %x20
;; ; space
;;
;; VCHAR = %x21-7E
;; ; visible (printing) characters
;;
;; WSP = SP / HTAB
;; ; white space
;;
;;
;; NOTES:
;;
;; 1. Rules name and terminal strings are case INSENSITIVE.
;; So, the following rule names are all equals:
;; Rule-name, rule-Name, rule-name, RULE-NAME
;; Also, the following strings are equals:
;; "abc", "ABC", "aBc", "Abc", "aBC", etc.
;;
;; 2. To have a case SENSITIVE string, use the character notation.
;; For example, to specify the lowercase string "abc", use:
;; %d97.98.99
;;
;; 3. There are no implicit spaces between elements, for example, the
;; following rules:
;;
;; foo = %x61 ; a
;;
;; bar = %x62 ; b
;;
;; mumble = foo bar foo
;;
;; Are equivalent to the following rule:
;;
;; mumble = %x61.62.61
;;
;; If spaces are needed, it should be explicit specified, like:
;;
;; spaces = 1*(%x20 / %x09) ; one or more spaces or tabs
;;
;; mumble = foo spaces bar spaces foo
;;
;; 4. Lines starting with space or tab are considered a continuation line.
;; For example, the rule:
;;
;; rule = foo
;; bar
;;
;; Is equivalent to:
;;
;; rule = foo bar
;;
;;
;; Differences Between ABNF And ebnf2ps ABNF
;; -----------------------------------------
;;
;; Besides the characters that ABNF accepts, ebnf2ps ABNF accepts also the
;; underscore (_) for rule name and european 8-bit accentuated characters (from
;; \240 to \377) for rule name, string and comment.
;;
;;
;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; Code:
(require 'ebnf-otz)
(defvar ebnf-abn-lex nil
"Value returned by `ebnf-abn-lex' function.")
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Syntactic analyzer
;;; rulelist = 1*( rule / (*c-wsp c-nl) )
(defun ebnf-abn-parser (start)
"ABNF parser."
(let ((total (+ (- ebnf-limit start) 1))
(bias (1- start))
(origin (point))
rule-list token rule)
(goto-char start)
(setq token (ebnf-abn-lex))
(and (eq token 'end-of-input)
(error "Invalid ABNF file format"))
(while (not (eq token 'end-of-input))
(ebnf-message-float
"Parsing...%s%%"
(/ (* (- (point) bias) 100.0) total))
(setq token (ebnf-abn-rule token)
rule (cdr token)
token (car token))
(or (ebnf-add-empty-rule-list rule)
(setq rule-list (cons rule rule-list))))
(goto-char origin)
rule-list))
;;; rule = rulename defined-as elements c-nl
;;; ; continues if next line starts with white space
;;;
;;; rulename = ALPHA *(ALPHA / DIGIT / "-")
;;;
;;; defined-as = *c-wsp ("=" / "=/") *c-wsp
;;; ; basic rules definition and incremental
;;; ; alternatives
;;;
;;; elements = alternation *c-wsp
;;;
;;; c-wsp = WSP / (c-nl WSP)
;;;
;;; c-nl = comment / CRLF
;;; ; comment or newline
;;;
;;; comment = ";" *(WSP / VCHAR) CRLF
(defun ebnf-abn-rule (token)
(let ((name ebnf-abn-lex)
(action ebnf-action)
elements)
(setq ebnf-action nil)
(or (eq token 'non-terminal)
(error "Invalid rule name"))
(setq token (ebnf-abn-lex))
(or (memq token '(equal incremental-alternative))
(error "Invalid rule: missing `=' or `=/'"))
(and (eq token 'incremental-alternative)
(setq name (concat name " =/")))
(setq elements (ebnf-abn-alternation))
(or (memq (car elements) '(end-of-rule end-of-input))
(error "Invalid rule: there is no end of rule"))
(setq elements (cdr elements))
(ebnf-eps-add-production name)
(cons (ebnf-abn-lex)
(ebnf-make-production name elements action))))
;;; alternation = concatenation
;;; *(*c-wsp "/" *c-wsp concatenation)
(defun ebnf-abn-alternation ()
(let (body concatenation)
(while (eq (car (setq concatenation
(ebnf-abn-concatenation (ebnf-abn-lex))))
'alternative)
(setq body (cons (cdr concatenation) body)))
(ebnf-token-alternative body concatenation)))
;;; concatenation = repetition *(1*c-wsp repetition)
(defun ebnf-abn-concatenation (token)
(let ((term (ebnf-abn-repetition token))
seq)
(or (setq token (car term)
term (cdr term))
(error "Empty element"))
(setq seq (cons term seq))
(while (setq term (ebnf-abn-repetition token)
token (car term)
term (cdr term))
(setq seq (cons term seq)))
(cons token
(if (= (length seq) 1)
;; sequence with only one element
(car seq)
;; a real sequence
(ebnf-make-sequence (nreverse seq))))))
;;; repetition = [repeat] element
;;;
;;; repeat = 1*DIGIT / (*DIGIT "*" *DIGIT)
(defun ebnf-abn-repetition (token)
(let (lower upper)
;; INTEGER [ "*" [ INTEGER ] ]
(when (eq token 'integer)
(setq lower ebnf-abn-lex
token (ebnf-abn-lex))
(or (eq token 'repeat)
(setq upper lower)))
;; "*" [ INTEGER ]
(when (eq token 'repeat)
;; only * ==> lower & upper are empty string
(or lower
(setq lower ""
upper ""))
(when (eq (setq token (ebnf-abn-lex)) 'integer)
(setq upper ebnf-abn-lex
token (ebnf-abn-lex))))
(let ((element (ebnf-abn-element token)))
(cond
;; there is a repetition
(lower
(or element
(error "Missing element repetition"))
(setq token (ebnf-abn-lex))
(cond
;; one or more
((and (string= lower "1") (null upper))
(cons token (ebnf-make-one-or-more element)))
;; zero or more
((or (and (string= lower "0") (null upper))
(and (string= lower "") (string= upper "")))
(cons token (ebnf-make-zero-or-more element)))
;; real repetition
(t
(ebnf-token-repeat lower (cons token element) upper))))
;; there is an element
(element
(cons (ebnf-abn-lex) element))
;; something that caller has to deal
(t
(cons token nil))))))
;;; element = rulename / group / option /
;;; char-val / num-val / prose-val
;;;
;;; group = "(" *c-wsp alternation *c-wsp ")"
;;;
;;; option = "[" *c-wsp alternation *c-wsp "]"
;;;
;;; char-val = DQUOTE *(%x20-21 / %x23-7E) DQUOTE
;;; ; quoted string of SP and VCHAR without DQUOTE
;;;
;;; num-val = "%" (bin-val / dec-val / hex-val)
;;;
;;; bin-val = "b" 1*BIT
;;; [ 1*("." 1*BIT) / ("-" 1*BIT) ]
;;; ; series of concatenated bit values
;;; ; or single ONEOF range
;;;
;;; dec-val = "d" 1*DIGIT
;;; [ 1*("." 1*DIGIT) / ("-" 1*DIGIT) ]
;;;
;;; hex-val = "x" 1*HEXDIG
;;; [ 1*("." 1*HEXDIG) / ("-" 1*HEXDIG) ]
;;;
;;; prose-val = "<" *(%x20-3D / %x3F-7E) ">"
;;; ; bracketed string of SP and VCHAR without
;;; ; angles
;;; ; prose description, to be used as last resort
(defun ebnf-abn-element (token)
(cond
;; terminal
((eq token 'terminal)
(ebnf-make-terminal ebnf-abn-lex))
;; non-terminal
((eq token 'non-terminal)
(ebnf-make-non-terminal ebnf-abn-lex))
;; group
((eq token 'begin-group)
(let ((body (ebnf-abn-alternation)))
(or (eq (car body) 'end-group)
(error "Missing `)'"))
(cdr body)))
;; optional
((eq token 'begin-optional)
(let ((body (ebnf-abn-alternation)))
(or (eq (car body) 'end-optional)
(error "Missing `]'"))
(ebnf-token-optional (cdr body))))
;; no element
(t
nil)
))
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Lexical analyzer
(defconst ebnf-abn-token-table (make-vector 256 'error)
"Vector used to map characters to a lexical token.")
(defun ebnf-abn-initialize ()
"Initialize EBNF token table."
;; control character & control 8-bit character are set to `error'
(let ((char ?\060))
;; digits: 0-9
(while (< char ?\072)
(aset ebnf-abn-token-table char 'integer)
(setq char (1+ char)))
;; printable character: A-Z
(setq char ?\101)
(while (< char ?\133)
(aset ebnf-abn-token-table char 'non-terminal)
(setq char (1+ char)))
;; printable character: a-z
(setq char ?\141)
(while (< char ?\173)
(aset ebnf-abn-token-table char 'non-terminal)
(setq char (1+ char)))
;; European 8-bit accentuated characters:
(setq char ?\240)
(while (< char ?\400)
(aset ebnf-abn-token-table char 'non-terminal)
(setq char (1+ char)))
;; Override end of line characters:
(aset ebnf-abn-token-table ?\n 'end-of-rule) ; [NL] linefeed
(aset ebnf-abn-token-table ?\r 'end-of-rule) ; [CR] carriage return
;; Override space characters:
(aset ebnf-abn-token-table ?\013 'space) ; [VT] vertical tab
(aset ebnf-abn-token-table ?\t 'space) ; [HT] horizontal tab
(aset ebnf-abn-token-table ?\ 'space) ; [SP] space
;; Override form feed character:
(aset ebnf-abn-token-table ?\f 'form-feed) ; [FF] form feed
;; Override other lexical characters:
(aset ebnf-abn-token-table ?< 'non-terminal)
(aset ebnf-abn-token-table ?% 'terminal)
(aset ebnf-abn-token-table ?\" 'terminal)
(aset ebnf-abn-token-table ?\( 'begin-group)
(aset ebnf-abn-token-table ?\) 'end-group)
(aset ebnf-abn-token-table ?* 'repeat)
(aset ebnf-abn-token-table ?= 'equal)
(aset ebnf-abn-token-table ?\[ 'begin-optional)
(aset ebnf-abn-token-table ?\] 'end-optional)
(aset ebnf-abn-token-table ?/ 'alternative)
;; Override comment character:
(aset ebnf-abn-token-table ?\; 'comment)))
;; replace the range "\240-\377" (see `ebnf-range-regexp').
(defconst ebnf-abn-non-terminal-chars
(ebnf-range-regexp "-_0-9A-Za-z" ?\240 ?\377))
(defconst ebnf-abn-non-terminal-letter-chars
(ebnf-range-regexp "A-Za-z" ?\240 ?\377))
(defun ebnf-abn-lex ()
"Lexical analyser for ABNF.
Return a lexical token.
See documentation for variable `ebnf-abn-lex'."
(if (>= (point) ebnf-limit)
'end-of-input
(let (token)
;; skip spaces and comments
(while (if (> (following-char) 255)
(progn
(setq token 'error)
nil)
(setq token (aref ebnf-abn-token-table (following-char)))
(cond
((eq token 'space)
(skip-chars-forward " \013\t" ebnf-limit)
(< (point) ebnf-limit))
((eq token 'comment)
(ebnf-abn-skip-comment))
((eq token 'form-feed)
(forward-char)
(setq ebnf-action 'form-feed))
((eq token 'end-of-rule)
(ebnf-abn-skip-end-of-rule))
(t nil)
)))
(cond
;; end of input
((>= (point) ebnf-limit)
'end-of-input)
;; error
((eq token 'error)
(error "Illegal character"))
;; end of rule
((eq token 'end-of-rule)
'end-of-rule)
;; integer
((eq token 'integer)
(setq ebnf-abn-lex (ebnf-buffer-substring "0-9"))
'integer)
;; terminal: "string" or %[bdx]NNN((.NNN)+|-NNN)?
((eq token 'terminal)
(setq ebnf-abn-lex
(if (= (following-char) ?\")
(ebnf-abn-string)
(ebnf-abn-character)))
'terminal)
;; non-terminal: NAME or <NAME>
((eq token 'non-terminal)
(let ((prose-p (= (following-char) ?<)))
(when prose-p
(forward-char)
(or (looking-at ebnf-abn-non-terminal-letter-chars)
(error "Invalid prose value")))
(setq ebnf-abn-lex
(ebnf-buffer-substring ebnf-abn-non-terminal-chars))
(when prose-p
(or (= (following-char) ?>)
(error "Invalid prose value"))
(setq ebnf-abn-lex (concat "<" ebnf-abn-lex ">"))))
'non-terminal)
;; equal: =, =/
((eq token 'equal)
(forward-char)
(if (/= (following-char) ?/)
'equal
(forward-char)
'incremental-alternative))
;; miscellaneous: (, ), [, ], /, *
(t
(forward-char)
token)
))))
(defun ebnf-abn-skip-end-of-rule ()
(let (eor-p)
(while (progn
;; end of rule ==> 2 or more consecutive end of lines
(setq eor-p (or (> (skip-chars-forward "\r\n" ebnf-limit) 1)
eor-p))
;; skip spaces
(skip-chars-forward " \013\t" ebnf-limit)
;; skip comments
(and (= (following-char) ?\;)
(ebnf-abn-skip-comment))))
(not eor-p)))
;; replace the range "\177-\237" (see `ebnf-range-regexp').
(defconst ebnf-abn-comment-chars
(ebnf-range-regexp "^\n\000-\010\016-\037" ?\177 ?\237))
(defun ebnf-abn-skip-comment ()
(forward-char)
(cond
;; open EPS file
((and ebnf-eps-executing (= (following-char) ?\[))
(ebnf-eps-add-context (ebnf-abn-eps-filename)))
;; close EPS file
((and ebnf-eps-executing (= (following-char) ?\]))
(ebnf-eps-remove-context (ebnf-abn-eps-filename)))
;; any other action in comment
(t
(setq ebnf-action (aref ebnf-comment-table (following-char)))
(skip-chars-forward ebnf-abn-comment-chars ebnf-limit))
)
;; check for a valid end of comment
(cond ((>= (point) ebnf-limit)
nil)
((= (following-char) ?\n)
t)
(t
(error "Illegal character"))
))
(defun ebnf-abn-eps-filename ()
(forward-char)
(ebnf-buffer-substring ebnf-abn-comment-chars))
;; replace the range "\240-\377" (see `ebnf-range-regexp').
(defconst ebnf-abn-string-chars
(ebnf-range-regexp " -!#-~" ?\240 ?\377))
(defun ebnf-abn-string ()
(buffer-substring-no-properties
(progn
(forward-char)
(point))
(progn
(skip-chars-forward ebnf-abn-string-chars ebnf-limit)
(or (= (following-char) ?\")
(error "Missing `\"'"))
(prog1
(point)
(forward-char)))))
(defun ebnf-abn-character ()
;; %[bdx]NNN((-NNN)|(.NNN)+)?
(buffer-substring-no-properties
(point)
(progn
(forward-char)
(let* ((char (following-char))
(chars (cond ((or (= char ?B) (= char ?b)) "01")
((or (= char ?D) (= char ?d)) "0-9")
((or (= char ?X) (= char ?x)) "0-9A-Fa-f")
(t (error "Invalid terminal value")))))
(forward-char)
(or (> (skip-chars-forward chars ebnf-limit) 0)
(error "Invalid terminal value"))
(if (= (following-char) ?-)
(progn
(forward-char)
(or (> (skip-chars-forward chars ebnf-limit) 0)
(error "Invalid terminal value range")))
(while (= (following-char) ?.)
(forward-char)
(or (> (skip-chars-forward chars ebnf-limit) 0)
(error "Invalid terminal value")))))
(point))))
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(provide 'ebnf-abn)
2004-02-25 01:19:08 +00:00
;;; arch-tag: 8d1b3c4d-4226-4393-b9ae-b7ccf07cf779
2004-02-24 22:58:07 +00:00
;;; ebnf-abn.el ends here