1
0
mirror of https://git.savannah.gnu.org/git/emacs.git synced 2024-12-21 10:24:55 +00:00

Add C interface for Unicode character property table.

This commit is contained in:
Kenichi Handa 2011-07-07 07:43:48 +09:00
parent 5c62d13346
commit c805dec0b5
28 changed files with 878 additions and 389 deletions

View File

@ -1,3 +1,38 @@
2011-07-06 Kenichi Handa <handa@m17n.org>
* unidata/unidata-gen.el (unidata-dir): New variable.
(unidata-setup-list): Expand unidata-text-file in unidata-dir.
(unidata-prop-alist): INDEX element may be a function. New
optional element VAL-LIST (for general-category and bidi-class).
New entry `mirroring'.
(unidata-prop-default, unidata-prop-val-list): New subst.
(unidata-get-character, unidata-put-character): Delete them.
(unidata-gen-table-character): New arg IGNORE. Adjusted for the
above changes.
(unidata-get-symbol, unidata-get-integer, unidata-get-numeric)
(unidata-put-symbol, unidata-put-integer, unidata-put-numeric):
Delete them.
(unidata-encode-val): Assume that the first element of VAL-LIST is
a cons (nil . 0).
(unidata-gen-table): Change argument DEFAULT-VALUE to VAL-LIST.
Always store the encoded value.
(unidata-gen-table-symbol): New args DEFAULT-VALUE and VAL-LIST.
Set the 1st and the 2nd extra slots to index numbers for C
functions.
(unidata-gen-table-integer): Likewise.
(unidata-gen-table-numeric): Likewise.
(unidata-gen-table-name): New arg IGNORE.
(unidata-gen-table-decomposition): Likewise.
(unidata-describe-general-category): Add the case nil to the
description alist.
(unidata-gen-mirroring-list): New funciton.
(unidata-gen-files): New arg DATA-DIR. Adjusted for the change of
unidata-prop-alist. Handle the case of storing multiple
char-tables in a file.
* unidata/Makefile.in (${DSTDIR}/charprop.el): New arg to
unidata-gen-files.
2011-05-21 Glenn Morris <rgm@gnu.org>
* bzrmerge.el (bzrmerge-resolve): Suppress prompts about file-locals.

View File

@ -33,9 +33,10 @@ unidata.txt: UnicodeData.txt
${DSTDIR}/charprop.el: unidata-gen.elc unidata.txt
ELC=`/bin/pwd`/unidata-gen.elc; \
DATA=`/bin/pwd`/unidata.txt; \
DATADIR=`/bin/pwd`; \
DATA=unidata.txt; \
cd ${DSTDIR}; \
${RUNEMACS} -batch --load $${ELC} -f unidata-gen-files $${DATA}
${RUNEMACS} -batch --load $${ELC} -f unidata-gen-files $${DATADIR} $${DATA}
../../src/biditype.h: UnicodeData.txt
gawk -F";" -f biditype.awk $< > $@

View File

@ -33,24 +33,25 @@
;;
;; charprop.el
;; It contains a series of forms of this format:
;; (char-code-property-register PROP FILE)
;; (define-char-code-property PROP FILE)
;; where PROP is a symbol representing a character property
;; (name, generic-category, etc), and FILE is a name of one of
;; (name, general-category, etc), and FILE is a name of one of
;; the following files.
;;
;; uni-name.el, uni-category.el, uni-combining.el, uni-bidi.el,
;; uni-decomposition.el, uni-decimal.el, uni-digit.el, uni-numeric.el,
;; uni-mirrored.el, uni-old-name.el, uni-comment.el, uni-uppercase.el,
;; uni-lowercase.el, uni-titlecase.el
;; They each contain a single form of this format:
;; (char-code-property-register PROP CHAR-TABLE)
;; They contain one or more forms of this format:
;; (define-char-code-property PROP CHAR-TABLE)
;; where PROP is the same as above, and CHAR-TABLE is a
;; char-table containing property values in a compressed format.
;;
;; When they are installed in .../lisp/international/, the file
;; "charprop.el" is preloaded in loadup.el. The other files are
;; automatically loaded when the functions `get-char-code-property'
;; and `put-char-code-property' are called.
;; automatically loaded when the Lisp functions
;; `get-char-code-property' and `put-char-code-property', and C
;; function uniprop_table are called.
;;
;; FORMAT OF A CHAR TABLE
;;
@ -62,17 +63,22 @@
;; data in a char-table as below.
;;
;; If succeeding 128*N characters have the same property value, we
;; store that value for them. Otherwise, compress values for
;; succeeding 128 characters into a single string and store it as a
;; value for those characters. The way of compression depends on a
;; property. See the section "SIMPLE TABLE", "RUN-LENGTH TABLE",
;; and "WORD-LIST TABLE".
;; store that value (or the encoded one) for them. Otherwise,
;; compress values (or the encoded ones) for succeeding 128
;; characters into a single string and store it for those
;; characters. The way of compression depends on a property. See
;; the section "SIMPLE TABLE", "RUN-LENGTH TABLE", and "WORD-LIST
;; TABLE".
;; The char table has four extra slots:
;; The char table has five extra slots:
;; 1st: property symbol
;; 2nd: function to call to get a property value
;; 3nd: function to call to put a property value
;; 4th: function to call to get a description of a property value
;; 2nd: function to call to get a property value,
;; or an index number of C function to decode the value,
;; or nil if the value can be directly got from the table.
;; 3nd: function to call to put a property value,
;; or an index number of C function to encode the value,
;; or nil if the value can be directly stored in the table.
;; 4th: function to call to get a description of a property value, or nil
;; 5th: data referred by the above functions
;; List of elements of this form:
@ -82,6 +88,11 @@
(defvar unidata-list nil)
;; Name of the directory containing files of Unicode Character
;; Database.
(defvar unidata-dir nil)
(defun unidata-setup-list (unidata-text-file)
(let* ((table (list nil))
(tail table)
@ -90,6 +101,7 @@
("^<.*Surrogate" . nil)
("^<.*Private Use" . PRIVATE\ USE)))
val char name)
(setq unidata-text-file (expand-file-name unidata-text-file unidata-dir))
(or (file-readable-p unidata-text-file)
(error "File not readable: %s" unidata-text-file))
(with-temp-buffer
@ -134,12 +146,17 @@
(setq unidata-list (cdr table))))
;; Alist of this form:
;; (PROP INDEX GENERATOR FILENAME)
;; (PROP INDEX GENERATOR FILENAME DOCSTRING DESCRIBER VAL-LIST)
;; PROP: character property
;; INDEX: index to each element of unidata-list for PROP
;; INDEX: index to each element of unidata-list for PROP.
;; It may be a function that generates an alist of character codes
;; vs. the corresponding property values.
;; GENERATOR: function to generate a char-table
;; FILENAME: filename to store the char-table
;; DOCSTRING: docstring for the property
;; DESCRIBER: function to call to get a description string of property value
;; DEFAULT: the default value of the property
;; VAL-LIST: list of specially ordered property values
(defconst unidata-prop-alist
'((name
@ -152,7 +169,12 @@ Property value is a string.")
Property value is one of the following symbols:
Lu, Ll, Lt, Lm, Lo, Mn, Mc, Me, Nd, Nl, No, Pc, Pd, Ps, Pe, Pi, Pf, Po,
Sm, Sc, Sk, So, Zs, Zl, Zp, Cc, Cf, Cs, Co, Cn"
unidata-describe-general-category)
unidata-describe-general-category
nil
;; The order of elements must be in sync with unicode_category_t
;; in src/character.h.
(Lu Ll Lt Lm Lo Mn Mc Me Nd Nl No Pc Pd Ps Pe Pi Pf Po
Sm Sc Sk So Zs Zl Zp Cc Cf Cs Co Cn))
(canonical-combining-class
3 unidata-gen-table-integer "uni-combining.el"
"Unicode canonical combining class.
@ -164,7 +186,11 @@ Property value is an integer."
Property value is one of the following symbols:
L, LRE, LRO, R, AL, RLE, RLO, PDF, EN, ES, ET,
AN, CS, NSM, BN, B, S, WS, ON"
unidata-describe-bidi-class)
unidata-describe-bidi-class
L
;; The order of elements must be in sync with bidi_type_t in
;; src/dispextern.h.
(L R EN AN BN B AL LRE LRO RLE RLO PDF ES ET CS NSM S WS ON))
(decomposition
5 unidata-gen-table-decomposition "uni-decomposition.el"
"Unicode decomposition mapping.
@ -188,7 +214,7 @@ Property value is an integer or a floating point.")
(mirrored
9 unidata-gen-table-symbol "uni-mirrored.el"
"Unicode bidi mirrored flag.
Property value is a symbol `Y' or `N'.")
Property value is a symbol `Y' or `N'. See also the property `mirroring'.")
(old-name
10 unidata-gen-table-name "uni-old-name.el"
"Unicode old names as published in Unicode 1.0.
@ -211,7 +237,12 @@ Property value is a character."
14 unidata-gen-table-character "uni-titlecase.el"
"Unicode simple titlecase mapping.
Property value is a character."
string)))
string)
(mirroring
unidata-gen-mirroring-list unidata-gen-table-character "uni-mirrored.el"
"Unicode bidi-mirroring characters.
Property value is a character that has the corresponding mirroring image,
or nil for non-mirrored character.")))
;; Functions to access the above data.
(defsubst unidata-prop-index (prop) (nth 1 (assq prop unidata-prop-alist)))
@ -219,6 +250,8 @@ Property value is a character."
(defsubst unidata-prop-file (prop) (nth 3 (assq prop unidata-prop-alist)))
(defsubst unidata-prop-docstring (prop) (nth 4 (assq prop unidata-prop-alist)))
(defsubst unidata-prop-describer (prop) (nth 5 (assq prop unidata-prop-alist)))
(defsubst unidata-prop-default (prop) (nth 6 (assq prop unidata-prop-alist)))
(defsubst unidata-prop-val-list (prop) (nth 7 (assq prop unidata-prop-alist)))
;; SIMPLE TABLE
@ -227,52 +260,34 @@ Property value is a character."
;; values of succeeding character codes are usually different, we use
;; a char-table described here to store such values.
;;
;; If succeeding 128 characters has no property, a char-table has the
;; symbol t for them. Otherwise a char-table has a string of the
;; following format for them.
;; A char-table divides character code space (#x0..#x3FFFFF) into
;; #x8000 blocks (each block contains 128 characters).
;; If all characters of a block have no property, a char-table has the
;; symbol nil for that block. Otherwise a char-table has a string of
;; the following format for it.
;;
;; The first character of the string is FIRST-INDEX.
;; The Nth (N > 0) character of the string is a property value of the
;; character (BLOCK-HEAD + FIRST-INDEX + N - 1), where BLOCK-HEAD is
;; the first of the characters in the block.
;; The first character of the string is ?\001.
;; The second character of the string is FIRST-INDEX.
;; The Nth (N > 1) character of the string is a property value of the
;; character (BLOCK-HEAD + FIRST-INDEX + N - 2), where BLOCK-HEAD is
;; the first character of the block.
;;
;; The 4th extra slot of a char-table is nil.
;; This kind of char-table has these extra slots:
;; 1st: the property symbol
;; 2nd: nil
;; 3rd: 0 (corresponding to uniprop_encode_character in chartab.c)
;; 4th to 5th: nil
(defun unidata-get-character (char val table)
(cond
((characterp val)
val)
((stringp val)
(let* ((len (length val))
(block-head (lsh (lsh char -7) 7))
(vec (make-vector 128 nil))
(first-index (aref val 0)))
(dotimes (i (1- len))
(let ((elt (aref val (1+ i))))
(if (> elt 0)
(aset vec (+ first-index i) elt))))
(dotimes (i 128)
(aset table (+ block-head i) (aref vec i)))
(aref vec (- char block-head))))))
(defun unidata-put-character (char val table)
(or (characterp val)
(not val)
(error "Not a character nor nil: %S" val))
(let ((current-val (aref table char)))
(unless (eq current-val val)
(if (stringp current-val)
(funcall (char-table-extra-slot table 1) char current-val table))
(aset table char val))))
(defun unidata-gen-table-character (prop)
(defun unidata-gen-table-character (prop &rest ignore)
(let ((table (make-char-table 'char-code-property-table))
(prop-idx (unidata-prop-index prop))
(vec (make-vector 128 0))
(tail unidata-list)
elt range val idx slot)
(set-char-table-range table (cons 0 (max-char)) t)
(if (functionp prop-idx)
(setq tail (funcall prop-idx)
prop-idx 1))
(while tail
(setq elt (car tail) tail (cdr tail))
(setq range (car elt)
@ -301,7 +316,7 @@ Property value is a character."
(setq first-index last-index)))
(setq tail (cdr tail)))
(when first-index
(let ((str (string first-index))
(let ((str (string 1 first-index))
c)
(while (<= first-index last-index)
(setq str (format "%s%c" str (or (aref vec first-index) 0))
@ -309,184 +324,78 @@ Property value is a character."
(set-char-table-range table (cons start limit) str))))))
(set-char-table-extra-slot table 0 prop)
(byte-compile 'unidata-get-character)
(byte-compile 'unidata-put-character)
(set-char-table-extra-slot table 1 (symbol-function 'unidata-get-character))
(set-char-table-extra-slot table 2 (symbol-function 'unidata-put-character))
(set-char-table-extra-slot table 2 0)
table))
;; RUN-LENGTH TABLE
;;
;; If the type of character property value is symbol, integer,
;; boolean, or character, we use a char-table described here to store
;; the values.
;; If many characters of successive character codes have the same
;; property value, we use a char-table described here to store the
;; values.
;;
;; The 4th extra slot is a vector of property values (VAL-TABLE), and
;; values for succeeding 128 characters are encoded into this
;; character sequence:
;; At first, instead of a value itself, we store an index number to
;; the VAL-TABLE (5th extra slot) in the table. We call that index
;; number as VAL-CODE here after.
;;
;; A char-table divides character code space (#x0..#x3FFFFF) into
;; #x8000 blocks (each block contains 128 characters).
;;
;; If all characters of a block have the same value, a char-table has
;; VAL-CODE for that block. Otherwise a char-table has a string of
;; the following format for that block.
;;
;; The first character of the string is ?\002.
;; The following characters has this form:
;; ( VAL-CODE RUN-LENGTH ? ) +
;; where:
;; VAL-CODE (0..127):
;; (VAL-CODE - 1) is an index into VAL-TABLE.
;; The value 0 means no-value.
;; VAL-CODE (0..127): index into VAL-TABLE.
;; RUN-LENGTH (130..255):
;; (RUN-LENGTH - 128) specifies how many characters have the same
;; value. If omitted, it means 1.
;; Return a symbol-type character property value of CHAR. VAL is the
;; current value of (aref TABLE CHAR).
(defun unidata-get-symbol (char val table)
(let ((val-table (char-table-extra-slot table 4)))
(cond ((symbolp val)
val)
((stringp val)
(let ((first-char (lsh (lsh char -7) 7))
(str val)
(len (length val))
(idx 0)
this-val count)
(set-char-table-range table (cons first-char (+ first-char 127))
nil)
(while (< idx len)
(setq val (aref str idx) idx (1+ idx)
count (if (< idx len) (aref str idx) 1))
(setq val (and (> val 0) (aref val-table (1- val)))
count (if (< count 128)
1
(prog1 (- count 128) (setq idx (1+ idx)))))
(dotimes (i count)
(if val
(aset table first-char val))
(if (= first-char char)
(setq this-val val))
(setq first-char (1+ first-char))))
this-val))
((> val 0)
(aref val-table (1- val))))))
;; Return a integer-type character property value of CHAR. VAL is the
;; current value of (aref TABLE CHAR).
(defun unidata-get-integer (char val table)
(let ((val-table (char-table-extra-slot table 4)))
(cond ((integerp val)
val)
((stringp val)
(let ((first-char (lsh (lsh char -7) 7))
(str val)
(len (length val))
(idx 0)
this-val count)
(while (< idx len)
(setq val (aref str idx) idx (1+ idx)
count (if (< idx len) (aref str idx) 1))
(setq val (and (> val 0) (aref val-table (1- val)))
count (if (< count 128)
1
(prog1 (- count 128) (setq idx (1+ idx)))))
(dotimes (i count)
(aset table first-char val)
(if (= first-char char)
(setq this-val val))
(setq first-char (1+ first-char))))
this-val)))))
;; Return a numeric-type (integer or float) character property value
;; of CHAR. VAL is the current value of (aref TABLE CHAR).
(defun unidata-get-numeric (char val table)
(cond
((numberp val)
val)
((stringp val)
(let ((val-table (char-table-extra-slot table 4))
(first-char (lsh (lsh char -7) 7))
(str val)
(len (length val))
(idx 0)
this-val count)
(while (< idx len)
(setq val (aref str idx) idx (1+ idx)
count (if (< idx len) (aref str idx) 1))
(setq val (and (> val 0) (aref val-table (1- val)))
count (if (< count 128)
1
(prog1 (- count 128) (setq idx (1+ idx)))))
(dotimes (i count)
(aset table first-char val)
(if (= first-char char)
(setq this-val val))
(setq first-char (1+ first-char))))
this-val))))
;; Store VAL (symbol) as a character property value of CHAR in TABLE.
(defun unidata-put-symbol (char val table)
(or (symbolp val)
(error "Not a symbol: %S" val))
(let ((current-val (aref table char)))
(unless (eq current-val val)
(if (stringp current-val)
(funcall (char-table-extra-slot table 1) char current-val table))
(aset table char val))))
;; Store VAL (integer) as a character property value of CHAR in TABLE.
(defun unidata-put-integer (char val table)
(or (integerp val)
(not val)
(error "Not an integer nor nil: %S" val))
(let ((current-val (aref table char)))
(unless (eq current-val val)
(if (stringp current-val)
(funcall (char-table-extra-slot table 1) char current-val table))
(aset table char val))))
;; Store VAL (integer or float) as a character property value of CHAR
;; in TABLE.
(defun unidata-put-numeric (char val table)
(or (numberp val)
(not val)
(error "Not a number nor nil: %S" val))
(let ((current-val (aref table char)))
(unless (equal current-val val)
(if (stringp current-val)
(funcall (char-table-extra-slot table 1) char current-val table))
(aset table char val))))
;;
;; This kind of char-table has these extra slots:
;; 1st: the property symbol
;; 2nd: 0 (corresponding to uniprop_decode_value in chartab.c)
;; 3rd: 1..3 (corresponding to uniprop_encode_xxx in chartab.c)
;; 4th: function or nil
;; 5th: VAL-TABLE
;; Encode the character property value VAL into an integer value by
;; VAL-LIST. By side effect, VAL-LIST is modified.
;; VAL-LIST has this form:
;; (t (VAL1 . VAL-CODE1) (VAL2 . VAL-CODE2) ...)
;; If VAL is one of VALn, just return VAL-CODEn. Otherwise,
;; VAL-LIST is modified to this:
;; (t (VAL . (1+ VAL-CODE1)) (VAL1 . VAL-CODE1) (VAL2 . VAL-CODE2) ...)
;; ((nil . 0) (VAL1 . 1) (VAL2 . 2) ...)
;; If VAL is one of VALn, just return n.
;; Otherwise, VAL-LIST is modified to this:
;; ((nil . 0) (VAL1 . 1) (VAL2 . 2) ... (VAL . n+1))
(defun unidata-encode-val (val-list val)
(let ((slot (assoc val val-list))
val-code)
(if slot
(cdr slot)
(setq val-code (if (cdr val-list) (1+ (cdr (nth 1 val-list))) 1))
(setcdr val-list (cons (cons val val-code) (cdr val-list)))
(setq val-code (length val-list))
(nconc val-list (list (cons val val-code)))
val-code)))
;; Generate a char-table for the character property PROP.
(defun unidata-gen-table (prop val-func default-value)
(defun unidata-gen-table (prop val-func default-value val-list)
(let ((table (make-char-table 'char-code-property-table))
(prop-idx (unidata-prop-index prop))
(val-list (list t))
(vec (make-vector 128 0))
tail elt range val val-code idx slot
prev-range-data)
(set-char-table-range table (cons 0 (max-char)) default-value)
(setq val-list (cons nil (copy-sequence val-list)))
(setq tail val-list val-code 0)
;; Convert (nil A B ...) to ((nil . 0) (A . 1) (B . 2) ...)
(while tail
(setcar tail (cons (car tail) val-code))
(setq tail (cdr tail) val-code (1+ val-code)))
(setq default-value (unidata-encode-val val-list default-value))
(set-char-table-range table t default-value)
(set-char-table-range table nil default-value)
(setq tail unidata-list)
(while tail
(setq elt (car tail) tail (cdr tail))
@ -495,7 +404,7 @@ Property value is a character."
(setq val-code (if val (unidata-encode-val val-list val)))
(if (consp range)
(when val-code
(set-char-table-range table range val)
(set-char-table-range table range val-code)
(let ((from (car range)) (to (cdr range)))
;; If RANGE doesn't end at the char-table boundary (each
;; 128 characters), we may have to carry over the data
@ -534,7 +443,7 @@ Property value is a character."
(if val-code
(aset vec (- range start) val-code))
(setq tail (cdr tail)))
(setq str "" val-code -1 count 0)
(setq str "\002" val-code -1 count 0)
(mapc #'(lambda (x)
(if (= val-code x)
(setq count (1+ count))
@ -549,7 +458,7 @@ Property value is a character."
vec)
(if (= count 128)
(if val
(set-char-table-range table (cons start limit) val))
(set-char-table-range table (cons start limit) val-code))
(if (= val-code 0)
(set-char-table-range table (cons start limit) str)
(if (> count 2)
@ -559,34 +468,29 @@ Property value is a character."
(setq str (concat str (string val-code)))))
(set-char-table-range table (cons start limit) str))))))
(setq val-list (nreverse (cdr val-list)))
(set-char-table-extra-slot table 0 prop)
(set-char-table-extra-slot table 4 (vconcat (mapcar 'car val-list)))
table))
(defun unidata-gen-table-symbol (prop)
(defun unidata-gen-table-symbol (prop default-value val-list)
(let ((table (unidata-gen-table prop
#'(lambda (x) (and (> (length x) 0)
(intern x)))
0)))
(byte-compile 'unidata-get-symbol)
(byte-compile 'unidata-put-symbol)
(set-char-table-extra-slot table 1 (symbol-function 'unidata-get-symbol))
(set-char-table-extra-slot table 2 (symbol-function 'unidata-put-symbol))
default-value val-list)))
(set-char-table-extra-slot table 1 0)
(set-char-table-extra-slot table 2 1)
table))
(defun unidata-gen-table-integer (prop)
(defun unidata-gen-table-integer (prop default-value val-list)
(let ((table (unidata-gen-table prop
#'(lambda (x) (and (> (length x) 0)
(string-to-number x)))
t)))
(byte-compile 'unidata-get-integer)
(byte-compile 'unidata-put-integer)
(set-char-table-extra-slot table 1 (symbol-function 'unidata-get-integer))
(set-char-table-extra-slot table 2 (symbol-function 'unidata-put-integer))
default-value val-list)))
(set-char-table-extra-slot table 1 0)
(set-char-table-extra-slot table 2 1)
table))
(defun unidata-gen-table-numeric (prop)
(defun unidata-gen-table-numeric (prop default-value val-list)
(let ((table (unidata-gen-table prop
#'(lambda (x)
(if (string-match "/" x)
@ -595,11 +499,9 @@ Property value is a character."
(substring x (match-end 0))))
(if (> (length x) 0)
(string-to-number x))))
t)))
(byte-compile 'unidata-get-numeric)
(byte-compile 'unidata-put-numeric)
(set-char-table-extra-slot table 1 (symbol-function 'unidata-get-numeric))
(set-char-table-extra-slot table 2 (symbol-function 'unidata-put-numeric))
default-value val-list)))
(set-char-table-extra-slot table 1 0)
(set-char-table-extra-slot table 2 2)
table))
@ -892,7 +794,6 @@ Property value is a character."
word-table
block-list block-word-table block-end
tail elt range val idx slot)
(set-char-table-range table (cons 0 (max-char)) 0)
(setq tail unidata-list)
(setq block-end -1)
(while tail
@ -1025,7 +926,7 @@ Property value is a character."
idx (1+ i)))))
(nreverse (cons (intern (substring str idx)) l))))))
(defun unidata-gen-table-name (prop)
(defun unidata-gen-table-name (prop &rest ignore)
(let* ((table (unidata-gen-table-word-list prop 'unidata-split-name))
(word-tables (char-table-extra-slot table 4)))
(byte-compile 'unidata-get-name)
@ -1064,7 +965,7 @@ Property value is a character."
(nreverse l)))))
(defun unidata-gen-table-decomposition (prop)
(defun unidata-gen-table-decomposition (prop &rest ignore)
(let* ((table (unidata-gen-table-word-list prop 'unidata-split-decomposition))
(word-tables (char-table-extra-slot table 4)))
(byte-compile 'unidata-get-decomposition)
@ -1080,7 +981,8 @@ Property value is a character."
(defun unidata-describe-general-category (val)
(cdr (assq val
'((Lu . "Letter, Uppercase")
'((nil . "Uknown")
(Lu . "Letter, Uppercase")
(Ll . "Letter, Lowercase")
(Lt . "Letter, Titlecase")
(Lm . "Letter, Modifier")
@ -1171,6 +1073,19 @@ Property value is a character."
(string ?'))))
val " "))
(defun unidata-gen-mirroring-list ()
(let ((head (list nil))
tail)
(with-temp-buffer
(insert-file-contents (expand-file-name "BidiMirroring.txt" unidata-dir))
(goto-char (point-min))
(setq tail head)
(while (re-search-forward "^\\([0-9A-F]+\\);\\s +\\([0-9A-F]+\\)" nil t)
(let ((char (string-to-number (match-string 1) 16))
(mirror (match-string 2)))
(setq tail (setcdr tail (list (list char mirror)))))))
(cdr head)))
;; Verify if we can retrieve correct values from the generated
;; char-tables.
@ -1212,13 +1127,21 @@ Property value is a character."
;; The entry function. It generates files described in the header
;; comment of this file.
(defun unidata-gen-files (&optional unidata-text-file)
(or unidata-text-file
(setq unidata-text-file (car command-line-args-left)
(defun unidata-gen-files (&optional data-dir unidata-text-file)
(or data-dir
(setq data-dir (car command-line-args-left)
command-line-args-left (cdr command-line-args-left)
unidata-text-file (car command-line-args-left)
command-line-args-left (cdr command-line-args-left)))
(unidata-setup-list unidata-text-file)
(let ((coding-system-for-write 'utf-8-unix)
(charprop-file "charprop.el"))
(charprop-file "charprop.el")
(unidata-dir data-dir))
(dolist (elt unidata-prop-alist)
(let* ((prop (car elt))
(file (unidata-prop-file prop)))
(if (file-exists-p file)
(delete-file file))))
(unidata-setup-list unidata-text-file)
(with-temp-file charprop-file
(insert ";; Automatically generated by unidata-gen.el.\n")
(dolist (elt unidata-prop-alist)
@ -1227,31 +1150,41 @@ Property value is a character."
(file (unidata-prop-file prop))
(docstring (unidata-prop-docstring prop))
(describer (unidata-prop-describer prop))
(default-value (unidata-prop-default prop))
(val-list (unidata-prop-val-list prop))
table)
;; Filename in this comment line is extracted by sed in
;; Makefile.
(insert (format ";; FILE: %s\n" file))
(insert (format "(define-char-code-property '%S %S\n %S)\n"
prop file docstring))
(with-temp-file file
(with-temp-buffer
(message "Generating %s..." file)
(setq table (funcall generator prop))
(when (file-exists-p file)
(insert-file-contents file)
(goto-char (point-max))
(search-backward ";; Local Variables:"))
(setq table (funcall generator prop default-value val-list))
(when describer
(unless (subrp (symbol-function describer))
(byte-compile describer)
(setq describer (symbol-function describer)))
(set-char-table-extra-slot table 3 describer))
(insert ";; Copyright (C) 1991-2009 Unicode, Inc.
;; This file was generated from the Unicode data file at
;; http://www.unicode.org/Public/UNIDATA/UnicodeData.txt.
;; See lisp/international/README for the copyright and permission notice.\n"
(format "(define-char-code-property '%S %S %S)\n"
prop table docstring)
";; Local Variables:\n"
";; coding: utf-8\n"
";; no-byte-compile: t\n"
";; End:\n\n"
(format ";; %s ends here\n" file)))))
(if (bobp)
(insert ";; Copyright (C) 1991-2009 Unicode, Inc.
;; This file was generated from the Unicode data files at
;; http://www.unicode.org/Public/UNIDATA/.
;; See lisp/international/README for the copyright and permission notice.\n"))
(insert (format "(define-char-code-property '%S %S %S)\n"
prop table docstring))
(if (eobp)
(insert ";; Local Variables:\n"
";; coding: utf-8\n"
";; no-byte-compile: t\n"
";; End:\n\n"
(format ";; %s ends here\n" file)))
(write-file file)
(message "Generating %s...done" file))))
(message "Writing %s..." charprop-file)
(insert ";; Local Variables:\n"
";; coding: utf-8\n"

View File

@ -1,3 +1,39 @@
2011-07-06 Kenichi Handa <handa@m17n.org>
* international/characters.el (build-unicode-category-table):
Delete it.
(unicode-category-table): Set it by
unicode-prroperty-table-internal.
* international/mule-cmds.el (char-code-property-alist): Moved to
to src/chartab.c.
(get-char-code-property): Call unicode-property-table-internal to
load a file. Call get-unicode-property-internal where necessary.
(put-char-code-property): Call unicode-property-table-internal to
load a file. Call put-unicode-property-internal where necessary.
put-unicode-property-internal where necessary.
(char-code-property-description): Call
unicode-property-table-internal to load a file.
* international/charprop.el:
* international/uni-bidi.el:
* international/uni-category.el:
* international/uni-combining.el:
* international/uni-comment.el:
* international/uni-decimal.el:
* international/uni-decomposition.el:
* international/uni-digit.el:
* international/uni-lowercase.el:
* international/uni-mirrored.el:
* international/uni-name.el:
* international/uni-numeric.el:
* international/uni-old-name.el:
* international/uni-titlecase.el:
* international/uni-uppercase.el: Regenerate.
* loadup.el: Load international/charprop.el before
international/characters.
2011-06-22 Richard Stallman <rms@gnu.org>
* mail/sendmail.el (mail-bury): If Rmail is in use, return nicely

View File

@ -1206,22 +1206,8 @@ Setup char-width-table appropriate for non-CJK language environment."
;;; Setting unicode-category-table.
;; This macro is to build unicode-category-table at compile time so
;; that C code can access the table efficiently.
(defmacro build-unicode-category-table ()
(let ((table (make-char-table 'unicode-category-table nil)))
(dotimes (i #x110000)
(if (or (< i #xD800)
(and (>= i #xF900) (< i #x30000))
(and (>= i #xE0000) (< i #xE0200)))
(aset table i (get-char-code-property i 'general-category))))
(set-char-table-range table '(#xE000 . #xF8FF) 'Co)
(set-char-table-range table '(#xF0000 . #xFFFFD) 'Co)
(set-char-table-range table '(#x100000 . #x10FFFD) 'Co)
(optimize-char-table table 'eq)
table))
(setq unicode-category-table (build-unicode-category-table))
(setq unicode-category-table
(unicode-property-table-internal 'general-category))
(map-char-table #'(lambda (key val)
(if (and val
(or (and (/= (aref (symbol-name val) 0) ?M)

View File

@ -1,8 +1,4 @@
;; Copyright (C) 1991-2010 Unicode, Inc.
;; This file was generated from the Unicode data file at
;; http://www.unicode.org/Public/UNIDATA/UnicodeData.txt.
;; See lisp/international/README for the copyright and permission notice.
;; Automatically generated by unidata-gen.el.
;; FILE: uni-name.el
(define-char-code-property 'name "uni-name.el"
"Unicode character name.
@ -45,7 +41,7 @@ Property value is an integer or a floating point.")
;; FILE: uni-mirrored.el
(define-char-code-property 'mirrored "uni-mirrored.el"
"Unicode bidi mirrored flag.
Property value is a symbol `Y' or `N'.")
Property value is a symbol `Y' or `N'. See also the property `mirroring'.")
;; FILE: uni-old-name.el
(define-char-code-property 'old-name "uni-old-name.el"
"Unicode old names as published in Unicode 1.0.
@ -66,6 +62,11 @@ Property value is a character.")
(define-char-code-property 'titlecase "uni-titlecase.el"
"Unicode simple titlecase mapping.
Property value is a character.")
;; FILE: uni-mirrored.el
(define-char-code-property 'mirroring "uni-mirrored.el"
"Unicode bidi-mirroring characters.
Property value is a character that has the corresponding mirroring image,
or nil for non-mirrored character.")
;; Local Variables:
;; coding: utf-8
;; no-byte-compile: t

View File

@ -2709,16 +2709,6 @@ See also `locale-charset-language-names', `locale-language-names',
;;; Character property
;; Each element has the form (PROP . TABLE).
;; PROP is a symbol representing a character property.
;; TABLE is a char-table containing the property value for each character.
;; TABLE may be a name of file to load to build a char-table.
;; Don't modify this variable directly but use `define-char-code-property'.
(defvar char-code-property-alist nil
"Alist of character property name vs char-table containing property values.
Internal use only.")
(put 'char-code-property-table 'char-table-extra-slots 5)
(defun define-char-code-property (name table &optional docstring)
@ -2770,32 +2760,23 @@ See also the documentation of `get-char-code-property' and
(defun get-char-code-property (char propname)
"Return the value of CHAR's PROPNAME property."
(let ((slot (assq propname char-code-property-alist)))
(if slot
(let (table value func)
(if (stringp (cdr slot))
(load (cdr slot) nil t))
(setq table (cdr slot)
value (aref table char)
func (char-table-extra-slot table 1))
(let ((table (unicode-property-table-internal propname)))
(if table
(let ((func (char-table-extra-slot table 1)))
(if (functionp func)
(setq value (funcall func char value table)))
value)
(funcall func char (aref table char) table)
(get-unicode-property-internal table char)))
(plist-get (aref char-code-property-table char) propname))))
(defun put-char-code-property (char propname value)
"Store CHAR's PROPNAME property with VALUE.
It can be retrieved with `(get-char-code-property CHAR PROPNAME)'."
(let ((slot (assq propname char-code-property-alist)))
(if slot
(let (table func)
(if (stringp (cdr slot))
(load (cdr slot) nil t))
(setq table (cdr slot)
func (char-table-extra-slot table 2))
(let ((table (unicode-property-table-internal propname)))
(if table
(let ((func (char-table-extra-slot table 2)))
(if (functionp func)
(funcall func char value table)
(aset table char value)))
(put-unicode-property-internal table char value)))
(let* ((plist (aref char-code-property-table char))
(x (plist-put plist propname value)))
(or (eq x plist)
@ -2805,13 +2786,9 @@ It can be retrieved with `(get-char-code-property CHAR PROPNAME)'."
(defun char-code-property-description (prop value)
"Return a description string of character property PROP's value VALUE.
If there's no description string for VALUE, return nil."
(let ((slot (assq prop char-code-property-alist)))
(if slot
(let (table func)
(if (stringp (cdr slot))
(load (cdr slot) nil t))
(setq table (cdr slot)
func (char-table-extra-slot table 3))
(let ((table (unicode-property-table-internal prop)))
(if table
(let ((func (char-table-extra-slot table 3)))
(if (functionp func)
(funcall func value))))))

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@ -123,11 +123,11 @@
;; multilingual text.
(load "international/mule-cmds")
(load "case-table")
(load "international/characters")
(load "composite")
;; This file doesn't exist when building a development version of Emacs
;; from the repository. It is generated just after temacs is built.
(load "international/charprop.el" t)
(load "international/characters")
(load "composite")
;; Load language-specific files.
(load "language/chinese")

View File

@ -1,3 +1,45 @@
2011-07-06 Kenichi Handa <handa@m17n.org>
* character.h (unicode_category_t): New enum type.
* chartab.c (uniprop_decoder_t, uniprop_encoder_t): New types.
(Qchar_code_property_table): New variable.
(UNIPROP_TABLE_P, UNIPROP_GET_DECODER)
(UNIPROP_COMPRESSED_FORM_P): New macros.
(char_table_ascii): Uncompress the compressed values.
(sub_char_table_ref): New arg is_uniprop. Callers changed.
Uncompress the compressed values.
(sub_char_table_ref_and_range): Likewise.
(char_table_ref_and_range): Uncompress the compressed values.
(sub_char_table_set): New arg is_uniprop. Callers changed.
Uncompress the compressed values.
(sub_char_table_set_range): Args changed. Callers changed.
(char_table_set_range): Adjuted for the above change.
(map_sub_char_table): Delete args default_val and parent. Add arg
top. Give decoded values to a Lisp function.
(map_char_table): Adjusted for the above change. Give decoded
values to a Lisp function. Gcpro more variables.
(uniprop_table_uncompress)
(uniprop_decode_value_run_length): New functions.
(uniprop_decoder, uniprop_decoder_count): New variables.
(uniprop_get_decoder, uniprop_encode_value_character)
(uniprop_encode_value_run_length, uniprop_encode_value_numeric):
New functions.
(uniprop_encoder, uniprop_encoder_count): New variables.
(uniprop_get_encoder, uniprop_table)
(Funicode_property_table_internal, Fget_unicode_property_internal)
(Fput_unicode_property_internal): New functions.
(syms_of_chartab): DEFSYM Qchar_code_property_table, defsubr
Sunicode_property_table_internal, Sget_unicode_property_internal,
and Sput_unicode_property_internal. Defvar_lisp
char-code-property-alist.
* composite.c (CHAR_COMPOSABLE_P): Adjusted for the change of
Vunicode_category_table.
* font.c (font_range): Adjusted for the change of
Vunicode_category_table.
2011-06-22 Paul Eggert <eggert@cs.ucla.edu>
Fixes for GLYPH_DEBUG found by GCC 4.6.0 static checking.

View File

@ -597,6 +597,45 @@ along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
: (c) <= 0xDFFF ? 2 \
: 0)
/* Data type for Unicode general category.
The order of members must be in sync with the 8th element of the
member of unidata-prop-alist (in admin/unidata/unidata-getn.el) for
Unicode character property `general-category'. */
typedef enum {
UNICODE_CATEGORY_UNKNOWN = 0,
UNICODE_CATEGORY_Lu,
UNICODE_CATEGORY_Ll,
UNICODE_CATEGORY_Lt,
UNICODE_CATEGORY_Lm,
UNICODE_CATEGORY_Lo,
UNICODE_CATEGORY_Mn,
UNICODE_CATEGORY_Mc,
UNICODE_CATEGORY_Me,
UNICODE_CATEGORY_Nd,
UNICODE_CATEGORY_Nl,
UNICODE_CATEGORY_No,
UNICODE_CATEGORY_Pc,
UNICODE_CATEGORY_Pd,
UNICODE_CATEGORY_Ps,
UNICODE_CATEGORY_Pe,
UNICODE_CATEGORY_Pi,
UNICODE_CATEGORY_Pf,
UNICODE_CATEGORY_Po,
UNICODE_CATEGORY_Sm,
UNICODE_CATEGORY_Sc,
UNICODE_CATEGORY_Sk,
UNICODE_CATEGORY_So,
UNICODE_CATEGORY_Zs,
UNICODE_CATEGORY_Zl,
UNICODE_CATEGORY_Zp,
UNICODE_CATEGORY_Cc,
UNICODE_CATEGORY_Cf,
UNICODE_CATEGORY_Cs,
UNICODE_CATEGORY_Co,
UNICODE_CATEGORY_Cn
} unicode_category_t;
extern int char_resolve_modifier_mask (int);
extern int char_string (unsigned, unsigned char *);

View File

@ -53,7 +53,38 @@ static const int chartab_bits[4] =
#define CHARTAB_IDX(c, depth, min_char) \
(((c) - (min_char)) >> chartab_bits[(depth)])
/* Preamble for uniprop (Unicode character property) tables. See the
comment of "Unicode character property tables". */
/* Purpose of uniprop tables. */
static Lisp_Object Qchar_code_property_table;
/* Types of decoder and encoder functions for uniprop values. */
typedef Lisp_Object (*uniprop_decoder_t) (Lisp_Object, Lisp_Object);
typedef Lisp_Object (*uniprop_encoder_t) (Lisp_Object, Lisp_Object);
static Lisp_Object uniprop_table_uncompress (Lisp_Object, int);
static uniprop_decoder_t uniprop_get_decoder (Lisp_Object);
/* 1 iff TABLE is a uniprop table. */
#define UNIPROP_TABLE_P(TABLE) \
(EQ (XCHAR_TABLE (TABLE)->purpose, Qchar_code_property_table) \
&& CHAR_TABLE_EXTRA_SLOTS (XCHAR_TABLE (TABLE)) == 5)
/* Return a decoder for values in the uniprop table TABLE. */
#define UNIPROP_GET_DECODER(TABLE) \
(UNIPROP_TABLE_P (TABLE) ? uniprop_get_decoder (TABLE) : NULL)
/* Nonzero iff OBJ is a string representing uniprop values of 128
succeeding characters (the bottom level of a char-table) by a
compressed format. We are sure that no property value has a string
starting with '\001' nor '\002'. */
#define UNIPROP_COMPRESSED_FORM_P(OBJ) \
(STRINGP (OBJ) && SCHARS (OBJ) > 0 \
&& ((SREF (OBJ, 0) == 1 || (SREF (OBJ, 0) == 2))))
DEFUN ("make-char-table", Fmake_char_table, Smake_char_table, 1, 2, 0,
doc: /* Return a newly created char-table, with purpose PURPOSE.
Each element is initialized to INIT, which defaults to nil.
@ -107,7 +138,7 @@ make_sub_char_table (int depth, int min_char, Lisp_Object defalt)
static Lisp_Object
char_table_ascii (Lisp_Object table)
{
Lisp_Object sub;
Lisp_Object sub, val;
sub = XCHAR_TABLE (table)->contents[0];
if (! SUB_CHAR_TABLE_P (sub))
@ -115,7 +146,10 @@ char_table_ascii (Lisp_Object table)
sub = XSUB_CHAR_TABLE (sub)->contents[0];
if (! SUB_CHAR_TABLE_P (sub))
return sub;
return XSUB_CHAR_TABLE (sub)->contents[0];
val = XSUB_CHAR_TABLE (sub)->contents[0];
if (UNIPROP_TABLE_P (table) && UNIPROP_COMPRESSED_FORM_P (val))
val = uniprop_table_uncompress (sub, 0);
return val;
}
static Lisp_Object
@ -169,16 +203,19 @@ copy_char_table (Lisp_Object table)
}
static Lisp_Object
sub_char_table_ref (Lisp_Object table, int c)
sub_char_table_ref (Lisp_Object table, int c, int is_uniprop)
{
struct Lisp_Sub_Char_Table *tbl = XSUB_CHAR_TABLE (table);
int depth = XINT (tbl->depth);
int min_char = XINT (tbl->min_char);
Lisp_Object val;
int idx = CHARTAB_IDX (c, depth, min_char);
val = tbl->contents[CHARTAB_IDX (c, depth, min_char)];
val = tbl->contents[idx];
if (is_uniprop && UNIPROP_COMPRESSED_FORM_P (val))
val = uniprop_table_uncompress (table, idx);
if (SUB_CHAR_TABLE_P (val))
val = sub_char_table_ref (val, c);
val = sub_char_table_ref (val, c, is_uniprop);
return val;
}
@ -198,7 +235,7 @@ char_table_ref (Lisp_Object table, int c)
{
val = tbl->contents[CHARTAB_IDX (c, 0, 0)];
if (SUB_CHAR_TABLE_P (val))
val = sub_char_table_ref (val, c);
val = sub_char_table_ref (val, c, UNIPROP_TABLE_P (table));
}
if (NILP (val))
{
@ -210,7 +247,8 @@ char_table_ref (Lisp_Object table, int c)
}
static Lisp_Object
sub_char_table_ref_and_range (Lisp_Object table, int c, int *from, int *to, Lisp_Object defalt)
sub_char_table_ref_and_range (Lisp_Object table, int c, int *from, int *to,
Lisp_Object defalt, int is_uniprop)
{
struct Lisp_Sub_Char_Table *tbl = XSUB_CHAR_TABLE (table);
int depth = XINT (tbl->depth);
@ -219,8 +257,10 @@ sub_char_table_ref_and_range (Lisp_Object table, int c, int *from, int *to, Lisp
Lisp_Object val;
val = tbl->contents[chartab_idx];
if (is_uniprop && UNIPROP_COMPRESSED_FORM_P (val))
val = uniprop_table_uncompress (table, chartab_idx);
if (SUB_CHAR_TABLE_P (val))
val = sub_char_table_ref_and_range (val, c, from, to, defalt);
val = sub_char_table_ref_and_range (val, c, from, to, defalt, is_uniprop);
else if (NILP (val))
val = defalt;
@ -232,8 +272,11 @@ sub_char_table_ref_and_range (Lisp_Object table, int c, int *from, int *to, Lisp
c = min_char + idx * chartab_chars[depth] - 1;
idx--;
this_val = tbl->contents[idx];
if (is_uniprop && UNIPROP_COMPRESSED_FORM_P (this_val))
this_val = uniprop_table_uncompress (table, idx);
if (SUB_CHAR_TABLE_P (this_val))
this_val = sub_char_table_ref_and_range (this_val, c, from, to, defalt);
this_val = sub_char_table_ref_and_range (this_val, c, from, to, defalt,
is_uniprop);
else if (NILP (this_val))
this_val = defalt;
@ -251,8 +294,11 @@ sub_char_table_ref_and_range (Lisp_Object table, int c, int *from, int *to, Lisp
chartab_idx++;
this_val = tbl->contents[chartab_idx];
if (is_uniprop && UNIPROP_COMPRESSED_FORM_P (this_val))
this_val = uniprop_table_uncompress (table, chartab_idx);
if (SUB_CHAR_TABLE_P (this_val))
this_val = sub_char_table_ref_and_range (this_val, c, from, to, defalt);
this_val = sub_char_table_ref_and_range (this_val, c, from, to, defalt,
is_uniprop);
else if (NILP (this_val))
this_val = defalt;
if (! EQ (this_val, val))
@ -277,17 +323,20 @@ char_table_ref_and_range (Lisp_Object table, int c, int *from, int *to)
struct Lisp_Char_Table *tbl = XCHAR_TABLE (table);
int chartab_idx = CHARTAB_IDX (c, 0, 0), idx;
Lisp_Object val;
int is_uniprop = UNIPROP_TABLE_P (table);
val = tbl->contents[chartab_idx];
if (*from < 0)
*from = 0;
if (*to < 0)
*to = MAX_CHAR;
if (is_uniprop && UNIPROP_COMPRESSED_FORM_P (val))
val = uniprop_table_uncompress (table, chartab_idx);
if (SUB_CHAR_TABLE_P (val))
val = sub_char_table_ref_and_range (val, c, from, to, tbl->defalt);
val = sub_char_table_ref_and_range (val, c, from, to, tbl->defalt,
is_uniprop);
else if (NILP (val))
val = tbl->defalt;
idx = chartab_idx;
while (*from < idx * chartab_chars[0])
{
@ -296,9 +345,11 @@ char_table_ref_and_range (Lisp_Object table, int c, int *from, int *to)
c = idx * chartab_chars[0] - 1;
idx--;
this_val = tbl->contents[idx];
if (is_uniprop && UNIPROP_COMPRESSED_FORM_P (this_val))
this_val = uniprop_table_uncompress (table, idx);
if (SUB_CHAR_TABLE_P (this_val))
this_val = sub_char_table_ref_and_range (this_val, c, from, to,
tbl->defalt);
tbl->defalt, is_uniprop);
else if (NILP (this_val))
this_val = tbl->defalt;
@ -315,9 +366,11 @@ char_table_ref_and_range (Lisp_Object table, int c, int *from, int *to)
chartab_idx++;
c = chartab_idx * chartab_chars[0];
this_val = tbl->contents[chartab_idx];
if (is_uniprop && UNIPROP_COMPRESSED_FORM_P (this_val))
this_val = uniprop_table_uncompress (table, chartab_idx);
if (SUB_CHAR_TABLE_P (this_val))
this_val = sub_char_table_ref_and_range (this_val, c, from, to,
tbl->defalt);
tbl->defalt, is_uniprop);
else if (NILP (this_val))
this_val = tbl->defalt;
if (! EQ (this_val, val))
@ -332,7 +385,7 @@ char_table_ref_and_range (Lisp_Object table, int c, int *from, int *to)
static void
sub_char_table_set (Lisp_Object table, int c, Lisp_Object val)
sub_char_table_set (Lisp_Object table, int c, Lisp_Object val, int is_uniprop)
{
struct Lisp_Sub_Char_Table *tbl = XSUB_CHAR_TABLE (table);
int depth = XINT ((tbl)->depth);
@ -347,11 +400,17 @@ sub_char_table_set (Lisp_Object table, int c, Lisp_Object val)
sub = tbl->contents[i];
if (! SUB_CHAR_TABLE_P (sub))
{
sub = make_sub_char_table (depth + 1,
min_char + i * chartab_chars[depth], sub);
tbl->contents[i] = sub;
if (is_uniprop && UNIPROP_COMPRESSED_FORM_P (sub))
sub = uniprop_table_uncompress (table, i);
else
{
sub = make_sub_char_table (depth + 1,
min_char + i * chartab_chars[depth],
sub);
tbl->contents[i] = sub;
}
}
sub_char_table_set (sub, c, val);
sub_char_table_set (sub, c, val, is_uniprop);
}
}
@ -376,7 +435,7 @@ char_table_set (Lisp_Object table, int c, Lisp_Object val)
sub = make_sub_char_table (1, i * chartab_chars[0], sub);
tbl->contents[i] = sub;
}
sub_char_table_set (sub, c, val);
sub_char_table_set (sub, c, val, UNIPROP_TABLE_P (table));
if (ASCII_CHAR_P (c))
tbl->ascii = char_table_ascii (table);
}
@ -384,30 +443,40 @@ char_table_set (Lisp_Object table, int c, Lisp_Object val)
}
static void
sub_char_table_set_range (Lisp_Object *table, int depth, int min_char, int from, int to, Lisp_Object val)
sub_char_table_set_range (Lisp_Object table, int from, int to, Lisp_Object val,
int is_uniprop)
{
int max_char = min_char + chartab_chars[depth] - 1;
struct Lisp_Sub_Char_Table *tbl = XSUB_CHAR_TABLE (table);
int depth = XINT ((tbl)->depth);
int min_char = XINT ((tbl)->min_char);
int chars_in_block = chartab_chars[depth];
int i, c, lim = chartab_size[depth];
if (depth == 3 || (from <= min_char && to >= max_char))
*table = val;
else
if (from < min_char)
from = min_char;
i = CHARTAB_IDX (from, depth, min_char);
c = min_char + chars_in_block * i;
for (; i <= lim; i++, c += chars_in_block)
{
int i;
unsigned j;
depth++;
if (! SUB_CHAR_TABLE_P (*table))
*table = make_sub_char_table (depth, min_char, *table);
if (from < min_char)
from = min_char;
if (to > max_char)
to = max_char;
i = CHARTAB_IDX (from, depth, min_char);
j = CHARTAB_IDX (to, depth, min_char);
min_char += chartab_chars[depth] * i;
for (j++; i < j; i++, min_char += chartab_chars[depth])
sub_char_table_set_range (XSUB_CHAR_TABLE (*table)->contents + i,
depth, min_char, from, to, val);
if (c > to)
break;
if (from <= c && c + chars_in_block - 1 <= to)
tbl->contents[i] = val;
else
{
Lisp_Object sub = tbl->contents[i];
if (! SUB_CHAR_TABLE_P (sub))
{
if (is_uniprop && UNIPROP_COMPRESSED_FORM_P (sub))
sub = uniprop_table_uncompress (table, i);
else
{
sub = make_sub_char_table (depth + 1, c, sub);
tbl->contents[i] = sub;
}
}
sub_char_table_set_range (sub, from, to, val, is_uniprop);
}
}
}
@ -417,16 +486,33 @@ char_table_set_range (Lisp_Object table, int from, int to, Lisp_Object val)
{
struct Lisp_Char_Table *tbl = XCHAR_TABLE (table);
Lisp_Object *contents = tbl->contents;
int i;
if (from == to)
char_table_set (table, from, val);
else
{
unsigned lim = to / chartab_chars[0] + 1;
for (i = CHARTAB_IDX (from, 0, 0); i < lim; i++)
sub_char_table_set_range (contents + i, 0, i * chartab_chars[0],
from, to, val);
int is_uniprop = UNIPROP_TABLE_P (table);
int lim = CHARTAB_IDX (to, 0, 0);
int i, c;
for (i = CHARTAB_IDX (from, 0, 0), c = 0; i <= lim;
i++, c += chartab_chars[0])
{
if (c > to)
break;
if (from <= c && c + chartab_chars[0] - 1 <= to)
tbl->contents[i] = val;
else
{
Lisp_Object sub = tbl->contents[i];
if (! SUB_CHAR_TABLE_P (sub))
{
sub = make_sub_char_table (1, i * chartab_chars[0], sub);
tbl->contents[i] = sub;
}
sub_char_table_set_range (sub, from, to, val, is_uniprop);
}
}
if (ASCII_CHAR_P (from))
tbl->ascii = char_table_ascii (table);
}
@ -504,6 +590,8 @@ DEFUN ("set-char-table-extra-slot", Fset_char_table_extra_slot,
(Lisp_Object char_table, Lisp_Object n, Lisp_Object value)
{
CHECK_CHAR_TABLE (char_table);
if (EQ (XCHAR_TABLE (char_table)->purpose, Qchar_code_property_table))
error ("Can't change extra-slot of char-code-property-table");
CHECK_NUMBER (n);
if (XINT (n) < 0
|| XINT (n) >= CHAR_TABLE_EXTRA_SLOTS (XCHAR_TABLE (char_table)))
@ -532,8 +620,9 @@ a cons of character codes (for characters in the range), or a character code. *
CHECK_CHARACTER_CAR (range);
CHECK_CHARACTER_CDR (range);
val = char_table_ref_and_range (char_table, XFASTINT (XCAR (range)),
&from, &to);
from = XFASTINT (XCAR (range));
to = XFASTINT (XCDR (range));
val = char_table_ref_and_range (char_table, from, &from, &to);
/* Not yet implemented. */
}
else
@ -655,8 +744,7 @@ equivalent and can be merged. It defaults to `equal'. */)
/* Map C_FUNCTION or FUNCTION over TABLE (top or sub char-table),
calling it for each character or group of characters that share a
value. RANGE is a cons (FROM . TO) specifying the range of target
characters, VAL is a value of FROM in TABLE, DEFAULT_VAL is the
default value of the char-table, PARENT is the parent of the
characters, VAL is a value of FROM in TABLE, TOP is the top
char-table.
ARG is passed to C_FUNCTION when that is called.
@ -669,7 +757,7 @@ equivalent and can be merged. It defaults to `equal'. */)
static Lisp_Object
map_sub_char_table (void (*c_function) (Lisp_Object, Lisp_Object, Lisp_Object),
Lisp_Object function, Lisp_Object table, Lisp_Object arg, Lisp_Object val,
Lisp_Object range, Lisp_Object default_val, Lisp_Object parent)
Lisp_Object range, Lisp_Object top)
{
/* Pointer to the elements of TABLE. */
Lisp_Object *contents;
@ -681,6 +769,8 @@ map_sub_char_table (void (*c_function) (Lisp_Object, Lisp_Object, Lisp_Object),
int chars_in_block;
int from = XINT (XCAR (range)), to = XINT (XCDR (range));
int i, c;
int is_uniprop = UNIPROP_TABLE_P (top);
uniprop_decoder_t decoder = UNIPROP_GET_DECODER (top);
if (SUB_CHAR_TABLE_P (table))
{
@ -710,28 +800,33 @@ map_sub_char_table (void (*c_function) (Lisp_Object, Lisp_Object, Lisp_Object),
for (c = min_char + chars_in_block * i; c <= max_char;
i++, c += chars_in_block)
{
Lisp_Object this = contents[i];
Lisp_Object this = (SUB_CHAR_TABLE_P (table)
? XSUB_CHAR_TABLE (table)->contents[i]
: XCHAR_TABLE (table)->contents[i]);
int nextc = c + chars_in_block;
if (is_uniprop && UNIPROP_COMPRESSED_FORM_P (this))
this = uniprop_table_uncompress (table, i);
if (SUB_CHAR_TABLE_P (this))
{
if (to >= nextc)
XSETCDR (range, make_number (nextc - 1));
val = map_sub_char_table (c_function, function, this, arg,
val, range, default_val, parent);
val, range, top);
}
else
{
if (NILP (this))
this = default_val;
this = XCHAR_TABLE (top)->defalt;
if (!EQ (val, this))
{
int different_value = 1;
if (NILP (val))
{
if (! NILP (parent))
if (! NILP (XCHAR_TABLE (top)->parent))
{
Lisp_Object parent = XCHAR_TABLE (top)->parent;
Lisp_Object temp = XCHAR_TABLE (parent)->parent;
/* This is to get a value of FROM in PARENT
@ -742,8 +837,7 @@ map_sub_char_table (void (*c_function) (Lisp_Object, Lisp_Object, Lisp_Object),
XSETCDR (range, make_number (c - 1));
val = map_sub_char_table (c_function, function,
parent, arg, val, range,
XCHAR_TABLE (parent)->defalt,
XCHAR_TABLE (parent)->parent);
parent);
if (EQ (val, this))
different_value = 0;
}
@ -756,14 +850,22 @@ map_sub_char_table (void (*c_function) (Lisp_Object, Lisp_Object, Lisp_Object),
if (c_function)
(*c_function) (arg, XCAR (range), val);
else
call2 (function, XCAR (range), val);
{
if (decoder)
val = decoder (top, val);
call2 (function, XCAR (range), val);
}
}
else
{
if (c_function)
(*c_function) (arg, range, val);
else
call2 (function, range, val);
{
if (decoder)
val = decoder (top, val);
call2 (function, range, val);
}
}
}
val = this;
@ -783,35 +885,39 @@ map_sub_char_table (void (*c_function) (Lisp_Object, Lisp_Object, Lisp_Object),
ARG is passed to C_FUNCTION when that is called. */
void
map_char_table (void (*c_function) (Lisp_Object, Lisp_Object, Lisp_Object), Lisp_Object function, Lisp_Object table, Lisp_Object arg)
map_char_table (void (*c_function) (Lisp_Object, Lisp_Object, Lisp_Object),
Lisp_Object function, Lisp_Object table, Lisp_Object arg)
{
Lisp_Object range, val;
struct gcpro gcpro1, gcpro2, gcpro3;
Lisp_Object range, val, parent;
struct gcpro gcpro1, gcpro2, gcpro3, gcpro4;
uniprop_decoder_t decoder = UNIPROP_GET_DECODER (table);
range = Fcons (make_number (0), make_number (MAX_CHAR));
GCPRO3 (table, arg, range);
parent = XCHAR_TABLE (table)->parent;
GCPRO4 (table, arg, range, parent);
val = XCHAR_TABLE (table)->ascii;
if (SUB_CHAR_TABLE_P (val))
val = XSUB_CHAR_TABLE (val)->contents[0];
val = map_sub_char_table (c_function, function, table, arg, val, range,
XCHAR_TABLE (table)->defalt,
XCHAR_TABLE (table)->parent);
table);
/* If VAL is nil and TABLE has a parent, we must consult the parent
recursively. */
while (NILP (val) && ! NILP (XCHAR_TABLE (table)->parent))
{
Lisp_Object parent = XCHAR_TABLE (table)->parent;
Lisp_Object temp = XCHAR_TABLE (parent)->parent;
Lisp_Object temp;
int from = XINT (XCAR (range));
parent = XCHAR_TABLE (table)->parent;
temp = XCHAR_TABLE (parent)->parent;
/* This is to get a value of FROM in PARENT without checking the
parent of PARENT. */
XCHAR_TABLE (parent)->parent = Qnil;
val = CHAR_TABLE_REF (parent, from);
XCHAR_TABLE (parent)->parent = temp;
val = map_sub_char_table (c_function, function, parent, arg, val, range,
XCHAR_TABLE (parent)->defalt,
XCHAR_TABLE (parent)->parent);
parent);
table = parent;
}
@ -822,14 +928,22 @@ map_char_table (void (*c_function) (Lisp_Object, Lisp_Object, Lisp_Object), Lisp
if (c_function)
(*c_function) (arg, XCAR (range), val);
else
call2 (function, XCAR (range), val);
{
if (decoder)
val = decoder (table, val);
call2 (function, XCAR (range), val);
}
}
else
{
if (c_function)
(*c_function) (arg, range, val);
else
call2 (function, range, val);
{
if (decoder)
val = decoder (table, val);
call2 (function, range, val);
}
}
}
@ -983,10 +1097,316 @@ map_char_table_for_charset (void (*c_function) (Lisp_Object, Lisp_Object),
UNGCPRO;
}
/* Unicode character property tables.
This section provides a convenient and efficient way to get a
Unicode character property from C code (from Lisp, you must use
get-char-code-property).
The typical usage is to get a char-table for a specific property at
a proper initialization time as this:
Lisp_Object bidi_class_table = uniprop_table (intern ("bidi-class"));
and get a property value for character CH as this:
Lisp_Object bidi_class = CHAR_TABLE_REF (CH, bidi_class_table);
In this case, what you actually get is an index number to the
vector of property values (symbols nil, L, R, etc).
A table for Unicode character property has these characteristics:
o The purpose is `char-code-property-table', which implies that the
table has 5 extra slots.
o The second extra slot is a Lisp function, an index (integer) to
the array uniprop_decoder[], or nil. If it is a Lisp function, we
can't use such a table from C (at the moment). If it is nil, it
means that we don't have to decode values.
o The third extra slot is a Lisp function, an index (integer) to
the array uniprop_enncoder[], or nil. If it is a Lisp function, we
can't use such a table from C (at the moment). If it is nil, it
means that we don't have to encode values. */
/* Uncompress the IDXth element of sub-char-table TABLE. */
static Lisp_Object
uniprop_table_uncompress (Lisp_Object table, int idx)
{
Lisp_Object val = XSUB_CHAR_TABLE (table)->contents[idx];
int min_char = (XINT (XSUB_CHAR_TABLE (table)->min_char)
+ chartab_chars[2] * idx);
Lisp_Object sub = make_sub_char_table (3, min_char, Qnil);
struct Lisp_Sub_Char_Table *subtbl = XSUB_CHAR_TABLE (sub);
const unsigned char *p, *pend;
int i;
XSUB_CHAR_TABLE (table)->contents[idx] = sub;
p = SDATA (val), pend = p + SBYTES (val);
if (*p == 1)
{
/* SIMPLE TABLE */
p++;
idx = STRING_CHAR_ADVANCE (p);
while (p < pend && idx < chartab_chars[2])
{
int v = STRING_CHAR_ADVANCE (p);
subtbl->contents[idx++] = v > 0 ? make_number (v) : Qnil;
}
}
else if (*p == 2)
{
/* RUN-LENGTH TABLE */
p++;
for (idx = 0; p < pend; )
{
int v = STRING_CHAR_ADVANCE (p);
int count = 1;
int len;
if (p < pend)
{
count = STRING_CHAR_AND_LENGTH (p, len);
if (count < 128)
count = 1;
else
{
count -= 128;
p += len;
}
}
while (count-- > 0)
subtbl->contents[idx++] = make_number (v);
}
}
/* It seems that we don't need this function because C code won't need
to get a property that is compressed in this form. */
#if 0
else if (*p == 0)
{
/* WORD-LIST TABLE */
}
#endif
return sub;
}
/* Decode VALUE as an elemnet of char-table TABLE. */
static Lisp_Object
uniprop_decode_value_run_length (Lisp_Object table, Lisp_Object value)
{
if (VECTORP (XCHAR_TABLE (table)->extras[4]))
{
Lisp_Object valvec = XCHAR_TABLE (table)->extras[4];
if (XINT (value) >= 0 && XINT (value) < ASIZE (valvec))
value = AREF (valvec, XINT (value));
}
return value;
}
static uniprop_decoder_t uniprop_decoder [] =
{ uniprop_decode_value_run_length };
static int uniprop_decoder_count
= (sizeof uniprop_decoder) / sizeof (uniprop_decoder[0]);
/* Return the decoder of char-table TABLE or nil if none. */
static uniprop_decoder_t
uniprop_get_decoder (Lisp_Object table)
{
int i;
if (! INTEGERP (XCHAR_TABLE (table)->extras[1]))
return NULL;
i = XINT (XCHAR_TABLE (table)->extras[1]);
if (i < 0 || i >= uniprop_decoder_count)
return NULL;
return uniprop_decoder[i];
}
/* Encode VALUE as an element of char-table TABLE which contains
characters as elements. */
static Lisp_Object
uniprop_encode_value_character (Lisp_Object table, Lisp_Object value)
{
if (! NILP (value) && ! CHARACTERP (value))
wrong_type_argument (Qintegerp, value);
return value;
}
/* Encode VALUE as an element of char-table TABLE which adopts RUN-LENGTH
compression. */
static Lisp_Object
uniprop_encode_value_run_length (Lisp_Object table, Lisp_Object value)
{
Lisp_Object *value_table = XVECTOR (XCHAR_TABLE (table)->extras[4])->contents;
int i, size = ASIZE (XCHAR_TABLE (table)->extras[4]);
for (i = 0; i < size; i++)
if (EQ (value, value_table[i]))
break;
if (i == size)
wrong_type_argument (build_string ("Unicode property value"), value);
return make_number (i);
}
/* Encode VALUE as an element of char-table TABLE which adopts RUN-LENGTH
compression and contains numbers as elements . */
static Lisp_Object
uniprop_encode_value_numeric (Lisp_Object table, Lisp_Object value)
{
Lisp_Object *value_table = XVECTOR (XCHAR_TABLE (table)->extras[4])->contents;
int i, size = ASIZE (XCHAR_TABLE (table)->extras[4]);
CHECK_NUMBER (value);
for (i = 0; i < size; i++)
if (EQ (value, value_table[i]))
break;
value = make_number (i);
if (i == size)
{
Lisp_Object args[2];
args[0] = XCHAR_TABLE (table)->extras[4];
args[1] = Fmake_vector (make_number (1), value);
XCHAR_TABLE (table)->extras[4] = Fvconcat (2, args);
}
return make_number (i);
}
static uniprop_encoder_t uniprop_encoder[] =
{ uniprop_encode_value_character,
uniprop_encode_value_run_length,
uniprop_encode_value_numeric };
static int uniprop_encoder_count
= (sizeof uniprop_encoder) / sizeof (uniprop_encoder[0]);
/* Return the encoder of char-table TABLE or nil if none. */
static uniprop_decoder_t
uniprop_get_encoder (Lisp_Object table)
{
int i;
if (! INTEGERP (XCHAR_TABLE (table)->extras[2]))
return NULL;
i = XINT (XCHAR_TABLE (table)->extras[2]);
if (i < 0 || i >= uniprop_encoder_count)
return NULL;
return uniprop_encoder[i];
}
/* Return a char-table for Unicode character property PROP. This
function may load a Lisp file and thus may cause
garbage-collection. */
Lisp_Object
uniprop_table (Lisp_Object prop)
{
Lisp_Object val, table, result;
val = Fassq (prop, Vchar_code_property_alist);
if (! CONSP (val))
return Qnil;
table = XCDR (val);
if (STRINGP (table))
{
struct gcpro gcpro1;
GCPRO1 (val);
result = Fload (concat2 (build_string ("international/"), table),
Qt, Qt, Qt, Qt);
UNGCPRO;
if (NILP (result))
return Qnil;
table = XCDR (val);
}
if (! CHAR_TABLE_P (table)
|| ! UNIPROP_TABLE_P (table))
return Qnil;
val = XCHAR_TABLE (table)->extras[1];
if (INTEGERP (val)
? (XINT (val) < 0 || XINT (val) >= uniprop_decoder_count)
: ! NILP (val))
return Qnil;
/* Prepare ASCII values in advance for CHAR_TABLE_REF. */
XCHAR_TABLE (table)->ascii = char_table_ascii (table);
return table;
}
DEFUN ("unicode-property-table-internal", Funicode_property_table_internal,
Sunicode_property_table_internal, 1, 1, 0,
doc: /* Return a char-table for Unicode character property PROP.
Use `get-unicode-property-internal' and
`put-unicode-property-internal' instead of `aref' and `aset' to get
and put an element value. */)
(Lisp_Object prop)
{
Lisp_Object table = uniprop_table (prop);
if (CHAR_TABLE_P (table))
return table;
return Fcdr (Fassq (prop, Vchar_code_property_alist));
}
DEFUN ("get-unicode-property-internal", Fget_unicode_property_internal,
Sget_unicode_property_internal, 2, 2, 0,
doc: /* Return an element of CHAR-TABLE for character CH.
CHAR-TABLE must be what returned by `unicode-property-table-internal'. */)
(Lisp_Object char_table, Lisp_Object ch)
{
Lisp_Object val;
uniprop_decoder_t decoder;
CHECK_CHAR_TABLE (char_table);
CHECK_CHARACTER (ch);
if (! UNIPROP_TABLE_P (char_table))
error ("Invalid Unicode property table");
val = CHAR_TABLE_REF (char_table, XINT (ch));
decoder = uniprop_get_decoder (char_table);
return (decoder ? decoder (char_table, val) : val);
}
DEFUN ("put-unicode-property-internal", Fput_unicode_property_internal,
Sput_unicode_property_internal, 3, 3, 0,
doc: /* Set an element of CHAR-TABLE for character CH to VALUE.
CHAR-TABLE must be what returned by `unicode-property-table-internal'. */)
(Lisp_Object char_table, Lisp_Object ch, Lisp_Object value)
{
uniprop_encoder_t encoder;
CHECK_CHAR_TABLE (char_table);
CHECK_CHARACTER (ch);
if (! UNIPROP_TABLE_P (char_table))
error ("Invalid Unicode property table");
encoder = uniprop_get_encoder (char_table);
if (encoder)
value = encoder (char_table, value);
CHAR_TABLE_SET (char_table, XINT (ch), value);
return Qnil;
}
void
syms_of_chartab (void)
{
DEFSYM (Qchar_code_property_table, "char-code-property-table");
defsubr (&Smake_char_table);
defsubr (&Schar_table_parent);
defsubr (&Schar_table_subtype);
@ -998,4 +1418,19 @@ syms_of_chartab (void)
defsubr (&Sset_char_table_default);
defsubr (&Soptimize_char_table);
defsubr (&Smap_char_table);
defsubr (&Sunicode_property_table_internal);
defsubr (&Sget_unicode_property_internal);
defsubr (&Sput_unicode_property_internal);
/* Each element has the form (PROP . TABLE).
PROP is a symbol representing a character property.
TABLE is a char-table containing the property value for each character.
TABLE may be a name of file to load to build a char-table.
This variable should be modified only through
`define-char-code-property'. */
DEFVAR_LISP ("char-code-property-alist", Vchar_code_property_alist,
doc: /* Alist of character property name vs char-table containing property values.
Internal use only. */);
Vchar_code_property_alist = Qnil;
}

View File

@ -976,9 +976,8 @@ static int _work_char;
((C) > ' ' \
&& ((C) == 0x200C || (C) == 0x200D \
|| (_work_val = CHAR_TABLE_REF (Vunicode_category_table, (C)), \
(SYMBOLP (_work_val) \
&& (_work_char = SDATA (SYMBOL_NAME (_work_val))[0]) != 'C' \
&& _work_char != 'Z'))))
(INTEGERP (_work_val) \
&& (XINT (_work_val) <= UNICODE_CATEGORY_So)))))
/* Update cmp_it->stop_pos to the next position after CHARPOS (and
BYTEPOS) where character composition may happen. If BYTEPOS is

View File

@ -1773,7 +1773,11 @@ extern int face_change_count;
/* Data type for describing the bidirectional character types. The
first 7 must be at the beginning, because they are the only values
valid in the `bidi_type' member of `struct glyph'; we only reserve
3 bits for it, so we cannot use there values larger than 7. */
3 bits for it, so we cannot use there values larger than 7.
The order of members must be in sync with the 8th element of the
member of unidata-prop-alist (in admin/unidata/unidata-getn.el) for
Unicode character property `bidi-class'. */
typedef enum {
UNKNOWN_BT = 0,
STRONG_L, /* strong left-to-right */

View File

@ -3739,8 +3739,9 @@ font_range (EMACS_INT pos, EMACS_INT *limit, struct window *w, struct face *face
else
FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, string, pos, pos_byte);
category = CHAR_TABLE_REF (Vunicode_category_table, c);
if (EQ (category, QCf)
|| CHAR_VARIATION_SELECTOR_P (c))
if (INTEGERP (category)
&& (XINT (category) == UNICODE_CATEGORY_Cf
|| CHAR_VARIATION_SELECTOR_P (c)))
continue;
if (NILP (font_object))
{