mirror of
https://git.savannah.gnu.org/git/emacs.git
synced 2024-12-28 10:56:36 +00:00
Make ucs-names a hash table (Bug#28302)
* etc/NEWS: Mention the type change. * lisp/descr-text.el (describe-char): Use gethash to access ucs-names. Hardcode BEL's name into the function instead of needlessly mapping over the hash table in the spirit of rassoc. * lisp/international/mule-cmds.el (ucs-names): Fix variable and function docstrings. Initialize a hash table for ucs-names--the number of entries is 42845 here. Switch to hash-table getters/setters. (mule--ucs-names-annotation): Use hash-table getter. (char-from-name): Upcase the string if ignore-case is truthy. * lisp/leim/quail/latin-ltx.el: Use maphash instead of dolist.
This commit is contained in:
parent
e6a2b4c2df
commit
96c2c098ae
3
etc/NEWS
3
etc/NEWS
@ -1154,6 +1154,9 @@ table implementation. This uses a new bytecode op 'switch', which
|
||||
isn't compatible with previous Emacs versions. This functionality can
|
||||
be disabled by setting 'byte-compile-cond-use-jump-table' to nil.
|
||||
|
||||
---
|
||||
** The alist 'ucs-names' is now a hash table.
|
||||
|
||||
---
|
||||
** 'C-up', 'C-down', 'C-left' and 'C-right' are now defined in term
|
||||
mode to send the same escape sequences that xterm does. This makes
|
||||
|
@ -617,16 +617,16 @@ relevant to POS."
|
||||
(list
|
||||
(let* ((names (ucs-names))
|
||||
(name
|
||||
(or (when (= char 7)
|
||||
(or (when (= char ?\a)
|
||||
;; Special case for "BELL" which is
|
||||
;; apparently the only char which
|
||||
;; doesn't have a new name and whose
|
||||
;; old-name is shadowed by a newer char
|
||||
;; with that name (bug#25641).
|
||||
(car (rassoc char names)))
|
||||
"BELL (BEL)")
|
||||
(get-char-code-property char 'name)
|
||||
(get-char-code-property char 'old-name))))
|
||||
(if (and name (assoc-string name names))
|
||||
(if (and name (gethash name names))
|
||||
(format
|
||||
"type \"C-x 8 RET %x\" or \"C-x 8 RET %s\""
|
||||
char name)
|
||||
|
@ -2923,10 +2923,10 @@ on encoding."
|
||||
(make-obsolete-variable 'nonascii-translation-table "do not use it." "23.1")
|
||||
|
||||
(defvar ucs-names nil
|
||||
"Alist of cached (CHAR-NAME . CHAR-CODE) pairs.")
|
||||
"Hash table of cached CHAR-NAME keys to CHAR-CODE values.")
|
||||
|
||||
(defun ucs-names ()
|
||||
"Return alist of (CHAR-NAME . CHAR-CODE) pairs cached in `ucs-names'."
|
||||
"Return table of CHAR-NAME keys and CHAR-CODE values cached in `ucs-names'."
|
||||
(or ucs-names
|
||||
(let ((ranges
|
||||
'((#x0000 . #x33FF)
|
||||
@ -2954,38 +2954,39 @@ on encoding."
|
||||
;; (#x20000 . #xDFFFF) CJK Ideograph Extension A, B, etc, unused
|
||||
(#xE0000 . #xE01FF)))
|
||||
(gc-cons-threshold 10000000)
|
||||
names)
|
||||
(dolist (range ranges)
|
||||
(let ((c (car range))
|
||||
(end (cdr range)))
|
||||
(while (<= c end)
|
||||
(names (make-hash-table :size 42943 :test #'equal)))
|
||||
(dolist (range ranges)
|
||||
(let ((c (car range))
|
||||
(end (cdr range)))
|
||||
(while (<= c end)
|
||||
(let ((new-name (get-char-code-property c 'name))
|
||||
(old-name (get-char-code-property c 'old-name)))
|
||||
;; In theory this code could end up pushing an "old-name" that
|
||||
;; shadows a "new-name" but in practice every time an
|
||||
;; `old-name' conflicts with a `new-name', the newer one has a
|
||||
;; higher code, so it gets pushed later!
|
||||
(if new-name (push (cons new-name c) names))
|
||||
(if old-name (push (cons old-name c) names))
|
||||
(setq c (1+ c))))))
|
||||
;; Special case for "BELL" which is apparently the only char which
|
||||
;; doesn't have a new name and whose old-name is shadowed by a newer
|
||||
;; char with that name.
|
||||
(setq ucs-names `(("BELL (BEL)" . 7) ,@names)))))
|
||||
;; In theory this code could end up pushing an "old-name" that
|
||||
;; shadows a "new-name" but in practice every time an
|
||||
;; `old-name' conflicts with a `new-name', the newer one has a
|
||||
;; higher code, so it gets pushed later!
|
||||
(if new-name (puthash new-name c names))
|
||||
(if old-name (puthash old-name c names))
|
||||
(setq c (1+ c))))))
|
||||
;; Special case for "BELL" which is apparently the only char which
|
||||
;; doesn't have a new name and whose old-name is shadowed by a newer
|
||||
;; char with that name.
|
||||
(puthash "BELL (BEL)" ?\a names)
|
||||
(setq ucs-names names))))
|
||||
|
||||
(defun mule--ucs-names-annotation (name)
|
||||
;; FIXME: It would be much better to add this annotation before rather than
|
||||
;; after the char name, so the annotations are aligned.
|
||||
;; FIXME: The default behavior of displaying annotations in italics
|
||||
;; doesn't work well here.
|
||||
(let ((char (assoc name ucs-names)))
|
||||
(when char (format " (%c)" (cdr char)))))
|
||||
(let ((char (gethash name ucs-names)))
|
||||
(when char (format " (%c)" char))))
|
||||
|
||||
(defun char-from-name (string &optional ignore-case)
|
||||
"Return a character as a number from its Unicode name STRING.
|
||||
If optional IGNORE-CASE is non-nil, ignore case in STRING.
|
||||
Return nil if STRING does not name a character."
|
||||
(or (cdr (assoc-string string (ucs-names) ignore-case))
|
||||
(or (gethash (if ignore-case (upcase string) string) (ucs-names))
|
||||
(let ((minus (string-match-p "-[0-9A-F]+\\'" string)))
|
||||
(when minus
|
||||
;; Parse names like "VARIATION SELECTOR-17" and "CJK
|
||||
|
@ -75,20 +75,20 @@ system, including many technical ones. Examples:
|
||||
(`(,seq ,re)
|
||||
(let ((count 0)
|
||||
(re (eval re t)))
|
||||
(dolist (pair (ucs-names))
|
||||
(let ((name (car pair))
|
||||
(char (cdr pair)))
|
||||
(when (and (characterp char) ;; Ignore char-ranges.
|
||||
(string-match re name))
|
||||
(let ((keys (if (stringp seq)
|
||||
(replace-match seq nil nil name)
|
||||
(funcall seq name char))))
|
||||
(if (listp keys)
|
||||
(dolist (x keys)
|
||||
(setq count (1+ count))
|
||||
(push (list x char) newrules))
|
||||
(setq count (1+ count))
|
||||
(push (list keys char) newrules))))))
|
||||
(maphash
|
||||
(lambda (name char)
|
||||
(when (and (characterp char) ;; Ignore char-ranges.
|
||||
(string-match re name))
|
||||
(let ((keys (if (stringp seq)
|
||||
(replace-match seq nil nil name)
|
||||
(funcall seq name char))))
|
||||
(if (listp keys)
|
||||
(dolist (x keys)
|
||||
(setq count (1+ count))
|
||||
(push (list x char) newrules))
|
||||
(setq count (1+ count))
|
||||
(push (list keys char) newrules)))))
|
||||
(ucs-names))
|
||||
;; (message "latin-ltx: %d mappings for %S" count re)
|
||||
))))
|
||||
(setq newrules (delete-dups newrules))
|
||||
@ -206,7 +206,7 @@ system, including many technical ones. Examples:
|
||||
|
||||
((lambda (name char)
|
||||
(let* ((base (concat (match-string 1 name) (match-string 3 name)))
|
||||
(basechar (cdr (assoc base (ucs-names)))))
|
||||
(basechar (gethash base (ucs-names))))
|
||||
(when (latin-ltx--ascii-p basechar)
|
||||
(string (if (match-end 2) ?^ ?_) basechar))))
|
||||
"\\(.*\\)SU\\(?:B\\|\\(PER\\)\\)SCRIPT \\(.*\\)")
|
||||
|
Loading…
Reference in New Issue
Block a user