mirror of
https://git.savannah.gnu.org/git/emacs.git
synced 2024-12-26 10:49:33 +00:00
New function ‘char-from-name’
This also fixes the mishandling of "\N{CJK COMPATIBILITY IDEOGRAPH-F900}", "\N{VARIATION SELECTOR-1}", etc. Problem reported by Eli Zaretskii in: http://lists.gnu.org/archive/html/emacs-devel/2016-04/msg00614.html * doc/lispref/nonascii.texi (Character Codes), etc/NEWS: Document this. * lisp/international/mule-cmds.el (char-from-name): New function. (read-char-by-name): Use it. Document that "BED" is treated as a name, not as a hexadecimal number. Reject out-of-range integers, floating-point numbers, and strings with trailing junk. * src/lread.c (character_name_to_code): Call char-from-name instead of inspecting ucs-names directly, so that we handle computed names like "VARIATION SELECTOR-1". Do not use an auto string, since char-from-name might GC. * test/src/lread-tests.el: Add tests for new behavior, and fix some old tests that were wrong.
This commit is contained in:
parent
f069d85450
commit
86d083438d
@ -420,6 +420,18 @@ codepoint can have.
|
||||
@end example
|
||||
@end defun
|
||||
|
||||
@defun char-from-name string &optional ignore-case
|
||||
This function returns the character whose Unicode name is @var{string}.
|
||||
If @var{ignore-case} is non-@code{nil}, case is ignored in @var{string}.
|
||||
This function returns @code{nil} if @var{string} does not name a character.
|
||||
|
||||
@example
|
||||
;; U+03A3
|
||||
(= (char-from-name "GREEK CAPITAL LETTER SIGMA") #x03A3)
|
||||
@result{} t
|
||||
@end example
|
||||
@end defun
|
||||
|
||||
@defun get-byte &optional pos string
|
||||
This function returns the byte at character position @var{pos} in the
|
||||
current buffer. If the current buffer is unibyte, this is literally
|
||||
|
4
etc/NEWS
4
etc/NEWS
@ -390,6 +390,10 @@ interpreting consecutive runs of numerical characters as numbers, and
|
||||
compares their numerical values. According to this predicate,
|
||||
"foo2.png" is smaller than "foo12.png".
|
||||
|
||||
+++
|
||||
** The new function 'char-from-name' converts a Unicode name string
|
||||
to the corresponding character code.
|
||||
|
||||
+++
|
||||
** New functions 'sxhash-eq' and 'sxhash-eql' return hash codes of a
|
||||
Lisp object suitable for use with 'eq' and 'eql' correspondingly. If
|
||||
|
@ -2978,6 +2978,27 @@ on encoding."
|
||||
(let ((char (assoc name ucs-names)))
|
||||
(when char (format " (%c)" (cdr char)))))
|
||||
|
||||
(defun char-from-name (string &optional ignore-case)
|
||||
"Return a character as a number from its Unicode name STRING.
|
||||
If optional IGNORE-CASE is non-nil, ignore case in STRING.
|
||||
Return nil if STRING does not name a character."
|
||||
(or (cdr (assoc-string string (ucs-names) ignore-case))
|
||||
(let ((minus (string-match-p "-[0-9A-F]+\\'" string)))
|
||||
(when minus
|
||||
;; Parse names like "VARIATION SELECTOR-17" and "CJK
|
||||
;; COMPATIBILITY IDEOGRAPH-F900" that are not in ucs-names.
|
||||
(ignore-errors
|
||||
(let* ((case-fold-search ignore-case)
|
||||
(vs (string-match-p "\\`VARIATION SELECTOR-" string))
|
||||
(minus-num (string-to-number (substring string minus)
|
||||
(if vs 10 16)))
|
||||
(vs-offset (if vs (if (< minus-num -16) #xE00EF #xFDFF) 0))
|
||||
(code (- vs-offset minus-num))
|
||||
(name (get-char-code-property code 'name)))
|
||||
(when (eq t (compare-strings string nil nil name nil nil
|
||||
ignore-case))
|
||||
code)))))))
|
||||
|
||||
(defun read-char-by-name (prompt)
|
||||
"Read a character by its Unicode name or hex number string.
|
||||
Display PROMPT and read a string that represents a character by its
|
||||
@ -2991,9 +3012,11 @@ preceded by an asterisk `*' and use completion, it will show all
|
||||
the characters whose names include that substring, not necessarily
|
||||
at the beginning of the name.
|
||||
|
||||
This function also accepts a hexadecimal number of Unicode code
|
||||
point or a number in hash notation, e.g. #o21430 for octal,
|
||||
#x2318 for hex, or #10r8984 for decimal."
|
||||
Accept a name like \"CIRCULATION FUNCTION\", a hexadecimal
|
||||
number like \"2A10\", or a number in hash notation (e.g.,
|
||||
\"#x2a10\" for hex, \"10r10768\" for decimal, or \"#o25020\" for
|
||||
octal). Treat otherwise-ambiguous strings like \"BED\" (U+1F6CF)
|
||||
as names, not numbers."
|
||||
(let* ((enable-recursive-minibuffers t)
|
||||
(completion-ignore-case t)
|
||||
(input
|
||||
@ -3006,13 +3029,13 @@ point or a number in hash notation, e.g. #o21430 for octal,
|
||||
(category . unicode-name))
|
||||
(complete-with-action action (ucs-names) string pred)))))
|
||||
(char
|
||||
(cond
|
||||
((string-match-p "\\`[0-9a-fA-F]+\\'" input)
|
||||
(string-to-number input 16))
|
||||
((string-match-p "\\`#" input)
|
||||
(read input))
|
||||
(t
|
||||
(cdr (assoc-string input (ucs-names) t))))))
|
||||
(cond
|
||||
((char-from-name input t))
|
||||
((string-match-p "\\`[0-9a-fA-F]+\\'" input)
|
||||
(ignore-errors (string-to-number input 16)))
|
||||
((string-match-p "\\`#\\([bBoOxX]\\|[0-9]+[rR]\\)[0-9a-zA-Z]+\\'"
|
||||
input)
|
||||
(ignore-errors (read input))))))
|
||||
(unless (characterp char)
|
||||
(error "Invalid character"))
|
||||
char))
|
||||
|
29
src/lread.c
29
src/lread.c
@ -2155,26 +2155,15 @@ grow_read_buffer (void)
|
||||
static int
|
||||
character_name_to_code (char const *name, ptrdiff_t name_len)
|
||||
{
|
||||
Lisp_Object code;
|
||||
/* For "U+XXXX", pass the leading '+' to string_to_number to reject
|
||||
monstrosities like "U+-0000". */
|
||||
Lisp_Object code
|
||||
= (name[0] == 'U' && name[1] == '+'
|
||||
? string_to_number (name + 1, 16, false)
|
||||
: call2 (Qchar_from_name, make_unibyte_string (name, name_len), Qt));
|
||||
|
||||
/* Code point as U+XXXX.... */
|
||||
if (name[0] == 'U' && name[1] == '+')
|
||||
{
|
||||
/* Pass the leading '+' to string_to_number, so that it
|
||||
rejects monstrosities such as negative values. */
|
||||
code = string_to_number (name + 1, 16, false);
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Look up the name in the table returned by 'ucs-names'. */
|
||||
AUTO_STRING_WITH_LEN (namestr, name, name_len);
|
||||
Lisp_Object names = call0 (Qucs_names);
|
||||
code = CDR (Fassoc (namestr, names));
|
||||
}
|
||||
|
||||
if (! (INTEGERP (code)
|
||||
&& 0 <= XINT (code) && XINT (code) <= MAX_UNICODE_CHAR
|
||||
&& ! char_surrogate_p (XINT (code))))
|
||||
if (! RANGED_INTEGERP (0, code, MAX_UNICODE_CHAR)
|
||||
|| char_surrogate_p (XINT (code)))
|
||||
{
|
||||
AUTO_STRING (format, "\\N{%s}");
|
||||
AUTO_STRING_WITH_LEN (namestr, name, name_len);
|
||||
@ -4829,5 +4818,5 @@ that are loaded before your customizations are read! */);
|
||||
DEFSYM (Qrehash_size, "rehash-size");
|
||||
DEFSYM (Qrehash_threshold, "rehash-threshold");
|
||||
|
||||
DEFSYM (Qucs_names, "ucs-names");
|
||||
DEFSYM (Qchar_from_name, "char-from-name");
|
||||
}
|
||||
|
@ -28,15 +28,55 @@
|
||||
(ert-deftest lread-char-number ()
|
||||
(should (equal (read "?\\N{U+A817}") #xA817)))
|
||||
|
||||
(ert-deftest lread-char-name ()
|
||||
(ert-deftest lread-char-name-1 ()
|
||||
(should (equal (read "?\\N{SYLOTI NAGRI LETTER \n DHO}")
|
||||
#xA817)))
|
||||
(ert-deftest lread-char-name-2 ()
|
||||
(should (equal (read "?\\N{BED}") #x1F6CF)))
|
||||
(ert-deftest lread-char-name-3 ()
|
||||
(should (equal (read "?\\N{U+BED}") #xBED)))
|
||||
(ert-deftest lread-char-name-4 ()
|
||||
(should (equal (read "?\\N{VARIATION SELECTOR-1}") #xFE00)))
|
||||
(ert-deftest lread-char-name-5 ()
|
||||
(should (equal (read "?\\N{VARIATION SELECTOR-16}") #xFE0F)))
|
||||
(ert-deftest lread-char-name-6 ()
|
||||
(should (equal (read "?\\N{VARIATION SELECTOR-17}") #xE0100)))
|
||||
(ert-deftest lread-char-name-7 ()
|
||||
(should (equal (read "?\\N{VARIATION SELECTOR-256}") #xE01EF)))
|
||||
(ert-deftest lread-char-name-8 ()
|
||||
(should (equal (read "?\\N{CJK COMPATIBILITY IDEOGRAPH-F900}") #xF900)))
|
||||
(ert-deftest lread-char-name-9 ()
|
||||
(should (equal (read "?\\N{CJK COMPATIBILITY IDEOGRAPH-FAD9}") #xFAD9)))
|
||||
(ert-deftest lread-char-name-10 ()
|
||||
(should (equal (read "?\\N{CJK COMPATIBILITY IDEOGRAPH-2F800}") #x2F800)))
|
||||
(ert-deftest lread-char-name-11 ()
|
||||
(should (equal (read "?\\N{CJK COMPATIBILITY IDEOGRAPH-2FA1D}") #x2FA1D)))
|
||||
|
||||
(ert-deftest lread-char-invalid-number ()
|
||||
(should-error (read "?\\N{U+110000}") :type 'invalid-read-syntax))
|
||||
|
||||
(ert-deftest lread-char-invalid-name ()
|
||||
(ert-deftest lread-char-invalid-name-1 ()
|
||||
(should-error (read "?\\N{DOES NOT EXIST}")) :type 'invalid-read-syntax)
|
||||
(ert-deftest lread-char-invalid-name-2 ()
|
||||
(should-error (read "?\\N{VARIATION SELECTOR-0}")) :type 'invalid-read-syntax)
|
||||
(ert-deftest lread-char-invalid-name-3 ()
|
||||
(should-error (read "?\\N{VARIATION SELECTOR-257}"))
|
||||
:type 'invalid-read-syntax)
|
||||
(ert-deftest lread-char-invalid-name-4 ()
|
||||
(should-error (read "?\\N{VARIATION SELECTOR--0}"))
|
||||
:type 'invalid-read-syntax)
|
||||
(ert-deftest lread-char-invalid-name-5 ()
|
||||
(should-error (read "?\\N{CJK COMPATIBILITY IDEOGRAPH-F8FF}"))
|
||||
:type 'invalid-read-syntax)
|
||||
(ert-deftest lread-char-invalid-name-6 ()
|
||||
(should-error (read "?\\N{CJK COMPATIBILITY IDEOGRAPH-FADA}"))
|
||||
:type 'invalid-read-syntax)
|
||||
(ert-deftest lread-char-invalid-name-7 ()
|
||||
(should-error (read "?\\N{CJK COMPATIBILITY IDEOGRAPH-2F7FF}"))
|
||||
:type 'invalid-read-syntax)
|
||||
(ert-deftest lread-char-invalid-name-8 ()
|
||||
(should-error (read "?\\N{CJK COMPATIBILITY IDEOGRAPH-2FA1E}"))
|
||||
:type 'invalid-read-syntax)
|
||||
|
||||
(ert-deftest lread-char-non-ascii-name ()
|
||||
(should-error (read "?\\N{LATIN CAPITAL LETTER Ø}")
|
||||
@ -55,13 +95,13 @@
|
||||
(should-error (read "?\\N{U+DFFF}") :type 'invalid-read-syntax))
|
||||
|
||||
(ert-deftest lread-string-char-number-1 ()
|
||||
(should (equal (read "a\\N{U+A817}b") "a\uA817bx")))
|
||||
(should (equal (read "\"a\\N{U+A817}b\"") "a\uA817b")))
|
||||
(ert-deftest lread-string-char-number-2 ()
|
||||
(should-error (read "?\\N{0.5}") :type 'invalid-read-syntax))
|
||||
(ert-deftest lread-string-char-number-3 ()
|
||||
(should-error (read "?\\N{U+-0}") :type 'invalid-read-syntax))
|
||||
|
||||
(ert-deftest lread-string-char-name ()
|
||||
(should (equal (read "a\\N{SYLOTI NAGRI LETTER DHO}b") "a\uA817b")))
|
||||
(should (equal (read "\"a\\N{SYLOTI NAGRI LETTER DHO}b\"") "a\uA817b")))
|
||||
|
||||
;;; lread-tests.el ends here
|
||||
|
Loading…
Reference in New Issue
Block a user