mirror of
https://git.savannah.gnu.org/git/emacs.git
synced 2024-11-25 07:28:20 +00:00
Modify the coding system compound-text-with-extensions to conform to the spec of Compound Text.
This commit is contained in:
parent
4ce5a4ccd4
commit
6b4d96c2f0
@ -1,3 +1,20 @@
|
||||
2010-08-04 Kenichi Handa <handa@m17n.org>
|
||||
|
||||
* language/cyrillic.el: Don't add "microsoft-cp1251" to
|
||||
ctext-non-standard-encodings-alist here.
|
||||
|
||||
* international/mule.el (ctext-non-standard-encodings-alist): Add
|
||||
"koi8-r" and "microsoft-cp1251".
|
||||
(ctext-standard-encodings): New variable.
|
||||
(ctext-non-standard-encodings-table): List only elements for
|
||||
non-standard encodings.
|
||||
(ctext-pre-write-conversion): Adjusted for the above change.
|
||||
Check ctext-standard-encodings.
|
||||
|
||||
* international/mule-conf.el (compound-text): Doc fix.
|
||||
(ctext-no-compositions): Doc fix.
|
||||
(compound-text-with-extensions): Doc fix.
|
||||
|
||||
2010-07-23 Juanma Barranquero <lekktu@gmail.com>
|
||||
|
||||
* help-fns.el (find-lisp-object-file-name): Doc fix (bug#6494).
|
||||
|
@ -1410,9 +1410,10 @@ is treated as a character."
|
||||
:flags '(ascii-at-eol ascii-at-cntl designation single-shift composition))
|
||||
|
||||
(define-coding-system 'compound-text
|
||||
"Compound text based generic encoding for decoding unknown messages.
|
||||
|
||||
This coding system does not support extended segments of CTEXT."
|
||||
"Compound text based generic encoding.
|
||||
This coding system is an extension of X's \"Compound Text Encoding\".
|
||||
It encodes many characters using the normal ISO-2022 designation sequences,
|
||||
but it doesn't support extended segments of CTEXT."
|
||||
:coding-type 'iso-2022
|
||||
:mnemonic ?x
|
||||
:charset-list 'iso-2022
|
||||
@ -1432,7 +1433,7 @@ This coding system does not support extended segments of CTEXT."
|
||||
;; not have a mime-charset property, to prevent it from showing up
|
||||
;; close to the beginning of coding systems ordered by priority.
|
||||
(define-coding-system 'ctext-no-compositions
|
||||
"Compound text based generic encoding for decoding unknown messages.
|
||||
"Compound text based generic encoding.
|
||||
|
||||
Like `compound-text', but does not produce escape sequences for compositions."
|
||||
:coding-type 'iso-2022
|
||||
@ -1445,8 +1446,9 @@ Like `compound-text', but does not produce escape sequences for compositions."
|
||||
(define-coding-system 'compound-text-with-extensions
|
||||
"Compound text encoding with ICCCM Extended Segment extensions.
|
||||
|
||||
See the variable `ctext-non-standard-encodings-alist' for the
|
||||
detail about how extended segments are handled.
|
||||
See the variables `ctext-standard-encodings' and
|
||||
`ctext-non-standard-encodings-alist' for the detail about how
|
||||
extended segments are handled.
|
||||
|
||||
This coding system should be used only for X selections. It is inappropriate
|
||||
for decoding and encoding files, process I/O, etc."
|
||||
|
@ -1408,7 +1408,9 @@ This function is provided for backward compatibility."
|
||||
'(("big5-0" big5 2 big5)
|
||||
("ISO8859-14" iso-8859-14 1 latin-iso8859-14)
|
||||
("ISO8859-15" iso-8859-15 1 latin-iso8859-15)
|
||||
("gbk-0" gbk 2 chinese-gbk)))
|
||||
("gbk-0" gbk 2 chinese-gbk)
|
||||
("koi8-r" koi8-r 1 koi8-r)
|
||||
("microsoft-cp1251" windows-1251 1 windows-1251)))
|
||||
"Alist of non-standard encoding names vs the corresponding usages in CTEXT.
|
||||
|
||||
It controls how extended segments of a compound text are handled
|
||||
@ -1497,6 +1499,20 @@ Each element must be one of the names listed in the variable
|
||||
(goto-char (point-min))
|
||||
(- (point-max) (point)))))
|
||||
|
||||
(defvar ctext-standard-encodings
|
||||
'(ascii latin-jisx0201 katakana-jisx0201
|
||||
latin-iso8859-1 latin-iso8859-2 latin-iso8859-3 latin-iso8859-4
|
||||
greek-iso8859-7 arabic-iso8859-6 hebrew-iso8859-8 cyrillic-iso8859-5
|
||||
latin-iso8859-9
|
||||
chinese-gb2312 japanese-jisx0208 korean-ksc5601)
|
||||
"List of approved standard encodings (i.e. charsets) of X's Compound Text.
|
||||
Coding-system `compound-text-with-extensions' encodes a character
|
||||
belonging to any of those charsets using the normal ISO2022
|
||||
designation sequence unless the current language environment or
|
||||
the variable `ctext-non-standard-encodings' decide to use an extended
|
||||
segment of CTEXT for that character. See also the documentation
|
||||
of `ctext-non-standard-encodings-alist'.")
|
||||
|
||||
;; Return an alist of CHARSET vs CTEXT-USAGE-INFO generated from
|
||||
;; `ctext-non-standard-encodings' and a list specified by the key
|
||||
;; `ctext-non-standard-encodings' for the currrent language
|
||||
@ -1508,77 +1524,74 @@ Each element must be one of the names listed in the variable
|
||||
;; is encoded using UTF-8 encoding extention.
|
||||
|
||||
(defun ctext-non-standard-encodings-table ()
|
||||
(let (table)
|
||||
;; Setup charsets specified by the key
|
||||
;; `ctext-non-standard-encodings' for the current language
|
||||
;; environment and in `ctext-non-standard-encodings'.
|
||||
(dolist (encoding (append
|
||||
(get-language-info current-language-environment
|
||||
'ctext-non-standard-encodings)
|
||||
ctext-non-standard-encodings))
|
||||
(let* ((slot (assoc encoding ctext-non-standard-encodings-alist))
|
||||
(let* ((table (append ctext-non-standard-encodings
|
||||
(copy-sequence
|
||||
(get-language-info current-language-environment
|
||||
'ctext-non-standard-encodings))))
|
||||
(tail table)
|
||||
elt)
|
||||
(while tail
|
||||
(setq elt (car tail))
|
||||
(let* ((slot (assoc elt ctext-non-standard-encodings-alist))
|
||||
(charset (nth 3 slot)))
|
||||
(if (charsetp charset)
|
||||
(push (cons charset slot) table)
|
||||
(dolist (cs charset)
|
||||
(push (cons cs slot) table)))))
|
||||
|
||||
;; Next prepend charsets for ISO2022 designation sequence.
|
||||
(dolist (charset charset-list)
|
||||
(let ((final (plist-get (charset-plist charset) :iso-final-char)))
|
||||
(if (and (integerp final)
|
||||
(>= final #x40) (<= final #x7e)
|
||||
;; Exclude ascii and chinese-cns11643-X.
|
||||
(not (eq charset 'ascii))
|
||||
(not (string-match "cns11643" (symbol-name charset))))
|
||||
(push (cons charset nil) table))))
|
||||
|
||||
;; Returned reversed list so that the charsets specified by the
|
||||
;; key `ctext-non-standard-encodings' for the current language
|
||||
;; have the highest priority.
|
||||
(nreverse table)))
|
||||
(setcar tail (cons charset slot))
|
||||
(setcar tail (cons (car charset) slot))
|
||||
(dolist (cs (cdr charset))
|
||||
(setcdr tail
|
||||
(cons (cons (car cs) slot) (cdr tail)))
|
||||
(setq tail (cdr tail))))
|
||||
(setq tail (cdr tail))))
|
||||
table))
|
||||
|
||||
(defun ctext-pre-write-conversion (from to)
|
||||
"Encode characters between FROM and TO as Compound Text w/Extended Segments.
|
||||
|
||||
If FROM is a string, or if the current buffer is not the one set up for us
|
||||
by `encode-coding-string', generate a new temp buffer, insert the text,
|
||||
and convert it in the temporary buffer. Otherwise, convert in-place."
|
||||
If FROM is a string, generate a new temp buffer, insert the text,
|
||||
and convert it in the temporary buffer. Otherwise, convert
|
||||
in-place."
|
||||
(save-match-data
|
||||
;; Setup a working buffer if necessary.
|
||||
(when (stringp from)
|
||||
(set-buffer (generate-new-buffer " *temp"))
|
||||
(set-buffer-multibyte (multibyte-string-p from))
|
||||
(insert from))
|
||||
|
||||
;; Now we can encode the whole buffer.
|
||||
(let ((encoding-table (ctext-non-standard-encodings-table))
|
||||
last-coding-system-used
|
||||
last-pos last-encoding-info
|
||||
encoding-info end-pos ch)
|
||||
(goto-char (setq last-pos (point-min)))
|
||||
(setq end-pos (point-marker))
|
||||
(while (re-search-forward "[^\000-\177]+" nil t)
|
||||
;; Found a sequence of non-ASCII characters.
|
||||
(setq last-pos (match-beginning 0)
|
||||
ch (char-after last-pos)
|
||||
last-encoding-info (catch 'tag
|
||||
(dolist (elt encoding-table)
|
||||
(if (encode-char ch (car elt))
|
||||
(throw 'tag (cdr elt))))
|
||||
'utf-8))
|
||||
(set-marker end-pos (match-end 0))
|
||||
(goto-char (1+ last-pos))
|
||||
(catch 'tag
|
||||
(while t
|
||||
(setq encoding-info
|
||||
(if (< (point) end-pos)
|
||||
(catch 'tag
|
||||
(setq ch (following-char))
|
||||
(dolist (elt encoding-table)
|
||||
(if (encode-char ch (car elt))
|
||||
(throw 'tag (cdr elt))))
|
||||
'utf-8)))
|
||||
(insert from)
|
||||
(setq from 1 to (point-max)))
|
||||
(save-restriction
|
||||
(narrow-to-region from to)
|
||||
(let ((encoding-table (ctext-non-standard-encodings-table))
|
||||
(charset-list ctext-standard-encodings)
|
||||
last-coding-system-used
|
||||
last-pos last-encoding-info
|
||||
encoding-info end-pos ch charset)
|
||||
(dolist (elt encoding-table)
|
||||
(push (car elt) charset-list))
|
||||
(goto-char (setq last-pos from))
|
||||
(setq end-pos (point-marker))
|
||||
(while (re-search-forward "[^\000-\177]+" nil t)
|
||||
;; Found a sequence of non-ASCII characters.
|
||||
(setq last-pos (match-beginning 0)
|
||||
ch (char-after last-pos)
|
||||
charset (char-charset ch charset-list)
|
||||
last-encoding-info
|
||||
(if charset
|
||||
(or (cdr (assq charset encoding-table))
|
||||
charset)
|
||||
'utf-8))
|
||||
(set-marker end-pos (match-end 0))
|
||||
(goto-char (1+ last-pos))
|
||||
(while (marker-position end-pos)
|
||||
(if (< (point) end-pos)
|
||||
(progn
|
||||
(setq charset (char-charset (following-char) charset-list)
|
||||
encoding-info
|
||||
(if charset
|
||||
(or (cdr (assq charset encoding-table))
|
||||
charset)
|
||||
'utf-8))
|
||||
(forward-char 1))
|
||||
(setq encoding-info nil)
|
||||
(set-marker end-pos nil))
|
||||
(unless (eq last-encoding-info encoding-info)
|
||||
(cond ((consp last-encoding-info)
|
||||
;; Encode the previous range using an extended
|
||||
@ -1609,14 +1622,12 @@ and convert it in the temporary buffer. Otherwise, convert in-place."
|
||||
(save-excursion
|
||||
(goto-char last-pos)
|
||||
(insert "\e%G"))
|
||||
(insert "\e%@")))
|
||||
(insert "\e%@"))
|
||||
(t
|
||||
(put-text-property last-pos (point) 'charset charset)))
|
||||
(setq last-pos (point)
|
||||
last-encoding-info encoding-info))
|
||||
(if (< (point) end-pos)
|
||||
(forward-char 1)
|
||||
(throw 'tag nil)))))
|
||||
(set-marker end-pos nil)
|
||||
(goto-char (point-min))))
|
||||
last-encoding-info encoding-info))))
|
||||
(goto-char (point-min)))))
|
||||
;; Must return nil, as build_annotations_2 expects that.
|
||||
nil)
|
||||
|
||||
|
@ -239,13 +239,6 @@ Support for Russian using koi8-r and the russian-computer input method.")
|
||||
(documentation . "Support for Tajik using KOI8-T."))
|
||||
'("Cyrillic"))
|
||||
|
||||
(let ((elt `("microsoft-cp1251" windows-1251 1
|
||||
,(get 'encode-windows-1251 'translation-table)))
|
||||
(slot (assoc "microsoft-cp1251" ctext-non-standard-encodings-alist)))
|
||||
(if slot
|
||||
(setcdr slot (cdr elt))
|
||||
(push elt ctext-non-standard-encodings-alist)))
|
||||
|
||||
(set-language-info-alist
|
||||
"Bulgarian" `((coding-system windows-1251)
|
||||
(coding-priority windows-1251)
|
||||
|
Loading…
Reference in New Issue
Block a user