mirror of
https://git.savannah.gnu.org/git/emacs.git
synced 2024-12-03 08:30:09 +00:00
f89cb6b636
* lisp/language/japan-util.el (setup-japanese-environment-internal): Prefer UTF-8 for Cygwin and other Posix hosts; prefer Codepage 932 on DOS/Windows. (Bug#69493)
327 lines
14 KiB
EmacsLisp
327 lines
14 KiB
EmacsLisp
;;; japan-util.el --- utilities for Japanese -*- lexical-binding: t; -*-
|
||
|
||
;; Copyright (C) 2001-2024 Free Software Foundation, Inc.
|
||
;; Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
|
||
;; 2005, 2006, 2007, 2008, 2009, 2010, 2011
|
||
;; National Institute of Advanced Industrial Science and Technology (AIST)
|
||
;; Registration Number H14PRO021
|
||
|
||
;; Keywords: mule, multilingual, Japanese
|
||
|
||
;; This file is part of GNU Emacs.
|
||
|
||
;; GNU Emacs is free software: you can redistribute it and/or modify
|
||
;; it under the terms of the GNU General Public License as published by
|
||
;; the Free Software Foundation, either version 3 of the License, or
|
||
;; (at your option) any later version.
|
||
|
||
;; GNU Emacs is distributed in the hope that it will be useful,
|
||
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||
;; GNU General Public License for more details.
|
||
|
||
;; You should have received a copy of the GNU General Public License
|
||
;; along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>.
|
||
|
||
;;; Commentary:
|
||
|
||
;;; Code:
|
||
|
||
;;;###autoload
|
||
(defun setup-japanese-environment-internal ()
|
||
(prefer-coding-system (if (memq system-type '(windows-nt ms-dos))
|
||
'japanese-cp932
|
||
'utf-8))
|
||
(use-cjk-char-width-table 'ja_JP))
|
||
|
||
(defconst japanese-kana-table
|
||
'((?あ ?ア ?ア) (?い ?イ ?イ) (?う ?ウ ?ウ) (?え ?エ ?エ) (?お ?オ ?オ)
|
||
(?か ?カ ?カ) (?き ?キ ?キ) (?く ?ク ?ク) (?け ?ケ ?ケ) (?こ ?コ ?コ)
|
||
(?さ ?サ ?サ) (?し ?シ ?シ) (?す ?ス ?ス) (?せ ?セ ?セ) (?そ ?ソ ?ソ)
|
||
(?た ?タ ?タ) (?ち ?チ ?チ) (?つ ?ツ ?ツ) (?て ?テ ?テ) (?と ?ト ?ト)
|
||
(?な ?ナ ?ナ) (?に ?ニ ?ニ) (?ぬ ?ヌ ?ヌ) (?ね ?ネ ?ネ) (?の ?ノ ?ノ)
|
||
(?は ?ハ ?ハ) (?ひ ?ヒ ?ヒ) (?ふ ?フ ?フ) (?へ ?ヘ ?ヘ) (?ほ ?ホ ?ホ)
|
||
(?ま ?マ ?マ) (?み ?ミ ?ミ) (?む ?ム ?ム) (?め ?メ ?メ) (?も ?モ ?モ)
|
||
(?や ?ヤ ?ヤ) (?ゆ ?ユ ?ユ) (?よ ?ヨ ?ヨ)
|
||
(?ら ?ラ ?ラ) (?り ?リ ?リ) (?る ?ル ?ル) (?れ ?レ ?レ) (?ろ ?ロ ?ロ)
|
||
(?わ ?ワ ?ワ) (?ゐ ?ヰ "イ") (?ゑ ?ヱ "エ") (?を ?ヲ ?ヲ)
|
||
(?ん ?ン ?ン)
|
||
(?が ?ガ "ガ") (?ぎ ?ギ "ギ") (?ぐ ?グ "グ") (?げ ?ゲ "ゲ") (?ご ?ゴ "ゴ")
|
||
(?ざ ?ザ "ザ") (?じ ?ジ "ジ") (?ず ?ズ "ズ") (?ぜ ?ゼ "ゼ") (?ぞ ?ゾ "ゾ")
|
||
(?だ ?ダ "ダ") (?ぢ ?ヂ "ヂ") (?づ ?ヅ "ヅ") (?で ?デ "デ") (?ど ?ド "ド")
|
||
(?ば ?バ "バ") (?び ?ビ "ビ") (?ぶ ?ブ "ブ") (?べ ?ベ "ベ") (?ぼ ?ボ "ボ")
|
||
(?ぱ ?パ "パ") (?ぴ ?ピ "ピ") (?ぷ ?プ "プ") (?ぺ ?ペ "ペ") (?ぽ ?ポ "ポ")
|
||
(?ぁ ?ァ ?ァ) (?ぃ ?ィ ?ィ) (?ぅ ?ゥ ?ゥ) (?ぇ ?ェ ?ェ) (?ぉ ?ォ ?ォ)
|
||
(?っ ?ッ ?ッ)
|
||
(?ゃ ?ャ ?ャ) (?ゅ ?ュ ?ュ) (?ょ ?ョ ?ョ)
|
||
(?ゎ ?ヮ "ワ")
|
||
(?ゝ ?ヽ) (?ゞ ?ヾ)
|
||
("う゛" ?ヴ "ヴ") (nil ?ヵ "カ") (nil ?ヶ "ケ"))
|
||
"Japanese JISX0208 Kana character table.
|
||
Each element is of the form (HIRAGANA KATAKANA HANKAKU-KATAKANA), where
|
||
HIRAGANA and KATAKANA belong to `japanese-jisx0208',
|
||
HANKAKU-KATAKANA belongs to `japanese-jisx0201-kana'.")
|
||
|
||
;; Put properties 'katakana, 'hiragana, and 'jix0201 to each Japanese
|
||
;; kana characters for conversion among them.
|
||
(let ((l japanese-kana-table)
|
||
slot hiragana katakana jisx0201)
|
||
(while l
|
||
(setq slot (car l)
|
||
hiragana (car slot) katakana (nth 1 slot) jisx0201 (nth 2 slot)
|
||
l (cdr l))
|
||
(if hiragana
|
||
(if (stringp hiragana)
|
||
(if (> (length hiragana) 1)
|
||
(let ((hira (aref hiragana 0)))
|
||
(put-char-code-property
|
||
hira 'kana-composition
|
||
(cons (cons (aref hiragana 1) katakana)
|
||
(get-char-code-property hira 'kana-composition)))))
|
||
(put-char-code-property hiragana 'katakana katakana)
|
||
(put-char-code-property hiragana 'jisx0201 jisx0201)))
|
||
(when (integerp katakana)
|
||
(put-char-code-property katakana 'hiragana hiragana)
|
||
(put-char-code-property katakana 'jisx0201 jisx0201))
|
||
(if jisx0201
|
||
(if (stringp jisx0201)
|
||
(if (> (length jisx0201) 1)
|
||
(let ((kana (aref jisx0201 0)))
|
||
(put-char-code-property
|
||
kana 'kana-composition
|
||
(cons (cons (aref jisx0201 1) katakana)
|
||
(get-char-code-property kana 'kana-composition)))))
|
||
(put-char-code-property jisx0201 'hiragana hiragana)
|
||
(put-char-code-property jisx0201 'katakana katakana)
|
||
(put-char-code-property jisx0201 'jisx0208 katakana)))))
|
||
|
||
(defconst japanese-symbol-table
|
||
'((?\ ?\ ) (?, ?,) (?. ?.) (?、 nil ?、) (?。 nil ?。) (?・ nil ?・)
|
||
(?: ?:) (?; ?\;) (?? ??) (?! ?!) (?゛ nil ?゙) (?゜ nil ?゚)
|
||
(?´ ?') (?` ?`) (?^ ?^) (?_ ?_) (?ー nil ?ー) (?— ?-) (?‐ ?-)
|
||
(?/ ?/) (?\ ?\\) (?〜 ?~) (?| ?|) (?‘ ?`) (?’ ?') (?“ ?\") (?” ?\")
|
||
(?\( ?\() (?\) ?\)) (?\[ ?\[) (?\] ?\]) (?\{ ?{) (?\} ?})
|
||
(?〈 ?<) (?〉 ?>) (?\「 nil ?\「) (?\」 nil ?\」)
|
||
(?+ ?+) (?− ?-) (?= ?=) (?< ?<) (?> ?>)
|
||
(?′ ?') (?″ ?\") (?¥ ?\\) (?$ ?$) (?% ?%) (?# ?#) (?& ?&) (?* ?*)
|
||
(?@ ?@)
|
||
;; cp932-2-byte
|
||
(#x2015 ?-) (#xFF5E ?~) (#xFF0D ?-))
|
||
"Japanese JISX0208 and CP932 symbol character table.
|
||
Each element is of the form (SYMBOL ASCII HANKAKU), where SYMBOL
|
||
belongs to `japanese-jisx0208' or `cp932', ASCII belongs to `ascii',
|
||
and HANKAKU belongs to `japanese-jisx0201-kana'.")
|
||
|
||
;; Put properties 'jisx0208, 'jisx0201, and 'ascii to each Japanese
|
||
;; symbol and ASCII characters for conversion among them.
|
||
(let ((l japanese-symbol-table)
|
||
slot jisx0208 ascii jisx0201)
|
||
(while l
|
||
(setq slot (car l)
|
||
jisx0208 (car slot) ascii (nth 1 slot) jisx0201 (nth 2 slot)
|
||
l (cdr l))
|
||
(if ascii
|
||
(progn
|
||
(put-char-code-property jisx0208 'ascii ascii)
|
||
(if (encode-char jisx0208 'japanese-jisx0208)
|
||
(put-char-code-property ascii 'jisx0208 jisx0208))))
|
||
(if jisx0201
|
||
(progn
|
||
(put-char-code-property jisx0208 'jisx0201 jisx0201)
|
||
(if (encode-char jisx0208 'japanese-jisx0208)
|
||
(put-char-code-property jisx0201 'jisx0208 jisx0208))))))
|
||
|
||
(defconst japanese-alpha-numeric-table
|
||
'((?0 . ?0) (?1 . ?1) (?2 . ?2) (?3 . ?3) (?4 . ?4)
|
||
(?5 . ?5) (?6 . ?6) (?7 . ?7) (?8 . ?8) (?9 . ?9)
|
||
(?A . ?A) (?B . ?B) (?C . ?C) (?D . ?D) (?E . ?E)
|
||
(?F . ?F) (?G . ?G) (?H . ?H) (?I . ?I) (?J . ?J)
|
||
(?K . ?K) (?L . ?L) (?M . ?M) (?N . ?N) (?O . ?O)
|
||
(?P . ?P) (?Q . ?Q) (?R . ?R) (?S . ?S) (?T . ?T)
|
||
(?U . ?U) (?V . ?V) (?W . ?W) (?X . ?X) (?Y . ?Y) (?Z . ?Z)
|
||
(?a . ?a) (?b . ?b) (?c . ?c) (?d . ?d) (?e . ?e)
|
||
(?f . ?f) (?g . ?g) (?h . ?h) (?i . ?i) (?j . ?j)
|
||
(?k . ?k) (?l . ?l) (?m . ?m) (?n . ?n) (?o . ?o)
|
||
(?p . ?p) (?q . ?q) (?r . ?r) (?s . ?s) (?t . ?t)
|
||
(?u . ?u) (?v . ?v) (?w . ?w) (?x . ?x) (?y . ?y) (?z . ?z))
|
||
"Japanese JISX0208 alpha numeric character table.
|
||
Each element is of the form (ALPHANUMERIC . ASCII), where ALPHANUMERIC
|
||
belongs to `japanese-jisx0208', ASCII belongs to `ascii'.")
|
||
|
||
;; Put properties 'jisx0208 and 'ascii to each Japanese alpha numeric
|
||
;; and ASCII characters for conversion between them.
|
||
(let ((l japanese-alpha-numeric-table)
|
||
slot jisx0208 ascii)
|
||
(while l
|
||
(setq slot (car l)
|
||
jisx0208 (car slot) ascii (cdr slot)
|
||
l (cdr l))
|
||
(put-char-code-property jisx0208 'ascii ascii)
|
||
(put-char-code-property ascii 'jisx0208 jisx0208)))
|
||
|
||
;; Convert string STR by FUNC and return a resulting string.
|
||
(defun japanese-string-conversion (str func &rest args)
|
||
(let ((buf (get-buffer-create " *Japanese work*")))
|
||
(with-current-buffer buf
|
||
(erase-buffer)
|
||
(insert str)
|
||
(apply func 1 (point) args)
|
||
(buffer-string))))
|
||
|
||
;;;###autoload
|
||
(defun japanese-katakana (obj &optional hankaku)
|
||
"Convert argument to Katakana and return that.
|
||
The argument may be a character or string. The result has the same type.
|
||
The argument object is not altered--the value is a copy.
|
||
Optional argument HANKAKU t means to convert to `hankaku' Katakana
|
||
\(`japanese-jisx0201-kana'), in which case return value
|
||
may be a string even if OBJ is a character if two Katakanas are
|
||
necessary to represent OBJ."
|
||
(if (stringp obj)
|
||
(japanese-string-conversion obj 'japanese-katakana-region hankaku)
|
||
(or (get-char-code-property obj (if hankaku 'jisx0201 'katakana))
|
||
obj)))
|
||
|
||
;;;###autoload
|
||
(defun japanese-hiragana (obj)
|
||
"Convert argument to Hiragana and return that.
|
||
The argument may be a character or string. The result has the same type.
|
||
The argument object is not altered--the value is a copy."
|
||
(if (stringp obj)
|
||
(japanese-string-conversion obj 'japanese-hiragana-region)
|
||
(or (get-char-code-property obj 'hiragana)
|
||
obj)))
|
||
|
||
;;;###autoload
|
||
(defun japanese-hankaku (obj &optional ascii-only)
|
||
"Convert argument to `hankaku' and return that.
|
||
The argument may be a character or string. The result has the same type.
|
||
The argument object is not altered--the value is a copy.
|
||
Optional argument ASCII-ONLY non-nil means to return only ASCII character."
|
||
(if (stringp obj)
|
||
(japanese-string-conversion obj 'japanese-hankaku-region ascii-only)
|
||
(or (and (not ascii-only)
|
||
(get-char-code-property obj 'jisx0201))
|
||
(get-char-code-property obj 'ascii)
|
||
obj)))
|
||
|
||
;;;###autoload
|
||
(defun japanese-zenkaku (obj)
|
||
"Convert argument to `zenkaku' and return that.
|
||
The argument may be a character or string. The result has the same type.
|
||
The argument object is not altered--the value is a copy."
|
||
(if (stringp obj)
|
||
(japanese-string-conversion obj 'japanese-zenkaku-region)
|
||
(or (get-char-code-property obj 'jisx0208)
|
||
obj)))
|
||
|
||
(defun japanese-replace-region (from to string)
|
||
"Replace the region specified by FROM and TO to STRING."
|
||
(goto-char from)
|
||
(insert string)
|
||
(delete-char (- to from)))
|
||
|
||
;;;###autoload
|
||
(defun japanese-katakana-region (from to &optional hankaku)
|
||
"Convert Japanese `hiragana' chars in the region to `katakana' chars.
|
||
Optional argument HANKAKU t means to convert to `hankaku katakana' character
|
||
of which charset is `japanese-jisx0201-kana'."
|
||
(interactive "r\nP")
|
||
(save-restriction
|
||
(narrow-to-region from to)
|
||
(save-excursion
|
||
(goto-char (point-min))
|
||
(while (re-search-forward "\\cH\\|\\cK" nil t)
|
||
(let* ((kana (preceding-char))
|
||
(composition
|
||
(and (not hankaku)
|
||
(get-char-code-property kana 'kana-composition)))
|
||
slot) ;; next
|
||
(if (and composition (setq slot (assq (following-char) composition)))
|
||
(japanese-replace-region (match-beginning 0) (1+ (point))
|
||
(cdr slot))
|
||
(let ((kata (get-char-code-property
|
||
kana (if hankaku 'jisx0201 'katakana))))
|
||
(if kata
|
||
(japanese-replace-region (match-beginning 0) (point)
|
||
kata)))))))))
|
||
|
||
|
||
;;;###autoload
|
||
(defun japanese-hiragana-region (from to)
|
||
"Convert Japanese `katakana' chars in the region to `hiragana' chars."
|
||
(interactive "r")
|
||
(save-restriction
|
||
(narrow-to-region from to)
|
||
(save-excursion
|
||
(goto-char (point-min))
|
||
(while (re-search-forward "\\cK\\|\\ck" nil t)
|
||
(let* ((kata (preceding-char))
|
||
(composition (get-char-code-property kata 'kana-composition))
|
||
slot) ;; next
|
||
(if (and composition (setq slot (assq (following-char) composition)))
|
||
(japanese-replace-region (match-beginning 0) (1+ (point))
|
||
(get-char-code-property
|
||
(cdr slot) 'hiragana))
|
||
(let ((hira (get-char-code-property kata 'hiragana)))
|
||
(if hira
|
||
(japanese-replace-region (match-beginning 0) (point)
|
||
hira)))))))))
|
||
|
||
;;;###autoload
|
||
(defun japanese-hankaku-region (from to &optional ascii-only)
|
||
"Convert Japanese `zenkaku' chars in the region to `hankaku' chars.
|
||
`Zenkaku' chars belong to `japanese-jisx0208'
|
||
`Hankaku' chars belong to `ascii' or `japanese-jisx0201-kana'.
|
||
Optional argument ASCII-ONLY non-nil means to convert only to ASCII char."
|
||
(interactive "r\nP")
|
||
(save-restriction
|
||
(narrow-to-region from to)
|
||
(save-excursion
|
||
(goto-char (point-min))
|
||
(while (re-search-forward "\\cj" nil t)
|
||
(let* ((zenkaku (preceding-char))
|
||
(hankaku (or (and (not ascii-only)
|
||
(get-char-code-property zenkaku 'jisx0201))
|
||
(get-char-code-property zenkaku 'ascii))))
|
||
(if hankaku
|
||
(japanese-replace-region (match-beginning 0) (match-end 0)
|
||
hankaku)))))))
|
||
|
||
;;;###autoload
|
||
(defun japanese-zenkaku-region (from to &optional katakana-only)
|
||
"Convert hankaku' chars in the region to Japanese `zenkaku' chars.
|
||
`Zenkaku' chars belong to `japanese-jisx0208'
|
||
`Hankaku' chars belong to `ascii' or `japanese-jisx0201-kana'.
|
||
Optional argument KATAKANA-ONLY non-nil means to convert only KATAKANA char."
|
||
(interactive "r\nP")
|
||
(save-restriction
|
||
(narrow-to-region from to)
|
||
(save-excursion
|
||
(goto-char (point-min))
|
||
(while (or (and katakana-only
|
||
(re-search-forward "\\ck" nil t))
|
||
(and (not katakana-only)
|
||
(re-search-forward "\\ca\\|\\ck" nil t)))
|
||
(let* ((hankaku (preceding-char))
|
||
(composition (get-char-code-property hankaku 'kana-composition))
|
||
slot) ;; next
|
||
(if (and composition (setq slot (assq (following-char) composition)))
|
||
(japanese-replace-region (match-beginning 0) (1+ (point))
|
||
(cdr slot))
|
||
(let ((zenkaku (japanese-zenkaku hankaku)))
|
||
(if zenkaku
|
||
(japanese-replace-region (match-beginning 0) (match-end 0)
|
||
zenkaku)))))))))
|
||
|
||
;;;###autoload
|
||
(defun read-hiragana-string (prompt &optional initial-input)
|
||
"Read a Hiragana string from the minibuffer, prompting with string PROMPT.
|
||
If non-nil, second arg INITIAL-INPUT is a string to insert before reading."
|
||
(read-multilingual-string prompt initial-input "japanese-hiragana"))
|
||
|
||
;;
|
||
(provide 'japan-util)
|
||
|
||
;;; japan-util.el ends here
|