Use UTF-8 for most files with non-ASCII characters.
* admin/notes/unicode (etc/tutorials/TUTORIAL.ko, leim/quail/hanja.el)
(leim/quail/hanja3.el, leim/quail/symbol-ksc.el):
Now utf-8, not iso-2022-7bit. Also, files that contain non-UTF-8
characters are now encoded in utf-8-emacs, not iso-2022-7bit.
* etc/tutorials/TUTORIAL.ko, tutorials/TUTORIAL.th:
Switch from iso-2022-7bit to utf-8.
* leim/quail/cyrillic.el, leim/quail/czech.el, leim/quail/ethiopic.el:
* leim/quail/greek.el, leim/quail/hanja.el, leim/quail/hanja3.el:
* leim/quail/hebrew.el, leim/quail/lao.el, leim/quail/lrt.el:
* leim/quail/slovak.el, leim/quail/symbol-ksc.el, leim/quail/thai.el:
* leim/quail/tibetan.el, leim/quail/viqr.el, leim/quail/vntelex.el:
* leim/quail/vnvni.el, leim/quail/welsh.el:
* lisp/international/latin1-disp.el, lisp/international/mule-util.el:
* lisp/language/cyril-util.el, lisp/language/european.el:
* lisp/language/ind-util.el, lisp/language/lao-util.el, lisp/language/thai.el:
* lisp/language/tibet-util.el, lisp/language/tibetan.el:
* lisp/language/viet-util.el:
Switch from iso-2022-7bit to utf-8 or (if needed) utf-8-emacs.
Fixes: debbugs:13936
2013-04-02 01:18:40 +00:00
|
|
|
|
;;; lao-util.el --- utilities for Lao -*- coding: utf-8; -*-
|
1997-02-26 12:22:56 +00:00
|
|
|
|
|
2014-01-01 07:43:34 +00:00
|
|
|
|
;; Copyright (C) 2001-2014 Free Software Foundation, Inc.
|
2008-01-07 01:33:37 +00:00
|
|
|
|
;; Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006,
|
2011-01-02 23:50:46 +00:00
|
|
|
|
;; 2007, 2008, 2009, 2010, 2011
|
2005-05-16 07:03:28 +00:00
|
|
|
|
;; National Institute of Advanced Industrial Science and Technology (AIST)
|
|
|
|
|
;; Registration Number H14PRO021
|
2003-09-08 12:53:41 +00:00
|
|
|
|
;; Copyright (C) 2003
|
|
|
|
|
;; National Institute of Advanced Industrial Science and Technology (AIST)
|
|
|
|
|
;; Registration Number H13PRO009
|
1997-02-26 12:22:56 +00:00
|
|
|
|
|
2002-06-12 16:46:15 +00:00
|
|
|
|
;; Keywords: multilingual, Lao, i18n
|
1997-02-26 12:22:56 +00:00
|
|
|
|
|
|
|
|
|
;; This file is part of GNU Emacs.
|
|
|
|
|
|
2008-05-06 04:29:13 +00:00
|
|
|
|
;; GNU Emacs is free software: you can redistribute it and/or modify
|
1997-02-26 12:22:56 +00:00
|
|
|
|
;; it under the terms of the GNU General Public License as published by
|
2008-05-06 04:29:13 +00:00
|
|
|
|
;; the Free Software Foundation, either version 3 of the License, or
|
|
|
|
|
;; (at your option) any later version.
|
1997-02-26 12:22:56 +00:00
|
|
|
|
|
|
|
|
|
;; GNU Emacs is distributed in the hope that it will be useful,
|
|
|
|
|
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
|
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
|
;; GNU General Public License for more details.
|
|
|
|
|
|
|
|
|
|
;; You should have received a copy of the GNU General Public License
|
2008-05-06 04:29:13 +00:00
|
|
|
|
;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
|
1997-02-26 12:22:56 +00:00
|
|
|
|
|
2001-07-15 19:53:53 +00:00
|
|
|
|
;;; Commentary:
|
|
|
|
|
|
1997-02-26 12:22:56 +00:00
|
|
|
|
;;; Code:
|
|
|
|
|
|
1999-12-15 00:47:53 +00:00
|
|
|
|
;; Setting information of Thai characters.
|
|
|
|
|
|
|
|
|
|
(defconst lao-category-table (make-category-table))
|
|
|
|
|
(define-category ?c "Lao consonant" lao-category-table)
|
|
|
|
|
(define-category ?s "Lao semi-vowel" lao-category-table)
|
|
|
|
|
(define-category ?v "Lao upper/lower vowel" lao-category-table)
|
|
|
|
|
(define-category ?t "Lao tone" lao-category-table)
|
|
|
|
|
|
Use UTF-8 for most files with non-ASCII characters.
* admin/notes/unicode (etc/tutorials/TUTORIAL.ko, leim/quail/hanja.el)
(leim/quail/hanja3.el, leim/quail/symbol-ksc.el):
Now utf-8, not iso-2022-7bit. Also, files that contain non-UTF-8
characters are now encoded in utf-8-emacs, not iso-2022-7bit.
* etc/tutorials/TUTORIAL.ko, tutorials/TUTORIAL.th:
Switch from iso-2022-7bit to utf-8.
* leim/quail/cyrillic.el, leim/quail/czech.el, leim/quail/ethiopic.el:
* leim/quail/greek.el, leim/quail/hanja.el, leim/quail/hanja3.el:
* leim/quail/hebrew.el, leim/quail/lao.el, leim/quail/lrt.el:
* leim/quail/slovak.el, leim/quail/symbol-ksc.el, leim/quail/thai.el:
* leim/quail/tibetan.el, leim/quail/viqr.el, leim/quail/vntelex.el:
* leim/quail/vnvni.el, leim/quail/welsh.el:
* lisp/international/latin1-disp.el, lisp/international/mule-util.el:
* lisp/language/cyril-util.el, lisp/language/european.el:
* lisp/language/ind-util.el, lisp/language/lao-util.el, lisp/language/thai.el:
* lisp/language/tibet-util.el, lisp/language/tibetan.el:
* lisp/language/viet-util.el:
Switch from iso-2022-7bit to utf-8 or (if needed) utf-8-emacs.
Fixes: debbugs:13936
2013-04-02 01:18:40 +00:00
|
|
|
|
(let ((l '((?ກ consonant "LETTER KOR KAI'" "CHICKEN")
|
|
|
|
|
(?ຂ consonant "LETTER KHOR KHAI'" "EGG")
|
|
|
|
|
(? invalid nil)
|
|
|
|
|
(?ຄ consonant "LETTER QHOR QHWARGN" "BUFFALO")
|
|
|
|
|
(? invalid nil)
|
1997-02-26 12:22:56 +00:00
|
|
|
|
(? invalid nil)
|
Use UTF-8 for most files with non-ASCII characters.
* admin/notes/unicode (etc/tutorials/TUTORIAL.ko, leim/quail/hanja.el)
(leim/quail/hanja3.el, leim/quail/symbol-ksc.el):
Now utf-8, not iso-2022-7bit. Also, files that contain non-UTF-8
characters are now encoded in utf-8-emacs, not iso-2022-7bit.
* etc/tutorials/TUTORIAL.ko, tutorials/TUTORIAL.th:
Switch from iso-2022-7bit to utf-8.
* leim/quail/cyrillic.el, leim/quail/czech.el, leim/quail/ethiopic.el:
* leim/quail/greek.el, leim/quail/hanja.el, leim/quail/hanja3.el:
* leim/quail/hebrew.el, leim/quail/lao.el, leim/quail/lrt.el:
* leim/quail/slovak.el, leim/quail/symbol-ksc.el, leim/quail/thai.el:
* leim/quail/tibetan.el, leim/quail/viqr.el, leim/quail/vntelex.el:
* leim/quail/vnvni.el, leim/quail/welsh.el:
* lisp/international/latin1-disp.el, lisp/international/mule-util.el:
* lisp/language/cyril-util.el, lisp/language/european.el:
* lisp/language/ind-util.el, lisp/language/lao-util.el, lisp/language/thai.el:
* lisp/language/tibet-util.el, lisp/language/tibetan.el:
* lisp/language/viet-util.el:
Switch from iso-2022-7bit to utf-8 or (if needed) utf-8-emacs.
Fixes: debbugs:13936
2013-04-02 01:18:40 +00:00
|
|
|
|
(?ງ consonant "LETTER NGOR NGUU" "SNAKE")
|
|
|
|
|
(?ຈ consonant "LETTER JOR JUA" "BUDDHIST NOVICE")
|
|
|
|
|
(?ຉ invalid nil)
|
|
|
|
|
(?ຊ consonant "LETTER XOR X\"ARNG" "ELEPHANT")
|
|
|
|
|
(? invalid nil)
|
|
|
|
|
(?ຌ invalid nil)
|
|
|
|
|
(?ຍ consonant "LETTER YOR YUNG" "MOSQUITO")
|
|
|
|
|
(?ຎ invalid nil)
|
|
|
|
|
(?ຎ invalid nil)
|
|
|
|
|
(?ຎ invalid nil)
|
|
|
|
|
(?ຎ invalid nil)
|
|
|
|
|
(?ຎ invalid nil)
|
|
|
|
|
(?ຎ invalid nil)
|
|
|
|
|
(?ດ consonant "LETTER DOR DANG" "NOSE")
|
|
|
|
|
(?ຕ consonant "LETTER TOR TAR" "EYE")
|
|
|
|
|
(?ຖ consonant "LETTER THOR THUNG" "TO ASK,QUESTION")
|
|
|
|
|
(?ທ consonant "LETTER DHOR DHARM" "FLAG")
|
|
|
|
|
(?ຘ invalid nil)
|
|
|
|
|
(?ນ consonant "LETTER NOR NOK" "BIRD")
|
|
|
|
|
(?ບ consonant "LETTER BOR BED" "FISHHOOK")
|
|
|
|
|
(?ປ consonant "LETTER POR PAR" "FISH")
|
|
|
|
|
(?ຜ consonant "LETTER HPOR HPER\"" "BEE")
|
|
|
|
|
(?ຝ consonant "LETTER FHOR FHAR" "WALL")
|
|
|
|
|
(?ພ consonant "LETTER PHOR PHUU" "MOUNTAIN")
|
|
|
|
|
(?ຟ consonant "LETTER FOR FAI" "FIRE")
|
|
|
|
|
(?ຠ invalid nil)
|
|
|
|
|
(?ມ consonant "LETTER MOR MAR\"" "HORSE")
|
|
|
|
|
(?ຢ consonant "LETTER GNOR GNAR" "MEDICINE")
|
|
|
|
|
(?ຣ consonant "LETTER ROR ROD" "CAR")
|
|
|
|
|
(? invalid nil)
|
|
|
|
|
(?ລ consonant "LETTER LOR LIING" "MONKEY")
|
|
|
|
|
(? invalid nil)
|
|
|
|
|
(?ວ consonant "LETTER WOR WII" "HAND FAN")
|
|
|
|
|
(?ຨ invalid nil)
|
|
|
|
|
(?ຩ invalid nil)
|
|
|
|
|
(?ສ consonant "LETTER SOR SEA" "TIGER")
|
|
|
|
|
(?ຫ consonant "LETTER HHOR HHAI" "JAR")
|
|
|
|
|
(?ຬ invalid nil)
|
|
|
|
|
(?ອ consonant "LETTER OR OOW" "TAKE")
|
|
|
|
|
(?ຮ consonant "LETTER HOR HEA" "BOAT")
|
|
|
|
|
(?ຯ special "ELLIPSIS")
|
|
|
|
|
(?ະ vowel-base "VOWEL SIGN SARA A")
|
|
|
|
|
(?ັ vowel-upper "VOWEL SIGN MAI KAN")
|
|
|
|
|
(?າ vowel-base "VOWEL SIGN SARA AR")
|
|
|
|
|
(?ຳ vowel-base "VOWEL SIGN SARA AM")
|
|
|
|
|
(?ິ vowel-upper "VOWEL SIGN SARA I")
|
|
|
|
|
(?ີ vowel-upper "VOWEL SIGN SARA II")
|
|
|
|
|
(?ຶ vowel-upper "VOWEL SIGN SARA EU")
|
|
|
|
|
(?ື vowel-upper "VOWEL SIGN SARA UR")
|
|
|
|
|
(?ຸ vowel-lower "VOWEL SIGN SARA U")
|
|
|
|
|
(?ູ vowel-lower "VOWEL SIGN SARA UU")
|
|
|
|
|
(?຺ invalid nil)
|
|
|
|
|
(?ົ vowel-upper "VOWEL SIGN MAI KONG")
|
|
|
|
|
(?ຼ semivowel-lower "SEMIVOWEL SIGN LO")
|
|
|
|
|
(?ຽ vowel-base "SEMIVOWEL SIGN SARA IA")
|
|
|
|
|
(? invalid nil)
|
|
|
|
|
(? invalid nil)
|
|
|
|
|
(?ເ vowel-base "VOWEL SIGN SARA EE")
|
|
|
|
|
(?ແ vowel-base "VOWEL SIGN SARA AA")
|
|
|
|
|
(?ໂ vowel-base "VOWEL SIGN SARA OO")
|
|
|
|
|
(?ໃ vowel-base "VOWEL SIGN SARA EI MAI MUAN\"")
|
|
|
|
|
(?ໄ vowel-base "VOWEL SIGN SARA AI MAI MAY")
|
|
|
|
|
(? invalid nil)
|
|
|
|
|
(?ໆ special "KO LA (REPETITION)")
|
|
|
|
|
(? invalid nil)
|
|
|
|
|
(?່ tone "TONE MAI EK")
|
|
|
|
|
(?້ tone "TONE MAI THO")
|
|
|
|
|
(?໊ tone "TONE MAI TI")
|
|
|
|
|
(?໋ tone "TONE MAI JADTAWAR")
|
|
|
|
|
(?໌ tone "CANCELLATION MARK")
|
|
|
|
|
(?ໍ vowel-upper "VOWEL SIGN SARA OR")
|
|
|
|
|
(?໎ invalid nil)
|
|
|
|
|
(? invalid nil)
|
|
|
|
|
(?໐ special "DIGIT ZERO")
|
|
|
|
|
(?໑ special "DIGIT ONE")
|
|
|
|
|
(?໒ special "DIGIT TWO")
|
|
|
|
|
(?໓ special "DIGIT THREE")
|
|
|
|
|
(?໔ special "DIGIT FOUR")
|
|
|
|
|
(?໕ special "DIGIT FIVE")
|
|
|
|
|
(?໖ special "DIGIT SIX")
|
|
|
|
|
(?໗ special "DIGIT SEVEN")
|
|
|
|
|
(?໘ special "DIGIT EIGHT")
|
|
|
|
|
(?໙ special "DIGIT NINE")
|
|
|
|
|
(? invalid nil)
|
|
|
|
|
(? invalid nil)
|
|
|
|
|
(?ໜ consonant "LETTER NHOR NHUU" "MOUSE")
|
|
|
|
|
(?ໝ consonant "LETTER MHOR MHAR" "DOG")
|
|
|
|
|
(?ໞ invalid nil)))
|
1997-02-26 12:22:56 +00:00
|
|
|
|
elm)
|
|
|
|
|
(while l
|
1999-12-15 00:47:53 +00:00
|
|
|
|
(setq elm (car l) l (cdr l))
|
|
|
|
|
(let ((char (car elm))
|
|
|
|
|
(ptype (nth 1 elm)))
|
|
|
|
|
(cond ((eq ptype 'consonant)
|
|
|
|
|
(modify-category-entry char ?c lao-category-table))
|
|
|
|
|
((memq ptype '(vowel-upper vowel-lower))
|
|
|
|
|
(modify-category-entry char ?v lao-category-table))
|
|
|
|
|
((eq ptype 'semivowel-lower)
|
|
|
|
|
(modify-category-entry char ?s lao-category-table))
|
|
|
|
|
((eq ptype 'tone)
|
|
|
|
|
(modify-category-entry char ?t lao-category-table)))
|
|
|
|
|
(put-char-code-property char 'phonetic-type ptype)
|
|
|
|
|
(put-char-code-property char 'name (nth 2 elm))
|
|
|
|
|
(put-char-code-property char 'meaning (nth 3 elm)))))
|
|
|
|
|
|
|
|
|
|
;; The general composing rules are as follows:
|
|
|
|
|
;;
|
|
|
|
|
;; T
|
|
|
|
|
;; V T V T
|
|
|
|
|
;; CV -> C, CT -> C, CVT -> C, Cv -> C, CvT -> C
|
|
|
|
|
;; v v
|
|
|
|
|
;; T
|
2002-11-07 06:31:37 +00:00
|
|
|
|
;; V T V T
|
|
|
|
|
;; CsV -> C, CsT -> C, CsVT -> C, Csv -> C, CsvT -> C
|
|
|
|
|
;; s s s s s
|
|
|
|
|
;; v v
|
1999-12-15 00:47:53 +00:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
;; where C: consonant, V: vowel upper, v: vowel lower,
|
|
|
|
|
;; T: tone mark, s: semivowel lower
|
|
|
|
|
|
|
|
|
|
(defvar lao-composition-pattern
|
|
|
|
|
"\\cc\\(\\ct\\|\\cv\\ct?\\|\\cs\\(\\ct\\|\\cv\\ct?\\)?\\)"
|
|
|
|
|
"Regular expression matching a Lao composite sequence.")
|
|
|
|
|
|
|
|
|
|
;;;###autoload
|
|
|
|
|
(defun lao-compose-string (str)
|
|
|
|
|
(with-category-table lao-category-table
|
|
|
|
|
(let ((idx 0))
|
|
|
|
|
(while (setq idx (string-match lao-composition-pattern str idx))
|
|
|
|
|
(compose-string str idx (match-end 0))
|
|
|
|
|
(setq idx (match-end 0))))
|
|
|
|
|
str))
|
|
|
|
|
|
|
|
|
|
;;; LRT: Lao <-> Roman Transcription
|
|
|
|
|
|
|
|
|
|
;; Upper vowels and tone-marks are put on the letter.
|
|
|
|
|
;; Semi-vowel-sign-lo and lower vowels are put under the letter.
|
|
|
|
|
|
|
|
|
|
(defconst lao-transcription-consonant-alist
|
|
|
|
|
(sort '(;; single consonants
|
Use UTF-8 for most files with non-ASCII characters.
* admin/notes/unicode (etc/tutorials/TUTORIAL.ko, leim/quail/hanja.el)
(leim/quail/hanja3.el, leim/quail/symbol-ksc.el):
Now utf-8, not iso-2022-7bit. Also, files that contain non-UTF-8
characters are now encoded in utf-8-emacs, not iso-2022-7bit.
* etc/tutorials/TUTORIAL.ko, tutorials/TUTORIAL.th:
Switch from iso-2022-7bit to utf-8.
* leim/quail/cyrillic.el, leim/quail/czech.el, leim/quail/ethiopic.el:
* leim/quail/greek.el, leim/quail/hanja.el, leim/quail/hanja3.el:
* leim/quail/hebrew.el, leim/quail/lao.el, leim/quail/lrt.el:
* leim/quail/slovak.el, leim/quail/symbol-ksc.el, leim/quail/thai.el:
* leim/quail/tibetan.el, leim/quail/viqr.el, leim/quail/vntelex.el:
* leim/quail/vnvni.el, leim/quail/welsh.el:
* lisp/international/latin1-disp.el, lisp/international/mule-util.el:
* lisp/language/cyril-util.el, lisp/language/european.el:
* lisp/language/ind-util.el, lisp/language/lao-util.el, lisp/language/thai.el:
* lisp/language/tibet-util.el, lisp/language/tibetan.el:
* lisp/language/viet-util.el:
Switch from iso-2022-7bit to utf-8 or (if needed) utf-8-emacs.
Fixes: debbugs:13936
2013-04-02 01:18:40 +00:00
|
|
|
|
("k" . "ກ")
|
|
|
|
|
("kh" . "ຂ")
|
|
|
|
|
("qh" . "ຄ")
|
|
|
|
|
("ng" . "ງ")
|
|
|
|
|
("j" . "ຈ")
|
|
|
|
|
("s" . "ສ")
|
|
|
|
|
("x" . "ຊ")
|
|
|
|
|
("y" . "ຍ")
|
|
|
|
|
("d" . "ດ")
|
|
|
|
|
("t" . "ຕ")
|
|
|
|
|
("th" . "ຖ")
|
|
|
|
|
("dh" . "ທ")
|
|
|
|
|
("n" . "ນ")
|
|
|
|
|
("b" . "ບ")
|
|
|
|
|
("p" . "ປ")
|
|
|
|
|
("hp" . "ຜ")
|
|
|
|
|
("fh" . "ຝ")
|
|
|
|
|
("ph" . "ພ")
|
|
|
|
|
("f" . "ຟ")
|
|
|
|
|
("m" . "ມ")
|
|
|
|
|
("gn" . "ຢ")
|
|
|
|
|
("l" . "ລ")
|
|
|
|
|
("r" . "ຣ")
|
|
|
|
|
("v" . "ວ")
|
|
|
|
|
("w" . "ວ")
|
|
|
|
|
("hh" . "ຫ")
|
|
|
|
|
("O" . "ອ")
|
|
|
|
|
("h" . "ຮ")
|
|
|
|
|
("nh" . "ໜ")
|
|
|
|
|
("mh" . "ໝ")
|
|
|
|
|
("lh" . ["ຫຼ"])
|
1999-12-15 00:47:53 +00:00
|
|
|
|
;; double consonants
|
Use UTF-8 for most files with non-ASCII characters.
* admin/notes/unicode (etc/tutorials/TUTORIAL.ko, leim/quail/hanja.el)
(leim/quail/hanja3.el, leim/quail/symbol-ksc.el):
Now utf-8, not iso-2022-7bit. Also, files that contain non-UTF-8
characters are now encoded in utf-8-emacs, not iso-2022-7bit.
* etc/tutorials/TUTORIAL.ko, tutorials/TUTORIAL.th:
Switch from iso-2022-7bit to utf-8.
* leim/quail/cyrillic.el, leim/quail/czech.el, leim/quail/ethiopic.el:
* leim/quail/greek.el, leim/quail/hanja.el, leim/quail/hanja3.el:
* leim/quail/hebrew.el, leim/quail/lao.el, leim/quail/lrt.el:
* leim/quail/slovak.el, leim/quail/symbol-ksc.el, leim/quail/thai.el:
* leim/quail/tibetan.el, leim/quail/viqr.el, leim/quail/vntelex.el:
* leim/quail/vnvni.el, leim/quail/welsh.el:
* lisp/international/latin1-disp.el, lisp/international/mule-util.el:
* lisp/language/cyril-util.el, lisp/language/european.el:
* lisp/language/ind-util.el, lisp/language/lao-util.el, lisp/language/thai.el:
* lisp/language/tibet-util.el, lisp/language/tibetan.el:
* lisp/language/viet-util.el:
Switch from iso-2022-7bit to utf-8 or (if needed) utf-8-emacs.
Fixes: debbugs:13936
2013-04-02 01:18:40 +00:00
|
|
|
|
("ngh" . ["ຫງ"])
|
|
|
|
|
("yh" . ["ຫຽ"])
|
|
|
|
|
("wh" . ["ຫວ"])
|
|
|
|
|
("hl" . ["ຫລ"])
|
|
|
|
|
("hy" . ["ຫຍ"])
|
|
|
|
|
("hn" . ["ຫນ"])
|
|
|
|
|
("hm" . ["ຫມ"])
|
1999-12-15 00:47:53 +00:00
|
|
|
|
)
|
|
|
|
|
(function (lambda (x y) (> (length (car x)) (length (car y)))))))
|
|
|
|
|
|
|
|
|
|
(defconst lao-transcription-semi-vowel-alist
|
Use UTF-8 for most files with non-ASCII characters.
* admin/notes/unicode (etc/tutorials/TUTORIAL.ko, leim/quail/hanja.el)
(leim/quail/hanja3.el, leim/quail/symbol-ksc.el):
Now utf-8, not iso-2022-7bit. Also, files that contain non-UTF-8
characters are now encoded in utf-8-emacs, not iso-2022-7bit.
* etc/tutorials/TUTORIAL.ko, tutorials/TUTORIAL.th:
Switch from iso-2022-7bit to utf-8.
* leim/quail/cyrillic.el, leim/quail/czech.el, leim/quail/ethiopic.el:
* leim/quail/greek.el, leim/quail/hanja.el, leim/quail/hanja3.el:
* leim/quail/hebrew.el, leim/quail/lao.el, leim/quail/lrt.el:
* leim/quail/slovak.el, leim/quail/symbol-ksc.el, leim/quail/thai.el:
* leim/quail/tibetan.el, leim/quail/viqr.el, leim/quail/vntelex.el:
* leim/quail/vnvni.el, leim/quail/welsh.el:
* lisp/international/latin1-disp.el, lisp/international/mule-util.el:
* lisp/language/cyril-util.el, lisp/language/european.el:
* lisp/language/ind-util.el, lisp/language/lao-util.el, lisp/language/thai.el:
* lisp/language/tibet-util.el, lisp/language/tibetan.el:
* lisp/language/viet-util.el:
Switch from iso-2022-7bit to utf-8 or (if needed) utf-8-emacs.
Fixes: debbugs:13936
2013-04-02 01:18:40 +00:00
|
|
|
|
'(("r" . "ຼ")))
|
1999-12-15 00:47:53 +00:00
|
|
|
|
|
|
|
|
|
(defconst lao-transcription-vowel-alist
|
Use UTF-8 for most files with non-ASCII characters.
* admin/notes/unicode (etc/tutorials/TUTORIAL.ko, leim/quail/hanja.el)
(leim/quail/hanja3.el, leim/quail/symbol-ksc.el):
Now utf-8, not iso-2022-7bit. Also, files that contain non-UTF-8
characters are now encoded in utf-8-emacs, not iso-2022-7bit.
* etc/tutorials/TUTORIAL.ko, tutorials/TUTORIAL.th:
Switch from iso-2022-7bit to utf-8.
* leim/quail/cyrillic.el, leim/quail/czech.el, leim/quail/ethiopic.el:
* leim/quail/greek.el, leim/quail/hanja.el, leim/quail/hanja3.el:
* leim/quail/hebrew.el, leim/quail/lao.el, leim/quail/lrt.el:
* leim/quail/slovak.el, leim/quail/symbol-ksc.el, leim/quail/thai.el:
* leim/quail/tibetan.el, leim/quail/viqr.el, leim/quail/vntelex.el:
* leim/quail/vnvni.el, leim/quail/welsh.el:
* lisp/international/latin1-disp.el, lisp/international/mule-util.el:
* lisp/language/cyril-util.el, lisp/language/european.el:
* lisp/language/ind-util.el, lisp/language/lao-util.el, lisp/language/thai.el:
* lisp/language/tibet-util.el, lisp/language/tibetan.el:
* lisp/language/viet-util.el:
Switch from iso-2022-7bit to utf-8 or (if needed) utf-8-emacs.
Fixes: debbugs:13936
2013-04-02 01:18:40 +00:00
|
|
|
|
(sort '(("a" . "ະ")
|
|
|
|
|
("ar" . "າ")
|
|
|
|
|
("i" . "ິ")
|
|
|
|
|
("ii" . "ີ")
|
|
|
|
|
("eu" . "ຶ")
|
|
|
|
|
("ur" . "ື")
|
|
|
|
|
("u" . "ຸ")
|
|
|
|
|
("uu" . "ູ")
|
|
|
|
|
("e" . ["ເະ"])
|
|
|
|
|
("ee" . "ເ")
|
|
|
|
|
("ae" . ["ແະ"])
|
|
|
|
|
("aa" . "ແ")
|
|
|
|
|
("o" . ["ໂະ"])
|
|
|
|
|
("oo" . "ໂ")
|
|
|
|
|
("oe" . ["ເາະ"])
|
|
|
|
|
("or" . "ໍ")
|
|
|
|
|
("er" . ["ເິ"])
|
|
|
|
|
("ir" . ["ເີ"])
|
|
|
|
|
("ua" . ["ົວະ"])
|
|
|
|
|
("uaa" . ["ົວ"])
|
|
|
|
|
("ie" . ["ເັຽະ"])
|
|
|
|
|
("ia" . ["ເັຽ"])
|
|
|
|
|
("ea" . ["ເຶອ"])
|
|
|
|
|
("eaa" . ["ເືອ"])
|
|
|
|
|
("ai" . "ໄ")
|
|
|
|
|
("ei" . "ໃ")
|
|
|
|
|
("ao" . ["ເົາ"])
|
|
|
|
|
("aM" . "ຳ"))
|
1999-12-15 00:47:53 +00:00
|
|
|
|
(function (lambda (x y) (> (length (car x)) (length (car y)))))))
|
|
|
|
|
|
|
|
|
|
;; Maa-sakod is put at the tail.
|
|
|
|
|
(defconst lao-transcription-maa-sakod-alist
|
Use UTF-8 for most files with non-ASCII characters.
* admin/notes/unicode (etc/tutorials/TUTORIAL.ko, leim/quail/hanja.el)
(leim/quail/hanja3.el, leim/quail/symbol-ksc.el):
Now utf-8, not iso-2022-7bit. Also, files that contain non-UTF-8
characters are now encoded in utf-8-emacs, not iso-2022-7bit.
* etc/tutorials/TUTORIAL.ko, tutorials/TUTORIAL.th:
Switch from iso-2022-7bit to utf-8.
* leim/quail/cyrillic.el, leim/quail/czech.el, leim/quail/ethiopic.el:
* leim/quail/greek.el, leim/quail/hanja.el, leim/quail/hanja3.el:
* leim/quail/hebrew.el, leim/quail/lao.el, leim/quail/lrt.el:
* leim/quail/slovak.el, leim/quail/symbol-ksc.el, leim/quail/thai.el:
* leim/quail/tibetan.el, leim/quail/viqr.el, leim/quail/vntelex.el:
* leim/quail/vnvni.el, leim/quail/welsh.el:
* lisp/international/latin1-disp.el, lisp/international/mule-util.el:
* lisp/language/cyril-util.el, lisp/language/european.el:
* lisp/language/ind-util.el, lisp/language/lao-util.el, lisp/language/thai.el:
* lisp/language/tibet-util.el, lisp/language/tibetan.el:
* lisp/language/viet-util.el:
Switch from iso-2022-7bit to utf-8 or (if needed) utf-8-emacs.
Fixes: debbugs:13936
2013-04-02 01:18:40 +00:00
|
|
|
|
'(("k" . "ກ")
|
|
|
|
|
("g" . "ງ")
|
|
|
|
|
("y" . "ຍ")
|
|
|
|
|
("d" . "ດ")
|
|
|
|
|
("n" . "ນ")
|
|
|
|
|
("b" . "ບ")
|
|
|
|
|
("m" . "ມ")
|
|
|
|
|
("v" . "ວ")
|
|
|
|
|
("w" . "ວ")
|
1999-12-15 00:47:53 +00:00
|
|
|
|
))
|
|
|
|
|
|
|
|
|
|
(defconst lao-transcription-tone-alist
|
Use UTF-8 for most files with non-ASCII characters.
* admin/notes/unicode (etc/tutorials/TUTORIAL.ko, leim/quail/hanja.el)
(leim/quail/hanja3.el, leim/quail/symbol-ksc.el):
Now utf-8, not iso-2022-7bit. Also, files that contain non-UTF-8
characters are now encoded in utf-8-emacs, not iso-2022-7bit.
* etc/tutorials/TUTORIAL.ko, tutorials/TUTORIAL.th:
Switch from iso-2022-7bit to utf-8.
* leim/quail/cyrillic.el, leim/quail/czech.el, leim/quail/ethiopic.el:
* leim/quail/greek.el, leim/quail/hanja.el, leim/quail/hanja3.el:
* leim/quail/hebrew.el, leim/quail/lao.el, leim/quail/lrt.el:
* leim/quail/slovak.el, leim/quail/symbol-ksc.el, leim/quail/thai.el:
* leim/quail/tibetan.el, leim/quail/viqr.el, leim/quail/vntelex.el:
* leim/quail/vnvni.el, leim/quail/welsh.el:
* lisp/international/latin1-disp.el, lisp/international/mule-util.el:
* lisp/language/cyril-util.el, lisp/language/european.el:
* lisp/language/ind-util.el, lisp/language/lao-util.el, lisp/language/thai.el:
* lisp/language/tibet-util.el, lisp/language/tibetan.el:
* lisp/language/viet-util.el:
Switch from iso-2022-7bit to utf-8 or (if needed) utf-8-emacs.
Fixes: debbugs:13936
2013-04-02 01:18:40 +00:00
|
|
|
|
'(("'" . "່")
|
|
|
|
|
("\"" . "້")
|
|
|
|
|
("^" . "໊")
|
|
|
|
|
("+" . "໋")
|
|
|
|
|
("~" . "໌")))
|
1999-12-15 00:47:53 +00:00
|
|
|
|
|
|
|
|
|
(defconst lao-transcription-punctuation-alist
|
Use UTF-8 for most files with non-ASCII characters.
* admin/notes/unicode (etc/tutorials/TUTORIAL.ko, leim/quail/hanja.el)
(leim/quail/hanja3.el, leim/quail/symbol-ksc.el):
Now utf-8, not iso-2022-7bit. Also, files that contain non-UTF-8
characters are now encoded in utf-8-emacs, not iso-2022-7bit.
* etc/tutorials/TUTORIAL.ko, tutorials/TUTORIAL.th:
Switch from iso-2022-7bit to utf-8.
* leim/quail/cyrillic.el, leim/quail/czech.el, leim/quail/ethiopic.el:
* leim/quail/greek.el, leim/quail/hanja.el, leim/quail/hanja3.el:
* leim/quail/hebrew.el, leim/quail/lao.el, leim/quail/lrt.el:
* leim/quail/slovak.el, leim/quail/symbol-ksc.el, leim/quail/thai.el:
* leim/quail/tibetan.el, leim/quail/viqr.el, leim/quail/vntelex.el:
* leim/quail/vnvni.el, leim/quail/welsh.el:
* lisp/international/latin1-disp.el, lisp/international/mule-util.el:
* lisp/language/cyril-util.el, lisp/language/european.el:
* lisp/language/ind-util.el, lisp/language/lao-util.el, lisp/language/thai.el:
* lisp/language/tibet-util.el, lisp/language/tibetan.el:
* lisp/language/viet-util.el:
Switch from iso-2022-7bit to utf-8 or (if needed) utf-8-emacs.
Fixes: debbugs:13936
2013-04-02 01:18:40 +00:00
|
|
|
|
'(("\\0" . "໐")
|
|
|
|
|
("\\1" . "໑")
|
|
|
|
|
("\\2" . "໒")
|
|
|
|
|
("\\3" . "໓")
|
|
|
|
|
("\\4" . "໔")
|
|
|
|
|
("\\5" . "໕")
|
|
|
|
|
("\\6" . "໖")
|
|
|
|
|
("\\7" . "໗")
|
|
|
|
|
("\\8" . "໘")
|
|
|
|
|
("\\9" . "໙")
|
|
|
|
|
("\\\\" . "ໆ")
|
|
|
|
|
("\\$" . "ຯ")))
|
1999-12-15 00:47:53 +00:00
|
|
|
|
|
|
|
|
|
(defconst lao-transcription-pattern
|
|
|
|
|
(concat
|
|
|
|
|
"\\("
|
|
|
|
|
(mapconcat 'car lao-transcription-consonant-alist "\\|")
|
|
|
|
|
"\\)\\("
|
|
|
|
|
(mapconcat 'car lao-transcription-semi-vowel-alist "\\|")
|
|
|
|
|
"\\)?\\(\\("
|
|
|
|
|
(mapconcat 'car lao-transcription-vowel-alist "\\|")
|
|
|
|
|
"\\)\\("
|
|
|
|
|
(mapconcat 'car lao-transcription-maa-sakod-alist "\\|")
|
|
|
|
|
"\\)?\\("
|
|
|
|
|
(mapconcat (lambda (x) (regexp-quote (car x)))
|
|
|
|
|
lao-transcription-tone-alist "\\|")
|
|
|
|
|
"\\)?\\)?\\|"
|
|
|
|
|
(mapconcat (lambda (x) (regexp-quote (car x)))
|
|
|
|
|
lao-transcription-punctuation-alist "\\|")
|
|
|
|
|
)
|
|
|
|
|
"Regexp of Roman transcription pattern for one Lao syllable.")
|
|
|
|
|
|
|
|
|
|
(defconst lao-transcription-pattern
|
|
|
|
|
(concat
|
|
|
|
|
"\\("
|
|
|
|
|
(regexp-opt (mapcar 'car lao-transcription-consonant-alist))
|
|
|
|
|
"\\)\\("
|
|
|
|
|
(regexp-opt (mapcar 'car lao-transcription-semi-vowel-alist))
|
|
|
|
|
"\\)?\\(\\("
|
|
|
|
|
(regexp-opt (mapcar 'car lao-transcription-vowel-alist))
|
|
|
|
|
"\\)\\("
|
|
|
|
|
(regexp-opt (mapcar 'car lao-transcription-maa-sakod-alist))
|
|
|
|
|
"\\)?\\("
|
|
|
|
|
(regexp-opt (mapcar 'car lao-transcription-tone-alist))
|
|
|
|
|
"\\)?\\)?\\|"
|
|
|
|
|
(regexp-opt (mapcar 'car lao-transcription-punctuation-alist))
|
|
|
|
|
)
|
|
|
|
|
"Regexp of Roman transcription pattern for one Lao syllable.")
|
|
|
|
|
|
|
|
|
|
(defconst lao-vowel-reordering-rule
|
Use UTF-8 for most files with non-ASCII characters.
* admin/notes/unicode (etc/tutorials/TUTORIAL.ko, leim/quail/hanja.el)
(leim/quail/hanja3.el, leim/quail/symbol-ksc.el):
Now utf-8, not iso-2022-7bit. Also, files that contain non-UTF-8
characters are now encoded in utf-8-emacs, not iso-2022-7bit.
* etc/tutorials/TUTORIAL.ko, tutorials/TUTORIAL.th:
Switch from iso-2022-7bit to utf-8.
* leim/quail/cyrillic.el, leim/quail/czech.el, leim/quail/ethiopic.el:
* leim/quail/greek.el, leim/quail/hanja.el, leim/quail/hanja3.el:
* leim/quail/hebrew.el, leim/quail/lao.el, leim/quail/lrt.el:
* leim/quail/slovak.el, leim/quail/symbol-ksc.el, leim/quail/thai.el:
* leim/quail/tibetan.el, leim/quail/viqr.el, leim/quail/vntelex.el:
* leim/quail/vnvni.el, leim/quail/welsh.el:
* lisp/international/latin1-disp.el, lisp/international/mule-util.el:
* lisp/language/cyril-util.el, lisp/language/european.el:
* lisp/language/ind-util.el, lisp/language/lao-util.el, lisp/language/thai.el:
* lisp/language/tibet-util.el, lisp/language/tibetan.el:
* lisp/language/viet-util.el:
Switch from iso-2022-7bit to utf-8 or (if needed) utf-8-emacs.
Fixes: debbugs:13936
2013-04-02 01:18:40 +00:00
|
|
|
|
'(("ະ" (0 ?ະ) (0 ?ັ))
|
|
|
|
|
("າ" (0 ?າ))
|
|
|
|
|
("ິ" (0 ?ີ))
|
|
|
|
|
("ີ" (0 ?ີ))
|
|
|
|
|
("ຶ" (0 ?ຶ))
|
|
|
|
|
("ື" (0 ?ື))
|
|
|
|
|
("ຸ" (0 ?ຸ))
|
|
|
|
|
("ູ" (0 ?ູ))
|
|
|
|
|
("ເະ" (?ເ 0 ?ະ) (?ເ 0 ?ັ))
|
|
|
|
|
("ເ" (?ເ 0))
|
|
|
|
|
("ແະ" (?ແ 0 ?ະ) (?ແ 0 ?ັ))
|
|
|
|
|
("ແ" (?ແ 0))
|
|
|
|
|
("ໂະ" (?ໂ 0 ?ະ) (0 ?ົ) (?ຍ ?ໂ 0 ?ັ) (?ວ ?ໂ 0 ?ັ))
|
|
|
|
|
("ໂ" (?ໂ 0))
|
|
|
|
|
("ເາະ" (?ເ 0 ?າ ?ະ) (0 ?ັ ?ອ))
|
|
|
|
|
("ໍ" (0 ?ໍ) (0 ?ອ))
|
|
|
|
|
("ເິ" (?ເ 0 ?ິ))
|
|
|
|
|
("ເີ" (?ເ 0 ?ີ))
|
|
|
|
|
("ົວະ" (0 ?ົ ?ວ ?ະ) (0 ?ັ ?ວ))
|
|
|
|
|
("ົວ" (0 ?ົ ?ວ) (0 ?ວ))
|
|
|
|
|
("ເັຽະ" (?ເ 0 ?ັ ?ຽ ?ະ) (0 ?ັ ?ຽ))
|
|
|
|
|
("ເັຽ" (?ເ 0 ?ັ ?ຽ) (0 ?ຽ))
|
|
|
|
|
("ເຶອ" (?ເ 0 ?ຶ ?ອ))
|
|
|
|
|
("ເືອ" (?ເ 0 ?ື ?ອ))
|
|
|
|
|
("ໄ" (?ໄ 0))
|
|
|
|
|
("ໃ" (?ໃ 0))
|
|
|
|
|
("ເົາ" (?ເ 0 ?ົ ?າ))
|
|
|
|
|
("ຳ" (0 ?ຳ)))
|
1999-12-15 00:47:53 +00:00
|
|
|
|
"Alist of Lao vowel string vs the corresponding re-ordering rule.
|
|
|
|
|
Each element has this form:
|
|
|
|
|
(VOWEL NO-MAA-SAKOD-RULE WITH-MAA-SAKOD-RULE (MAA-SAKOD-0 RULE-0) ...)
|
|
|
|
|
|
Use UTF-8 for most files with non-ASCII characters.
* admin/notes/unicode (etc/tutorials/TUTORIAL.ko, leim/quail/hanja.el)
(leim/quail/hanja3.el, leim/quail/symbol-ksc.el):
Now utf-8, not iso-2022-7bit. Also, files that contain non-UTF-8
characters are now encoded in utf-8-emacs, not iso-2022-7bit.
* etc/tutorials/TUTORIAL.ko, tutorials/TUTORIAL.th:
Switch from iso-2022-7bit to utf-8.
* leim/quail/cyrillic.el, leim/quail/czech.el, leim/quail/ethiopic.el:
* leim/quail/greek.el, leim/quail/hanja.el, leim/quail/hanja3.el:
* leim/quail/hebrew.el, leim/quail/lao.el, leim/quail/lrt.el:
* leim/quail/slovak.el, leim/quail/symbol-ksc.el, leim/quail/thai.el:
* leim/quail/tibetan.el, leim/quail/viqr.el, leim/quail/vntelex.el:
* leim/quail/vnvni.el, leim/quail/welsh.el:
* lisp/international/latin1-disp.el, lisp/international/mule-util.el:
* lisp/language/cyril-util.el, lisp/language/european.el:
* lisp/language/ind-util.el, lisp/language/lao-util.el, lisp/language/thai.el:
* lisp/language/tibet-util.el, lisp/language/tibetan.el:
* lisp/language/viet-util.el:
Switch from iso-2022-7bit to utf-8 or (if needed) utf-8-emacs.
Fixes: debbugs:13936
2013-04-02 01:18:40 +00:00
|
|
|
|
VOWEL is a vowel string (e.g. \"ເັຽະ\").
|
1999-12-15 00:47:53 +00:00
|
|
|
|
|
|
|
|
|
NO-MAA-SAKOD-RULE is a rule to re-order and modify VOWEL following a
|
|
|
|
|
consonant. It is a list vowel characters or 0. The element 0
|
|
|
|
|
indicate the place to embed a consonant.
|
|
|
|
|
|
|
|
|
|
Optional WITH-MAA-SAKOD-RULE is a rule to re-order and modify VOWEL
|
2011-11-17 17:40:48 +00:00
|
|
|
|
following a consonant and preceding a maa-sakod character. If it is
|
2011-11-14 23:59:56 +00:00
|
|
|
|
nil, NO-MAA-SAKOD-RULE is used. The maa-sakod character is always
|
1999-12-15 00:47:53 +00:00
|
|
|
|
appended at the tail.
|
|
|
|
|
|
Use UTF-8 for most files with non-ASCII characters.
* admin/notes/unicode (etc/tutorials/TUTORIAL.ko, leim/quail/hanja.el)
(leim/quail/hanja3.el, leim/quail/symbol-ksc.el):
Now utf-8, not iso-2022-7bit. Also, files that contain non-UTF-8
characters are now encoded in utf-8-emacs, not iso-2022-7bit.
* etc/tutorials/TUTORIAL.ko, tutorials/TUTORIAL.th:
Switch from iso-2022-7bit to utf-8.
* leim/quail/cyrillic.el, leim/quail/czech.el, leim/quail/ethiopic.el:
* leim/quail/greek.el, leim/quail/hanja.el, leim/quail/hanja3.el:
* leim/quail/hebrew.el, leim/quail/lao.el, leim/quail/lrt.el:
* leim/quail/slovak.el, leim/quail/symbol-ksc.el, leim/quail/thai.el:
* leim/quail/tibetan.el, leim/quail/viqr.el, leim/quail/vntelex.el:
* leim/quail/vnvni.el, leim/quail/welsh.el:
* lisp/international/latin1-disp.el, lisp/international/mule-util.el:
* lisp/language/cyril-util.el, lisp/language/european.el:
* lisp/language/ind-util.el, lisp/language/lao-util.el, lisp/language/thai.el:
* lisp/language/tibet-util.el, lisp/language/tibetan.el:
* lisp/language/viet-util.el:
Switch from iso-2022-7bit to utf-8 or (if needed) utf-8-emacs.
Fixes: debbugs:13936
2013-04-02 01:18:40 +00:00
|
|
|
|
For instance, rule `(\"ເືອ\" (?ເ t ?ື ?ອ))' tells that this vowel
|
|
|
|
|
string following a consonant `ກ' should be re-ordered as \"ເກືອ\".
|
1999-12-15 00:47:53 +00:00
|
|
|
|
|
|
|
|
|
Optional (MAA-SAKOD-n RULE-n) are rules specially applied to maa-sakod
|
|
|
|
|
character MAA-SAKOD-n.")
|
|
|
|
|
|
|
|
|
|
;;;###autoload
|
|
|
|
|
(defun lao-transcribe-single-roman-syllable-to-lao (from to &optional str)
|
|
|
|
|
"Transcribe a Romanized Lao syllable in the region FROM and TO to Lao string.
|
|
|
|
|
Only the first syllable is transcribed.
|
|
|
|
|
The value has the form: (START END LAO-STRING), where
|
2011-12-05 08:55:25 +00:00
|
|
|
|
START and END are the beginning and end positions of the Roman Lao syllable,
|
1999-12-15 00:47:53 +00:00
|
|
|
|
LAO-STRING is the Lao character transcription of it.
|
|
|
|
|
|
|
|
|
|
Optional 3rd arg STR, if non-nil, is a string to search for Roman Lao
|
|
|
|
|
syllable. In that case, FROM and TO are indexes to STR."
|
|
|
|
|
(if str
|
|
|
|
|
(if (setq from (string-match lao-transcription-pattern str from))
|
|
|
|
|
(progn
|
|
|
|
|
(if (>= from to)
|
|
|
|
|
(setq from nil)
|
|
|
|
|
(setq to (match-end 0)))))
|
|
|
|
|
(save-excursion
|
|
|
|
|
(goto-char from)
|
|
|
|
|
(if (setq to (re-search-forward lao-transcription-pattern to t))
|
|
|
|
|
(setq from (match-beginning 0))
|
|
|
|
|
(setq from nil))))
|
|
|
|
|
(if from
|
|
|
|
|
(let* ((consonant (match-string 1 str))
|
|
|
|
|
(semivowel (match-string 3 str))
|
|
|
|
|
(vowel (match-string 5 str))
|
|
|
|
|
(maa-sakod (match-string 8 str))
|
|
|
|
|
(tone (match-string 9 str))
|
|
|
|
|
lao-consonant lao-semivowel lao-vowel lao-maa-sakod lao-tone
|
|
|
|
|
clen cidx)
|
|
|
|
|
(setq to (match-end 0))
|
|
|
|
|
(if (not consonant)
|
|
|
|
|
(setq str (cdr (assoc (match-string 0 str)
|
|
|
|
|
lao-transcription-punctuation-alist)))
|
|
|
|
|
(setq lao-consonant
|
|
|
|
|
(cdr (assoc consonant lao-transcription-consonant-alist)))
|
|
|
|
|
(if (vectorp lao-consonant)
|
|
|
|
|
(setq lao-consonant (aref lao-consonant 0)))
|
|
|
|
|
(setq clen (length lao-consonant))
|
|
|
|
|
(if semivowel
|
|
|
|
|
;; Include semivowel in STR.
|
|
|
|
|
(setq lao-semivowel
|
|
|
|
|
(cdr (assoc semivowel lao-transcription-semi-vowel-alist))
|
|
|
|
|
str (if (= clen 1)
|
|
|
|
|
(concat lao-consonant lao-semivowel)
|
|
|
|
|
(concat (substring lao-consonant 0 1) lao-semivowel
|
|
|
|
|
(substring lao-consonant 1))))
|
|
|
|
|
(setq str lao-consonant))
|
|
|
|
|
(if vowel
|
|
|
|
|
(let (rule)
|
|
|
|
|
(setq lao-vowel
|
|
|
|
|
(cdr (assoc vowel lao-transcription-vowel-alist)))
|
|
|
|
|
(if (vectorp lao-vowel)
|
|
|
|
|
(setq lao-vowel (aref lao-vowel 0)))
|
|
|
|
|
(setq rule (assoc lao-vowel lao-vowel-reordering-rule))
|
|
|
|
|
(if (null maa-sakod)
|
|
|
|
|
(setq rule (nth 1 rule))
|
|
|
|
|
(setq lao-maa-sakod
|
|
|
|
|
(cdr (assoc maa-sakod lao-transcription-maa-sakod-alist))
|
|
|
|
|
rule
|
|
|
|
|
(or (cdr (assq (aref lao-maa-sakod 0) (nthcdr 2 rule)))
|
|
|
|
|
(nth 2 rule)
|
|
|
|
|
(nth 1 rule))))
|
|
|
|
|
(or rule
|
|
|
|
|
(error "Lao vowel %S has no re-ordering rule" lao-vowel))
|
|
|
|
|
(setq lao-consonant str str "")
|
|
|
|
|
(while rule
|
|
|
|
|
(if (= (car rule) 0)
|
|
|
|
|
(setq str (concat str lao-consonant)
|
|
|
|
|
cidx (length str))
|
|
|
|
|
(setq str (concat str (list (car rule)))))
|
|
|
|
|
(setq rule (cdr rule)))
|
|
|
|
|
(or cidx
|
|
|
|
|
(error "Lao vowel %S has malformed re-ordering rule" vowel))
|
|
|
|
|
;; Set CIDX to after upper or lower vowel if any.
|
|
|
|
|
(let ((len (length str)))
|
|
|
|
|
(while (and (< cidx len)
|
|
|
|
|
(memq (get-char-code-property (aref str cidx)
|
|
|
|
|
'phonetic-type)
|
|
|
|
|
'(vowel-lower vowel-upper)))
|
|
|
|
|
(setq cidx (1+ cidx))))
|
|
|
|
|
(if lao-maa-sakod
|
|
|
|
|
(setq str (concat str lao-maa-sakod)))
|
|
|
|
|
(if tone
|
|
|
|
|
(setq lao-tone
|
|
|
|
|
(cdr (assoc tone lao-transcription-tone-alist))
|
|
|
|
|
str (concat (substring str 0 cidx) lao-tone
|
|
|
|
|
(substring str cidx)))))))
|
|
|
|
|
(list from to (lao-compose-string str)))))
|
|
|
|
|
|
|
|
|
|
;;;###autoload
|
|
|
|
|
(defun lao-transcribe-roman-to-lao-string (str)
|
|
|
|
|
"Transcribe Romanized Lao string STR to Lao character string."
|
|
|
|
|
(let ((from 0)
|
|
|
|
|
(to (length str))
|
|
|
|
|
(lao-str "")
|
|
|
|
|
val)
|
|
|
|
|
(while (setq val (lao-transcribe-single-roman-syllable-to-lao from to str))
|
|
|
|
|
(let ((start (car val))
|
|
|
|
|
(end (nth 1 val))
|
|
|
|
|
(lao (nth 2 val)))
|
|
|
|
|
(if (> start from)
|
|
|
|
|
(setq lao-str (concat lao-str (substring str from start) lao))
|
|
|
|
|
(setq lao-str (concat lao-str lao)))
|
|
|
|
|
(setq from end)))
|
|
|
|
|
(if (< from to)
|
|
|
|
|
(concat lao-str (substring str from to))
|
|
|
|
|
lao-str)))
|
|
|
|
|
|
2001-12-07 18:21:04 +00:00
|
|
|
|
;;;###autoload
|
2008-08-29 10:57:29 +00:00
|
|
|
|
(defun lao-composition-function (gstring)
|
|
|
|
|
(if (= (lgstring-char-len gstring) 1)
|
2008-08-29 07:59:39 +00:00
|
|
|
|
(compose-gstring-for-graphic gstring)
|
2008-08-29 10:57:29 +00:00
|
|
|
|
(or (font-shape-gstring gstring)
|
2008-08-29 07:59:39 +00:00
|
|
|
|
(let ((glyph-len (lgstring-glyph-len gstring))
|
|
|
|
|
(i 0)
|
|
|
|
|
glyph)
|
|
|
|
|
(while (and (< i glyph-len)
|
|
|
|
|
(setq glyph (lgstring-glyph gstring i)))
|
|
|
|
|
(setq i (1+ i)))
|
2008-08-29 10:57:29 +00:00
|
|
|
|
(compose-glyph-string-relative gstring 0 i 0.1)))))
|
1997-02-26 12:22:56 +00:00
|
|
|
|
|
2000-05-03 03:28:39 +00:00
|
|
|
|
;;;###autoload
|
|
|
|
|
(defun lao-compose-region (from to)
|
|
|
|
|
(interactive "r")
|
|
|
|
|
(save-restriction
|
|
|
|
|
(narrow-to-region from to)
|
|
|
|
|
(goto-char (point-min))
|
|
|
|
|
(with-category-table lao-category-table
|
|
|
|
|
(while (re-search-forward lao-composition-pattern nil t)
|
|
|
|
|
(compose-region (match-beginning 0) (point))))))
|
|
|
|
|
|
1997-02-26 12:22:56 +00:00
|
|
|
|
;;
|
1997-06-18 13:06:17 +00:00
|
|
|
|
(provide 'lao-util)
|
1997-02-26 12:22:56 +00:00
|
|
|
|
|
|
|
|
|
;;; lao-util.el ends here
|