1
0
mirror of https://git.savannah.gnu.org/git/emacs.git synced 2024-11-24 07:20:37 +00:00
emacs/admin/unidata/uvs.el
2024-01-02 09:47:10 +08:00

226 lines
8.7 KiB
EmacsLisp

;;; uvs.el --- utility for UVS (format 14) cmap subtables in OpenType fonts -*- lexical-binding:t -*-
;; Copyright (C) 2014-2024 Free Software Foundation, Inc.
;; Author: YAMAMOTO Mitsuharu <mituharu@math.s.chiba-u.ac.jp>
;; This file is part of GNU Emacs.
;; GNU Emacs is free software: you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
;; the Free Software Foundation, either version 3 of the License, or
;; (at your option) any later version.
;; GNU Emacs is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
;; GNU General Public License for more details.
;; You should have received a copy of the GNU General Public License
;; along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>.
;;; Commentary:
;; To extract a C array definition of a UVS table for the Adobe-Japan1
;; character collection from an IVD Sequences file, execute
;; $ emacs -batch -l uvs.el \
;; --eval '(uvs-print-table-ivd "IVD_Sequences.txt" "Adobe-Japan1")' \
;; > uvs.h
;;; Code:
(defun uvs-fields-total-size (fields)
(apply #'+ (mapcar (lambda (field) (get field 'uvs-field-size)) fields)))
;;; Fields in Format 14 header.
(defconst uvs-format-14-header-fields
'(format length num-var-selector-records))
(put 'format 'uvs-field-size 2)
(put 'length 'uvs-field-size 4)
(put 'num-var-selector-records 'uvs-field-size 4)
(defconst uvs-format-14-header-size
(uvs-fields-total-size uvs-format-14-header-fields))
;;; Fields in Variation Selector Record.
(defconst uvs-variation-selector-record-fields
'(var-selector default-uvs-offset non-default-uvs-offset))
(put 'var-selector 'uvs-field-size 3)
(put 'default-uvs-offset 'uvs-field-size 4)
(put 'non-default-uvs-offset 'uvs-field-size 4)
(defconst uvs-variation-selector-record-size
(uvs-fields-total-size uvs-variation-selector-record-fields))
;;; Fields in Non-Default UVS Table.
(defconst uvs-non-default-uvs-table-header-fields '(num-uvs-mappings))
(put 'num-uvs-mappings 'uvs-field-size 4)
(defconst uvs-non-default-uvs-table-header-size
(uvs-fields-total-size uvs-non-default-uvs-table-header-fields))
;;; Fields in UVS Mapping.
(defconst uvs-uvs-mapping-fields '(unicode-value glyph-id))
(put 'unicode-value 'uvs-field-size 3)
(put 'glyph-id 'uvs-field-size 2)
(defconst uvs-uvs-mapping-size
(uvs-fields-total-size uvs-uvs-mapping-fields))
(defun uvs-alist-from-ivd (collection-id sequence-id-to-glyph-function)
"Create UVS alist from IVD Sequences and COLLECTION-ID.
The IVD (Ideographic Variation Database) Sequences are obtained
from the contents of the current buffer, and should be in the
form of IVD_Sequences.txt specified in Unicode Technical Standard
#37. COLLECTION-ID is a string specifying the identifier of the
collection to extract (e.g., \"Adobe-Japan1\").
SEQUENCE-ID-TO-GLYPH-FUNC is a function to convert an identifier
string of the sequence to a glyph number. UVS alist is of the
following form:
((SELECTOR1 . ((BASE11 . GLYPH11) (BASE12 . GLYPH12) ...))
(SELECTOR2 . ((BASE21 . GLYPH21) (BASE22 . GLYPH22) ...)) ...),
where selectors and bases are sorted in ascending order."
(let (uvs-alist)
(goto-char (point-min))
(while (re-search-forward
(concat "^[[:blank:]]*"
"\\([[:xdigit:]]+\\) \\([[:xdigit:]]+\\)"
"[[:blank:]]*;[[:blank:]]*"
"\\(?:" (regexp-quote collection-id) "\\)"
"[[:blank:]]*;[[:blank:]]*"
"\\([^\n[:blank:]]+\\)"
"[[:blank:]]*$")
nil t)
(let* ((base (string-to-number (match-string 1) 16))
(selector (string-to-number (match-string 2) 16))
(sequence-id (match-string 3))
(glyph (funcall sequence-id-to-glyph-function sequence-id)))
(let ((selector-bgs (assq selector uvs-alist))
(base-glyph (cons base glyph)))
(if selector-bgs
(setcdr selector-bgs (cons base-glyph (cdr selector-bgs)))
(push (cons selector (list base-glyph)) uvs-alist)))))
(dolist (selector-bgs uvs-alist)
(setcdr selector-bgs
(sort (cdr selector-bgs)
(lambda (bg1 bg2) (< (car bg1) (car bg2))))))
(sort uvs-alist (lambda (sb1 sb2) (< (car sb1) (car sb2))))))
(defun uvs-int-to-bytes (value size)
"Convert integer VALUE to a list of SIZE bytes.
The most significant byte comes first."
(let (result)
(dotimes (_ size)
(push (logand value #xff) result)
(setq value (ash value -8)))
result))
(defun uvs-insert-fields-as-bytes (fields &rest values)
"Insert VALUES for FIELDS as a sequence of bytes to the current buffer.
VALUES and FIELDS are lists of integers and field symbols,
respectively. Byte length of each value is determined by the
`uvs-field-size' property of the corresponding field."
(while fields
(let ((field (car fields))
(value (car values)))
(insert (apply #'unibyte-string
(uvs-int-to-bytes value (get field 'uvs-field-size))))
(setq fields (cdr fields) values (cdr values)))))
(defun uvs-insert-alist-as-bytes (uvs-alist)
"Insert UVS-ALIST as a sequence of bytes to the current buffer."
(let* ((nrecords (length uvs-alist)) ; # of selectors
(total-nmappings
(apply #'+ (mapcar
(lambda (selector-bgs) (length (cdr selector-bgs)))
uvs-alist)))
(non-default-offset
(+ uvs-format-14-header-size
(* uvs-variation-selector-record-size nrecords))))
(uvs-insert-fields-as-bytes uvs-format-14-header-fields
14
(+ uvs-format-14-header-size
(* uvs-variation-selector-record-size
nrecords)
(* uvs-non-default-uvs-table-header-size
nrecords)
(* uvs-uvs-mapping-size total-nmappings))
nrecords)
(dolist (selector-bgs uvs-alist)
(uvs-insert-fields-as-bytes uvs-variation-selector-record-fields
(car selector-bgs)
0 ; No Default UVS Tables.
non-default-offset)
(setq non-default-offset
(+ non-default-offset
uvs-non-default-uvs-table-header-size
(* (length (cdr selector-bgs)) uvs-uvs-mapping-size))))
(dolist (selector-bgs uvs-alist)
(uvs-insert-fields-as-bytes uvs-non-default-uvs-table-header-fields
(length (cdr selector-bgs)))
(dolist (base-glyph (cdr selector-bgs))
(uvs-insert-fields-as-bytes uvs-uvs-mapping-fields
(car base-glyph)
(cdr base-glyph))))))
(defun uvs-dump (&optional bytes-per-line separator separator-eol bol-prefix)
"Print the current buffer as in representation of C array contents."
(or bytes-per-line (setq bytes-per-line 8))
(or separator (setq separator ", "))
(or separator-eol (setq separator-eol ","))
(or bol-prefix (setq bol-prefix " "))
(goto-char (point-min))
(while (> (- (point-max) (point)) bytes-per-line)
(princ bol-prefix)
(princ (mapconcat (lambda (byte) (format "0x%02x" byte))
(string-to-unibyte
(buffer-substring (point) (+ (point) bytes-per-line)))
separator))
(princ separator-eol)
(terpri)
(forward-char bytes-per-line))
(princ bol-prefix)
(princ (mapconcat (lambda (byte) (format "0x%02x" byte))
(string-to-unibyte
(buffer-substring (point) (point-max)))
separator))
(terpri))
(defun uvs-print-table-ivd (filename collection-id
&optional sequence-id-to-glyph-func)
"Print a C array definition of a UVS table for IVD Sequences.
FILENAME specifies the IVD Sequences file. COLLECTION-ID is a
string specifying the identifier of the collection to
extract (e.g., \"Adobe-Japan1\"). SEQUENCE-ID-TO-GLYPH-FUNC is a
function to convert an identifier string of the sequence to a
glyph number, and nil means to convert \"CID\\+[0-9]+\" to the
corresponding number."
(or sequence-id-to-glyph-func
(setq sequence-id-to-glyph-func
(lambda (sequence-id)
(string-match "\\`CID\\+\\([[:digit:]]+\\)\\'" sequence-id)
(string-to-number (match-string 1 sequence-id)))))
(let ((uvs-alist
(with-temp-buffer
(insert-file-contents filename)
(uvs-alist-from-ivd collection-id
sequence-id-to-glyph-func))))
(set-binary-mode 'stdout t)
(princ "\
/* This file was automatically generated from admin/unidata/IVD_Sequences.txt
by the script admin/unidata/uvs.el. It is version-controlled
because otherwise the first build on macOS from the source
repository would need to be headless.
FIXME: Make it convenient to do headless bootstrap builds on macOS,
so that this file need not be put into the source repository. */
")
(princ
(format "static const unsigned char mac_uvs_table_%s_bytes[] =\n {\n"
(replace-regexp-in-string "[^_[:alnum:]]" "_"
(downcase collection-id))))
(with-temp-buffer
(set-buffer-multibyte nil)
(uvs-insert-alist-as-bytes uvs-alist)
(uvs-dump))
(princ " };\n")))
;;; uvs.el ends here