diff --git a/.arch-inventory b/.arch-inventory index 5f451d088a3..8d55bbd3512 100644 --- a/.arch-inventory +++ b/.arch-inventory @@ -1,4 +1,5 @@ -precious ^(config\.status)$ +# Generated files +precious ^(config\.status|config\.cache)$ # Build-in-place makes these directories, so just ignore them precious ^(info)$ diff --git a/ChangeLog b/ChangeLog index 1a771c60604..80df0d84058 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,8 @@ +2004-06-12 Juri Linkov + + * info/dir: Move menu help lines from `* Menu:' to file header. + Describe the purpose of a red *. + 2004-05-04 Dave Love * configure.in: Don't use `extrasub'. diff --git a/etc/.arch-inventory b/etc/.arch-inventory index f242618b2bd..6ab642ca65c 100644 --- a/etc/.arch-inventory +++ b/etc/.arch-inventory @@ -1,6 +1,9 @@ # Unlike most emacs dirs, etc has a simple non-autoconf-generated makefile source ^(Makefile)$ +# Generated files (DOC-X is generated on windows) +backup ^(DOC(|-[0-9.]*|-X))$ + # Install-in-place on NT makes this directory, so just ignore it backup ^(icons)$ diff --git a/etc/NEWS b/etc/NEWS index 18761ea1461..ca4b17ad824 100644 --- a/etc/NEWS +++ b/etc/NEWS @@ -90,8 +90,18 @@ types any more. Add -DUSE_LISP_UNION_TYPE if you want union types. * Changes in Emacs 21.4 +** Passing resources on the command line now works on MS Windows. +You can use --xrm to pass resource settings to Emacs, overriding any +existing values. For example: + + emacs --xrm "Emacs.Background:red" --xrm "Emacs.Geometry:100x20" + +will start up Emacs on an initial frame of 100x20 with red background, +irrespective of geometry or background setting on the Windows registry. + ** New features in evaluation commands ++++ *** The function `eval-defun' (C-M-x) called on defface reinitializes the face to the value specified in the defface expression. @@ -436,14 +446,24 @@ restores the previous value of `buffer-invisibility-spec'. ** New command `kill-whole-line' kills an entire line at once. By default, it is bound to C-S-. +** New commands to operate on pairs of open and close characters: +`insert-pair', `delete-pair', `raise-sexp'. + +** A prefix argument of C-M-q in Emacs Lisp mode pretty-printifies the +list starting after point. + ** Dired mode: *** New faces dired-header, dired-mark, dired-marked, dired-flagged, dired-ignored, dired-directory, dired-symlink, dired-warning introduced for Dired mode instead of font-lock faces. -*** New Dired command `dired-compare-directories' to mark files with -different file attributes in two dired buffers. +*** New Dired command `dired-compare-directories' marks files +with different file attributes in two dired buffers. + ++++ +*** New Dired command `dired-do-touch' (bound to T) changes timestamps +of marked files with the value entered in the minibuffer. +++ *** In Dired's ! command (dired-do-shell-command), `*' and `?' now @@ -459,7 +479,7 @@ types of files. The variable `dired-view-command-alist' controls what external viewers to use and when. *** In Dired, the w command now copies the current line's file name -into the kill ring. +into the kill ring. With a zero prefix arg, copies absolute file names. +++ ** Dired-x: @@ -470,15 +490,39 @@ marks omitted files. The variable dired-omit-files-p is obsoleted, use the mode toggling function instead. ** Info mode: + +*** A numeric prefix argument of `info' selects an Info buffer +with the number appended to the *info* buffer name. + +*** New command `Info-history' (bound to L) displays a menu of visited nodes. + +*** New command `Info-toc' (bound to T) creates a node with table of contents +from the tree structure of menus of the current Info file. + +*** New command `info-apropos' searches the indices of the known +Info files on your system for a string, and builds a menu of the +possible matches. + +*** New command `Info-copy-current-node-name' (bound to w) copies +the current Info node name into the kill ring. With a zero prefix +arg, puts the node name inside the `info' function call. + +*** New command `Info-search-case-sensitively' (bound to S). + +*** New command `Info-search-next' (unbound) repeats the last search +without prompting for a new search string. + +*** New face `info-xref-visited' distinguishes visited nodes from unvisited +and a new option `Info-fontify-visited-nodes' to control this. + +*** http and ftp links in Info are now operational: they look like cross +references and following them calls `browse-url'. + +++ *** Info now hides node names in menus and cross references by default. If you prefer the old behavior, you can set the new user option `Info-hide-note-references' to nil. -*** The new command `info-apropos' searches the indices of the known -Info files on your system for a string, and builds a menu of the -possible matches. - *** Images in Info pages are supported. Info pages show embedded images, in Emacs frames with image support. Info documentation that includes images, processed with makeinfo @@ -708,11 +752,12 @@ and windows-1251 are preloaded since the former is so common and the latter is used by GNU locales. ** The utf-8/16 coding systems have been enhanced. -By default, untranslatable utf-8 sequences (mostly representing CJK -characters) are simply composed into single quasi-characters. User -option `utf-translate-cjk' arranges to translate many utf-8 CJK -character sequences into real Emacs characters in a similar way to the -Mule-UCS system. This uses significant space, so is not the default. +By default, untranslatable utf-8 sequences are simply composed into +single quasi-characters. User option `utf-translate-cjk-mode' (it is +turned on by default) arranges to translate many utf-8 CJK character +sequences into real Emacs characters in a similar way to the Mule-UCS +system. As this loads a fairly big data on demand, people who are not +interested in CJK characters may want to customize it to nil. You can augment/amend the CJK translation via hash tables `ucs-mule-cjk-to-unicode' and `ucs-unicode-to-mule-cjk'. The utf-8 coding system now also encodes characters from most of Emacs's @@ -2100,6 +2145,15 @@ configuration files. * Lisp Changes in Emacs 21.4 ++++ +** Cleaner way to enter key sequences. + +You can enter a constant key sequence in a more natural format, the +same one used for saving keyboard macros, using the macro `kbd'. For +example, + +(kbd "C-x C-f") => "\^x\^f" + ** The sentinel is now called when a network process is deleted with delete-process. The status message passed to the sentinel for a deleted network process is "deleted". The message passed to the @@ -2110,10 +2164,12 @@ changed to "connection broken by remote peer". undo-outer-limit, garbage collection empties it. This is to prevent it from using up the available memory and choking Emacs. +--- ** New function quail-find-key returns a list of keys to type in the current input method to input a character. -** New functions posn-at-point and posn-at-x-y returns ++++ +** New functions posn-at-point and posn-at-x-y return click-event-style position information for a given visible buffer position or for a given window pixel coordinate. diff --git a/etc/TODO b/etc/TODO index ca8835645d8..afc84185c55 100644 --- a/etc/TODO +++ b/etc/TODO @@ -185,10 +185,6 @@ to the FSF. ** Make the Custom themes support do useful things. -** Investigate using GNU Lightning or similar system for incremental - compilation of selected bytecode functions to subrs. Converting CCL - programs to native code is probably the first thing to try, though. - ** Add support for SVG (Scalable Vector Graphics) rendering to Emacs. diff --git a/leim/.arch-inventory b/leim/.arch-inventory new file mode 100644 index 00000000000..3e125ae6e9c --- /dev/null +++ b/leim/.arch-inventory @@ -0,0 +1,4 @@ +# Auto-generated files, which ignore. +precious ^(stamp-subdir|changed\..*|leim-list\.el)$ + +# arch-tag: a4cda8ae-2a52-4d85-bd29-14e25c7ed2a2 diff --git a/leim/quail/.arch-inventory b/leim/quail/.arch-inventory new file mode 100644 index 00000000000..8e90362b19d --- /dev/null +++ b/leim/quail/.arch-inventory @@ -0,0 +1,4 @@ +# Auto-generated lisp files, which ignore. +precious ^([A-Z0-9].*|tsang-.*|quick-.*)\.el$ + +# arch-tag: 3d0d3e6b-f7c3-4dce-9135-a72ba7fe095d diff --git a/lib-src/.arch-inventory b/lib-src/.arch-inventory new file mode 100644 index 00000000000..0e0621a8dc3 --- /dev/null +++ b/lib-src/.arch-inventory @@ -0,0 +1,10 @@ +# Ignore binaries +backup ^(test-distrib|make-docfile|profile|digest-doc|movemail|cvtmail|fakemail|yow|emacsserver|hexl|update-game-score|etags|ctags|emacsclient|b2m|ebrowse)$ + +# Building actually makes a copy/link of the source file +precious ^(ctags\.c)$ + +# Windows generates this +backup ^(DOC)$ + +# arch-tag: da33b3d6-170d-4fe5-9eb8-ed2753bc9b4f diff --git a/lisp/.arch-inventory b/lisp/.arch-inventory index 5341c2d8fec..9bd88350a95 100644 --- a/lisp/.arch-inventory +++ b/lisp/.arch-inventory @@ -1,4 +1,7 @@ # Auto-generated lisp files, which ignore precious ^(loaddefs|finder-inf|cus-load)\.el$ +# Something generated during a windows build?!? +precious ^(Makefile\.unix)$ + # arch-tag: fc62dc9f-3a91-455b-b8e7-d49df66beee0 diff --git a/lisp/ChangeLog b/lisp/ChangeLog index 6dcc5c8fbbe..055bb70de3b 100644 --- a/lisp/ChangeLog +++ b/lisp/ChangeLog @@ -1,3 +1,149 @@ +2004-06-14 Kenichi Handa + + * international/code-pages.el (windows-1256, cp1125): Fix tables + for several characters. + + * international/utf-8.el (ccl-encode-mule-utf-8): Fix previous + change. + +2004-06-13 Richard M. Stallman + + * textmodes/paragraphs.el (sentence-end): Add 0x5397d as close brace. + + * emulation/pc-select.el: Doc fixes: say "PC Selection mode", + not "`pc-selection-mode'". + + * emacs-lisp/bytecomp.el: Put `...' around symbols in warning messages. + + * simple.el (previous-matching-history-element): Specify a default. + + * hexl.el (hexl-mode): Catch errors in hexl-goto-address. + + * cus-face.el (custom-declare-face): Simplify code. + + * abbrev.el (abbrev-mode, edit-abbrevs-map): Doc fixes. + +2004-06-13 Luc Teirlinck + + * files.el (before-save-hook): Add `time-stamp' to the options. + + * time-stamp.el (time-stamp): Recommend adding it to + `before-save-hook', rather than `write-file-functions' + Make a similar change in `Commentary' section. + +2004-06-13 Kai Grossjohann + + * diff-mode.el (diff-current-defun): If at start of hunk, use + position of first change. + +2004-06-13 Lars Hansen + + * dired-x.el (dired-mark-omitted): Bind to "*O". + +2004-06-12 Karl Fogel + + * bookmark.el (bookmark-bmenu-relocate): New function, as + suggested by David J. Biesack . + (bookmark-bmenu-mode-map): Bind `bookmark-bmenu-relocate' to "R". + (bookmark-bmenu-mode): Describe binding in doc string. + (bookmark-set-filename): Save the bookmark list if it's time. + +2004-06-13 Kenichi Handa + + * international/utf-8.el (ccl-decode-mule-utf-8): Fix previous + change. + (ccl-untranslated-to-ucs): Fix typo. + +2004-06-12 Karl Chen (tiny change) + + * progmodes/python.el (python-open-block-statement-p): Fix + indentation after a block opening that contains a comment. + +2004-06-12 J,Ai(Br,At(Bme Marant (tiny change) + + * bindings.el (completion-ignored-extensions): Add file extensions + of Python byte-compiled files. + +2004-06-12 Juri Linkov + + * info.el (Info-goto-node): Add autoload. + (Info-toc): Add substring-no-properties on Info file name. + (Info-mode, info, Info-toc, Info-mode-menu): Doc fix. + (Info-mode-map): Bind L to Info-history, T to Info-toc. + +2004-06-12 Kenichi Handa + + * international/mule-cmds.el (set-language-environment): Load + subst tables if necessary. + + * international/mule.el (decode-char): Load subst tables if + necessary. + (encode-char): Likewise. + + * international/utf-16.el (utf-16-decode-ucs): Handle a surrogate + pair correctly. Call ccl-mule-utf-untrans for untranslable chars. + (utf-16le-decode-loop): Set r5 to -1 before loop. + (utf-16be-decode-loop): Likewise. + (ccl-decode-mule-utf-16le): Add EOF processing block. + (ccl-decode-mule-utf-16be): Likewise. + (ccl-decode-mule-utf-16le-with-signature): Likewise. + (ccl-decode-mule-utf-16be-with-signature): Likewise. + (ccl-decode-mule-utf-16): Likewise. Set r5 to -1 initially. + (ccl-mule-utf-16-encode-untrans): New CCL. + (utf-16-decode-to-ucs): Handle pre-read character. + (utf-16le-encode-loop): Handle surrogate pair. + (utf-16be-encode-loop): Likewise. + (ccl-encode-mule-utf-16le-with-signature): Adjusted for the change + of utf-16le-encode-loop. + (ccl-encode-mule-utf-16be-with-signature): Adjusted for the change + of utf-16be-encode-loop. + (mule-utf-16-post-read-conversion): Call + utf-8-post-read-conversion at first. + (mule-utf-16[{le|be}], mule-utf-16{le|be}-with-signature): Include + CJK charsets in safe-charsets if utf-translate-cjk-mode is on. + Add post-read-conversion and pre-write-conversion. + + * international/utf-8.el (utf-translate-cjk-charsets): New + variable. + (utf-translate-cjk-unicode-range): New variable. + (utf-translate-cjk-load-tables): New function. + (utf-lookup-subst-table-for-decode): New function. + (utf-lookup-subst-table-for-encode): New function. + (utf-translate-cjk-mode): Init-value changed to t. Don't load + tables here. Update safe-charsets of utf-* coding systems. + (ccl-mule-utf-untrans): New CCL. + (ccl-decode-mule-utf-8): Call ccl-mule-utf-untrans. Use `repeat' + at end of each branch. + (ccl-mule-utf-8-encode-untrans): New CCL. + (ccl-encode-mule-utf-8): Call ccl-mule-utf-8-encode-untrans. + (ccl-untranslated-to-ucs): Handle 2-byte encoding. Set r1 to the + length of encoding. Don't return r0. + (utf-8-compose): New arg hash-table. Handle 2-byte encoding. + (utf-8-post-read-conversion): Narrow to region properly. If + utf-translate-cjk-mode is on, load tables if necessary. Call + utf-8-compose with hash-table arg if necessary. Call + XXX-compose-region instead of XXX-post-read-convesion. + (utf-8-pre-write-conversion): New function. + (mule-utf-8): Include CJK charsets in safe-charsets if + utf-translate-cjk-mode is on. Add pre-write-conversion. + + * international/characters.el: Temporarily set + utf-translate-cjk-mode to nil. + + * language/devan-util.el (devanagari-compose-region): Add + autoload cookie. + + * international/ccl.el (ccl-dump-call): Fix printing the + subroutine name. + +2004-06-11 Luc Teirlinck + + * dired.el (dired-revert): If buffer is marked unmodified before + reverting, keep it marked unmodified. + Adapt to new conventions for commenting out code. + (dired-make-relative): Adapt to new conventions for commenting out + code. + 2004-06-10 Miles Bader * eshell/esh-module.el (eshell-load-defgroups): Bind @@ -2169,17 +2315,21 @@ (desktop-buffer-info-misc-data): Rename to Info-desktop-buffer-misc-data and move to info.el. (desktop-read): Add message about number of buffers restored/failed. + * dired.el (dired-restore-desktop-buffer) Move from desktop.el. Add parameters. Pause to display error only when desktop-missing-file-warning is non-nil. (dired-desktop-buffer-misc-data): Move from desktop.el. Add parameter. (dired-mode): Bind desktop-buffer-misc-data-function. + * info.el (Info-restore-desktop-buffer): Move from desktop.el. Add Parameters. (Info-desktop-buffer-misc-data): Move from desktop.el. Add parameter. (Info-mode): Bind desktop-buffer-misc-data-function. + * mail/rmail.el (rmail-restore-desktop-buffer): Move from desktop.el. Add Parameters. + * mh-e/mh-e.el (mh-restore-desktop-buffer): Move from desktop.el. Add Parameters. diff --git a/lisp/abbrev.el b/lisp/abbrev.el index 1e3eea0e359..3be0014fd0e 100644 --- a/lisp/abbrev.el +++ b/lisp/abbrev.el @@ -37,9 +37,9 @@ define global abbrevs instead." :group 'convenience) (defun abbrev-mode (&optional arg) - "Toggle abbrev mode. + "Toggle Abbrev mode in the current buffer. With argument ARG, turn abbrev mode on iff ARG is positive. -In abbrev mode, inserting an abbreviation causes it to expand +In Abbrev mode, inserting an abbreviation causes it to expand and be replaced by its expansion." (interactive "P") (setq abbrev-mode @@ -48,18 +48,19 @@ and be replaced by its expansion." (force-mode-line-update)) (defcustom abbrev-mode nil - "Toggle abbrev mode. + "Enable or disable Abbrev mode. Non-nil means automatically expand abbrevs as they are inserted. +Setting this variable with `setq' changes it for the current buffer. Changing it with \\[customize] sets the default value. -Use the command `abbrev-mode' to enable or disable Abbrev mode in the current -buffer." +Interactively, use the command `abbrev-mode' +to enable or disable Abbrev mode in the current buffer." :type 'boolean :group 'abbrev-mode) (defvar edit-abbrevs-map nil - "Keymap used in edit-abbrevs.") + "Keymap used in `edit-abbrevs'.") (if edit-abbrevs-map nil (setq edit-abbrevs-map (make-sparse-keymap)) diff --git a/lisp/bindings.el b/lisp/bindings.el index 2518c9bae08..68c4ec433f7 100644 --- a/lisp/bindings.el +++ b/lisp/bindings.el @@ -524,7 +524,9 @@ is okay. See `mode-line-format'.") ;; files you do want to see, not just TeX stuff. -- fx ".toc" ".aux" ".cp" ".fn" ".ky" ".pg" ".tp" ".vr" - ".cps" ".fns" ".kys" ".pgs" ".tps" ".vrs"))) + ".cps" ".fns" ".kys" ".pgs" ".tps" ".vrs" + ;; Python byte-compiled + ".pyc" ".pyo"))) ;; Suffixes used for executables. (setq exec-suffixes diff --git a/lisp/bookmark.el b/lisp/bookmark.el index 3ed66f229a8..b25c261c1e7 100644 --- a/lisp/bookmark.el +++ b/lisp/bookmark.el @@ -376,7 +376,11 @@ That is, all information but the name." (if cell (setcdr cell filename) (nconc (bookmark-get-bookmark-record bookmark) - (list (cons 'filename filename)))))) + (list (cons 'filename filename)))) + (setq bookmark-alist-modification-count + (1+ bookmark-alist-modification-count)) + (if (bookmark-time-to-save-p) + (bookmark-save)))) (defun bookmark-get-position (bookmark) @@ -1491,6 +1495,7 @@ method buffers use to resolve name collisions." (define-key bookmark-bmenu-mode-map "m" 'bookmark-bmenu-mark) (define-key bookmark-bmenu-mode-map "l" 'bookmark-bmenu-load) (define-key bookmark-bmenu-mode-map "r" 'bookmark-bmenu-rename) + (define-key bookmark-bmenu-mode-map "R" 'bookmark-bmenu-relocate) (define-key bookmark-bmenu-mode-map "t" 'bookmark-bmenu-toggle-filenames) (define-key bookmark-bmenu-mode-map "a" 'bookmark-bmenu-show-annotation) (define-key bookmark-bmenu-mode-map "A" 'bookmark-bmenu-show-all-annotations) @@ -1589,6 +1594,7 @@ Bookmark names preceded by a \"*\" have annotations. so the bookmark menu bookmark remains visible in its window. \\[bookmark-bmenu-switch-other-window] -- switch the other window to this bookmark. \\[bookmark-bmenu-rename] -- rename this bookmark \(prompts for new name\). +\\[bookmark-bmenu-relocate] -- relocate this bookmark's file \(prompts for new file\). \\[bookmark-bmenu-delete] -- mark this bookmark to be deleted, and move down. \\[bookmark-bmenu-delete-backwards] -- mark this bookmark to be deleted, and move up. \\[bookmark-bmenu-execute-deletions] -- delete bookmarks marked with `\\[bookmark-bmenu-delete]'. @@ -2041,6 +2047,15 @@ To carry out the deletions that you've marked, use \\\\ (let ((bmrk (bookmark-bmenu-bookmark))) (message (bookmark-location bmrk))))) +(defun bookmark-bmenu-relocate () + "Change the file path of the bookmark on the current line, + prompting with completion for the new path." + (interactive) + (if (bookmark-bmenu-check-position) + (let ((bmrk (bookmark-bmenu-bookmark)) + (thispoint (point))) + (bookmark-relocate bmrk) + (goto-char thispoint)))) ;;; Menu bar stuff. Prefix is "bookmark-menu". diff --git a/lisp/cus-face.el b/lisp/cus-face.el index b5716da161a..0bd3387d3c7 100644 --- a/lisp/cus-face.el +++ b/lisp/cus-face.el @@ -40,15 +40,11 @@ (unless (facep face) ;; If the user has already created the face, respect that. (let ((value (or (get face 'saved-face) spec)) - (frames (frame-list)) - (have-window-system (memq initial-window-system '(x w32))) - frame) + (have-window-system (memq initial-window-system '(x w32)))) ;; Create global face. (make-empty-face face) ;; Create frame-local faces - (while frames - (setq frame (car frames) - frames (cdr frames)) + (dolist (frame (frame-list)) (face-spec-set face value frame) (when (memq (window-system frame) '(x w32)) (setq have-window-system t))) diff --git a/lisp/diff-mode.el b/lisp/diff-mode.el index 9b00eae050d..26ff5441baf 100644 --- a/lisp/diff-mode.el +++ b/lisp/diff-mode.el @@ -1248,9 +1248,12 @@ If the prefix arg is bigger than 8 (for example with \\[universal-argument] \\[u (defun diff-current-defun () "Find the name of function at point. For use in `add-log-current-defun-function'." - (destructuring-bind (buf line-offset pos src dst &optional switched) - (diff-find-source-location) - (save-excursion + (save-excursion + (when (looking-at diff-hunk-header-re) + (forward-line 1) + (while (and (looking-at " ") (not (zerop (forward-line 1)))))) + (destructuring-bind (buf line-offset pos src dst &optional switched) + (diff-find-source-location) (beginning-of-line) (or (when (memq (char-after) '(?< ?-)) ;; Cursor is pointing at removed text. This could be a removed diff --git a/lisp/dired-x.el b/lisp/dired-x.el index caef06a64fb..6b44b73b170 100644 --- a/lisp/dired-x.el +++ b/lisp/dired-x.el @@ -239,7 +239,7 @@ to nil: a pipe using `zcat' or `gunzip -c' will be used." ;;; KEY BINDINGS. (define-key dired-mode-map "\M-o" 'dired-omit-mode) -(define-key dired-mode-map "\M-O" 'dired-mark-omitted) +(define-key dired-mode-map "*O" 'dired-mark-omitted) (define-key dired-mode-map "\M-(" 'dired-mark-sexp) (define-key dired-mode-map "*(" 'dired-mark-sexp) (define-key dired-mode-map "*." 'dired-mark-extension) diff --git a/lisp/dired.el b/lisp/dired.el index 3d3fd34b5ac..e5e23dfe2d6 100644 --- a/lisp/dired.el +++ b/lisp/dired.el @@ -880,7 +880,8 @@ Must also be called after dired-actual-switches have changed. Should not fail even on completely garbaged buffers. Preserves old cursor, marks/flags, hidden-p." (widen) ; just in case user narrowed - (let ((opoint (point)) + (let ((modflag (buffer-modified-p)) + (opoint (point)) (ofile (dired-get-filename nil t)) (mark-alist nil) ; save marked files (hidden-subdirs (dired-remember-hidden)) @@ -907,9 +908,10 @@ Preserves old cursor, marks/flags, hidden-p." (save-excursion ; hide subdirs that were hidden (dolist (dir hidden-subdirs) (if (dired-goto-subdir dir) - (dired-hide-subdir 1))))) + (dired-hide-subdir 1)))) + (unless modflag (restore-buffer-modified-p nil))) ;; outside of the let scope -;;; Might as well not override the user if the user changed this. +;;; Might as well not override the user if the user changed this. ;;; (setq buffer-read-only t) ) @@ -1707,7 +1709,7 @@ DIR must be a directory name, not a file name." (setq dir (expand-file-name dir))) (if (string-match (concat "^" (regexp-quote dir)) file) (substring file (match-end 0)) -;;; (or no-error +;;; (or no-error ;;; (error "%s: not in directory tree growing at %s" file dir)) file)) diff --git a/lisp/emacs-lisp/bytecomp.el b/lisp/emacs-lisp/bytecomp.el index 8e20925c70d..a4ae751cab7 100644 --- a/lisp/emacs-lisp/bytecomp.el +++ b/lisp/emacs-lisp/bytecomp.el @@ -1008,11 +1008,11 @@ Each function's symbol gets added to `byte-compile-noruntime-functions'." (when (nth 2 new))) (byte-compile-set-symbol-position (car form)) (if (memq 'obsolete byte-compile-warnings) - (byte-compile-warn "%s is an obsolete function%s; %s" (car form) + (byte-compile-warn "`%s' is an obsolete function%s; %s" (car form) (if when (concat " since " when) "") (if (stringp (car new)) (car new) - (format "use %s instead." (car new))))) + (format "use `%s' instead." (car new))))) (funcall (or handler 'byte-compile-normal-call) form))) ;; Compiler options @@ -2076,7 +2076,7 @@ list that represents a doc string reference. (defun byte-compile-file-form-defsubst (form) (when (assq (nth 1 form) byte-compile-unresolved-functions) (setq byte-compile-current-form (nth 1 form)) - (byte-compile-warn "defsubst %s was used before it was defined" + (byte-compile-warn "defsubst `%s' was used before it was defined" (nth 1 form))) (byte-compile-file-form (macroexpand form byte-compile-macro-environment)) @@ -2206,7 +2206,7 @@ list that represents a doc string reference. (not (assq (nth 1 form) byte-compile-initial-macro-environment))) (byte-compile-warn - "%s defined multiple times, as both function and macro" + "`%s' defined multiple times, as both function and macro" (nth 1 form))) (setcdr that-one nil)) (this-one @@ -2215,14 +2215,14 @@ list that represents a doc string reference. ;; byte-compiler macros in byte-run.el... (not (assq (nth 1 form) byte-compile-initial-macro-environment))) - (byte-compile-warn "%s %s defined multiple times in this file" + (byte-compile-warn "%s `%s' defined multiple times in this file" (if macrop "macro" "function") (nth 1 form)))) ((and (fboundp name) (eq (car-safe (symbol-function name)) (if macrop 'lambda 'macro))) (when (memq 'redefine byte-compile-warnings) - (byte-compile-warn "%s %s being redefined as a %s" + (byte-compile-warn "%s `%s' being redefined as a %s" (if macrop "function" "macro") (nth 1 form) (if macrop "macro" "function"))) @@ -2695,7 +2695,7 @@ If FORM is a lambda or a macro, byte-compile it as a function." (handler (get fn 'byte-compile))) (byte-compile-set-symbol-position fn) (when (byte-compile-const-symbol-p fn) - (byte-compile-warn "%s called as a function" fn)) + (byte-compile-warn "`%s' called as a function" fn)) (if (and handler (or (not (byte-compile-version-cond byte-compile-compatibility)) @@ -2730,9 +2730,9 @@ If FORM is a lambda or a macro, byte-compile it as a function." (if (or (not (symbolp var)) (byte-compile-const-symbol-p var (not (eq base-op 'byte-varref)))) (byte-compile-warn - (cond ((eq base-op 'byte-varbind) "attempt to let-bind %s %s") - ((eq base-op 'byte-varset) "variable assignment to %s %s") - (t "variable reference to %s %s")) + (cond ((eq base-op 'byte-varbind) "attempt to let-bind %s `%s'") + ((eq base-op 'byte-varset) "variable assignment to %s `%s'") + (t "variable reference to %s `%s'")) (if (symbolp var) "constant" "nonvariable") (prin1-to-string var)) (if (and (get var 'byte-obsolete-variable) @@ -2740,11 +2740,11 @@ If FORM is a lambda or a macro, byte-compile it as a function." (not (eq var byte-compile-not-obsolete-var))) (let* ((ob (get var 'byte-obsolete-variable)) (when (cdr ob))) - (byte-compile-warn "%s is an obsolete variable%s; %s" var + (byte-compile-warn "`%s' is an obsolete variable%s; %s" var (if when (concat " since " when) "") (if (stringp (car ob)) (car ob) - (format "use %s instead." (car ob)))))) + (format "use `%s' instead." (car ob)))))) (if (memq 'free-vars byte-compile-warnings) (if (eq base-op 'byte-varbind) (push var byte-compile-bound-variables) @@ -2753,11 +2753,11 @@ If FORM is a lambda or a macro, byte-compile it as a function." (if (eq base-op 'byte-varset) (or (memq var byte-compile-free-assignments) (progn - (byte-compile-warn "assignment to free variable %s" var) + (byte-compile-warn "assignment to free variable `%s'" var) (push var byte-compile-free-assignments))) (or (memq var byte-compile-free-references) (progn - (byte-compile-warn "reference to free variable %s" var) + (byte-compile-warn "reference to free variable `%s'" var) (push var byte-compile-free-references)))))))) (let ((tmp (assq var byte-compile-variables))) (unless tmp @@ -2958,7 +2958,7 @@ If FORM is a lambda or a macro, byte-compile it as a function." (defun byte-compile-subr-wrong-args (form n) (byte-compile-set-symbol-position (car form)) - (byte-compile-warn "%s called with %d arg%s, but requires %s" + (byte-compile-warn "`%s' called with %d arg%s, but requires %s" (car form) (length (cdr form)) (if (= 1 (length (cdr form))) "" "s") n) ;; get run-time wrong-number-of-args error. @@ -3124,7 +3124,7 @@ If FORM is a lambda or a macro, byte-compile it as a function." (if (and (consp (car body)) (not (eq 'byte-code (car (car body))))) (byte-compile-warn - "A quoted lambda form is the second argument of fset. This is probably + "A quoted lambda form is the second argument of `fset'. This is probably not what you want, as that lambda cannot be compiled. Consider using the syntax (function (lambda (...) ...)) instead."))))) (byte-compile-two-args form)) @@ -3507,7 +3507,7 @@ being undefined will be suppressed." (byte-compile-set-symbol-position 'condition-case) (unless (symbolp var) (byte-compile-warn - "%s is not a variable-name or nil (in condition-case)" var)) + "`%s' is not a variable-name or nil (in condition-case)" var)) (byte-compile-push-constant var) (byte-compile-push-constant (byte-compile-top-level (nth 2 form) for-effect)) @@ -3525,13 +3525,13 @@ being undefined will be suppressed." (setq syms (cdr syms))) ok)))) (byte-compile-warn - "%s is not a condition name or list of such (in condition-case)" + "`%s' is not a condition name or list of such (in condition-case)" (prin1-to-string condition))) ;; ((not (or (eq condition 't) ;; (and (stringp (get condition 'error-message)) ;; (consp (get condition 'error-conditions))))) ;; (byte-compile-warn -;; "%s is not a known condition name (in condition-case)" +;; "`%s' is not a known condition name (in condition-case)" ;; condition)) ) (setq compiled-clauses @@ -3627,7 +3627,7 @@ being undefined will be suppressed." (and (eq fun 'defconst) (null (cddr form)))) (let ((ncall (length (cdr form)))) (byte-compile-warn - "%s called with %d argument%s, but %s %s" + "`%s' called with %d argument%s, but %s %s" fun ncall (if (= 1 ncall) "" "s") (if (< ncall 2) "requires" "accepts only") @@ -3644,7 +3644,7 @@ being undefined will be suppressed." `(push ',var current-load-list)) (when (> (length form) 3) (when (and string (not (stringp string))) - (byte-compile-warn "third arg to %s %s is not a string: %s" + (byte-compile-warn "third arg to `%s %s' is not a string: %s" fun var string)) `(put ',var 'variable-documentation ,string)) (if (cddr form) ; `value' provided diff --git a/lisp/emulation/pc-select.el b/lisp/emulation/pc-select.el index c7ea973467f..188e335687c 100644 --- a/lisp/emulation/pc-select.el +++ b/lisp/emulation/pc-select.el @@ -61,7 +61,7 @@ ;; Eli Barzilay (eli@cs.bgu.ac.il) suggested the sexps functions and ;; keybindings. ;; -;; Ok, some details about the idea of pc-selection-mode: +;; Ok, some details about the idea of PC Selection mode: ;; ;; o The standard keys for moving around (right, left, up, down, home, end, ;; prior, next, called "move-keys" from now on) will always de-activate @@ -114,23 +114,23 @@ This gives mostly Emacs-like behaviour with only the selection keys enabled." :group 'pc-select) (defvar pc-select-saved-settings-alist nil - "The values of the variables before `pc-selection-mode' was toggled on. -When `pc-selection-mode' is toggled on, it sets quite a few variables + "The values of the variables before PC Selection mode was toggled on. +When PC Selection mode is toggled on, it sets quite a few variables for its own purposes. This alist holds the original values of the -variables `pc-selection-mode' had set, so that these variables can be -restored to their original values when `pc-selection-mode' is toggled off.") +variables PC Selection mode had set, so that these variables can be +restored to their original values when PC Selection mode is toggled off.") (defvar pc-select-map nil - "The keymap used as the global map when `pc-selection-mode' is on." ) + "The keymap used as the global map when PC Selection mode is on." ) (defvar pc-select-saved-global-map nil - "The global map that was in effect when `pc-selection-mode' was toggled on.") + "The global map that was in effect when PC Selection mode was toggled on.") (defvar pc-select-key-bindings-alist nil - "This alist holds all the key bindings `pc-selection-mode' sets.") + "This alist holds all the key bindings PC Selection mode sets.") (defvar pc-select-default-key-bindings nil - "These key bindings always get set by `pc-selection-mode'.") + "These key bindings always get set by PC Selection mode.") (unless pc-select-default-key-bindings (let ((lst @@ -250,7 +250,7 @@ These key bindings get installed when running in a tty, but only if (defvar pc-select-old-M-delete-binding nil "Holds the old mapping of [M-delete] in the `function-key-map'. This variable holds the value associated with [M-delete] in the -`function-key-map' before `pc-selection-mode' had changed that +`function-key-map' before PC Selection mode had changed that association.") ;;;; @@ -842,7 +842,7 @@ M-LEFT and M-RIGHT move back or forward one word or sexp, disabling the mark. S-M-LEFT and S-M-RIGHT move back or forward one word or sexp, leaving the mark behind. To control whether these keys move word-wise or sexp-wise set the variable `pc-select-meta-moves-sexps' after loading pc-select.el but before -turning `pc-selection-mode' on. +turning PC Selection mode on. C-DOWN and C-UP move back or forward a paragraph, disabling the mark. S-C-DOWN and S-C-UP move back or forward a paragraph, leaving the mark behind. @@ -864,7 +864,7 @@ C-INSERT copies the region into the kill ring (`copy-region-as-kill'). In addition, certain other PC bindings are imitated (to avoid this, set the variable `pc-select-selection-keys-only' to t after loading pc-select.el -but before calling `pc-selection-mode'): +but before calling PC Selection mode): F6 other-window DELETE delete-char @@ -974,7 +974,8 @@ but before calling `pc-selection-mode'): Change mark behaviour to emulate Motif, MAC or MS-Windows cut and paste style, and cursor movement commands. This mode enables Delete Selection mode and Transient Mark mode. -You must modify via \\[customize] for this variable to have an effect." +Setting this variable directly does not take effect; +you must modify it using \\[customize] or \\[pc-selection-mode]." :set (lambda (symbol value) (pc-selection-mode (if value 1 -1))) :initialize 'custom-initialize-default diff --git a/lisp/eshell/.arch-inventory b/lisp/eshell/.arch-inventory new file mode 100644 index 00000000000..b5d82cdd6fc --- /dev/null +++ b/lisp/eshell/.arch-inventory @@ -0,0 +1,4 @@ +# Generated files +precious ^(esh-groups)\.el$ + +# arch-tag: 8dc7bfaa-6ca6-4be0-915a-1e539c3dabfb diff --git a/lisp/files.el b/lisp/files.el index 4ee6da4e544..dc84c79df84 100644 --- a/lisp/files.el +++ b/lisp/files.el @@ -3024,7 +3024,7 @@ the last real save, but optional arg FORCE non-nil means delete anyway." (defcustom before-save-hook nil "Normal hook that is run before a buffer is saved to its file." - :options '(copyright-update) + :options '(copyright-update time-stamp) :type 'hook :group 'files) diff --git a/lisp/hexl.el b/lisp/hexl.el index cc36c37602e..883700933a8 100644 --- a/lisp/hexl.el +++ b/lisp/hexl.el @@ -217,7 +217,9 @@ You can use \\[hexl-find-file] to visit a file in Hexl mode. (set-buffer-modified-p modified)) (make-local-variable 'hexl-max-address) (setq hexl-max-address max-address) - (hexl-goto-address original-point)) + (condition-case nil + (hexl-goto-address original-point) + (error nil))) ;; We do not turn off the old major mode; instead we just ;; override most of it. That way, we can restore it perfectly. @@ -405,7 +407,7 @@ This function is indented to be used as eldoc callback." Signal error if ADDRESS out of range." (interactive "nAddress: ") (if (or (< address 0) (> address hexl-max-address)) - (error "Out of hexl region")) + (error "Out of hexl region")) (goto-char (hexl-address-to-marker address))) (defun hexl-goto-hex-address (hex-address) diff --git a/lisp/info.el b/lisp/info.el index 14183383743..43e1dafcc6f 100644 --- a/lisp/info.el +++ b/lisp/info.el @@ -469,7 +469,8 @@ with the top-level Info directory. In interactive use, a non-numeric prefix argument directs this command to read a file name from the minibuffer. -A numeric prefix argument appends the number to the buffer name. +A numeric prefix argument selects an Info buffer with the prefix number +appended to the Info buffer name. The search path for Info files is in the variable `Info-directory-list'. The top-level Info directory is made by combining all the files named `dir' @@ -1315,6 +1316,7 @@ any double quotes or backslashes must be escaped (\\\",\\\\)." ;; Go to an info node specified with a filename-and-nodename string ;; of the sort that is found in pointers in nodes. +;;;###autoload (defun Info-goto-node (nodename &optional fork) "Go to info node named NODENAME. Give just NODENAME or (FILENAME)NODENAME. If NODENAME is of the form (FILENAME)NODENAME, the node is in the Info file @@ -1672,7 +1674,8 @@ If SAME-FILE is non-nil, do not move to a different Info file." (goto-char (or p (point-min))))) (defun Info-toc () - "Go to a node with table of contents of the current Info file." + "Go to a node with table of contents of the current Info file. +Table of contents is created from the tree structure of menus." (interactive) (let ((curr-file Info-current-file) (curr-node Info-current-node) @@ -1687,7 +1690,7 @@ If SAME-FILE is non-nil, do not move to a different Info file." (insert "*Note Top::\n") (Info-insert-toc (nth 2 (assoc "Top" node-list)) ; get Top nodes - node-list 0 curr-file)) + node-list 0 (substring-no-properties curr-file))) (if (not (bobp)) (let ((Info-hide-note-references 'hide) (Info-fontify-visited-nodes nil)) @@ -2786,6 +2789,7 @@ if point is in a menu item description, follow that menu item." (define-key Info-mode-map "h" 'Info-help) (define-key Info-mode-map "i" 'Info-index) (define-key Info-mode-map "l" 'Info-last) + (define-key Info-mode-map "L" 'Info-history) (define-key Info-mode-map "m" 'Info-menu) (define-key Info-mode-map "n" 'Info-next) (define-key Info-mode-map "p" 'Info-prev) @@ -2796,6 +2800,7 @@ if point is in a menu item description, follow that menu item." (define-key Info-mode-map "\M-s" 'Info-search) (define-key Info-mode-map "\M-n" 'clone-buffer) (define-key Info-mode-map "t" 'Info-top-node) + (define-key Info-mode-map "T" 'Info-toc) (define-key Info-mode-map "u" 'Info-up) ;; For consistency with dired-copy-filename-as-kill. (define-key Info-mode-map "w" 'Info-copy-current-node-name) @@ -2843,9 +2848,9 @@ if point is in a menu item description, follow that menu item." ["Last" Info-last :active Info-history :help "Go to the last node you were at"] ["History" Info-history :active Info-history-list - :help "Go to the history buffer"] + :help "Go to menu of visited nodes"] ["Table of Contents" Info-toc - :help "Go to the buffer with a table of contents"] + :help "Go to table of contents"] ("Index..." ["Lookup a String" Info-index :help "Look for a string in the index items"] @@ -2990,15 +2995,15 @@ Selecting other nodes: \\[Info-directory] Go to the Info directory node. \\[Info-follow-reference] Follow a cross reference. Reads name of reference. \\[Info-last] Move to the last node you were at. -\\[Info-history] Go to the history buffer. -\\[Info-toc] Go to the buffer with a table of contents. -\\[Info-index] Look up a topic in this file's Index and move to that node. -\\[Info-index-next] (comma) Move to the next match from a previous \\\\[Info-index] command. -\\[info-apropos] Look for a string in the indices of all manuals. +\\[Info-history] Go to menu of visited nodes. +\\[Info-toc] Go to table of contents of the current Info file. \\[Info-top-node] Go to the Top node of this file. \\[Info-final-node] Go to the final node in this file. \\[Info-backward-node] Go backward one node, considering all nodes as forming one sequence. \\[Info-forward-node] Go forward one node, considering all nodes as forming one sequence. +\\[Info-index] Look up a topic in this file's Index and move to that node. +\\[Info-index-next] (comma) Move to the next match from a previous \\\\[Info-index] command. +\\[info-apropos] Look for a string in the indices of all manuals. Moving within a node: \\[Info-scroll-up] Normally, scroll forward a full screen. @@ -3015,15 +3020,15 @@ Advanced commands: \\[Info-copy-current-node-name] Put name of current info node in the kill ring. \\[clone-buffer] Select a new cloned Info buffer in another window. \\[Info-edit] Edit contents of selected node. -1 Pick first item in node's menu. -2, 3, 4, 5 Pick second ... fifth item in node's menu. +1 .. 9 Pick first ... ninth item in node's menu. + Every third `*' is highlighted to help pick the right number. \\[Info-goto-node] Move to node specified by name. You may include a filename as well, as (FILENAME)NODENAME. \\[universal-argument] \\[info] Move to new Info file with completion. +\\[universal-argument] N \\[info] Select Info buffer with prefix number in the name *info*. \\[Info-search] Search through this Info file for specified regexp, and select the node in which the next occurrence is found. -\\[Info-search-case-sensitively] Search through this Info file - for specified regexp case-sensitively. +\\[Info-search-case-sensitively] Search through this Info file for specified regexp case-sensitively. \\[Info-search-next] Search for another occurrence of regexp from a previous \\\\[Info-search] command. \\[Info-next-reference] Move cursor to next cross-reference or menu item. diff --git a/lisp/international/ccl.el b/lisp/international/ccl.el index e9e46bb0c6c..e55f1fbcf36 100644 --- a/lisp/international/ccl.el +++ b/lisp/international/ccl.el @@ -1120,7 +1120,8 @@ (insert (format "write r%d (%d remaining)\n" rrr cc))) (defun ccl-dump-call (ignore cc) - (insert (format "call subroutine #%d\n" cc))) + (let ((subroutine (car (ccl-get-next-code)))) + (insert (format "call subroutine `%s'\n" subroutine)))) (defun ccl-dump-write-const-string (rrr cc) (if (= rrr 0) diff --git a/lisp/international/characters.el b/lisp/international/characters.el index 809c457c6e1..c33bd2eb43f 100644 --- a/lisp/international/characters.el +++ b/lisp/international/characters.el @@ -33,6 +33,11 @@ ;;; Code: +;; We must set utf-translate-cjk-mode to nil while loading this file +;; to avoid translating CJK characters in decode-char. +(defvar saved-utf-translate-cjk-mode utf-translate-cjk-mode) +(setq utf-translate-cjk-mode nil) + ;;; Predefined categories. ;; For each character set. @@ -1277,6 +1282,10 @@ (put-charset-property (car l) 'nospace-between-words t) (setq l (cdr l)))) + +(setq utf-translate-cjk-mode saved-utf-translate-cjk-mode) +(makunbound 'saved-utf-translate-cjk-mode) + ;;; Local Variables: ;;; coding: iso-2022-7bit ;;; End: diff --git a/lisp/international/code-pages.el b/lisp/international/code-pages.el index 64c430f9383..0f8cdbf5713 100644 --- a/lisp/international/code-pages.el +++ b/lisp/international/code-pages.el @@ -2970,22 +2970,22 @@ Return an updated `non-iso-charset-alist'." (cp-make-coding-system windows-1256 [?\€ - ?\٠ + ?\پ ?\‚ - ?\١ + ?\ƒ ?\„ ?\… ?\† ?\‡ - ?\٢ - ?\٣ - ?\٤ + ?\ˆ + ?\‰ + ?\ٹ ?\‹ - ?\٥ - ?\٦ - ?\٧ - ?\٨ - ?\٩ + ?\Œ + ?\چ + ?\ژ + ?\ڈ + ?\گ ?\‘ ?\’ ?\“ @@ -2993,110 +2993,110 @@ Return an updated `non-iso-charset-alist'." ?\• ?\– ?\— - ?\؛ + ?\ک ?\™ - ?\؟ + ?\ڑ ?\› - ?\ء - ?\آ - ?\أ - ?\Ÿ + ?\œ + ?\‌ + ?\‍ + ?\ں ?\  - ?\ؤ - ?\إ + ?\، + ?\¢ ?\£ ?\¤ - ?\ئ + ?\¥ ?\¦ ?\§ - ?\ا + ?\¨ ?\© - ?\ب + ?\ھ ?\« ?\¬ ?\­ ?\® - ?\پ + ?\¯ ?\° ?\± - ?\ة - ?\ت - ?\ث + ?\² + ?\³ + ?\´ ?\µ ?\¶ ?\· - ?\ج - ?\چ - ?\ح + ?\¸ + ?\¹ + ?\؛ ?\» + ?\¼ + ?\½ + ?\¾ + ?\؟ + ?\ہ + ?\ء + ?\آ + ?\أ + ?\ؤ + ?\إ + ?\ئ + ?\ا + ?\ب + ?\ة + ?\ت + ?\ث + ?\ج + ?\ح ?\خ ?\د ?\ذ ?\ر - ?\À ?\ز - ?\ - ?\ژ ?\س ?\ش ?\ص - ?\Ç - ?\È - ?\É - ?\Ê - ?\Ë ?\ض + ?\× ?\ط - ?\Î - ?\Ï - ?\ㄓ + ?\ظ ?\ع ?\غ ?\ـ - ?\Ô ?\ف ?\ق - ?\× ?\ك - ?\Ù - ?\گ - ?\Û - ?\Ü + ?\à ?\ل + ?\â ?\م ?\ن - ?\à ?\ه - ?\â - ?\ځ ?\و - ?\ى - ?\ي ?\ç ?\è ?\é ?\ê ?\ë - ?\ً - ?\ٌ + ?\ى + ?\ي ?\î ?\ï + ?\ً + ?\ٌ ?\ٍ ?\َ + ?\ô ?\ُ ?\ِ - ?\ô - ?\ّ - ?\ْ ?\÷ - nil + ?\ّ ?\ù - nil + ?\ْ ?\û ?\ü ?\‎ ?\‏ - ?\ÿ] + ?\ے] nil ?a) ;; Arabic (cp-make-coding-system @@ -4430,11 +4430,11 @@ Return an updated `non-iso-charset-alist'." ?\і ?\Ї ?\ї - ?\÷ - ?\± + ?\· + ?\√ ?\№ ?\¤ - ?\■ + ?\■ ?\ ]) (define-coding-system-alias 'ruscii 'cp1125) ;; Original name for cp1125, says Serhii Hlodin diff --git a/lisp/international/mule-cmds.el b/lisp/international/mule-cmds.el index c9c462f028e..95177fdb954 100644 --- a/lisp/international/mule-cmds.el +++ b/lisp/international/mule-cmds.el @@ -1834,6 +1834,15 @@ specifies the character set for the major languages of Western Europe." (let ((func (get-language-info language-name 'setup-function))) (if (functionp func) (funcall func))) + (if (and utf-translate-cjk-mode + utf-translate-cjk-lang-env + (not (eq utf-translate-cjk-lang-env language-name)) + (catch 'tag + (dolist (charset (get-language-info language-name 'charset)) + (if (memq charset utf-translate-cjk-charsets) + (throw 'tag t))) + nil)) + (utf-translate-cjk-load-tables)) (run-hooks 'set-language-environment-hook) (force-mode-line-update t)) diff --git a/lisp/international/mule.el b/lisp/international/mule.el index c4c7be3a225..bbe83c2baf7 100644 --- a/lisp/international/mule.el +++ b/lisp/international/mule.el @@ -316,8 +316,7 @@ Optional argument RESTRICTION specifies a way to map the pair of CCS and CODE-POINT to a character. Currently not supported and just ignored." (cond ((eq ccs 'ucs) - (or (gethash code-point - (get 'utf-subst-table-for-decode 'translation-hash-table)) + (or (utf-lookup-subst-table-for-decode code-point) (let ((c (cond ((< code-point 160) code-point) @@ -361,8 +360,7 @@ code-point in CCS. Currently not supported and just ignored." (charset (car split)) trans) (cond ((eq ccs 'ucs) - (or (gethash char (get 'utf-subst-table-for-encode - 'translation-hash-table)) + (or (utf-lookup-subst-table-for-encode char) (let ((table (get 'utf-translation-table-for-encode 'translation-table))) (setq trans (aref table char)) diff --git a/lisp/international/utf-16.el b/lisp/international/utf-16.el index d924512b634..477cccc2bf9 100644 --- a/lisp/international/utf-16.el +++ b/lisp/international/utf-16.el @@ -48,99 +48,110 @@ ;; things below, sometimes with commonality abstracted into a let ;; binding for maintenance convenience. -;; We'd need new charsets distinct from ascii and eight-bit-control to -;; deal with untranslated sequences, since we can't otherwise -;; distinguish the bytes, as we can with utf-8. - -;; ;; Do a multibyte write for bytes in r3 and r4. -;; ;; Intended for untranslatable utf-16 sequences. -;; (define-ccl-program ccl-mule-utf-16-untrans -;; `(0 -;; (if (r3 < 128) -;; (r0 = ,(charset-id 'ascii)) -;; (if (r3 < 160) -;; (r0 = ,(charset-id 'eight-bit-control)) -;; (r0 = ,(charset-id 'eight-bit-graphic)))) -;; (if (r4 < 128) -;; (r0 = ,(charset-id 'ascii)) -;; (if (r4 < 160) -;; (r0 = ,(charset-id 'eight-bit-control)) -;; (r0 = ,(charset-id 'eight-bit-graphic)))) -;; (r1 = r4))) -;; "Do a multibyte write for bytes in r3 and r4. -;; First swap them if we're big endian, indicated by r5==0. -;; Intended for untranslatable utf-16 sequences.") - ;; Needed in macro expansion, so can't be let-bound. Zapped after use. (eval-and-compile (defconst utf-16-decode-ucs - ;; We have the unicode in r1. Output is charset ID in r0, code - ;; point in r1. - `((lookup-integer utf-subst-table-for-decode r1 r3) - (if r7 ; got a translation - ((r0 = r1) (r1 = r3)) - (if (r1 < 128) - (r0 = ,(charset-id 'ascii)) - (if (r1 < 160) - (r0 = ,(charset-id 'eight-bit-control)) - (if (r1 < 256) - ((r0 = ,(charset-id 'latin-iso8859-1)) - (r1 -= 128)) - (if (r1 < #x2500) - ((r0 = ,(charset-id 'mule-unicode-0100-24ff)) - (r1 -= #x100) - (r2 = (((r1 / 96) + 32) << 7)) - (r1 %= 96) - (r1 += (r2 + 32))) - (if (r1 < #x3400) - ((r0 = ,(charset-id 'mule-unicode-2500-33ff)) - (r1 -= #x2500) - (r2 = (((r1 / 96) + 32) << 7)) - (r1 %= 96) - (r1 += (r2 + 32))) - (if (r1 < #xd800) ; 2 untranslated bytes - ;; ;; Assume this is rare, so don't worry about the - ;; ;; overhead of the call. - ;; (call mule-utf-16-untrans) - ((r0 = ,(charset-id 'mule-unicode-e000-ffff)) - (r1 = 15037)) ; U+fffd - (if (r1 < #xe000) ; surrogate - ;; ((call mule-utf-16-untrans) - ;; (write-multibyte-character r0 r1) - ;; (read r3 r4) - ;; (call mule-utf-16-untrans)) - ((read r3 r4) - (r0 = ,(charset-id 'mule-unicode-e000-ffff)) - (r1 = 15037)) - ((r0 = ,(charset-id 'mule-unicode-e000-ffff)) - (r1 -= #xe000) - (r2 = (((r1 / 96) + 32) << 7)) - (r1 %= 96) - (r1 += (r2 + 32))))))))))))) + ;; If r5 is negative, r1 is a Unicode chacter code. Otherise, r5 is + ;; the first of a surrogate pair and r1 is the second of the pair. + ;; Output is charset ID in r0, code point in r1. R0 may be set to + ;; -1 in which case a caller should not write out r1. + `((if (r5 >= 0) + ((r0 = (r1 < #xDC00)) + (if ((r1 >= #xE000) | r0) + ;; Invalid second code of surrogate pair. + ((r0 = r5) + (call ccl-mule-utf-untrans)) + ((r1 -= #xDC00) + (r1 += (((r5 - #xD800) << 10) + #x10000)))) + (r5 = -1))) + (if (r1 < 128) + (r0 = ,(charset-id 'ascii)) + ((lookup-integer utf-subst-table-for-decode r1 r3) + (if r7 ; got a translation + ((r0 = r1) (r1 = r3)) + (if (r1 < 160) + (r0 = ,(charset-id 'eight-bit-control)) + (if (r1 < 256) + ((r0 = ,(charset-id 'latin-iso8859-1)) + (r1 -= 128)) + (if (r1 < #x2500) + ((r0 = ,(charset-id 'mule-unicode-0100-24ff)) + (r1 -= #x100) + (r2 = (((r1 / 96) + 32) << 7)) + (r1 %= 96) + (r1 += (r2 + 32))) + (if (r1 < #x3400) + ((r0 = ,(charset-id 'mule-unicode-2500-33ff)) + (r1 -= #x2500) + (r2 = (((r1 / 96) + 32) << 7)) + (r1 %= 96) + (r1 += (r2 + 32))) + (if (r1 < #xD800) + ;; We can't have this character. + ((r0 = r1) + (call ccl-mule-utf-untrans) + (r5 = -1) + (r0 = -1)) + (if (r1 < #xDC00) + ;; The first code of a surrogate pair. + ((r5 = r1) + (r0 = -1)) + (if (r1 < #xE000) + ;; The second code of a surrogate pair, invalid. + ((r0 = r1) + (call ccl-mule-utf-untrans) + (r5 = -1) + (r0 = -1)) + (if (r1 < #x10000) + ((r0 = ,(charset-id 'mule-unicode-e000-ffff)) + (r1 -= #xE000) + (r2 = (((r1 / 96) + 32) << 7)) + (r1 %= 96) + (r1 += (r2 + 32))) + ;; We can't have this character. + ((r0 = r1) + (call ccl-mule-utf-untrans) + (r5 = -1) + (r0 = -1))))))))))))))) (defconst utf-16le-decode-loop - `(loop - (read r3 r4) - (r1 = (r4 <8 r3)) - ,utf-16-decode-ucs - (translate-character utf-translation-table-for-decode r0 r1) - (write-multibyte-character r0 r1) - (repeat))) + `((r5 = -1) + (loop + (r3 = -1) + (read r3 r4) + (r1 = (r4 <8 r3)) + ,@utf-16-decode-ucs + (if (r0 >= 0) + ((translate-character utf-translation-table-for-decode r0 r1) + (write-multibyte-character r0 r1))) + (repeat)))) (defconst utf-16be-decode-loop - `(loop - (read r3 r4) - (r1 = (r3 <8 r4)) - ,@utf-16-decode-ucs - (translate-character utf-translation-table-for-decode r0 r1) - (write-multibyte-character r0 r1) - (repeat))) + `((r5 = -1) + (loop + (r3 = -1) + (read r3 r4) + (r1 = (r3 <8 r4)) + ,@utf-16-decode-ucs + (if (r0 >= 0) + ((translate-character utf-translation-table-for-decode r0 r1) + (write-multibyte-character r0 r1))) + (repeat)))) ) (define-ccl-program ccl-decode-mule-utf-16le `(2 ; 2 bytes -> 1 to 4 bytes - ,utf-16le-decode-loop) + ,utf-16le-decode-loop + ((if (r5 >= 0) + ((r0 = r5) + (call ccl-mule-utf-untrans))) + (if (r3 < 0) + nil + ((if (r3 < #xA0) + (r0 = ,(charset-id 'eight-bit-control)) + (r0 = ,(charset-id 'eight-bit-graphic))) + (write-multibyte-character r0 r3))))) "Decode UTF-16LE (little endian without signature bytes). Basic decoding is done into the charsets ascii, latin-iso8859-1 and mule-unicode-*. Un-representable Unicode characters are decoded as @@ -149,7 +160,13 @@ U+fffd. The result is run through the translation-table named (define-ccl-program ccl-decode-mule-utf-16be `(2 ; 2 bytes -> 1 to 4 bytes - ,utf-16be-decode-loop) + ,utf-16be-decode-loop + ((if (r5 >= 0) + ((r0 = r5) + (call ccl-mule-utf-untrans))) + (if (r3 >= 0) + ((r0 = r3) + (call ccl-mule-utf-untrans))))) "Decode UTF-16BE (big endian without signature bytes). Basic decoding is done into the charsets ascii, latin-iso8859-1 and mule-unicode-*. Un-representable Unicode characters are @@ -158,91 +175,218 @@ name `utf-translation-table-for-decode'.") (define-ccl-program ccl-decode-mule-utf-16le-with-signature `(2 - ((read r3 r4) - ,utf-16le-decode-loop)) + ((r3 = -1) + (read r3 r4) + ,@utf-16le-decode-loop) + (if (r3 >= 0) + ((r0 = r3) + (call ccl-mule-utf-untrans)))) "Like ccl-decode-utf-16le but skip the first 2-byte BOM.") (define-ccl-program ccl-decode-mule-utf-16be-with-signature `(2 - ((read r3 r4) - ,utf-16be-decode-loop)) + ((r3 = -1) + (read r3 r4) + ,@utf-16be-decode-loop) + (if (r3 >= 0) + ((r0 = r3) + (call ccl-mule-utf-untrans)))) "Like ccl-decode-utf-16be but skip the first 2-byte BOM.") (define-ccl-program ccl-decode-mule-utf-16 `(2 - ((read r3 r4) + ((r3 = -1) + (read r3 r4) (r1 = (r3 <8 r4)) + (r5 = -1) (if (r1 == #xFFFE) ;; R1 is a BOM for little endian. We keep this character as ;; is temporarily. It is removed by post-read-conversion ;; function. (,@utf-16-decode-ucs (write-multibyte-character r0 r1) - ,utf-16le-decode-loop) + ,@utf-16le-decode-loop) ((if (r1 == #xFEFF) ;; R1 is a BOM for big endian, but we can't keep that ;; character in the output because it can't be ;; distinguished with the normal U+FEFF. So, we keep ;; #xFFFF instead. ((r1 = #xFFFF) - ,@utf-16-decode-ucs) - ;; R1 a normal Unicode character. + ,@utf-16-decode-ucs + (write-multibyte-character r0 r1)) + ;; R1 is a normal Unicode character. (,@utf-16-decode-ucs - (translate-character utf-translation-table-for-decode r0 r1))) - (write-multibyte-character r0 r1) - ,utf-16be-decode-loop)))) + (if (r0 >= 0) + ((translate-character utf-translation-table-for-decode r0 r1) + (write-multibyte-character r0 r1))))) + ,@utf-16be-decode-loop))) + (if (r3 >= 0) + ((r0 = r3) + (call ccl-mule-utf-untrans)))) "Like ccl-decode-utf-16be/le but check the first BOM.") (makunbound 'utf-16-decode-ucs) ; done with it (makunbound 'utf-16le-decode-loop) (makunbound 'utf-16be-decode-loop) +;; UTF-16 decoder generates an UTF-8 sequence represented by a +;; sequence eight-bit-control/graphic chars for an invalid byte (the +;; last byte of an odd length source) and an untranslatable character +;; (including an invalid surrogate-pair code-point). +;; +;; This CCL parses that sequence (the first byte is already in r1), +;; and if the sequence represents an untranslatable character, it sets +;; r1 to the original invalid code or untranslated Unicode character +;; code, sets r2 to -1 (to prevent r2 and r3 are written), set2 r5 to +;; -1 (to tell the caller that there's no pre-read character). +;; +;; If the sequence represents an invalid byte, it sets r1 to -1, r2 to +;; the byte, sets r3 and r5 to -1. +;; +;; Otherwise, don't change r1, set r2 and r3 to already read +;; eight-bit-control/graphic characters (if any), set r5 and r6 to the +;; last character that invalidates the UTF-8 form. +;; +;; Note: For UTF-8 validation, we only check if a character is +;; eight-bit-control/graphic or not. It may result in incorrect +;; handling of random binary data, but such a data can't be encoded by +;; UTF-16 anyway. At least, UTF-16 decoder doesn't generate such a +;; sequence even if a source contains invalid byte-sequence. + +(define-ccl-program ccl-mule-utf-16-encode-untrans + `(0 + ((r2 = -1) + ;; Read the 2nd byte. + (read-multibyte-character r5 r6) + (r0 = (r5 != ,(charset-id 'eight-bit-control))) + (if ((r5 != ,(charset-id 'eight-bit-graphic)) & r0) + ((r2 = r1) + (r3 = -1) + (r1 = -1) + (end))) ; invalid UTF-8 + + (r3 = -1) + (r2 = r6) + (if (r1 <= #xE0) + ;; 2-byte UTF-8, i.e. originally an invalid byte. + ((r2 &= #x3F) + (r2 |= ((r1 & #x1F) << 6)) + (r1 = -1) + (r5 = -1) + (end))) + + ;; Read the 3rd byte. + (read-multibyte-character r5 r6) + (r0 = (r5 != ,(charset-id 'eight-bit-control))) + (if ((r5 != ,(charset-id 'eight-bit-graphic)) & r0) + ((end))) ; invalid UTF-8 + + (if (r1 < #xF0) ; valid 3-byte UTF-8 + ((r1 = ((r1 & #x0F) << 12)) + (r1 |= ((r2 & #x3F) << 6)) + (r1 |= (r6 & #x3F)) + (r2 = -1) + (r5 = -1) + (end))) + + (r3 = r6) + ;; Read the 4th byte. + (read-multibyte-character r5 r6) + (r0 = (r5 != ,(charset-id 'eight-bit-control))) + (if ((r5 != ,(charset-id 'eight-bit-graphic)) & r0) + (end)) ; livalid UTF-8 + + ;; valid 4-byte UTF-8 + (r1 = ((r1 & #x07) << 18)) + (r1 |= ((r2 & #x3F) << 12)) + (r1 |= ((r3 & #x3F) << 6)) + (r1 |= (r6 & #x3F)) + (r2 = -1) + (r5 = -1) + (end)) + + (if (r1 >= 0) + ((write r1) + (if (r2 >= 0) + ((write r2) + (if (r3 >= 0) + (write r3)))))))) + (eval-and-compile (defconst utf-16-decode-to-ucs - ;; CCL which, given the result of a multibyte read in r0 and r1, - ;; sets r0 to the character's Unicode if the charset is one of the - ;; basic utf-8 coding system ones. Otherwise set to U+fffd. - `(if (r0 == ,(charset-id 'ascii)) - (r0 = r1) - (if (r0 == ,(charset-id 'latin-iso8859-1)) - (r0 = (r1 + 128)) - (if (r0 == ,(charset-id 'eight-bit-control)) - (r0 = r1) - (if (r0 == ,(charset-id 'eight-bit-graphic)) - (r0 = r1) - ((r2 = (r1 & #x7f)) - (r1 >>= 7) - (r3 = ((r1 - 32) * 96)) - (r3 += (r2 - 32)) - (if (r0 == ,(charset-id 'mule-unicode-0100-24ff)) - (r0 = (r3 + #x100)) - (if (r0 == ,(charset-id 'mule-unicode-2500-33ff)) - (r0 = (r3 + #x2500)) - (if (r0 == ,(charset-id 'mule-unicode-e000-ffff)) - (r0 = (r3 + #xe000)) - (r0 = #xfffd)))))))))) + ;; Read a character and set r1 to the corresponding Unicode code. + ;; If r5 is not negative, it means that we have already read a + ;; character into r5 and r6. + ;; If an invalid eight-bit-control/graphic sequence is found, r2 and + ;; r3 may contain a byte to written out, r5 and r6 may contain a + ;; pre-read character. Usually they are set to -1. + `((if (r5 < 0) + (read-multibyte-character r0 r1) + ((r0 = r5) + (r1 = r6) + (r5 = -1))) + (lookup-character utf-subst-table-for-encode r0 r1) + (r2 = -1) + (if (r7 > 0) + (r1 = r0) + ((translate-character utf-translation-table-for-encode r0 r1) + (if (r0 == ,(charset-id 'ascii)) + nil + (if (r0 == ,(charset-id 'latin-iso8859-1)) + (r1 += 128) + (if (r0 == ,(charset-id 'eight-bit-control)) + nil + (if (r0 == ,(charset-id 'eight-bit-graphic)) + (call ccl-mule-utf-16-encode-untrans) + ((r2 = ((r1 & #x7f) - 32)) + (r3 = ((((r1 >> 7) - 32) * 96) + r2)) + (r2 = -1) + (r5 = -1) + (if (r0 == ,(charset-id 'mule-unicode-0100-24ff)) + (r1 = (r3 + #x100)) + (if (r0 == ,(charset-id 'mule-unicode-2500-33ff)) + (r1 = (r3 + #x2500)) + (if (r0 == ,(charset-id 'mule-unicode-e000-ffff)) + (r1 = (r3 + #xe000)) + (r1 = #xfffd))))))))))))) (defconst utf-16le-encode-loop - `(loop - (read-multibyte-character r0 r1) - (lookup-character utf-subst-table-for-encode r0 r1) - (if (r7 == 0) - ((translate-character utf-translation-table-for-encode r0 r1) - ,utf-16-decode-to-ucs)) - (write (r0 & 255)) - (write (r0 >> 8)) - (repeat))) + `((r5 = -1) + (loop + ,@utf-16-decode-to-ucs + (if (r1 >= #x10000) + ((r1 -= #x10000) + (r0 = ((r1 >> 10) + #xD800)) + (write (r0 & 255)) + (write (r0 >> 8)) + (r1 = ((r1 & #x3FF) + #xDC00)))) + (if (r1 >= 0) + ((write (r1 & 255)) + (write (r1 >> 8)))) + (if (r2 >= 0) + ((write r2) + (if (r3 >= 0) + (write r3)))) + (repeat)))) (defconst utf-16be-encode-loop - `(loop - (read-multibyte-character r0 r1) - (lookup-character utf-subst-table-for-encode r0 r1) - (if (r7 == 0) - ((translate-character utf-translation-table-for-encode r0 r1) - ,utf-16-decode-to-ucs)) - (write (r0 >> 8)) - (write (r0 & 255)) - (repeat))) + `((r5 = -1) + (loop + ,@utf-16-decode-to-ucs + (if (r1 >= #x10000) + ((r1 -= #x10000) + (r0 = ((r1 >> 10) + #xD800)) + (write (r0 >> 8)) + (write (r0 & 255)) + (r1 = ((r1 & #x3FF) + #xDC00)))) + (if (r1 >= 0) + ((write (r1 >> 8)) + (write (r1 & 255)))) + (if (r2 >= 0) + ((write r2) + (if (r3 >= 0) + (write r3)))) + (repeat)))) ) @@ -270,7 +414,7 @@ Others are encoded as U+FFFD.") `(1 ((write #xFF) (write #xFE) - ,utf-16le-encode-loop)) + ,@utf-16le-encode-loop)) "Encode to UTF-16 (little endian with signature). Characters from the charsets ascii, eight-bit-control, eight-bit-graphic, latin-iso8859-1 and mule-unicode-* are encoded @@ -282,7 +426,7 @@ Others are encoded as U+FFFD.") `(1 ((write #xFE) (write #xFF) - ,utf-16be-encode-loop)) + ,@utf-16be-encode-loop)) "Encode to UTF-16 (big endian with signature). Characters from the charsets ascii, eight-bit-control, eight-bit-graphic, latin-iso8859-1 and mule-unicode-* are encoded @@ -296,6 +440,7 @@ Others are encoded as U+FFFD.") (defun mule-utf-16-post-read-conversion (length) (when (> length 0) + (setq length (utf-8-post-read-conversion length)) (let ((char (following-char))) (cond ((= char (decode-char 'ucs #xFFFE)) (delete-char 1) @@ -329,29 +474,34 @@ if they are re-encoded. On encoding (e.g. writing a file), Emacs characters not belonging to any of the character sets listed above are encoded into the byte -sequence representing U+FFFD (REPLACEMENT CHARACTER).")) +sequence representing U+FFFD (REPLACEMENT CHARACTER).") + (props `((safe-charsets + ascii + eight-bit-control + eight-bit-graphic + latin-iso8859-1 + mule-unicode-0100-24ff + mule-unicode-2500-33ff + mule-unicode-e000-ffff + ,@(if utf-translate-cjk-mode + utf-translate-cjk-charsets)) + (valid-codes (0 . 255)) + (mime-text-unsuitable . t) + (pre-write-conversion . utf-8-pre-write-conversion) + (dependency unify-8859-on-encoding-mode + unify-8859-on-decoding-mode + utf-fragment-on-decoding + utf-translate-cjk-mode)))) (make-coding-system 'mule-utf-16le 4 ?u ; Mule-UCS uses ?U, but code-pages uses that for koi8-u. (concat "UTF-16LE encoding for Emacs-supported Unicode characters." doc) - '(ccl-decode-mule-utf-16le . ccl-encode-mule-utf-16le) - '((safe-charsets - ascii - eight-bit-control - latin-iso8859-1 - mule-unicode-0100-24ff - mule-unicode-2500-33ff - mule-unicode-e000-ffff) - (mime-charset . utf-16le) - (mime-text-unsuitable . t) - (valid-codes (0 . 255)) - (dependency unify-8859-on-encoding-mode - unify-8859-on-decoding-mode - utf-fragment-on-decoding - utf-translate-cjk-mode))) + `(,@props + (post-read-conversion . utf-8-post-read-conversion) + (mime-charset . utf-16le))) (make-coding-system 'mule-utf-16be 4 ?u @@ -360,19 +510,9 @@ sequence representing U+FFFD (REPLACEMENT CHARACTER).")) doc) '(ccl-decode-mule-utf-16be . ccl-encode-mule-utf-16be) - '((safe-charsets - ascii - eight-bit-control - latin-iso8859-1 - mule-unicode-0100-24ff - mule-unicode-2500-33ff - mule-unicode-e000-ffff) - (mime-charset . utf-16be) - (valid-codes (0 . 255)) - (dependency unify-8859-on-encoding-mode - unify-8859-on-decoding-mode - utf-fragment-on-decoding - utf-translate-cjk-mode))) + `(,@props + (post-read-conversion . utf-8-post-read-conversion) + (mime-charset . utf-16be))) (make-coding-system 'mule-utf-16le-with-signature 4 ?u @@ -382,21 +522,10 @@ sequence representing U+FFFD (REPLACEMENT CHARACTER).")) '(ccl-decode-mule-utf-16le-with-signature . ccl-encode-mule-utf-16le-with-signature) - '((safe-charsets - ascii - eight-bit-control - latin-iso8859-1 - mule-unicode-0100-24ff - mule-unicode-2500-33ff - mule-unicode-e000-ffff) + `(,@props + (post-read-conversion . utf-8-post-read-conversion) (coding-category . coding-category-utf-16-le) - (mime-charset . utf-16) - (mime-text-unsuitable . t) - (valid-codes (0 . 255)) - (dependency unify-8859-on-encoding-mode - unify-8859-on-decoding-mode - utf-fragment-on-decoding - utf-translate-cjk-mode))) + (mime-charset . utf-16))) (make-coding-system 'mule-utf-16be-with-signature 4 ?u @@ -406,20 +535,10 @@ sequence representing U+FFFD (REPLACEMENT CHARACTER).")) '(ccl-decode-mule-utf-16be-with-signature . ccl-encode-mule-utf-16be-with-signature) - '((safe-charsets - ascii - eight-bit-control - latin-iso8859-1 - mule-unicode-0100-24ff - mule-unicode-2500-33ff - mule-unicode-e000-ffff) + `(,@props + (post-read-conversion . utf-8-post-read-conversion) (coding-category . coding-category-utf-16-be) - (mime-charset . utf-16) - (valid-codes (0 . 255)) - (dependency unify-8859-on-encoding-mode - unify-8859-on-decoding-mode - utf-fragment-on-decoding - utf-translate-cjk-mode))) + (mime-charset . utf-16))) (make-coding-system 'mule-utf-16 4 ?u @@ -428,22 +547,10 @@ sequence representing U+FFFD (REPLACEMENT CHARACTER).")) doc) '(ccl-decode-mule-utf-16 . ccl-encode-mule-utf-16be-with-signature) - '((safe-charsets - ascii - eight-bit-control - latin-iso8859-1 - mule-unicode-0100-24ff - mule-unicode-2500-33ff - mule-unicode-e000-ffff) + `(,@props + (post-read-conversion . mule-utf-16-post-read-conversion) (coding-category . coding-category-utf-16-be) - (mime-charset . utf-16) - (mime-text-unsuitable . t) - (valid-codes (0 . 255)) - (dependency unify-8859-on-encoding-mode - unify-8859-on-decoding-mode - utf-fragment-on-decoding - utf-translate-cjk-mode) - (post-read-conversion . mule-utf-16-post-read-conversion))) + (mime-charset . utf-16))) ) (define-coding-system-alias 'utf-16le 'mule-utf-16le) diff --git a/lisp/international/utf-8.el b/lisp/international/utf-8.el index d4dd7b6c882..77a51abb43f 100644 --- a/lisp/international/utf-8.el +++ b/lisp/international/utf-8.el @@ -190,9 +190,102 @@ Setting this variable outside customize has no effect." :type 'boolean :group 'mule) + +(defconst utf-translate-cjk-charsets '(chinese-gb2312 + chinese-big5-1 chinese-big5-2 + japanese-jisx0208 japanese-jisx0212 + korean-ksc5601) + "List of charsets supported by `utf-translate-cjk-mode'.") + +(defconst utf-translate-cjk-unicode-range + '((#x2e80 . #xd7a3) + (#xff00 . #xffef)) + "List of Unicode code ranges supported by `utf-translate-cjk-mode'.") + +;; Return non-nil if CODE-POINT is in `utf-translate-cjk-unicode-range'. +(defsubst utf-translate-cjk-substitutable-p (code-point) + (let ((tail utf-translate-cjk-unicode-range) + elt) + (while tail + (setq elt (car tail) tail (cdr tail)) + (if (and (>= code-point (car elt)) (<= code-point (cdr elt))) + (setq tail nil) + (setq elt nil))) + elt)) + +(defvar utf-translate-cjk-lang-env nil + "Language environment in which tables for `utf-translate-cjk-mode' is loaded. +The value nil means that the tables are not yet loaded.") + +(defun utf-translate-cjk-load-tables () + "Load tables for `utf-translate-cjk-mode'." + ;; Fixme: Allow the use of the CJK charsets to be + ;; customized by reordering and possible omission. + (let ((redefined (< (hash-table-size ucs-mule-cjk-to-unicode) 43000))) + (if redefined + ;; Redefine them with realistic initial sizes and a + ;; smallish rehash size to avoid wasting significant + ;; space after they're built. + (setq ucs-mule-cjk-to-unicode + (make-hash-table :test 'eq :size 43000 :rehash-size 1000) + ucs-unicode-to-mule-cjk + (make-hash-table :test 'eq :size 21500 :rehash-size 1000))) + + ;; Load the files explicitly, to avoid having to keep + ;; around the large tables they contain (as well as the + ;; ones which get built). + (cond ((string= "Korean" current-language-environment) + (load "subst-jis") + (load "subst-big5") + (load "subst-gb2312") + (load "subst-ksc")) + ((string= "Chinese-BIG5" current-language-environment) + (load "subst-jis") + (load "subst-ksc") + (load "subst-gb2312") + (load "subst-big5")) + ((string= "Chinese-GB" current-language-environment) + (load "subst-jis") + (load "subst-ksc") + (load "subst-big5") + (load "subst-gb2312")) + (t + (load "subst-ksc") + (load "subst-gb2312") + (load "subst-big5") + (load "subst-jis"))) ; jis covers as much as big5, gb2312 + + (when redefined + (define-translation-hash-table 'utf-subst-table-for-decode + ucs-unicode-to-mule-cjk) + (define-translation-hash-table 'utf-subst-table-for-encode + ucs-mule-cjk-to-unicode) + (set-char-table-extra-slot (get 'utf-translation-table-for-encode + 'translation-table) + 1 ucs-mule-cjk-to-unicode)) + + (setq utf-translate-cjk-lang-env current-language-environment))) + +(defun utf-lookup-subst-table-for-decode (code-point) + (if (and utf-translate-cjk-mode + (not utf-translate-cjk-lang-env) + (utf-translate-cjk-substitutable-p code-point)) + (utf-translate-cjk-load-tables)) + (gethash code-point + (get 'utf-subst-table-for-decode 'translation-hash-table))) + + +(defun utf-lookup-subst-table-for-encode (char) + (if (and utf-translate-cjk-mode + (not utf-translate-cjk-lang-env) + (memq (char-charset char) utf-translate-cjk-charsets)) + (utf-translate-cjk-load-tables)) + (gethash char + (get 'utf-subst-table-for-encode 'translation-hash-table))) + (define-minor-mode utf-translate-cjk-mode "Whether the UTF based coding systems should decode/encode CJK characters. -Enabling this loads tables which allow the coding systems mule-utf-8, +Enabling this allows the coding systems mule-utf-8, mule-utf-16le and mule-utf-16be to encode characters in the charsets `korean-ksc5601', `chinese-gb2312', `chinese-big5-1', `chinese-big5-2', `japanese-jisx0208' and `japanese-jisx0212', and to @@ -203,49 +296,16 @@ according to the language environment in effect when this option is turned on: ksc5601 for Korean, gb2312 for Chinese-GB, big5 for Chinese-Big5 and jisx for other environments. -The tables are large (over 40000 entries), so this option is not the -default. Also, installing them may be rather slow." - :init-value nil +This option is on by default. If you are not interested in CJK +characters and want to avoid some overhead on encoding/decoding +by the above coding systems, you can customize this option to nil." + :init-value t :version "21.4" :type 'boolean - :set-after '(current-language-environment) :group 'mule :global t (if utf-translate-cjk-mode - ;; Fixme: Allow the use of the CJK charsets to be - ;; customized by reordering and possible omission. (progn - ;; Redefine them with realistic initial sizes and a - ;; smallish rehash size to avoid wasting significant - ;; space after they're built. - (setq ucs-mule-cjk-to-unicode - (make-hash-table :test 'eq :size 43000 :rehash-size 1000) - ucs-unicode-to-mule-cjk - (make-hash-table :test 'eq :size 21500 :rehash-size 1000)) - ;; Load the files explicitly, to avoid having to keep - ;; around the large tables they contain (as well as the - ;; ones which get built). - (cond - ((string= "Korean" current-language-environment) - (load "subst-jis") - (load "subst-big5") - (load "subst-gb2312") - (load "subst-ksc")) - ((string= "Chinese-BIG5" current-language-environment) - (load "subst-jis") - (load "subst-ksc") - (load "subst-gb2312") - (load "subst-big5")) - ((string= "Chinese-GB" current-language-environment) - (load "subst-jis") - (load "subst-ksc") - (load "subst-big5") - (load "subst-gb2312")) - (t - (load "subst-ksc") - (load "subst-gb2312") - (load "subst-big5") - (load "subst-jis"))) ; jis covers as much as big5, gb2312 (define-translation-hash-table 'utf-subst-table-for-decode ucs-unicode-to-mule-cjk) (define-translation-hash-table 'utf-subst-table-for-encode @@ -259,7 +319,58 @@ default. Also, installing them may be rather slow." (make-hash-table :test 'eq)) (set-char-table-extra-slot (get 'utf-translation-table-for-encode 'translation-table) - 1 nil))) + 1 nil)) + + ;; Update safe-chars of mule-utf-* coding systems. + (dolist (elt (coding-system-list t)) + (if (string-match "^mule-utf" (symbol-name elt)) + (let ((safe-charsets (coding-system-get elt 'safe-charsets)) + (safe-chars (coding-system-get elt 'safe-chars)) + (need-update nil)) + (dolist (charset utf-translate-cjk-charsets) + (unless (eq utf-translate-cjk-mode (memq charset safe-charsets)) + (setq safe-charsets + (if utf-translate-cjk-mode + (cons charset safe-charsets) + (delq charset safe-charsets)) + need-update t) + (aset safe-chars (make-char charset) utf-translate-cjk-mode))) + (when need-update + (coding-system-put elt 'safe-charsets safe-charsets) + (define-coding-system-internal elt)))))) + +(define-ccl-program ccl-mule-utf-untrans + ;; R0 is an untranslatable Unicode code-point (U+3500..U+DFFF or + ;; U+10000..U+10FFFF) or an invaid byte (#x00..#xFF). Write + ;; eight-bit-control/graphic sequence (2 to 4 chars) representing + ;; UTF-8 sequence of r0. Registers r4, r5, r6 are modified. + ;; + ;; This is a subrountine because we assume that this is called very + ;; rarely (so we don't have to worry about the overhead of the + ;; call). + `(0 + ((r5 = ,(charset-id 'eight-bit-control)) + (r6 = ,(charset-id 'eight-bit-graphic)) + (if (r0 < #x100) + ((r4 = ((r0 >> 6) | #xC0)) + (write-multibyte-character r6 r4)) + ((if (r0 < #x10000) + ((r4 = ((r0 >> 12) | #xE0)) + (write-multibyte-character r6 r4)) + ((r4 = ((r0 >> 18) | #xF0)) + (write-multibyte-character r6 r4) + (r4 = (((r0 >> 12) & #x3F) | #x80)) + (if (r4 < #xA0) + (write-multibyte-character r5 r4) + (write-multibyte-character r6 r4)))) + (r4 = (((r0 >> 6) & #x3F) | #x80)) + (if (r4 < #xA0) + (write-multibyte-character r5 r4) + (write-multibyte-character r6 r4)))) + (r4 = ((r0 & #x3F) | #x80)) + (if (r4 < #xA0) + (write-multibyte-character r5 r4) + (write-multibyte-character r6 r4))))) (define-ccl-program ccl-decode-mule-utf-8 ;; @@ -278,260 +389,210 @@ default. Also, installing them may be rather slow." ;; (>= 8000) | | ;; mule-unicode-2500-33ff | 3 | 4 ;; mule-unicode-e000-ffff | 3 | 4 + ;; -----------------------+----------------+--------------- + ;; invalid byte | 1 | 2 ;; ;; Thus magnification factor is two. ;; `(2 - ((r5 = ,(charset-id 'eight-bit-control)) - (r6 = ,(charset-id 'eight-bit-graphic)) + ((r6 = ,(charset-id 'latin-iso8859-1)) + (read r0) (loop - (r0 = -1) - (read r0) - - ;; 1byte encoding, i.e., ascii (if (r0 < #x80) - ((write r0)) - (if (r0 < #xc0) ; continuation byte (invalid here) - ((if (r0 < #xa0) - (write-multibyte-character r5 r0) - (write-multibyte-character r6 r0))) - ;; 2 byte encoding 00000yyyyyxxxxxx = 110yyyyy 10xxxxxx - (if (r0 < #xe0) - ((r1 = -1) - (read r1) + ;; 1-byte encoding, i.e., ascii + (write-read-repeat r0)) + (if (r0 < #xc2) + ;; continuation byte (invalid here) or 1st byte of overlong + ;; 2-byte sequence. + ((call ccl-mule-utf-untrans) + (r6 = ,(charset-id 'latin-iso8859-1)) + (read r0) + (repeat))) - (if ((r1 & #b11000000) != #b10000000) - ;; Invalid 2-byte sequence - ((if (r0 < #xa0) - (write-multibyte-character r5 r0) - (write-multibyte-character r6 r0)) - (if (r1 < #x80) - (write r1) - (if (r1 < #xa0) - (write-multibyte-character r5 r1) - (write-multibyte-character r6 r1)))) + ;; Read the 2nd byte. + (read r1) + (if ((r1 & #b11000000) != #b10000000) ; Invalid 2nd byte + ((call ccl-mule-utf-untrans) + (r6 = ,(charset-id 'latin-iso8859-1)) + ;; Handle it in the next loop. + (r0 = r1) + (repeat))) - ((r3 = r0) ; save in case of overlong sequence - (r2 = r1) - (r0 &= #x1f) - (r0 <<= 6) - (r1 &= #x3f) - (r1 += r0) - ;; Now r1 holds scalar value + (if (r0 < #xe0) + ;; 2-byte encoding 00000yyyyyxxxxxx = 110yyyyy 10xxxxxx + ((r1 &= #x3F) + (r1 |= ((r0 & #x1F) << 6)) + ;; Now r2 holds scalar value. We don't have to check + ;; `overlong sequence' because r0 >= 0xC2. - (if (r1 < 128) ; `overlong sequence' - ((if (r3 < #xa0) - (write-multibyte-character r5 r3) - (write-multibyte-character r6 r3)) - (if (r2 < #x80) - (write r2) - (if (r2 < #xa0) - (write-multibyte-character r5 r2) - (write-multibyte-character r6 r2)))) + (if (r1 >= 256) + ;; mule-unicode-0100-24ff (< 0800) + ((r0 = ,(charset-id 'mule-unicode-0100-24ff)) + (r1 -= #x0100) + (r2 = (((r1 / 96) + 32) << 7)) + (r1 %= 96) + (r1 += (r2 + 32)) + (translate-character + utf-translation-table-for-decode r0 r1) + (write-multibyte-character r0 r1) + (read r0) + (repeat)) + (if (r1 >= 160) + ;; latin-iso8859-1 + ((r1 -= 128) + (write-multibyte-character r6 r1) + (read r0) + (repeat)) + ;; eight-bit-control + ((r0 = ,(charset-id 'eight-bit-control)) + (write-multibyte-character r0 r1) + (read r0) + (repeat)))))) - ;; eight-bit-control - (if (r1 < 160) - ((write-multibyte-character r5 r1)) + ;; Read the 3rd bytes. + (read r2) + (if ((r2 & #b11000000) != #b10000000) ; Invalid 3rd byte + ((call ccl-mule-utf-untrans) + (r0 = r1) + (call ccl-mule-utf-untrans) + (r6 = ,(charset-id 'latin-iso8859-1)) + ;; Handle it in the next loop. + (r0 = r2) + (repeat))) - ;; latin-iso8859-1 - (if (r1 < 256) - ((r0 = ,(charset-id 'latin-iso8859-1)) - (r1 -= 128) - (write-multibyte-character r0 r1)) + (if (r0 < #xF0) + ;; 3byte encoding + ;; zzzzyyyyyyxxxxxx = 1110zzzz 10yyyyyy 10xxxxxx + ((r3 = ((r0 & #xF) << 12)) + (r3 |= ((r1 & #x3F) << 6)) + (r3 |= (r2 & #x3F)) - ;; mule-unicode-0100-24ff (< 0800) - ((r0 = ,(charset-id 'mule-unicode-0100-24ff)) - (r1 -= #x0100) - (r2 = (((r1 / 96) + 32) << 7)) - (r1 %= 96) - (r1 += (r2 + 32)) - (translate-character - utf-translation-table-for-decode r0 r1) - (write-multibyte-character r0 r1)))))))) + (if (r3 < #x800) ; `overlong sequence' + ((call ccl-mule-utf-untrans) + (r0 = r1) + (call ccl-mule-utf-untrans) + (r0 = r2) + (call ccl-mule-utf-untrans) + (r6 = ,(charset-id 'latin-iso8859-1)) + (read r0) + (repeat))) - ;; 3byte encoding - ;; zzzzyyyyyyxxxxxx = 1110zzzz 10yyyyyy 10xxxxxx - (if (r0 < #xf0) - ((r1 = -1) - (r2 = -1) - (read r1 r2) + (if (r3 < #x2500) + ;; mule-unicode-0100-24ff (>= 0800) + ((r0 = ,(charset-id 'mule-unicode-0100-24ff)) + (r3 -= #x0100) + (r3 //= 96) + (r1 = (r7 + 32)) + (r1 += ((r3 + 32) << 7)) + (translate-character + utf-translation-table-for-decode r0 r1) + (write-multibyte-character r0 r1) + (read r0) + (repeat))) - ;; This is set to 1 if the encoding is invalid. - (r4 = 0) + (if (r3 < #x3400) + ;; mule-unicode-2500-33ff + ((r0 = r3) ; don't zap r3 + (lookup-integer utf-subst-table-for-decode r0 r1) + (if (r7 == 0) + ((r0 = ,(charset-id 'mule-unicode-2500-33ff)) + (r3 -= #x2500) + (r3 //= 96) + (r1 = (r7 + 32)) + (r1 += ((r3 + 32) << 7)))) + (write-multibyte-character r0 r1) + (read r0) + (repeat))) - (r3 = (r1 & #b11000000)) - (r3 |= ((r2 >> 2) & #b00110000)) - (if (r3 != #b10100000) - (r4 = 1) - ((r3 = ((r0 & #x0f) << 12)) - (r3 += ((r1 & #x3f) << 6)) - (r3 += (r2 & #x3f)) - (if (r3 < #x0800) - (r4 = 1)))) + (if (r3 < #xE000) + ;; Try to convert to CJK chars, else + ;; keep them as eight-bit-{control|graphic}. + ((r0 = r3) + (lookup-integer utf-subst-table-for-decode r3 r1) + (if r7 + ;; got a translation + ((write-multibyte-character r3 r1) + (read r0) + (repeat)) + ((call ccl-mule-utf-untrans) + (r6 = ,(charset-id 'latin-iso8859-1)) + (read r0) + (repeat))))) - (if (r4 != 0) - ;; Invalid 3-byte sequence - ((if (r0 < #xa0) - (write-multibyte-character r5 r0) - (write-multibyte-character r6 r0)) - (if (r1 < #x80) - (write r1) - (if (r1 < #xa0) - (write-multibyte-character r5 r1) - (write-multibyte-character r6 r1))) - (if (r2 < #x80) - (write r2) - (if (r2 < #xa0) - (write-multibyte-character r5 r2) - (write-multibyte-character r6 r2)))) + ;; mule-unicode-e000-ffff + ;; Fixme: fffe and ffff are invalid. + (r0 = r3) ; don't zap r3 + (lookup-integer utf-subst-table-for-decode r0 r1) + (if (r7 == 0) + ((r0 = ,(charset-id 'mule-unicode-e000-ffff)) + (r3 -= #xe000) + (r3 //= 96) + (r1 = (r7 + 32)) + (r1 += ((r3 + 32) << 7)))) + (write-multibyte-character r0 r1) + (read r0) + (repeat))) - ;; mule-unicode-0100-24ff (>= 0800) - ((if (r3 < #x2500) - ((r0 = ,(charset-id 'mule-unicode-0100-24ff)) - (r3 -= #x0100) - (r3 //= 96) - (r1 = (r7 + 32)) - (r1 += ((r3 + 32) << 7)) - (translate-character - utf-translation-table-for-decode r0 r1) - (write-multibyte-character r0 r1)) + ;; Read the 4th bytes. + (read r3) + (if ((r3 & #b11000000) != #b10000000) ; Invalid 4th byte + ((call ccl-mule-utf-untrans) + (r0 = r1) + (call ccl-mule-utf-untrans) + (r0 = r2) + (call ccl-mule-utf-untrans) + (r6 = ,(charset-id 'latin-iso8859-1)) + ;; Handle it in the next loop. + (r0 = r3) + (repeat))) - ;; mule-unicode-2500-33ff - (if (r3 < #x3400) - ((r4 = r3) ; don't zap r3 - (lookup-integer utf-subst-table-for-decode r4 r5) - (if r7 - ;; got a translation - ((write-multibyte-character r4 r5) - ;; Zapped through register starvation. - (r5 = ,(charset-id 'eight-bit-control))) - ((r0 = ,(charset-id 'mule-unicode-2500-33ff)) - (r3 -= #x2500) - (r3 //= 96) - (r1 = (r7 + 32)) - (r1 += ((r3 + 32) << 7)) - (write-multibyte-character r0 r1)))) + (if (r0 < #xF8) + ;; 4-byte encoding: + ;; wwwzzzzzzyyyyyyxxxxxx = 11110www 10zzzzzz 10yyyyyy 10xxxxxx + ;; keep those bytes as eight-bit-{control|graphic} + ;; Fixme: allow lookup in utf-subst-table-for-decode. + ((r4 = ((r0 & #x7) << 18)) + (r4 |= ((r1 & #x3F) << 12)) + (r4 |= ((r2 & #x3F) << 6)) + (r4 |= (r3 & #x3F)) - ;; U+3400 .. U+D7FF - ;; Try to convert to CJK chars, else keep - ;; them as eight-bit-{control|graphic}. - (if (r3 < #xd800) - ((r4 = r3) ; don't zap r3 - (lookup-integer utf-subst-table-for-decode r4 r5) - (if r7 - ;; got a translation - ((write-multibyte-character r4 r5) - ;; Zapped through register starvation. - (r5 = ,(charset-id 'eight-bit-control))) - ;; #xe0 <= r0 < #xf0, so r0 is eight-bit-graphic - ((r3 = r6) - (write-multibyte-character r3 r0) - (if (r1 < #xa0) - (r3 = r5)) - (write-multibyte-character r3 r1) - (if (r2 < #xa0) - (r3 = r5) - (r3 = r6)) - (write-multibyte-character r3 r2)))) + (if (r4 < #x10000) ; `overlong sequence' + ((call ccl-mule-utf-untrans) + (r0 = r1) + (call ccl-mule-utf-untrans) + (r0 = r2) + (call ccl-mule-utf-untrans) + (r0 = r3) + (call ccl-mule-utf-untrans)) + ((r0 = r4) + (call ccl-mule-utf-untrans)))) - ;; Surrogates, U+D800 .. U+DFFF - (if (r3 < #xe000) - ((r3 = r6) - (write-multibyte-character r3 r0) ; eight-bit-graphic - (if (r1 < #xa0) - (r3 = r5)) - (write-multibyte-character r3 r1) - (if (r2 < #xa0) - (r3 = r5) - (r3 = r6)) - (write-multibyte-character r3 r2)) - - ;; mule-unicode-e000-ffff - ;; Fixme: fffe and ffff are invalid. - ((r4 = r3) ; don't zap r3 - (lookup-integer utf-subst-table-for-decode r4 r5) - (if r7 - ;; got a translation - ((write-multibyte-character r4 r5) - ;; Zapped through register starvation. - (r5 = ,(charset-id 'eight-bit-control))) - ((r0 = ,(charset-id 'mule-unicode-e000-ffff)) - (r3 -= #xe000) - (r3 //= 96) - (r1 = (r7 + 32)) - (r1 += ((r3 + 32) << 7)) - (write-multibyte-character r0 r1))))))))))) - - (if (r0 < #xfe) - ;; 4byte encoding - ;; keep those bytes as eight-bit-{control|graphic} - ;; Fixme: allow lookup in utf-subst-table-for-decode. - ((r1 = -1) - (r2 = -1) - (r3 = -1) - (read r1 r2 r3) - ;; r0 > #xf0, thus eight-bit-graphic - (write-multibyte-character r6 r0) - (if (r1 < #xa0) - (if (r1 < #x80) ; invalid byte - (write r1) - (write-multibyte-character r5 r1)) - (write-multibyte-character r6 r1)) - (if (r2 < #xa0) - (if (r2 < #x80) ; invalid byte - (write r2) - (write-multibyte-character r5 r2)) - (write-multibyte-character r6 r2)) - (if (r3 < #xa0) - (if (r3 < #x80) ; invalid byte - (write r3) - (write-multibyte-character r5 r3)) - (write-multibyte-character r6 r3)) - (if (r0 >= #xf8) ; 5- or 6-byte encoding - ((r0 = -1) - (read r0) - (if (r0 < #xa0) - (if (r0 < #x80) ; invalid byte - (write r0) - (write-multibyte-character r5 r0)) - (write-multibyte-character r6 r0)) - (if (r0 >= #xfc) ; 6-byte - ((r0 = -1) - (read r0) - (if (r0 < #xa0) - (if (r0 < #x80) ; invalid byte - (write r0) - (write-multibyte-character r5 r0)) - (write-multibyte-character r6 r0))))))) - ;; else invalid byte >= #xfe - (write-multibyte-character r6 r0)))))) + ;; Unsupported sequence. + ((call ccl-mule-utf-untrans) + (r0 = r1) + (call ccl-mule-utf-untrans) + (r0 = r2) + (call ccl-mule-utf-untrans) + (r0 = r3) + (call ccl-mule-utf-untrans))) + (r6 = ,(charset-id 'latin-iso8859-1)) + (read r0) (repeat))) + ;; At EOF... (if (r0 >= 0) - ((if (r0 < #x80) - (write r0) - (if (r0 < #xa0) - (write-multibyte-character r5 r0) - ((write-multibyte-character r6 r0)))) + ;; r0 >= #x80 + ((call ccl-mule-utf-untrans) (if (r1 >= 0) - ((if (r1 < #x80) - (write r1) - (if (r1 < #xa0) - (write-multibyte-character r5 r1) - ((write-multibyte-character r6 r1)))) + ((r0 = r1) + (call ccl-mule-utf-untrans) (if (r2 >= 0) - ((if (r2 < #x80) - (write r2) - (if (r2 < #xa0) - (write-multibyte-character r5 r2) - ((write-multibyte-character r6 r2)))) + ((r0 = r2) + (call ccl-mule-utf-untrans) (if (r3 >= 0) - (if (r3 < #x80) - (write r3) - (if (r3 < #xa0) - (write-multibyte-character r5 r3) - ((write-multibyte-character r6 r3)))))))))))) + ((r0 = r3) + (call ccl-mule-utf-untrans)))))))))) "CCL program to decode UTF-8. Basic decoding is done into the charsets ascii, latin-iso8859-1 and @@ -540,164 +601,203 @@ mule-unicode-*, but see also `utf-fragmentation-table' and Encodings of un-representable Unicode characters are decoded asis into eight-bit-control and eight-bit-graphic characters.") +(define-ccl-program ccl-mule-utf-8-encode-untrans + ;; UTF-8 decoder generates an UTF-8 sequence represented by a + ;; sequence eight-bit-control/graphic chars for an untranslatable + ;; character and an invalid byte. + ;; + ;; This CCL parses that sequence (the first byte is already in r1), + ;; writes out the original bytes of that sequence, and sets r5 to + ;; -1. + ;; + ;; If the eight-bit-control/graphic sequence is shorter than what r1 + ;; suggests, it sets r5 and r6 to the last character read that + ;; should be handled by the next loop of a caller. + ;; + ;; Note: For UTF-8 validation, we only check if a character is + ;; eight-bit-control/graphic or not. It may result in incorrect + ;; handling of random binary data, but such a data can't be encoded + ;; by UTF-8 anyway. At least, UTF-8 decoders doesn't generate such + ;; a sequence even if a source contains invalid byte-sequence. + `(0 + (;; Read the 2nd byte. + (read-multibyte-character r5 r6) + (r0 = (r5 != ,(charset-id 'eight-bit-control))) + (if ((r5 != ,(charset-id 'eight-bit-graphic)) & r0) + ((write r1) ; invalid UTF-8 + (r1 = -1) + (end))) + + (if (r1 <= #xC3) + ;; 2-byte sequence for an originally invalid byte. + ((r6 &= #x3F) + (r6 |= ((r1 & #x1F) << 6)) + (write r6) + (r5 = -1) + (end))) + + (write r1 r6) + (r2 = r1) + (r1 = -1) + ;; Read the 3rd byte. + (read-multibyte-character r5 r6) + (r0 = (r5 != ,(charset-id 'eight-bit-control))) + (if ((r5 != ,(charset-id 'eight-bit-graphic)) & r0) + (end)) ; invalid UTF-8 + (write r6) + (if (r2 < #xF0) + ;; 3-byte sequence for an untranslated character. + ((r5 = -1) + (end))) + ;; Read the 4th byte. + (read-multibyte-character r5 r6) + (r0 = (r5 != ,(charset-id 'eight-bit-control))) + (if ((r5 != ,(charset-id 'eight-bit-graphic)) & r0) + (end)) ; invalid UTF-8 + ;; 4-byte sequence for an untranslated character. + (write r6) + (r5 = -1) + (end)) + + ;; At EOF... + ((r5 = -1) + (if (r1 >= 0) + (write r1))))) + (define-ccl-program ccl-encode-mule-utf-8 `(1 ((r5 = -1) (loop (if (r5 < 0) - ((r1 = -1) - (read-multibyte-character r0 r1) - (translate-character utf-translation-table-for-encode r0 r1)) - (;; We have already done read-multibyte-character. - (r0 = r5) + (read-multibyte-character r0 r1) + ;; Pre-read character is in r5 (charset-ID) and r6 (code-point). + ((r0 = r5) (r1 = r6) (r5 = -1))) + (translate-character utf-translation-table-for-encode r0 r1) (if (r0 == ,(charset-id 'ascii)) - (write r1) + (write-repeat r1)) - (if (r0 == ,(charset-id 'latin-iso8859-1)) - ;; r1 scalar utf-8 - ;; 0000 0yyy yyxx xxxx 110y yyyy 10xx xxxx - ;; 20 0000 0000 1010 0000 1100 0010 1010 0000 - ;; 7f 0000 0000 1111 1111 1100 0011 1011 1111 - ((r0 = (((r1 & #x40) >> 6) | #xc2)) - (r1 &= #x3f) - (r1 |= #x80) - (write r0 r1)) + (if (r0 == ,(charset-id 'latin-iso8859-1)) + ;; r1 scalar utf-8 + ;; 0000 0yyy yyxx xxxx 110y yyyy 10xx xxxx + ;; 20 0000 0000 1010 0000 1100 0010 1010 0000 + ;; 7f 0000 0000 1111 1111 1100 0011 1011 1111 + ((write ((r1 >> 6) | #xc2)) + (r1 &= #x3f) + (r1 |= #x80) + (write-repeat r1))) - (if (r0 == ,(charset-id 'mule-unicode-0100-24ff)) - ((r0 = ((((r1 & #x3f80) >> 7) - 32) * 96)) - ;; #x3f80 == (0011 1111 1000 0000)b - (r1 &= #x7f) - (r1 += (r0 + 224)) ; 240 == -32 + #x0100 - ;; now r1 holds scalar value - (if (r1 < #x0800) - ;; 2byte encoding - ((r0 = (((r1 & #x07c0) >> 6) | #xc0)) - ;; #x07c0 == (0000 0111 1100 0000)b - (r1 &= #x3f) - (r1 |= #x80) - (write r0 r1)) - ;; 3byte encoding - ((r0 = (((r1 & #xf000) >> 12) | #xe0)) - (r2 = ((r1 & #x3f) | #x80)) - (r1 &= #x0fc0) - (r1 >>= 6) - (r1 |= #x80) - (write r0 r1 r2)))) + (if (r0 == ,(charset-id 'mule-unicode-0100-24ff)) + ((r0 = ((((r1 & #x3f80) >> 7) - 32) * 96)) + ;; #x3f80 == (0011 1111 1000 0000)b + (r1 &= #x7f) + (r1 += (r0 + 224)) ; 240 == -32 + #x0100 + ;; now r1 holds scalar value + (if (r1 < #x0800) + ;; 2byte encoding + ((write ((r1 >> 6) | #xC0)) + (r1 &= #x3F) + (r1 |= #x80) + (write-repeat r1)) + ;; 3byte encoding + ((write ((r1 >> 12) | #xE0)) + (write (((r1 & #x0FC0) >> 6) | #x80)) + (r1 &= #x3F) + (r1 |= #x80) + (write-repeat r1))))) - (if (r0 == ,(charset-id 'mule-unicode-2500-33ff)) - ((r0 = ((((r1 & #x3f80) >> 7) - 32) * 96)) - (r1 &= #x7f) - (r1 += (r0 + 9440)) ; 9440 == -32 + #x2500 - (r0 = (((r1 & #xf000) >> 12) | #xe0)) - (r2 = ((r1 & #x3f) | #x80)) - (r1 &= #x0fc0) - (r1 >>= 6) - (r1 |= #x80) - (write r0 r1 r2)) + (if (r0 == ,(charset-id 'mule-unicode-2500-33ff)) + ((r0 = ((((r1 & #x3f80) >> 7) - 32) * 96)) + (r1 &= #x7f) + (r1 += (r0 + 9440)) ; 9440 == -32 + #x2500 + ;; now r1 holds scalar value + (write ((r1 >> 12) | #xE0)) + (write (((r1 & #x0FC0) >> 6) | #x80)) + (r1 &= #x3F) + (r1 |= #x80) + (write-repeat r1))) - (if (r0 == ,(charset-id 'mule-unicode-e000-ffff)) - ((r0 = ((((r1 & #x3f80) >> 7) - 32) * 96)) - (r1 &= #x7f) - (r1 += (r0 + 57312)) ; 57312 == -32 + #xe000 - (r0 = (((r1 & #xf000) >> 12) | #xe0)) - (r2 = ((r1 & #x3f) | #x80)) - (r1 &= #x0fc0) - (r1 >>= 6) - (r1 |= #x80) - (write r0 r1 r2)) + (if (r0 == ,(charset-id 'mule-unicode-e000-ffff)) + ((r0 = ((((r1 & #x3f80) >> 7) - 32) * 96)) + (r1 &= #x7f) + (r1 += (r0 + 57312)) ; 57312 == -32 + #xe000 + ;; now r1 holds scalar value + (write ((r1 >> 12) | #xE0)) + (write (((r1 & #x0FC0) >> 6) | #x80)) + (r1 &= #x3F) + (r1 |= #x80) + (write-repeat r1))) - (if (r0 == ,(charset-id 'eight-bit-control)) - ;; r1 scalar utf-8 - ;; 0000 0yyy yyxx xxxx 110y yyyy 10xx xxxx - ;; 80 0000 0000 1000 0000 1100 0010 1000 0000 - ;; 9f 0000 0000 1001 1111 1100 0010 1001 1111 - ((write #xc2) - (write r1)) + (if (r0 == ,(charset-id 'eight-bit-control)) + ;; r1 scalar utf-8 + ;; 0000 0yyy yyxx xxxx 110y yyyy 10xx xxxx + ;; 80 0000 0000 1000 0000 1100 0010 1000 0000 + ;; 9f 0000 0000 1001 1111 1100 0010 1001 1111 + ((write #xC2) + (write-repeat r1))) - (if (r0 == ,(charset-id 'eight-bit-graphic)) - ;; r1 scalar utf-8 - ;; 0000 0yyy yyxx xxxx 110y yyyy 10xx xxxx - ;; a0 0000 0000 1010 0000 1100 0010 1010 0000 - ;; ff 0000 0000 1111 1111 1101 1111 1011 1111 - ((write r1) - (r1 = -1) - (read-multibyte-character r0 r1) - (if (r0 != ,(charset-id 'eight-bit-graphic)) - (if (r0 != ,(charset-id 'eight-bit-control)) - ((r5 = r0) - (r6 = r1)))) - (if (r5 < 0) - ((read-multibyte-character r0 r2) - (if (r0 != ,(charset-id 'eight-bit-graphic)) - (if (r0 != ,(charset-id 'eight-bit-control)) - ((r5 = r0) - (r6 = r2)))) - (if (r5 < 0) - (write r1 r2) - (if (r1 < #xa0) - (write r1) - ((write #xc2) - (write r1))))))) + (if (r0 == ,(charset-id 'eight-bit-graphic)) + ;; r1 scalar utf-8 + ;; 0000 0yyy yyxx xxxx 110y yyyy 10xx xxxx + ;; a0 0000 0000 1010 0000 1100 0010 1010 0000 + ;; ff 0000 0000 1111 1111 1101 1111 1011 1111 + ((r0 = (r1 >= #xC0)) + (r0 &= (r1 <= #xC3)) + (r4 = (r1 >= #xE1)) + (r4 &= (r1 <= #xF7)) + (r0 |= r4) + (if r0 + ((call ccl-mule-utf-8-encode-untrans) + (repeat)) + (write-repeat r1)))) - ((lookup-character utf-subst-table-for-encode r0 r1) - (if r7 ; lookup succeeded - ((r1 = (((r0 & #xf000) >> 12) | #xe0)) - (r2 = ((r0 & #x3f) | #x80)) - (r0 &= #x0fc0) - (r0 >>= 6) - (r0 |= #x80) - (write r1 r0 r2)) - ;; Unsupported character. - ;; Output U+FFFD, which is `ef bf bd' in UTF-8. - ((write #xef) - (write #xbf) - (write #xbd))))))))))) - (repeat))) - (if (r1 >= #xa0) - (write r1) - (if (r1 >= #x80) - ((write #xc2) - (write r1))))) + (lookup-character utf-subst-table-for-encode r0 r1) + (if r7 ; lookup succeeded + (if (r0 < #x800) + ;; 2byte encoding + ((write ((r0 >> 6) | #xC0)) + (r0 = ((r0 & #x3F) | #x80)) + (write-repeat r0)) + ;; 3byte encoding + ((write ((r0 >> 12) | #xE0)) + (write (((r0 & #x0FC0) >> 6) | #x80)) + (r0 = ((r0 & #x3F) | #x80)) + (write-repeat r0)))) + ;; Unsupported character. + ;; Output U+FFFD, which is `ef bf bd' in UTF-8. + (write #xef) + (write #xbf) + (write-repeat #xbd)))) "CCL program to encode into UTF-8.") (define-ccl-program ccl-untranslated-to-ucs `(0 - (if (r0 < #xf0) ; 3-byte encoding, as above - ((r4 = 0) - (r3 = (r1 & #b11000000)) - (r3 |= ((r2 >> 2) & #b00110000)) - (if (r3 != #b10100000) - (r4 = 1) - ((r3 = ((r0 & #x0f) << 12)) - (r3 += ((r1 & #x3f) << 6)) - (r3 += (r2 & #x3f)) - (if (r3 < #x0800) - (r4 = 1)))) - (if (r4 != 0) - (r0 = 0) - (r0 = r3))) - (if (r0 < #xf8) ; 4-byte (Mule-UCS recipe) - ((r4 = (r1 >> 6)) - (if (r4 != #b10) - (r0 = 0) - ((r4 = (r2 >> 6)) - (if (r4 != #b10) - (r0 = 0) - ((r4 = (r3 >> 6)) - (if (r4 != #b10) - (r0 = 0) - ((r1 = ((r1 & #x3F) << 12)) - (r2 = ((r2 & #x3F) << 6)) - (r3 &= #x3F) - (r0 = (((((r0 & #x07) << 18) | r1) | r2) | r3))))))))) - (r0 = 0)))) - "Decode 3- or 4-byte sequences in r0, r1, r2 [,r3] to unicodes in r0. -r0 == 0 for invalid sequence.") + (if (r1 == 0) + nil + (if (r0 <= #xC3) ; 2-byte encoding + ((r0 = ((r0 & #x3) << 6)) + (r0 |= (r1 & #x3F)) + (r1 = 2)) + (if (r2 == 0) + (r1 = 0) + (if (r0 < #xF0) ; 3-byte encoding, as above + ((r0 = ((r0 & #xF) << 12)) + (r0 |= ((r1 & #x3F) << 6)) + (r0 |= (r2 & #x3F)) + (r1 = 3)) + (if (r3 == 0) + (r1 = 0) + ((r0 = ((r0 & #x7) << 18)) + (r0 |= ((r1 & #x3F) << 12)) + (r0 |= ((r2 & #x3F) << 6)) + (r0 |= (r3 & #x3F)) + (r1 = 4)))))))) + "Decode 2-, 3-, or 4-byte sequences in r0, r1, r2 [,r3] to unicodes in r0. +Set r1 to the byte length. r0 == 0 for invalid sequence.") (defvar utf-8-ccl-regs (make-vector 8 0)) @@ -708,33 +808,47 @@ Only for 3- or 4-byte sequences." (aset utf-8-ccl-regs 1 (or (char-after (1+ (point))) 0)) (aset utf-8-ccl-regs 2 (or (char-after (+ 2 (point))) 0)) (aset utf-8-ccl-regs 3 (or (char-after (+ 3 (point))) 0)) - (ccl-execute 'ccl-untranslated-to-ucs utf-8-ccl-regs) - (aref utf-8-ccl-regs 0)) + (ccl-execute 'ccl-untranslated-to-ucs utf-8-ccl-regs)) (defun utf-8-help-echo (window object position) (format "Untranslated Unicode U+%04X" (get-char-property position 'untranslated-utf-8 object))) -;; We compose the untranslatable sequences into a single character. +;; We compose the untranslatable sequences into a single character, +;; and move point to the next character. ;; This is infelicitous for editing, because there's currently no ;; mechanism for treating compositions as atomic, but is OK for ;; display. They are composed to U+FFFD with help-echo which ;; indicates the unicodes they represent. This function GCs too much. -(defsubst utf-8-compose () - "Put a suitable composition on an untranslatable sequence. -Return the sequence's length." - (let* ((u (utf-8-untranslated-to-ucs)) - (l (unless (zerop u) - (if (>= u #x10000) - 4 - 3)))) - (when l - (put-text-property (point) (min (point-max) (+ l (point))) - 'untranslated-utf-8 u) - (put-text-property (point) (min (point-max) (+ l (point))) - 'help-echo 'utf-8-help-echo) - (compose-region (point) (+ l (point)) ?$,3u=(B) - l))) + +;; If utf-translate-cjk-mode is non-nil, this function is called with +;; HASH-TABLE which translates CJK characters into some of CJK +;; charsets. + +(defsubst utf-8-compose (hash-table) + "Put a suitable composition on an untranslatable sequence at point. +If HASH-TABLE is non-nil, try to translate CJK characters by it at first. +Move point to the end of the sequence." + (utf-8-untranslated-to-ucs) + (let ((l (aref utf-8-ccl-regs 1)) + ch) + (if (> l 0) + (if (and hash-table + (setq ch (gethash (aref utf-8-ccl-regs 0) hash-table))) + (progn + (insert ch) + (delete-region (point) (min (point-max) (+ l (point))))) + (setq ch (aref utf-8-ccl-regs 0)) + (put-text-property (point) (min (point-max) (+ l (point))) + 'untranslated-utf-8 ch) + (put-text-property (point) (min (point-max) (+ l (point))) + 'help-echo 'utf-8-help-echo) + (if (= l 2) + (put-text-property (point) (min (point-max) (+ l (point))) + 'display (format "\\%03o" ch)) + (compose-region (point) (+ l (point)) ?$,3u=(B)) + (forward-char l)) + (forward-char 1)))) (defcustom utf-8-compose-scripts nil "*Non-nil means compose various scripts on decoding utf-8 text." @@ -744,38 +858,63 @@ Return the sequence's length." (defun utf-8-post-read-conversion (length) "Compose untranslated utf-8 sequences into single characters. +If `utf-translate-cjk-mode' is non-nil, tries to translate CJK characters. Also compose particular scripts if `utf-8-compose-scripts' is non-nil." (save-excursion - ;; Can't do eval-when-compile to insert a multibyte constant - ;; version of the string in the loop, since it's always loaded as - ;; unibyte from a byte-compiled file. - (let ((range (string-as-multibyte "^\xe1-\xf7"))) - (while (and (skip-chars-forward range) - (not (eobp))) - (forward-char (utf-8-compose))))) - ;; Fixme: Takahashi-san implies it may not work this easily. I - ;; asked why but didn't get a reply. -- fx - (when (and utf-8-compose-scripts (> length 1)) - ;; These currently have definitions which cover the relevant - ;; unicodes. We could avoid loading thai-util &c by checking - ;; whether the region contains any characters with the appropriate - ;; categories. There aren't yet Unicode-based rules for Tibetan. - (save-excursion (setq length (diacritic-post-read-conversion length))) - (save-excursion (setq length (thai-post-read-conversion length))) - (save-excursion (setq length (lao-post-read-conversion length))) - (save-excursion (setq length (devanagari-post-read-conversion length))) - (save-excursion (setq length (malayalam-post-read-conversion length))) - (save-excursion (setq length (tamil-post-read-conversion length)))) - length) + (save-restriction + (narrow-to-region (point) (+ (point) length)) + ;; Can't do eval-when-compile to insert a multibyte constant + ;; version of the string in the loop, since it's always loaded as + ;; unibyte from a byte-compiled file. + (let ((range (string-as-multibyte "^\xc0-\xc3\xe1-\xf7")) + hash-table ch) + (when utf-translate-cjk-mode + (if (not utf-translate-cjk-lang-env) + ;; Check these characters: + ;; "U+2e80-U+33ff", "U+ff00-U+ffef" + ;; We may have to translate them to CJK charsets. + (let ((range2 "$,29@(B-$,2G$,3r`(B-$,3u/(B")) + (skip-chars-forward (concat range range2)) + (unless (eobp) + (utf-translate-cjk-load-tables) + (setq range (concat range range2))) + (setq hash-table (get 'utf-subst-table-for-decode + 'translation-hash-table))))) + (while (and (skip-chars-forward range) + (not (eobp))) + (setq ch (following-char)) + (if (< ch 256) + (utf-8-compose hash-table) + (if (and hash-table + (setq ch (gethash (encode-char ch 'ucs) hash-table))) + (progn + (insert ch) + (delete-char 1)) + (forward-char 1))))) -;; ucs-tables is preloaded -;; (defun utf-8-pre-write-conversion (beg end) -;; "Semi-dummy pre-write function effectively to autoload ucs-tables." -;; ;; Ensure translation-table is loaded. -;; (require 'ucs-tables) -;; ;; Don't do this again. -;; (coding-system-put 'mule-utf-8 'pre-write-conversion nil) -;; nil) + (when (and utf-8-compose-scripts (> length 1)) + ;; These currently have definitions which cover the relevant + ;; unicodes. We could avoid loading thai-util &c by checking + ;; whether the region contains any characters with the appropriate + ;; categories. There aren't yet Unicode-based rules for Tibetan. + (diacritic-compose-region (point-max) (point-min)) + (thai-compose-region (point-max) (point-min)) + (lao-compose-region (point-max) (point-min)) + (devanagari-compose-region (point-max) (point-min)) + (malayalam-compose-region (point-max) (point-min)) + (tamil-compose-region (point-max) (point-min))) + (- (point-max) (point-min))))) + +(defun utf-8-pre-write-conversion (beg end) + "Prepare for `utf-translate-cjk-mode' to encode text between BEG and END. +This is used as a post-read-conversion of utf-8 coding system." + (if (and utf-translate-cjk-mode + (not utf-translate-cjk-lang-env) + (save-excursion + (goto-char beg) + (re-search-forward "\\cc\\|\\cj\\|\\ch" end t))) + (utf-translate-cjk-load-tables)) + nil) (make-coding-system 'mule-utf-8 4 ?u @@ -797,18 +936,20 @@ any of the character sets listed above are encoded into the UTF-8 byte sequence representing U+FFFD (REPLACEMENT CHARACTER)." '(ccl-decode-mule-utf-8 . ccl-encode-mule-utf-8) - '((safe-charsets + `((safe-charsets ascii eight-bit-control eight-bit-graphic latin-iso8859-1 mule-unicode-0100-24ff mule-unicode-2500-33ff - mule-unicode-e000-ffff) + mule-unicode-e000-ffff + ,@(if utf-translate-cjk-mode + utf-translate-cjk-charsets)) (mime-charset . utf-8) (coding-category . coding-category-utf-8) (valid-codes (0 . 255)) -;; (pre-write-conversion . utf-8-pre-write-conversion) + (pre-write-conversion . utf-8-pre-write-conversion) (post-read-conversion . utf-8-post-read-conversion) (translation-table-for-encode . utf-translation-table-for-encode) (dependency unify-8859-on-encoding-mode diff --git a/lisp/language/devan-util.el b/lisp/language/devan-util.el index 20bcffdad49..24b9d40eec0 100644 --- a/lisp/language/devan-util.el +++ b/lisp/language/devan-util.el @@ -60,6 +60,7 @@ "\\)") "Regexp matching a composable sequence of Devanagari characters.") +;;;###autoload (defun devanagari-compose-region (from to) (interactive "r") (save-excursion diff --git a/lisp/progmodes/python.el b/lisp/progmodes/python.el index cee59a6e3e1..274480a36de 100644 --- a/lisp/progmodes/python.el +++ b/lisp/progmodes/python.el @@ -323,7 +323,8 @@ BOS non-nil means point is known to be at beginning of statement." line-end)) (save-excursion (python-end-of-statement)) t) - (not (python-in-string/comment))))) + (not (progn (goto-char (match-beginning 0)) + (python-in-string/comment)))))) (defun python-close-block-statement-p (&optional bos) "Return non-nil if current line is a statement closing a block. diff --git a/lisp/simple.el b/lisp/simple.el index 94557956de0..9cd630b94ec 100644 --- a/lisp/simple.el +++ b/lisp/simple.el @@ -953,7 +953,8 @@ See also `minibuffer-history-case-insensitive-variables'." nil minibuffer-local-map nil - 'minibuffer-history-search-history))) + 'minibuffer-history-search-history + (car minibuffer-history-search-history)))) ;; Use the last regexp specified, by default, if input is empty. (list (if (string= regexp "") (if minibuffer-history-search-history diff --git a/lisp/textmodes/paragraphs.el b/lisp/textmodes/paragraphs.el index 1b2628760e1..e9cc4f397de 100644 --- a/lisp/textmodes/paragraphs.el +++ b/lisp/textmodes/paragraphs.el @@ -171,7 +171,7 @@ followed by two spaces, unless it's inside some sort of quotes or parenthesis. See Info node `Sentences'." (or sentence-end (concat (if sentence-end-without-period "\\w \\|") - "\\([.?!][]\"'\xd0c9)}]*" + "\\([.?!][]\"'\xd0c9\x5397d)}]*" (if sentence-end-double-space "\\($\\| $\\|\t\\| \\)" "\\($\\|[\t ]\\)") "\\|[" sentence-end-without-space "]+\\)" diff --git a/lisp/time-stamp.el b/lisp/time-stamp.el index b6e76ee5394..f574144f4b0 100644 --- a/lisp/time-stamp.el +++ b/lisp/time-stamp.el @@ -5,7 +5,7 @@ ;; This file is part of GNU Emacs. -;; Maintainer's Time-stamp: <2003-02-01 09:26:25 gildea> +;; Maintainer's Time-stamp: <2004-06-13 19:04:36 teirllm> ;; Maintainer: Stephen Gildea ;; Keywords: tools @@ -32,7 +32,7 @@ ;; See the top of `time-stamp.el' for another example. ;; To use time-stamping, add this line to your .emacs file: -;; (add-hook 'write-file-hooks 'time-stamp) +;; (add-hook 'before-save-hook 'time-stamp) ;; Now any time-stamp templates in your files will be updated automatically. ;; See the documentation for the functions `time-stamp' @@ -242,7 +242,8 @@ of the time-stamped file itself.") "Update the time stamp string(s) in the buffer. A template in a file can be automatically updated with a new time stamp every time you save the file. Add this line to your .emacs file: - (add-hook 'write-file-hooks 'time-stamp) + (add-hook 'before-save-hook 'time-stamp) +or customize `before-save-hook' through Custom. Normally the template must appear in the first 8 lines of a file and look like one of the following: Time-stamp: <> @@ -318,7 +319,6 @@ template." (setq start (time-stamp-once start search-limit ts-start ts-end ts-format format-lines end-lines)) (setq ts-count (1- ts-count)))) - ;; be sure to return nil so can be used on write-file-hooks nil) (defun time-stamp-once (start search-limit ts-start ts-end diff --git a/lispref/.arch-inventory b/lispref/.arch-inventory new file mode 100644 index 00000000000..2f5e6b9eea0 --- /dev/null +++ b/lispref/.arch-inventory @@ -0,0 +1,4 @@ +# Generated files +precious ^(config\.status|config\.cache)$ + +# arch-tag: dde817a2-94ff-4c6e-838c-bb5b33e7f0df diff --git a/man/ChangeLog b/man/ChangeLog index 47bfea09bda..8418bb43d13 100644 --- a/man/ChangeLog +++ b/man/ChangeLog @@ -1,3 +1,22 @@ +2004-06-13 Luc Teirlinck + + * autotype.texi (Copyrights, Timestamps): Recommend + `before-save-hook' instead of `write-file-functions'. + +2004-06-13 Richard M. Stallman + + * custom.texi (Init Syntax): Explain about vars that do special + things when set with setq or with Custom. + (Init Examples): Add line-number-mode example. + +2004-06-13 Lars Hansen + + * dired-x.texi (dired-mark-omitted): Update keybinding. + +2004-06-12 Juri Linkov + + * dired.texi (Operating on Files): Add dired-do-touch. + 2004-06-10 Kim F. Storm * pcl-cvs.texi (Viewing differences): Add 'd y'. diff --git a/man/autotype.texi b/man/autotype.texi index 5b24f26f2f5..16e88851c4b 100644 --- a/man/autotype.texi +++ b/man/autotype.texi @@ -456,16 +456,19 @@ wrong version of the GNU General Public License (@pxref{(emacs)Copying}) is foun that is updated too. An interesting application for this function is to have it be called -automatically every time a file is saved. This is accomplished by putting -@code{(add-hook 'write-file-functions 'copyright-update)} into your @file{~/.emacs} -file (@pxref{(emacs)Init File}). +automatically every time a file is saved. This is accomplished by +putting @code{(add-hook 'before-save-hook 'copyright-update)} into +your @file{~/.emacs} file (@pxref{(emacs)Init File}). Alternative, +you can do @kbd{M-x customize-variable @key{RET} before-save-hook +@key{RET}}. @code{copyright-update} is conveniently listed as an +option in the customization buffer. @vindex copyright-query The variable @code{copyright-query} controls whether to update the copyright or whether to ask about it. When this is @code{nil} updating is only done with @kbd{M-x copyright-update}. When this is @code{function} you are queried whenever @code{copyright-update} is called as a function, -such as in the @code{write-file-functions} feature mentioned above. Otherwise +such as in the @code{before-save-hook} feature mentioned above. Otherwise you are always queried. @@ -522,11 +525,13 @@ The ``interpreter'' used is @code{executable-self-display} with argument @cindex timestamps @findex time-stamp -@vindex write-file-functions +@vindex before-save-hook The @code{time-stamp} command can be used to update automatically a template in a file with a new time stamp every time you save the file. -Customize the hook @code{write-file-functions} to add the function -@code{time-stamp} to arrange this. +Customize the hook @code{before-save-hook} to add the function +@code{time-stamp} to arrange this. It you use Custom to do this, +then @code{time-stamp} is conveniently listed as an option in the +customization buffer. @vindex time-stamp-active @vindex time-stamp-format diff --git a/man/custom.texi b/man/custom.texi index 2502ae2a43d..614fa2442fc 100644 --- a/man/custom.texi +++ b/man/custom.texi @@ -1984,9 +1984,20 @@ arguments, all surrounded by parentheses. For example, @code{(setq fill-column 60)} calls the function @code{setq} to set the variable @code{fill-column} (@pxref{Filling}) to 60. - The second argument to @code{setq} is an expression for the new value of -the variable. This can be a constant, a variable, or a function call -expression. In @file{.emacs}, constants are used most of the time. They can be: + You can set any Lisp variable with @code{setq}, but with certain +variables @code{setq} won't do what you probably want in the +@file{.emacs} file. Some variables automatically become buffer-local +when set with @code{setq}; what you want in @file{.emacs} is to set +the default value, using @code{setq-default}. Some customizable minor +mode variables do special things to enable the mode when you set them +with Customize, but ordinary @code{setq} won't do that; to enable the +mode in your @file{.emacs} file, call the minor mode command. The +following section has examples of both of these methods. + + The second argument to @code{setq} is an expression for the new +value of the variable. This can be a constant, a variable, or a +function call expression. In @file{.emacs}, constants are used most +of the time. They can be: @table @asis @item Numbers: @@ -2106,6 +2117,14 @@ which supports most of the languages of Western Europe. (set-language-environment "Latin-1") @end example +@need 1500 +@item +Turn off Line Number mode, a global minor mode. + +@example +(line-number-mode 0) +@end example + @need 1500 @item Turn on Auto Fill mode automatically in Text mode and related modes. diff --git a/man/dired-x.texi b/man/dired-x.texi index 8ac41ec34ba..d51fda342df 100644 --- a/man/dired-x.texi +++ b/man/dired-x.texi @@ -397,8 +397,8 @@ Marked files are never omitted. @findex dired-omit-mode (@code{dired-omit-mode}) Toggle between displaying and omitting ``uninteresting'' files. -@item M-O -@kindex M-O +@item * O +@kindex * O @findex dired-mark-omitted (@code{dired-mark-omitted}) Mark ``uninteresting'' files. @end table diff --git a/man/dired.texi b/man/dired.texi index fd269811b9f..57fa240c39a 100644 --- a/man/dired.texi +++ b/man/dired.texi @@ -589,6 +589,12 @@ The variable @code{dired-chown-program} specifies the name of the program to use to do the work (different systems put @code{chown} in different places). +@findex dired-do-touch +@kindex T @r{(Dired)} +@cindex changing file time (in Dired) +@item T @var{timestamp} @key{RET} +Change the time of the specified files (@code{dired-do-touch}). + @findex dired-do-print @kindex P @r{(Dired)} @cindex printing files (in Dired) diff --git a/man/programs.texi b/man/programs.texi index 84f3e6f14f8..9c081a7315b 100644 --- a/man/programs.texi +++ b/man/programs.texi @@ -65,7 +65,6 @@ and you can select it by typing @kbd{M-x @var{l}-mode @key{RET}}. @cindex Perl mode @cindex Icon mode -@cindex Awk mode @cindex Makefile mode @cindex Tcl mode @cindex CPerl mode @@ -82,7 +81,7 @@ and you can select it by typing @kbd{M-x @var{l}-mode @key{RET}}. @cindex PostScript mode The existing programming language major modes include Lisp, Scheme (a variant of Lisp) and the Scheme-based DSSSL expression language, Ada, -Awk, C, C++, Delphi (Object Pascal), Fortran (free format and fixed +AWK, C, C++, Delphi (Object Pascal), Fortran (free format and fixed format), Icon, IDL (CORBA), IDLWAVE, Java, Metafont (@TeX{}'s companion for font creation), Modula2, Objective-C, Octave, Pascal, Perl, Pike, PostScript, Prolog, Simula, Tcl, and VHDL. There is @@ -104,7 +103,7 @@ whitespace consists of spaces or tabs. Use @kbd{C-b C-d} to delete a tab character before point, in these modes. Separate manuals are available for the modes for Ada (@pxref{Top, , Ada -Mode, ada-mode, Ada Mode}), C/C++/Objective C/Java/Corba IDL +Mode, ada-mode, Ada Mode}), C/C++/Objective C/Java/Corba IDL/Pike/AWK (@pxref{Top, , CC Mode, ccmode, CC Mode}) and the IDLWAVE modes (@pxref{Top, , IDLWAVE, idlwave, IDLWAVE User Manual}). @@ -446,15 +445,16 @@ modes and C and related modes.) @key{TAB} with a numeric argument reindents the current line as usual, then reindents by the same amount all the lines in the parenthetical grouping starting on the current line. It is clever, though, and does not alter lines that start -inside strings, or C preprocessor lines when in C mode. +inside strings. Neither does it alter C preprocessor lines when in C +mode, but it does reindent any continuation lines that may be attached +to them. @findex indent-code-rigidly You can also perform this operation on the region, using the command @kbd{M-x indent-code-rigidly}. It rigidly shifts all the lines in the region sideways, like @code{indent-rigidly} does (@pxref{Indentation Commands}). It doesn't alter the indentation of lines that start -inside a comment or a string, unless the region starts inside that -comment or string. +inside a string, unless the region also starts inside that string. @node Lisp Indent @subsection Customizing Lisp Indentation @@ -507,14 +507,15 @@ declaration (@code{c-indent-defun}). @kindex C-M-q @r{(C mode)} @findex c-indent-exp Reindent each line in the balanced expression that follows point -(@code{c-indent-exp}). A prefix argument inhibits error checking and -warning messages about invalid syntax. +(@code{c-indent-exp}). A prefix argument inhibits warning messages +about invalid syntax. @item @key{TAB} @findex c-indent-command Reindent the current line, and/or in some cases insert a tab character (@code{c-indent-command}). +@vindex c-tab-always-indent If @code{c-tab-always-indent} is @code{t}, this command always reindents the current line and does nothing else. This is the default. @@ -524,8 +525,7 @@ otherwise, it inserts a tab (or the equivalent number of spaces, if @code{indent-tabs-mode} is @code{nil}). Any other value (not @code{nil} or @code{t}) means always reindent the -line, and also insert a tab if within a comment, a string, or a -preprocessor directive. +line, and also insert a tab if within a comment or a string. @end table To reindent the whole current buffer, type @kbd{C-x h C-M-\}. This @@ -539,18 +539,19 @@ to the front of the block and then reindents it all. @subsection Customizing C Indentation @cindex style (for indentation) - C mode and related modes use a simple yet flexible mechanism for -customizing indentation. The mechanism works in two steps: first it -classifies the line syntactically according to its contents and context; -second, it associates each kind of syntactic construct with an -indentation offset based on your selected @dfn{style}. + C mode and related modes use a flexible mechanism for customizing +indentation. C mode indents a source line in two steps: first it +classifies the line syntactically according to its contents and +context; second, it determines the indentation offset associated by +your selected @dfn{style} with the syntactic construct and adds this +onto the indentation of the @dfn{anchor statement}. @table @kbd -@item M-x c-set-style @key{RET} @var{style} @key{RET} -Select predefined indentation style @var{style}. +@item C-c . @key{RET} @var{style} @key{RET} +Select a predefined style @var{style} (@code{c-set-style}). @end table - A style is a named collection of indentation customizations that can + A @dfn{style} is a named collection of customizations that can be used in C mode and the related modes. Emacs comes with several predefined styles, including @code{gnu}, @code{k&r}, @code{bsd}, @code{stroustrup}, @code{linux}, @code{python}, @code{java}, @@ -561,19 +562,21 @@ modes. To find out what a style looks like, select it and reindent some code, e.g., by typing @key{C-M-q} at the start of a function definition. +@kindex C-c . @r{(C mode)} @findex c-set-style - To choose a style for the current buffer, use the command @kbd{M-x -c-set-style}. Specify a style name as an argument (case is not -significant). This command affects the current buffer only, and it -affects only future invocations of the indentation commands; it does -not reindent the code in the buffer. To reindent the whole buffer in -the new style, you can type @kbd{C-x h C-M-\}. + To choose a style for the current buffer, use the command @kbd{C-c +.}. Specify a style name as an argument (case is not significant). +This command affects the current buffer only, and it affects only +future invocations of the indentation commands; it does not reindent +the code in the buffer. To reindent the whole buffer in the new +style, you can type @kbd{C-x h C-M-\}. @vindex c-default-style You can also set the variable @code{c-default-style} to specify the -default style for various major modes. Its value should be an alist, -in which each element specifies one major mode and which indentation -style to use for it. For example, +default style for various major modes. Its value should be either the +style's name (a string) or an alist, in which each element specifies +one major mode and which indentation style to use for it. For +example, @example (setq c-default-style @@ -848,18 +851,20 @@ also do spell checking on comments with Flyspell Prog mode The comment commands in this table insert, kill and align comments. They are described in this section and following sections. -@table @kbd -@item M-; +@table @asis +@item @kbd{M-;} Insert or realign comment on current line; alternatively, comment or uncomment the region (@code{comment-dwim}). -@item C-u M-; +@item @kbd{C-u M-;} Kill comment on current line (@code{comment-kill}). -@item C-x ; +@item @kbd{C-x ;} Set comment column (@code{comment-set-column}). -@item C-M-j +@item @kbd{C-M-j} +@itemx @kbd{M-j} Like @key{RET} followed by inserting and aligning a comment (@code{comment-indent-new-line}). -@item M-x comment-region +@item @kbd{M-x comment-region} +@itemx @kbd{C-c C-c} (in C-like modes) Add or remove comment delimiters on all the lines in the region. @end table @@ -937,17 +942,20 @@ is indented like a line of code. @subsection Multiple Lines of Comments @kindex C-M-j +@kindex M-j @cindex blank lines in programs @findex comment-indent-new-line If you are typing a comment and wish to continue it on another line, -you can use the command @kbd{C-M-j} (@code{comment-indent-new-line}). -This terminates the comment you are typing, creates a new blank line -afterward, and begins a new comment indented under the old one. When -Auto Fill mode is on, going past the fill column while typing a comment -causes the comment to be continued in just this fashion. If point is -not at the end of the line when @kbd{C-M-j} is typed, the text on -the rest of the line becomes part of the new comment line. +you can use the command @kbd{C-M-j} or @kbd{M-j} +(@code{comment-indent-new-line}). This terminates the comment you are +typing, creates a new blank line afterward, and begins a new comment +indented under the old one. When Auto Fill mode is on, going past the +fill column while typing a comment causes the comment to be continued +in just this fashion. If point is not at the end of the line when you +type the command, the text on the rest of the line becomes part of the +new comment line. +@kindex C-c C-c (C mode) @findex comment-region To turn existing lines into comment lines, use the @kbd{M-x comment-region} command. It adds comment delimiters to the lines that start @@ -970,12 +978,13 @@ if within a defun, it must be three. @vindex comment-column @kindex C-x ; @findex comment-set-column - The comment column is stored in the variable @code{comment-column}. You -can set it to a number explicitly. Alternatively, the command @kbd{C-x ;} -(@code{comment-set-column}) sets the comment column to the column point is -at. @kbd{C-u C-x ;} sets the comment column to match the last comment -before point in the buffer, and then does a @kbd{M-;} to align the -current line's comment under the previous one. + The @dfn{comment column}, the column at which Emacs tries to place +comments, is stored in the variable @code{comment-column}. You can +set it to a number explicitly. Alternatively, the command @kbd{C-x ;} +(@code{comment-set-column}) sets the comment column to the column +point is at. @kbd{C-u C-x ;} sets the comment column to match the +last comment before point in the buffer, and then does a @kbd{M-;} to +align the current line's comment under the previous one. The variable @code{comment-column} is per-buffer: setting the variable in the normal fashion affects only the current buffer, but there is a @@ -990,7 +999,7 @@ Make sure this regexp does not match the null string. It may match more than the comment starting delimiter in the strictest sense of the word; for example, in C mode the value of the variable is @c This stops M-q from breaking the line inside that @code. -@code{@w{"/\\*+ *\\|//+ *""}}, which matches extra stars and spaces +@code{@w{"/\\*+ *\\|//+ *"}}, which matches extra stars and spaces after the @samp{/*} itself, and accepts C++ style comments also. (Note that @samp{\\} is needed in Lisp syntax to include a @samp{\} in the string, which is needed to deny the first star its special meaning @@ -1006,21 +1015,21 @@ into the comment. In C mode, @code{comment-start} has the value @vindex comment-padding The variable @code{comment-padding} specifies how many spaces -@code{comment-region} should insert on each line between the -comment delimiter and the line's original text. The default is 1, -to insert one space. +@code{comment-region} should insert on each line between the comment +delimiter and the line's original text. The default is 1, to insert +one space. @code{nil} means 0. Alternatively, @code{comment-padding} +can hold the actual string to insert. @vindex comment-multi-line The variable @code{comment-multi-line} controls how @kbd{C-M-j} -(@code{indent-new-comment-line}) behaves when used inside a comment. If -@code{comment-multi-line} is @code{nil}, as it normally is, then the -comment on the starting line is terminated and a new comment is started -on the new following line. If @code{comment-multi-line} is not -@code{nil}, then the new following line is set up as part of the same -comment that was found on the starting line. This is done by not -inserting a terminator on the old line, and not inserting a starter on -the new line. In languages where multi-line comments work, the choice -of value for this variable is a matter of taste. +(@code{indent-new-comment-line}) behaves when used inside a comment. +Specifically, when @code{comment-multi-line} is @code{nil} (the +default value), the command inserts a comment terminator, begins a new +line, and finally inserts a comment starter. Otherwise it does not +insert the terminator and starter, so it effectively continues the +current comment across multiple lines. In languages that allow +multi-line comments, the choice of value for this variable is a matter +of taste. @vindex comment-indent-function The variable @code{comment-indent-function} should contain a function @@ -1064,7 +1073,7 @@ symbol---which Info files to look in, and which indices to search. You can also use @kbd{M-x info-lookup-file} to look for documentation for a file name. - This feature currently supports the modes Awk, Autoconf, Bison, C, + This feature currently supports the modes AWK, Autoconf, Bison, C, Emacs Lisp, LaTeX, M4, Makefile, Octave, Perl, Scheme, and Texinfo, provided you have installed the relevant Info files, which are typically available with the appropriate GNU package. @@ -1081,7 +1090,7 @@ still useful to read manual pages. @findex manual-entry You can read the man page for an operating system command, library -function, or system call, with the @kbd{M-x manual-entry} command. It +function, or system call, with the @kbd{M-x man} command. It runs the @code{man} program to format the man page; if the system permits, it runs @code{man} asynchronously, so that you can keep on editing while the page is being formatted. (On MS-DOS and MS-Windows @@ -1393,25 +1402,27 @@ Mode}). The Foldout package provides folding-editor features @cindex CORBA IDL mode @cindex Objective C mode @cindex C++ mode +@cindex AWK mode @cindex mode, Java @cindex mode, C +@cindex mode, C++ @cindex mode, Objective C @cindex mode, CORBA IDL @cindex mode, Pike +@cindex mode, AWK This section gives a brief description of the special features -available in C, C++, Objective-C, Java, CORBA IDL, and Pike modes. +available in C, C++, Objective-C, Java, CORBA IDL, Pike and AWK modes. (These are called ``C mode and related modes.'') @xref{Top, , CC Mode, ccmode, CC Mode}, for a more extensive description of these modes and their special features. @menu -* Motion in C:: Commands to move by C statements, etc. -* Electric C:: Colon and other chars can automatically reindent. -* Hungry Delete:: A more powerful DEL command. -* Other C Commands:: Filling comments, viewing expansion of macros, - and other neat features. -* Comments in C:: Options for customizing comment style. +* Motion in C:: Commands to move by C statements, etc. +* Electric C:: Colon and other chars can automatically reindent. +* Hungry Delete:: A more powerful DEL command. +* Other C Commands:: Filling comments, viewing expansion of macros, + and other neat features. @end menu @node Motion in C @@ -1421,15 +1432,29 @@ and their special features. related modes. @table @code +@item M-x c-beginning-of-defun +@itemx M-x c-end-of-defun +@findex c-beginning-of-defun +@findex c-end-of-defun +Move point to the beginning or end of the current function or +top-level definition. These are found by searching for the least +enclosing braces. (By contrast, @code{beginning-of-defun} and +@code{end-of-defun} search for braces in column zero.) If you are +editing code where the opening brace of a function isn't placed in +column zero, you may wish to bind @code{C-M-a} and @code{C-M-e} to +these commands. @xref{Moving by Defuns}. + @item C-c C-u @kindex C-c C-u @r{(C mode)} @findex c-up-conditional Move point back to the containing preprocessor conditional, leaving the mark behind. A prefix argument acts as a repeat count. With a negative argument, move point forward to the end of the containing -preprocessor conditional. When going backwards, @code{#elif} is treated -like @code{#else} followed by @code{#if}. When going forwards, -@code{#elif} is ignored.@refill +preprocessor conditional. + +@samp{#elif} is equivalent to @samp{#else} followed by @samp{#if}, so +the function will stop at a @samp{#elif} when going backward, but not +when going forward. @item C-c C-p @kindex C-c C-p @r{(C mode)} @@ -1446,27 +1471,22 @@ behind. A prefix argument acts as a repeat count. With a negative argument, move backward. @item M-a -@kindex ESC a +@kindex M-a (C mode) @findex c-beginning-of-statement Move point to the beginning of the innermost C statement (@code{c-beginning-of-statement}). If point is already at the beginning of a statement, move to the beginning of the preceding statement. With prefix argument @var{n}, move back @var{n} @minus{} 1 statements. -If point is within a string or comment, or next to a comment (only -whitespace between them), this command moves by sentences instead of -statements. - -When called from a program, this function takes three optional -arguments: the numeric prefix argument, a buffer position limit -(don't move back before that place), and a flag that controls whether -to do sentence motion when inside of a comment. +In comments or in strings which span more than one line, this command +moves by sentences instead of statements. @item M-e -@kindex ESC e +@kindex M-e (C mode) @findex c-end-of-statement -Move point to the end of the innermost C statement; like @kbd{M-a} -except that it moves in the other direction (@code{c-end-of-statement}). +Move point to the end of the innermost C statement or sentence; like +@kbd{M-a} except that it moves in the other direction +(@code{c-end-of-statement}). @item M-x c-backward-into-nomenclature @findex c-backward-into-nomenclature @@ -1530,12 +1550,14 @@ Insert a double colon scope operator at point, without reindenting the line or adding any newlines (@code{c-scope-operator}). @end table +@vindex c-electric-pound-behavior The electric @kbd{#} key reindents the line if it appears to be the beginning of a preprocessor directive. This happens when the value of @code{c-electric-pound-behavior} is @code{(alignleft)}. You can turn this feature off by setting @code{c-electric-pound-behavior} to @code{nil}. +@vindex c-hanging-braces-alist The variable @code{c-hanging-braces-alist} controls the insertion of newlines before and after inserted braces. It is an association list with elements of the following form: @code{(@var{syntactic-symbol} @@ -1550,6 +1572,7 @@ to determine where newlines are inserted: either before the brace, after, or both. If not found, the default is to insert a newline both before and after braces. +@vindex c-hanging-colons-alist The variable @code{c-hanging-colons-alist} controls the insertion of newlines before and after inserted colons. It is an association list with elements of the following form: @code{(@var{syntactic-symbol} @@ -1562,6 +1585,7 @@ where newlines are inserted: either before the brace, after, or both. If the syntactic symbol is not found in this list, no newlines are inserted. +@vindex c-cleanup-list Electric characters can also delete newlines automatically when the auto-newline feature is enabled. This feature makes auto-newline more acceptable, by deleting the newlines in the most common cases where you @@ -1613,6 +1637,7 @@ whitespace. @node Hungry Delete @subsection Hungry Delete Feature in C +@cindex hungry deletion (C Mode) When the @dfn{hungry-delete} feature is enabled (indicated by @samp{/h} or @samp{/ah} in the mode line after the mode name), a single @@ -1642,6 +1667,21 @@ hungry-delete feature is enabled. @subsection Other Commands for C Mode @table @kbd +@item M-x c-context-line-break +@findex c-context-line-break +This command inserts a line break and indents the new line in a manner +appropriate to the context. In normal code, it does the work of +@kbd{C-j} (@code{newline-and-indent}), in a C preprocessor line it +additionally inserts a @samp{\} at the line break, and within comments +it's like @kbd{M-j} (@code{c-indent-new-comment-line}). + +@code{c-context-line-break} isn't bound to a key by default, but it +needs a binding to be useful. The following code will bind it to +@kbd{C-j}. +@example +(define-key c-mode-base-map "\C-j" 'c-context-line-break) +@end example + @item C-M-h Put mark at the end of a function definition, and put point at the beginning (@code{c-mark-function}). @@ -1702,6 +1742,7 @@ directs how the line is indented. @itemx M-x global-cwarn-mode @findex cwarn-mode @findex global-cwarn-mode +@vindex global-cwarn-mode @cindex CWarn mode @cindex suspicious constructions in C, C++ CWarn minor mode highlights certain suspicious C and C++ constructions: @@ -1741,42 +1782,6 @@ to a C/C++ source file, or vice versa. The variable names. @end table -@node Comments in C -@subsection Comments in C Modes - - C mode and related modes use a number of variables for controlling -comment format. - -@table @code -@item c-comment-only-line-offset -@vindex c-comment-only-line-offset -Extra offset for line which contains only the start of a comment. It -can be either an integer or a cons cell of the form -@code{(@var{non-anchored-offset} . @var{anchored-offset})}, where -@var{non-anchored-offset} is the amount of offset given to -non-column-zero anchored comment-only lines, and @var{anchored-offset} -is the amount of offset to give column-zero anchored comment-only lines. -Just an integer as value is equivalent to @code{(@var{val} . 0)}. - -@item c-comment-start-regexp -@vindex c-comment-start-regexp -This buffer-local variable specifies how to recognize the start of a comment. - -@item c-hanging-comment-ender-p -@vindex c-hanging-comment-ender-p -If this variable is @code{nil}, @code{c-fill-paragraph} leaves the -comment terminator of a block comment on a line by itself. The default -value is @code{t}, which puts the comment-end delimiter @samp{*/} at the -end of the last line of the comment text. - -@item c-hanging-comment-starter-p -@vindex c-hanging-comment-starter-p -If this variable is @code{nil}, @code{c-fill-paragraph} leaves the -starting delimiter of a block comment on a line by itself. The default -value is @code{t}, which puts the comment-start delimiter @samp{/*} at -the beginning of the first line of the comment text. -@end table - @node Fortran @section Fortran Mode @cindex Fortran mode diff --git a/src/.arch-inventory b/src/.arch-inventory index 99b0d6cc539..a98d4c9932f 100644 --- a/src/.arch-inventory +++ b/src/.arch-inventory @@ -4,4 +4,6 @@ source ^\.(gdbinit|dbxinit)$ # Auto-generated files, which ignore precious ^(config\.stamp|config\.h|epaths\.h)$ +backup ^(stamp-oldxmenu|prefix-args|temacs|emacs|emacs-[0-9.]*)$ + # arch-tag: 277cc7ae-b3f5-44af-abf1-84c073164543 diff --git a/src/ChangeLog b/src/ChangeLog index 04133ca728c..5104ca9545c 100644 --- a/src/ChangeLog +++ b/src/ChangeLog @@ -1,3 +1,82 @@ +2004-06-13 Richard M. Stallman + + * regex.h (CHAR_CLASS_MAX_LENGTH, re_wctype_t, re_wchar_t) + (re_wctype, re_iswctype, re_wctype_to_bit): + Non-function definitions moved here from regex.c. + + * regex.c (re_wctype, re_iswctype): Function defs longer static. + (CHAR_CLASS_MAX_LENGTH, re_wctype_t, re_wchar_t) + (re_wctype, re_iswctype, re_wctype_to_bit): + Non-function definitions moved to regex.h. + + * window.c (Fselect_window): Doc fix. + + * syntax.c: Include regex.h. + (skip_chars): New arg HANDLE_ISO_CLASSES. Callers changed. + If requested, make a list of classes, then check the scanned + chars for membership in them. + (in_classes): New function. + Doc fix. + + * keyboard.c (cmd_error): Don't call any_kboard_state + if inside a recursive edit level. + +2004-06-13 Lorentey K,Aa(Broly + + * keyboard.c (command_loop): Call any_kboard_state before + command_loop_2 when at top level. + +2004-06-13 Andreas Schwab + + * print.c (print_object): Always use %ld for printing EMACS_INT. + + * keyboard.c (cancel_hourglass_unwind): Return a value. + (modify_event_symbol): Always use %ld for printing EMACS_INT. + (Fexecute_extended_command): Likewise. + + * syntax.h (SYNTAX_ENTRY_FOLLOW_PARENT): Rename local variable to + avoid clashes. + (SYNTAX): Likewise. + (SYNTAX_WITH_FLAGS): Likewise. + (SYNTAX_MATCH): Likewise. + + * syntax.c (char_quoted): Avoid warning about undefined operation. + (find_defun_start): Likewise. + (scan_lists): Likewise. + (INC_FROM): Likewise. + (scan_sexps_forward): Likewise. + + * image.c: Include . + + * xfaces.c (face_attr_equal_p): Declare parameters. + +2004-06-13 Kenichi Handa + + * ccl.c (CCL_READ_CHAR): If hit EOF, set REG to -1. + +2004-06-12 Matthew Mundell + + * eval.c (Fdefun): Signal an error if NAME is not a symbol. + +2004-06-12 Kenichi Handa + + * ccl.c (CCL_CALL_FOR_MAP_INSTRUCTION): Save eof_ic in + ccl_prog_stack_struct and update it. + (CCL_INVALID_CMD): If CCL_DEBUG is defined, call ccl_debug_hook. + (CCL_READ_CHAR): Get instruction counter from eof_ic, not from + ccl->eof_ic on EOF. + (ccl_debug_hook): New function. + (struct ccl_prog_stack): New member eof_ic. + (ccl_driver): Handle EOF in subrountine call correctly. + +2004-06-11 Kenichi Handa + + * coding.c (decode_coding_string): Check CODING_FINISH_INTERRUPT. + +2004-06-11 Kim F. Storm + + * emacs.c (shut_down_emacs): Inhibit redisplay during shutdown. + 2004-06-11 Juanma Barranquero * keyboard.c (Fposn_at_point): Doc fix. diff --git a/src/ccl.c b/src/ccl.c index 4c3528075b4..5bff1f3a0ad 100644 --- a/src/ccl.c +++ b/src/ccl.c @@ -626,14 +626,17 @@ do \ { \ ccl_prog = ccl_prog_stack_struct[0].ccl_prog; \ ic = ccl_prog_stack_struct[0].ic; \ + eof_ic = ccl_prog_stack_struct[0].eof_ic; \ } \ CCL_INVALID_CMD; \ } \ ccl_prog_stack_struct[stack_idx].ccl_prog = ccl_prog; \ ccl_prog_stack_struct[stack_idx].ic = (ret_ic); \ + ccl_prog_stack_struct[stack_idx].eof_ic = eof_ic; \ stack_idx++; \ ccl_prog = called_ccl.prog; \ ic = CCL_HEADER_MAIN; \ + eof_ic = XFASTINT (ccl_prog[CCL_HEADER_EOF]); \ goto ccl_repeat; \ } \ while (0) @@ -710,6 +713,8 @@ while (0) /* Terminate CCL program because of invalid command. Should not occur in the normal case. */ +#ifndef CCL_DEBUG + #define CCL_INVALID_CMD \ do \ { \ @@ -718,6 +723,19 @@ do \ } \ while(0) +#else + +#define CCL_INVALID_CMD \ +do \ + { \ + ccl_debug_hook (this_ic); \ + ccl->status = CCL_STAT_INVALID_CMD; \ + goto ccl_error_handler; \ + } \ +while(0) + +#endif + /* Encode one character CH to multibyte form and write to the current output buffer. If CH is less than 256, CH is written as is. */ #define CCL_WRITE_CHAR(ch) \ @@ -809,7 +827,8 @@ while(0) } \ else if (ccl->last_block) \ { \ - ic = ccl->eof_ic; \ + REG = -1; \ + ic = eof_ic; \ goto ccl_repeat; \ } \ else \ @@ -854,12 +873,20 @@ while(0) #define CCL_DEBUG_BACKTRACE_LEN 256 int ccl_backtrace_table[CCL_DEBUG_BACKTRACE_LEN]; int ccl_backtrace_idx; + +int +ccl_debug_hook (int ic) +{ + return ic; +} + #endif struct ccl_prog_stack { Lisp_Object *ccl_prog; /* Pointer to an array of CCL code. */ int ic; /* Instruction Counter. */ + int eof_ic; /* Instruction Counter to jump on EOF. */ }; /* For the moment, we only support depth 256 of stack. */ @@ -888,8 +915,10 @@ ccl_driver (ccl, source, destination, src_bytes, dst_bytes, consumed) sequence. For that conversion, we remember how many more bytes we must keep in DESTINATION in this variable. */ int extra_bytes = ccl->eight_bit_control; + int eof_ic = ccl->eof_ic; + int eof_hit = 0; - if (ic >= ccl->eof_ic) + if (ic >= eof_ic) ic = CCL_HEADER_MAIN; if (ccl->buf_magnification == 0) /* We can't produce any bytes. */ @@ -1093,15 +1122,18 @@ ccl_driver (ccl, source, destination, src_bytes, dst_bytes, consumed) { ccl_prog = ccl_prog_stack_struct[0].ccl_prog; ic = ccl_prog_stack_struct[0].ic; + eof_ic = ccl_prog_stack_struct[0].eof_ic; } CCL_INVALID_CMD; } ccl_prog_stack_struct[stack_idx].ccl_prog = ccl_prog; ccl_prog_stack_struct[stack_idx].ic = ic; + ccl_prog_stack_struct[stack_idx].eof_ic = eof_ic; stack_idx++; ccl_prog = XVECTOR (AREF (slot, 1))->contents; ic = CCL_HEADER_MAIN; + eof_ic = XFASTINT (ccl_prog[CCL_HEADER_EOF]); } break; @@ -1131,6 +1163,9 @@ ccl_driver (ccl, source, destination, src_bytes, dst_bytes, consumed) stack_idx--; ccl_prog = ccl_prog_stack_struct[stack_idx].ccl_prog; ic = ccl_prog_stack_struct[stack_idx].ic; + eof_ic = ccl_prog_stack_struct[stack_idx].eof_ic; + if (eof_hit) + ic = eof_ic; break; } if (src) @@ -1367,7 +1402,8 @@ ccl_driver (ccl, source, destination, src_bytes, dst_bytes, consumed) src--; if (ccl->last_block) { - ic = ccl->eof_ic; + ic = eof_ic; + eof_hit = 1; goto ccl_repeat; } else diff --git a/src/coding.c b/src/coding.c index 328507de499..ed4b131b3a9 100644 --- a/src/coding.c +++ b/src/coding.c @@ -6316,6 +6316,7 @@ encode_coding_string (str, coding, nocopy) produced += coding->produced; produced_char += coding->produced_char; if (result == CODING_FINISH_NORMAL + || result == CODING_FINISH_INTERRUPT || (result == CODING_FINISH_INSUFFICIENT_SRC && coding->consumed == 0)) break; diff --git a/src/emacs.c b/src/emacs.c index 0fbc6f86b5f..5425d5d64a2 100644 --- a/src/emacs.c +++ b/src/emacs.c @@ -207,6 +207,8 @@ extern Lisp_Object Vinitial_window_system; extern Lisp_Object Vauto_save_list_file_name; +extern Lisp_Object Vinhibit_redisplay; + #ifdef USG_SHARED_LIBRARIES /* If nonzero, this is the place to put the end of the writable segment at startup. */ @@ -2009,6 +2011,9 @@ shut_down_emacs (sig, no_x, stuff) /* Prevent running of hooks from now on. */ Vrun_hooks = Qnil; + /* Don't update display from now on. */ + Vinhibit_redisplay = Qt; + /* If we are controlling the terminal, reset terminal modes. */ #ifdef EMACS_HAVE_TTY_PGRP { diff --git a/src/eval.c b/src/eval.c index e1da1def446..096755f9c77 100644 --- a/src/eval.c +++ b/src/eval.c @@ -617,6 +617,7 @@ usage: (defun NAME ARGLIST [DOCSTRING] BODY...) */) register Lisp_Object defn; fn_name = Fcar (args); + CHECK_SYMBOL (fn_name); defn = Fcons (Qlambda, Fcdr (args)); if (!NILP (Vpurify_flag)) defn = Fpurecopy (defn); diff --git a/src/image.c b/src/image.c index 3c53903b4b4..41762030b9e 100644 --- a/src/image.c +++ b/src/image.c @@ -23,6 +23,7 @@ Boston, MA 02111-1307, USA. */ #include #include #include +#include #ifdef HAVE_UNISTD_H #include diff --git a/src/keyboard.c b/src/keyboard.c index 1c4e9fe0c4a..de2bcf825a0 100644 --- a/src/keyboard.c +++ b/src/keyboard.c @@ -1147,7 +1147,8 @@ cmd_error (data) Vinhibit_quit = Qnil; #ifdef MULTI_KBOARD - any_kboard_state (); + if (command_loop_level == 0 && minibuf_level == 0) + any_kboard_state (); #endif return make_number (0); @@ -6261,12 +6262,8 @@ modify_event_symbol (symbol_num, modifiers, symbol_kind, name_alist_or_stem, { int len = SBYTES (name_alist_or_stem); char *buf = (char *) alloca (len + 50); - if (sizeof (int) == sizeof (EMACS_INT)) - sprintf (buf, "%s-%d", SDATA (name_alist_or_stem), - (int)XINT (symbol_int) + 1); - else if (sizeof (long) == sizeof (EMACS_INT)) - sprintf (buf, "%s-%ld", SDATA (name_alist_or_stem), - (long)XINT (symbol_int) + 1); + sprintf (buf, "%s-%ld", SDATA (name_alist_or_stem), + (long) XINT (symbol_int) + 1); value = intern (buf); } else if (name_table != 0 && name_table[symbol_num]) @@ -9790,23 +9787,9 @@ DEFUN ("execute-extended-command", Fexecute_extended_command, Sexecute_extended_ else if (CONSP (prefixarg) && XINT (XCAR (prefixarg)) == 4) strcpy (buf, "C-u "); else if (CONSP (prefixarg) && INTEGERP (XCAR (prefixarg))) - { - if (sizeof (int) == sizeof (EMACS_INT)) - sprintf (buf, "%d ", XINT (XCAR (prefixarg))); - else if (sizeof (long) == sizeof (EMACS_INT)) - sprintf (buf, "%ld ", (long) XINT (XCAR (prefixarg))); - else - abort (); - } + sprintf (buf, "%ld ", (long) XINT (XCAR (prefixarg))); else if (INTEGERP (prefixarg)) - { - if (sizeof (int) == sizeof (EMACS_INT)) - sprintf (buf, "%d ", XINT (prefixarg)); - else if (sizeof (long) == sizeof (EMACS_INT)) - sprintf (buf, "%ld ", (long) XINT (prefixarg)); - else - abort (); - } + sprintf (buf, "%ld ", (long) XINT (prefixarg)); /* This isn't strictly correct if execute-extended-command is bound to anything else. Perhaps it should use diff --git a/src/minibuf.c b/src/minibuf.c index 6dd55c5ea76..2f05a56d7dd 100644 --- a/src/minibuf.c +++ b/src/minibuf.c @@ -1290,20 +1290,21 @@ is used to further constrain the set of candidates. */) XSETFASTINT (zero, 0); /* Ignore this element if it fails to match all the regexps. */ - { - int count = SPECPDL_INDEX (); - specbind (Qcase_fold_search, completion_ignore_case ? Qt : Qnil); - for (regexps = Vcompletion_regexp_list; CONSP (regexps); - regexps = XCDR (regexps)) - { - tem = Fstring_match (XCAR (regexps), eltstring, zero); - if (NILP (tem)) - break; - } - unbind_to (count, Qnil); - if (CONSP (regexps)) - continue; - } + if (CONSP (Vcompletion_regexp_list)) + { + int count = SPECPDL_INDEX (); + specbind (Qcase_fold_search, completion_ignore_case ? Qt : Qnil); + for (regexps = Vcompletion_regexp_list; CONSP (regexps); + regexps = XCDR (regexps)) + { + tem = Fstring_match (XCAR (regexps), eltstring, zero); + if (NILP (tem)) + break; + } + unbind_to (count, Qnil); + if (CONSP (regexps)) + continue; + } /* Ignore this element if there is a predicate and the predicate doesn't like it. */ @@ -1541,20 +1542,21 @@ are ignored unless STRING itself starts with a space. */) XSETFASTINT (zero, 0); /* Ignore this element if it fails to match all the regexps. */ - { - int count = SPECPDL_INDEX (); - specbind (Qcase_fold_search, completion_ignore_case ? Qt : Qnil); - for (regexps = Vcompletion_regexp_list; CONSP (regexps); - regexps = XCDR (regexps)) - { - tem = Fstring_match (XCAR (regexps), eltstring, zero); - if (NILP (tem)) - break; - } - unbind_to (count, Qnil); - if (CONSP (regexps)) - continue; - } + if (CONSP (Vcompletion_regexp_list)) + { + int count = SPECPDL_INDEX (); + specbind (Qcase_fold_search, completion_ignore_case ? Qt : Qnil); + for (regexps = Vcompletion_regexp_list; CONSP (regexps); + regexps = XCDR (regexps)) + { + tem = Fstring_match (XCAR (regexps), eltstring, zero); + if (NILP (tem)) + break; + } + unbind_to (count, Qnil); + if (CONSP (regexps)) + continue; + } /* Ignore this element if there is a predicate and the predicate doesn't like it. */ @@ -1789,19 +1791,20 @@ the values STRING, PREDICATE and `lambda'. */) return call3 (alist, string, predicate, Qlambda); /* Reject this element if it fails to match all the regexps. */ - { - int count = SPECPDL_INDEX (); - specbind (Qcase_fold_search, completion_ignore_case ? Qt : Qnil); - for (regexps = Vcompletion_regexp_list; CONSP (regexps); - regexps = XCDR (regexps)) - { - if (NILP (Fstring_match (XCAR (regexps), - SYMBOLP (tem) ? string : tem, - Qnil))) - return unbind_to (count, Qnil); - } - unbind_to (count, Qnil); - } + if (CONSP (Vcompletion_regexp_list)) + { + int count = SPECPDL_INDEX (); + specbind (Qcase_fold_search, completion_ignore_case ? Qt : Qnil); + for (regexps = Vcompletion_regexp_list; CONSP (regexps); + regexps = XCDR (regexps)) + { + if (NILP (Fstring_match (XCAR (regexps), + SYMBOLP (tem) ? string : tem, + Qnil))) + return unbind_to (count, Qnil); + } + unbind_to (count, Qnil); + } /* Finally, check the predicate. */ if (!NILP (predicate)) diff --git a/src/print.c b/src/print.c index 0e07cd6fdd3..a91276d409a 100644 --- a/src/print.c +++ b/src/print.c @@ -1822,7 +1822,7 @@ print_object (obj, printcharfun, escapeflag) PRINTCHAR ('#'); PRINTCHAR ('&'); - sprintf (buf, "%d", XBOOL_VECTOR (obj)->size); + sprintf (buf, "%ld", (long) XBOOL_VECTOR (obj)->size); strout (buf, -1, -1, printcharfun, 0); PRINTCHAR ('\"'); @@ -1875,7 +1875,7 @@ print_object (obj, printcharfun, escapeflag) else if (WINDOWP (obj)) { strout ("#sequence_number)); + sprintf (buf, "%ld", (long) XFASTINT (XWINDOW (obj)->sequence_number)); strout (buf, -1, -1, printcharfun, 0); if (!NILP (XWINDOW (obj)->buffer)) { @@ -1896,8 +1896,8 @@ print_object (obj, printcharfun, escapeflag) PRINTCHAR (' '); strout (SDATA (SYMBOL_NAME (h->weak)), -1, -1, printcharfun, 0); PRINTCHAR (' '); - sprintf (buf, "%d/%d", XFASTINT (h->count), - XVECTOR (h->next)->size); + sprintf (buf, "%ld/%ld", (long) XFASTINT (h->count), + (long) XVECTOR (h->next)->size); strout (buf, -1, -1, printcharfun, 0); } sprintf (buf, " 0x%lx", (unsigned long) h); @@ -2020,7 +2020,7 @@ print_object (obj, printcharfun, escapeflag) break; case Lisp_Misc_Intfwd: - sprintf (buf, "#", *XINTFWD (obj)->intvar); + sprintf (buf, "#", (long) *XINTFWD (obj)->intvar); strout (buf, -1, -1, printcharfun, 0); break; diff --git a/src/process.c b/src/process.c index d2789e0e42e..71f38afc776 100644 --- a/src/process.c +++ b/src/process.c @@ -3619,6 +3619,8 @@ FLAGS is the current flags of the interface. */) #endif #endif /* HAVE_SOCKETS */ +/* Turn off input and output for process PROC. */ + void deactivate_process (proc) Lisp_Object proc; diff --git a/src/regex.c b/src/regex.c index db69275c312..31a10ee92e0 100644 --- a/src/regex.c +++ b/src/regex.c @@ -1961,41 +1961,10 @@ struct range_table_work_area } \ } while (0) -#if WIDE_CHAR_SUPPORT -/* The GNU C library provides support for user-defined character classes - and the functions from ISO C amendement 1. */ -# ifdef CHARCLASS_NAME_MAX -# define CHAR_CLASS_MAX_LENGTH CHARCLASS_NAME_MAX -# else -/* This shouldn't happen but some implementation might still have this - problem. Use a reasonable default value. */ -# define CHAR_CLASS_MAX_LENGTH 256 -# endif -typedef wctype_t re_wctype_t; -typedef wchar_t re_wchar_t; -# define re_wctype wctype -# define re_iswctype iswctype -# define re_wctype_to_bit(cc) 0 -#else -# define CHAR_CLASS_MAX_LENGTH 9 /* Namely, `multibyte'. */ -# define btowc(c) c - -/* Character classes. */ -typedef enum { RECC_ERROR = 0, - RECC_ALNUM, RECC_ALPHA, RECC_WORD, - RECC_GRAPH, RECC_PRINT, - RECC_LOWER, RECC_UPPER, - RECC_PUNCT, RECC_CNTRL, - RECC_DIGIT, RECC_XDIGIT, - RECC_BLANK, RECC_SPACE, - RECC_MULTIBYTE, RECC_NONASCII, - RECC_ASCII, RECC_UNIBYTE -} re_wctype_t; - -typedef int re_wchar_t; +#if ! WIDE_CHAR_SUPPORT /* Map a string to the char class it names (if any). */ -static re_wctype_t +re_wctype_t re_wctype (str) re_char *str; { @@ -2021,7 +1990,7 @@ re_wctype (str) } /* True iff CH is in the char class CC. */ -static boolean +boolean re_iswctype (ch, cc) int ch; re_wctype_t cc; diff --git a/src/regex.h b/src/regex.h index 1818d5f9681..f969c9c5e74 100644 --- a/src/regex.h +++ b/src/regex.h @@ -562,6 +562,49 @@ extern void regfree _RE_ARGS ((regex_t *__preg)); } #endif /* C++ */ +/* For platform which support the ISO C amendement 1 functionality we + support user defined character classes. */ +#if WIDE_CHAR_SUPPORT +/* Solaris 2.5 has a bug: must be included before . */ +# include +# include +#endif + +#if WIDE_CHAR_SUPPORT +/* The GNU C library provides support for user-defined character classes + and the functions from ISO C amendement 1. */ +# ifdef CHARCLASS_NAME_MAX +# define CHAR_CLASS_MAX_LENGTH CHARCLASS_NAME_MAX +# else +/* This shouldn't happen but some implementation might still have this + problem. Use a reasonable default value. */ +# define CHAR_CLASS_MAX_LENGTH 256 +# endif +typedef wctype_t re_wctype_t; +typedef wchar_t re_wchar_t; +# define re_wctype wctype +# define re_iswctype iswctype +# define re_wctype_to_bit(cc) 0 +#else +# define CHAR_CLASS_MAX_LENGTH 9 /* Namely, `multibyte'. */ +# define btowc(c) c + +/* Character classes. */ +typedef enum { RECC_ERROR = 0, + RECC_ALNUM, RECC_ALPHA, RECC_WORD, + RECC_GRAPH, RECC_PRINT, + RECC_LOWER, RECC_UPPER, + RECC_PUNCT, RECC_CNTRL, + RECC_DIGIT, RECC_XDIGIT, + RECC_BLANK, RECC_SPACE, + RECC_MULTIBYTE, RECC_NONASCII, + RECC_ASCII, RECC_UNIBYTE +} re_wctype_t; + +typedef int re_wchar_t; + +#endif /* not WIDE_CHAR_SUPPORT */ + #endif /* regex.h */ /* diff --git a/src/syntax.c b/src/syntax.c index 4b26d04cd30..62612620f03 100644 --- a/src/syntax.c +++ b/src/syntax.c @@ -26,6 +26,7 @@ Boston, MA 02111-1307, USA. */ #include "buffer.h" #include "charset.h" #include "keymap.h" +#include "regex.h" /* Make syntax table lookup grant data in gl_state. */ #define SYNTAX_ENTRY_VIA_PROPERTY @@ -97,11 +98,12 @@ static int find_start_modiff; static int find_defun_start P_ ((int, int)); static int back_comment P_ ((int, int, int, int, int, int *, int *)); static int char_quoted P_ ((int, int)); -static Lisp_Object skip_chars P_ ((int, int, Lisp_Object, Lisp_Object)); +static Lisp_Object skip_chars P_ ((int, int, Lisp_Object, Lisp_Object, int)); static Lisp_Object scan_lists P_ ((int, int, int, int)); static void scan_sexps_forward P_ ((struct lisp_parse_state *, int, int, int, int, int, Lisp_Object, int)); +static int in_classes P_ ((int, Lisp_Object)); struct gl_state_s gl_state; /* Global state of syntax parser. */ @@ -292,8 +294,11 @@ char_quoted (charpos, bytepos) while (bytepos >= beg) { + int c; + UPDATE_SYNTAX_TABLE_BACKWARD (charpos); - code = SYNTAX (FETCH_CHAR (bytepos)); + c = FETCH_CHAR (bytepos); + code = SYNTAX (c); if (! (code == Scharquote || code == Sescape)) break; @@ -380,12 +385,16 @@ find_defun_start (pos, pos_byte) gl_state.use_global = 0; while (PT > BEGV) { + int c; + /* Open-paren at start of line means we may have found our defun-start. */ - if (SYNTAX (FETCH_CHAR (PT_BYTE)) == Sopen) + c = FETCH_CHAR (PT_BYTE); + if (SYNTAX (c) == Sopen) { SETUP_SYNTAX_TABLE (PT + 1, -1); /* Try again... */ - if (SYNTAX (FETCH_CHAR (PT_BYTE)) == Sopen) + c = FETCH_CHAR (PT_BYTE); + if (SYNTAX (c) == Sopen) break; /* Now fallback to the default value. */ gl_state.current_syntax_table = current_buffer->syntax_table; @@ -1314,13 +1323,13 @@ except that `]' is never special and `\\' quotes `^', `-' or `\\' (but not as the end of a range; quoting is never needed there). Thus, with arg "a-zA-Z", this skips letters stopping before first nonletter. With arg "^a-zA-Z", skips nonletters stopping before first letter. -Returns the distance traveled, either zero or positive. -Note that char classes, e.g. `[:alpha:]', are not currently supported; -they will be treated as literals. */) +Char classes, e.g. `[:alpha:]', are supported. + +Returns the distance traveled, either zero or positive. */) (string, lim) Lisp_Object string, lim; { - return skip_chars (1, 0, string, lim); + return skip_chars (1, 0, string, lim, 1); } DEFUN ("skip-chars-backward", Fskip_chars_backward, Sskip_chars_backward, 1, 2, 0, @@ -1330,7 +1339,7 @@ Returns the distance traveled, either zero or negative. */) (string, lim) Lisp_Object string, lim; { - return skip_chars (0, 0, string, lim); + return skip_chars (0, 0, string, lim, 1); } DEFUN ("skip-syntax-forward", Fskip_syntax_forward, Sskip_syntax_forward, 1, 2, 0, @@ -1342,7 +1351,7 @@ This function returns the distance traveled, either zero or positive. */) (syntax, lim) Lisp_Object syntax, lim; { - return skip_chars (1, 1, syntax, lim); + return skip_chars (1, 1, syntax, lim, 0); } DEFUN ("skip-syntax-backward", Fskip_syntax_backward, Sskip_syntax_backward, 1, 2, 0, @@ -1354,13 +1363,14 @@ This function returns the distance traveled, either zero or negative. */) (syntax, lim) Lisp_Object syntax, lim; { - return skip_chars (0, 1, syntax, lim); + return skip_chars (0, 1, syntax, lim, 0); } static Lisp_Object -skip_chars (forwardp, syntaxp, string, lim) +skip_chars (forwardp, syntaxp, string, lim, handle_iso_classes) int forwardp, syntaxp; Lisp_Object string, lim; + int handle_iso_classes; { register unsigned int c; unsigned char fastmap[0400]; @@ -1376,12 +1386,14 @@ skip_chars (forwardp, syntaxp, string, lim) int size_byte; const unsigned char *str; int len; + Lisp_Object iso_classes; CHECK_STRING (string); char_ranges = (int *) alloca (SCHARS (string) * (sizeof (int)) * 2); string_multibyte = STRING_MULTIBYTE (string); str = SDATA (string); size_byte = SBYTES (string); + iso_classes = Qnil; /* Adjust the multibyteness of the string to that of the buffer. */ if (multibyte != string_multibyte) @@ -1437,6 +1449,45 @@ skip_chars (forwardp, syntaxp, string, lim) fastmap[syntax_spec_code[c & 0377]] = 1; else { + if (handle_iso_classes && c == '[' + && i_byte < size_byte + && STRING_CHAR (str + i_byte, size_byte - i_byte) == ':') + { + const unsigned char *class_beg = str + i_byte + 1; + const unsigned char *class_end = class_beg; + const unsigned char *class_limit = str + size_byte; + /* Leave room for the null. */ + unsigned char class_name[CHAR_CLASS_MAX_LENGTH + 1]; + re_wctype_t cc; + + if (class_limit - class_beg > CHAR_CLASS_MAX_LENGTH) + class_limit = class_beg + CHAR_CLASS_MAX_LENGTH; + + while (class_end != class_limit + && ! (*class_end >= 0200 + || *class_end <= 040 + || (*class_end == ':' + && class_end[1] == ']'))) + class_end++; + + if (class_end == class_limit + || *class_end >= 0200 + || *class_end <= 040) + error ("Invalid ISO C character class"); + + bcopy (class_beg, class_name, class_end - class_beg); + class_name[class_end - class_beg] = 0; + + cc = re_wctype (class_name); + if (cc == 0) + error ("Invalid ISO C character class"); + + iso_classes = Fcons (make_number (cc), iso_classes); + + i_byte = class_end + 2 - str; + continue; + } + if (c == '\\') { if (i_byte == size_byte) @@ -1630,6 +1681,15 @@ skip_chars (forwardp, syntaxp, string, lim) stop = endp; } c = STRING_CHAR_AND_LENGTH (p, MAX_MULTIBYTE_LENGTH, nbytes); + + if (! NILP (iso_classes) && in_classes (c, iso_classes)) + { + if (negate) + break; + else + goto fwd_ok; + } + if (SINGLE_BYTE_CHAR_P (c)) { if (!fastmap[c]) @@ -1652,6 +1712,7 @@ skip_chars (forwardp, syntaxp, string, lim) if (!(negate ^ (i < n_char_ranges))) break; } + fwd_ok: p += nbytes, pos++, pos_byte += nbytes; } else @@ -1664,8 +1725,19 @@ skip_chars (forwardp, syntaxp, string, lim) p = GAP_END_ADDR; stop = endp; } + + if (!NILP (iso_classes) && in_classes (*p, iso_classes)) + { + if (negate) + break; + else + goto fwd_ok; + } + if (!fastmap[*p]) break; + + fwd_unibyte_ok: p++, pos++; } } @@ -1691,6 +1763,15 @@ skip_chars (forwardp, syntaxp, string, lim) p = prev_p - 1, c = *p, nbytes = 1; else c = STRING_CHAR (p, MAX_MULTIBYTE_LENGTH); + + if (! NILP (iso_classes) && in_classes (c, iso_classes)) + { + if (negate) + break; + else + goto back_ok; + } + if (SINGLE_BYTE_CHAR_P (c)) { if (!fastmap[c]) @@ -1705,6 +1786,7 @@ skip_chars (forwardp, syntaxp, string, lim) if (!(negate ^ (i < n_char_ranges))) break; } + back_ok: pos--, pos_byte -= nbytes; } else @@ -1717,8 +1799,19 @@ skip_chars (forwardp, syntaxp, string, lim) p = GPT_ADDR; stop = endp; } + + if (! NILP (iso_classes) && in_classes (p[-1], iso_classes)) + { + if (negate) + break; + else + goto fwd_ok; + } + if (!fastmap[p[-1]]) break; + + back_unibyte_ok: p--, pos--; } } @@ -1741,6 +1834,30 @@ skip_chars (forwardp, syntaxp, string, lim) return make_number (PT - start_point); } } + +/* Return 1 if character C belongs to one of the ISO classes + in the list ISO_CLASSES. Each class is represented by an + integer which is its type according to re_wctype. */ + +static int +in_classes (c, iso_classes) + int c; + Lisp_Object iso_classes; +{ + int fits_class = 0; + + while (! NILP (iso_classes)) + { + Lisp_Object elt; + elt = XCAR (iso_classes); + iso_classes = XCDR (iso_classes); + + if (re_iswctype (c, XFASTINT (elt))) + fits_class = 1; + } + + return fits_class; +} /* Jump over a comment, assuming we are at the beginning of one. FROM is the current position. @@ -2124,7 +2241,7 @@ scan_lists (from, count, depth, sexpflag) INC_BOTH (from, from_byte); UPDATE_SYNTAX_TABLE_FORWARD (from); if (from < stop && comstart_first - && SYNTAX_COMSTART_SECOND (FETCH_CHAR (from_byte)) + && (c = FETCH_CHAR (from_byte), SYNTAX_COMSTART_SECOND (c)) && parse_sexp_ignore_comments) { /* we have encountered a comment start sequence and we @@ -2449,7 +2566,7 @@ scan_lists (from, count, depth, sexpflag) Fcons (build_string ("Unbalanced parentheses"), Fcons (make_number (last_good), Fcons (make_number (from), Qnil)))); - + abort (); /* NOTREACHED */ } @@ -2588,8 +2705,8 @@ scan_sexps_forward (stateptr, from, from_byte, end, targetdepth, #define INC_FROM \ do { prev_from = from; \ prev_from_byte = from_byte; \ - prev_from_syntax \ - = SYNTAX_WITH_FLAGS (FETCH_CHAR (prev_from_byte)); \ + temp = FETCH_CHAR (prev_from_byte); \ + prev_from_syntax = SYNTAX_WITH_FLAGS (temp); \ INC_BOTH (from, from_byte); \ if (from < end) \ UPDATE_SYNTAX_TABLE_FORWARD (from); \ @@ -2664,7 +2781,8 @@ do { prev_from = from; \ curlevel->last = -1; SETUP_SYNTAX_TABLE (prev_from, 1); - prev_from_syntax = SYNTAX_WITH_FLAGS (FETCH_CHAR (prev_from_byte)); + temp = FETCH_CHAR (prev_from_byte); + prev_from_syntax = SYNTAX_WITH_FLAGS (temp); UPDATE_SYNTAX_TABLE_FORWARD (from); /* Enter the loop at a place appropriate for initial state. */ @@ -2743,7 +2861,8 @@ do { prev_from = from; \ while (from < end) { /* Some compilers can't handle this inside the switch. */ - temp = SYNTAX (FETCH_CHAR (from_byte)); + temp = FETCH_CHAR (from_byte); + temp = SYNTAX (temp); switch (temp) { case Scharquote: diff --git a/src/syntax.h b/src/syntax.h index f86ab8fc265..cb1eeb62687 100644 --- a/src/syntax.h +++ b/src/syntax.h @@ -68,16 +68,16 @@ enum syntaxcode #ifdef __GNUC__ #define SYNTAX_ENTRY_FOLLOW_PARENT(table, c) \ - ({ Lisp_Object tbl = table; \ - Lisp_Object temp = XCHAR_TABLE (tbl)->contents[(c)]; \ - while (NILP (temp)) \ + ({ Lisp_Object _syntax_tbl = (table); \ + Lisp_Object _syntax_temp = XCHAR_TABLE (_syntax_tbl)->contents[(c)]; \ + while (NILP (_syntax_temp)) \ { \ - tbl = XCHAR_TABLE (tbl)->parent; \ - if (NILP (tbl)) \ + _syntax_tbl = XCHAR_TABLE (_syntax_tbl)->parent; \ + if (NILP (_syntax_tbl)) \ break; \ - temp = XCHAR_TABLE (tbl)->contents[(c)]; \ + _syntax_temp = XCHAR_TABLE (_syntax_tbl)->contents[(c)]; \ } \ - temp; }) + _syntax_temp; }) #else extern Lisp_Object syntax_temp; extern Lisp_Object syntax_parent_lookup P_ ((Lisp_Object, int)); @@ -117,24 +117,24 @@ extern Lisp_Object syntax_parent_lookup P_ ((Lisp_Object, int)); #ifdef __GNUC__ #define SYNTAX(c) \ - ({ Lisp_Object temp; \ - temp = SYNTAX_ENTRY (c); \ - (CONSP (temp) \ - ? (enum syntaxcode) (XINT (XCAR (temp)) & 0xff) \ + ({ Lisp_Object _syntax_temp; \ + _syntax_temp = SYNTAX_ENTRY (c); \ + (CONSP (_syntax_temp) \ + ? (enum syntaxcode) (XINT (XCAR (_syntax_temp)) & 0xff) \ : Swhitespace); }) #define SYNTAX_WITH_FLAGS(c) \ - ({ Lisp_Object temp; \ - temp = SYNTAX_ENTRY (c); \ - (CONSP (temp) \ - ? XINT (XCAR (temp)) \ + ({ Lisp_Object _syntax_temp; \ + _syntax_temp = SYNTAX_ENTRY (c); \ + (CONSP (_syntax_temp) \ + ? XINT (XCAR (_syntax_temp)) \ : (int) Swhitespace); }) #define SYNTAX_MATCH(c) \ - ({ Lisp_Object temp; \ - temp = SYNTAX_ENTRY (c); \ - (CONSP (temp) \ - ? XCDR (temp) \ + ({ Lisp_Object _syntax_temp; \ + _syntax_temp = SYNTAX_ENTRY (c); \ + (CONSP (_syntax_temp) \ + ? XCDR (_syntax_temp) \ : Qnil); }) #else #define SYNTAX(c) \ diff --git a/src/window.c b/src/window.c index 03ca22dd525..ff0d810a9c8 100644 --- a/src/window.c +++ b/src/window.c @@ -3064,8 +3064,8 @@ defaults. */) DEFUN ("select-window", Fselect_window, Sselect_window, 1, 2, 0, doc: /* Select WINDOW. Most editing will apply to WINDOW's buffer. -If WINDOW is not already selected, also make WINDOW's buffer current. -Also make WINDOW the frame's selected window. +If WINDOW is not already selected, make WINDOW's buffer current +and make WINDOW the frame's selected window. Optional second arg NORECORD non-nil means do not put this buffer at the front of the list of recently selected ones. diff --git a/src/xfaces.c b/src/xfaces.c index 505e601c57d..171472dbb31 100644 --- a/src/xfaces.c +++ b/src/xfaces.c @@ -4915,6 +4915,7 @@ If FRAME is omitted or nil, use the selected frame. */) static INLINE int face_attr_equal_p (v1, v2) + Lisp_Object v1, v2; { /* Type can differ, e.g. when one attribute is unspecified, i.e. nil, and the other is specified. */