mirror of
https://git.savannah.gnu.org/git/emacs.git
synced 2024-12-25 10:47:00 +00:00
* lisp/character-fold.el: New file (Bug#20887)
(character-fold-to-regexp): New function. * lisp/replace.el (replace-search): Check value of `character-fold-search'. * lisp/isearch.el: Move character-folding code to character-fold.el (isearch-toggle-character-fold): New command. (isearch-mode-map): Bind it to "\M-sf". (isearch-mode): Check value of `character-fold-search'.
This commit is contained in:
parent
ce2e5c7f79
commit
795c918916
109
lisp/character-fold.el
Normal file
109
lisp/character-fold.el
Normal file
@ -0,0 +1,109 @@
|
||||
;;; chracter-fold.el --- matching unicode characters to their ascii similars -*- lexical-binding: t; -*-
|
||||
|
||||
;; Copyright (C) 2015 Free Software Foundation, Inc.
|
||||
|
||||
;; Maintainer: emacs-devel@gnu.org
|
||||
;; Keywords: matching
|
||||
|
||||
;; This file is part of GNU Emacs.
|
||||
|
||||
;; GNU Emacs is free software: you can redistribute it and/or modify
|
||||
;; it under the terms of the GNU General Public License as published by
|
||||
;; the Free Software Foundation, either version 3 of the License, or
|
||||
;; (at your option) any later version.
|
||||
|
||||
;; GNU Emacs is distributed in the hope that it will be useful,
|
||||
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
;; GNU General Public License for more details.
|
||||
|
||||
;; You should have received a copy of the GNU General Public License
|
||||
;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
;;; Code:
|
||||
|
||||
|
||||
;;;###autoload
|
||||
(defvar character-fold-search t
|
||||
"Non-nil if searches should fold similar characters.
|
||||
This means some characters will match entire groups of charactes.
|
||||
For instance, \" will match all variants of double quotes, and
|
||||
the letter a will match all of its accented versions (and then
|
||||
some).")
|
||||
|
||||
(defconst character-fold-table
|
||||
(eval-when-compile
|
||||
(let ((equiv (make-char-table 'character-fold-table)))
|
||||
;; Compile a list of all complex characters that each simple
|
||||
;; character should match.
|
||||
(map-char-table
|
||||
(lambda (i dec)
|
||||
(when (consp dec)
|
||||
;; Discard a possible formatting tag.
|
||||
(when (symbolp (car dec))
|
||||
(setq dec (cdr dec)))
|
||||
;; Skip trivial cases lika ?a decomposing to (?a).
|
||||
(unless (or (and (eq i (car dec))
|
||||
(not (cdr dec))))
|
||||
(let ((d dec) k found multiletter)
|
||||
(while (and d (not found))
|
||||
(setq k (pop d))
|
||||
;; Is k a number or letter, per unicode standard?
|
||||
(setq found (memq (get-char-code-property k 'general-category)
|
||||
'(Lu Ll Lt Lm Lo Nd Nl No))))
|
||||
(if found
|
||||
;; Check if the decomposition has more than one letter,
|
||||
;; because then we don't want the first letter to match
|
||||
;; the decomposition.
|
||||
(dolist (k d)
|
||||
(when (memq (get-char-code-property k 'general-category)
|
||||
'(Lu Ll Lt Lm Lo Nd Nl No))
|
||||
(setq multiletter t)))
|
||||
;; If there's no number or letter on the
|
||||
;; decomposition, take the first character in it.
|
||||
(setq found (car-safe dec)))
|
||||
;; Add i to the list of characters that k can
|
||||
;; represent. Also possibly add its decomposition, so we can
|
||||
;; match multi-char representations like (format "a%c" 769)
|
||||
(when (and found (not (eq i k)))
|
||||
(let ((chars (cons (char-to-string i) (aref equiv k))))
|
||||
(aset equiv k
|
||||
(if multiletter chars
|
||||
(cons (apply #'string dec) chars)))))))))
|
||||
(unicode-property-table-internal 'decomposition))
|
||||
(dolist (it '((?\" """ "“" "”" "”" "„" "⹂" "〞" "‟" "‟" "❞" "❝" "❠" "“" "„" "〝" "〟" "🙷" "🙶" "🙸" "«" "»")
|
||||
(?' "❟" "❛" "❜" "‘" "’" "‚" "‛" "‚" "" "❮" "❯" "‹" "›")
|
||||
(?` "❛" "‘" "‛" "" "❮" "‹")
|
||||
(?\s "\t" "\r" "\n")))
|
||||
(let ((idx (car it))
|
||||
(chars (cdr it)))
|
||||
(aset equiv idx (append chars (aref equiv idx)))))
|
||||
(map-char-table
|
||||
(lambda (i v) (let ((re (regexp-opt (cons (char-to-string i) v))))
|
||||
(if (consp i)
|
||||
(set-char-table-range equiv i re)
|
||||
(aset equiv i re))))
|
||||
equiv)
|
||||
equiv))
|
||||
"Used for folding characters of the same group during search.")
|
||||
|
||||
;;;###autoload
|
||||
(defun character-fold-to-regexp (string &optional lax)
|
||||
"Return a regexp matching anything that character-folds into STRING.
|
||||
If `character-fold-search' is nil, `regexp-quote' string.
|
||||
Otherwise, any character in STRING that has an entry in
|
||||
`character-fold-table' is replaced with that entry (which is a
|
||||
regexp) and other characters are `regexp-quote'd.
|
||||
If LAX is non-nil, any single whitespace character is allowed to
|
||||
match any number of times."
|
||||
(if character-fold-search
|
||||
(apply #'concat
|
||||
(mapcar (lambda (c) (let ((out (or (aref character-fold-table c)
|
||||
(regexp-quote (string c)))))
|
||||
(if (and lax (memq c '(?\s ?\t ?\r ?\n )))
|
||||
(concat out "+")
|
||||
out)))
|
||||
string))
|
||||
(regexp-quote string)))
|
||||
|
||||
;;; chracter-fold.el ends here
|
@ -272,79 +272,6 @@ Default value, nil, means edit the string instead."
|
||||
:version "23.1"
|
||||
:group 'isearch)
|
||||
|
||||
(defvar isearch-character-fold-search t
|
||||
"Non-nil if isearch should fold similar characters.
|
||||
This means some characters will match entire groups of charactes.
|
||||
For instance, \" will match all variants of double quotes, and
|
||||
the letter a will match all of its accented versions (and then
|
||||
some).")
|
||||
|
||||
(defconst isearch--character-fold-extras
|
||||
'((?\" """ "“" "”" "”" "„" "⹂" "〞" "‟" "‟" "❞" "❝" "❠" "“" "„" "〝" "〟" "🙷" "🙶" "🙸" "«" "»")
|
||||
(?' "❟" "❛" "❜" "‘" "’" "‚" "‛" "‚" "" "❮" "❯" "‹" "›")
|
||||
(?` "❛" "‘" "‛" "" "❮" "‹")
|
||||
;; `isearch-character-fold-search' doesn't interact with
|
||||
;; `isearch-lax-whitespace' yet. So we need to add this here.
|
||||
(?\s " " "\r" "\n"))
|
||||
"Extra entries to add to `isearch--character-fold-table'.
|
||||
Used to specify character folding not covered by unicode
|
||||
decomposition. Each car is a character and each cdr is a list of
|
||||
strings that it should match (itself excluded).")
|
||||
|
||||
(defvar isearch--character-fold-table
|
||||
(eval-when-compile
|
||||
(require 'subr-x)
|
||||
(let ((equiv (make-char-table 'character-fold-table)))
|
||||
;; Compile a list of all complex characters that each simple
|
||||
;; character should match.
|
||||
(dotimes (i (length equiv))
|
||||
(let ((dd (get-char-code-property i 'decomposition))
|
||||
d k found)
|
||||
;; Skip trivial cases (?a decomposes to (?a)).
|
||||
(unless (and (eq i (car dd)))
|
||||
;; Discard a possible formatting tag.
|
||||
(when (symbolp (car-safe dd))
|
||||
(setq dd (cdr dd)))
|
||||
;; Is k a number or letter, per unicode standard?
|
||||
(setq d dd)
|
||||
(while (and d (not found))
|
||||
(setq k (pop d))
|
||||
(setq found (and (characterp k)
|
||||
(memq (get-char-code-property k 'general-category)
|
||||
'(Lu Ll Lt Lm Lo Nd Nl No)))))
|
||||
;; If there's no number or letter on the
|
||||
;; decomposition, find the first character in it.
|
||||
(setq d dd)
|
||||
(while (and d (not found))
|
||||
(setq k (pop d))
|
||||
(setq found (characterp k)))
|
||||
;; Add i to the list of characters that k can
|
||||
;; represent. Also add its decomposition, so we can
|
||||
;; match multi-char representations like (format "a%c" 769)
|
||||
(when (and found (not (eq i k)))
|
||||
(aset equiv k (cons (apply #'string dd)
|
||||
(cons (char-to-string i)
|
||||
(aref equiv k))))))))
|
||||
(dotimes (i (length equiv))
|
||||
(when-let ((chars (append (cdr (assq i isearch--character-fold-extras))
|
||||
(aref equiv i))))
|
||||
(aset equiv i (regexp-opt (cons (char-to-string i) chars)))))
|
||||
equiv))
|
||||
"Used for folding characters of the same group during search.")
|
||||
|
||||
(defun isearch--character-folded-regexp (string)
|
||||
"Return a regexp matching anything that character-folds into STRING.
|
||||
If `isearch-character-fold-search' is nil, `regexp-quote' string.
|
||||
Otherwise, any character in STRING that has an entry in
|
||||
`isearch--character-fold-table' is replaced with that entry
|
||||
\(which is a regexp) and other characters are `regexp-quote'd."
|
||||
(if isearch-character-fold-search
|
||||
(apply #'concat
|
||||
(mapcar (lambda (c) (or (aref isearch--character-fold-table c)
|
||||
(regexp-quote (string c))))
|
||||
string))
|
||||
(regexp-quote string)))
|
||||
|
||||
(defcustom isearch-lazy-highlight t
|
||||
"Controls the lazy-highlighting during incremental search.
|
||||
When non-nil, all text in the buffer matching the current search
|
||||
@ -592,6 +519,7 @@ This is like `describe-bindings', but displays only Isearch keys."
|
||||
(define-key map "\M-sw" 'isearch-toggle-word)
|
||||
(define-key map "\M-s_" 'isearch-toggle-symbol)
|
||||
(define-key map "\M-s " 'isearch-toggle-lax-whitespace)
|
||||
(define-key map "\M-s'" #'isearch-toggle-character-fold)
|
||||
|
||||
(define-key map [?\M-%] 'isearch-query-replace)
|
||||
(define-key map [?\C-\M-%] 'isearch-query-replace-regexp)
|
||||
@ -907,6 +835,9 @@ See the command `isearch-forward-symbol' for more information."
|
||||
;; isearch-forward-regexp isearch-backward-regexp)
|
||||
;; "List of commands for which isearch-mode does not recursive-edit.")
|
||||
|
||||
(autoload 'character-fold-to-regexp "character-fold")
|
||||
(put 'character-fold-to-regexp 'isearch-message-prefix "char-fold ")
|
||||
(defvar character-fold-search)
|
||||
|
||||
(defun isearch-mode (forward &optional regexp op-fun recursive-edit word)
|
||||
"Start Isearch minor mode.
|
||||
@ -931,7 +862,8 @@ convert the search string to a regexp used by regexp search functions."
|
||||
;; Initialize global vars.
|
||||
(setq isearch-forward forward
|
||||
isearch-regexp regexp
|
||||
isearch-word word
|
||||
isearch-word (or word (and character-fold-search
|
||||
'character-fold-to-regexp))
|
||||
isearch-op-fun op-fun
|
||||
isearch-last-case-fold-search isearch-case-fold-search
|
||||
isearch-case-fold-search case-fold-search
|
||||
@ -1581,6 +1513,15 @@ Use `isearch-exit' to quit without signaling."
|
||||
(setq isearch-success t isearch-adjusted t)
|
||||
(isearch-update))
|
||||
|
||||
(defun isearch-toggle-character-fold ()
|
||||
"Toggle character folding in searching on or off."
|
||||
(interactive)
|
||||
(setq isearch-word (unless (eq isearch-word #'character-fold-to-regexp)
|
||||
#'character-fold-to-regexp))
|
||||
(if isearch-word (setq isearch-regexp nil))
|
||||
(setq isearch-success t isearch-adjusted t)
|
||||
(isearch-update))
|
||||
|
||||
(defun isearch-toggle-lax-whitespace ()
|
||||
"Toggle whitespace matching in searching on or off.
|
||||
In ordinary search, toggles the value of the variable
|
||||
@ -2680,11 +2621,6 @@ Can be changed via `isearch-search-fun-function' for special needs."
|
||||
're-search-backward-lax-whitespace))
|
||||
(isearch-regexp
|
||||
(if isearch-forward 're-search-forward 're-search-backward))
|
||||
(isearch-character-fold-search
|
||||
(lambda (string &optional bound noerror count)
|
||||
(funcall (if isearch-forward #'re-search-forward #'re-search-backward)
|
||||
(isearch--character-folded-regexp string)
|
||||
bound noerror count)))
|
||||
((and isearch-lax-whitespace search-whitespace-regexp)
|
||||
(if isearch-forward
|
||||
'search-forward-lax-whitespace
|
||||
|
@ -2012,8 +2012,9 @@ It is called with three arguments, as if it were
|
||||
;; outside of this function because then another I-search
|
||||
;; used after `recursive-edit' might override them.
|
||||
(let* ((isearch-regexp regexp-flag)
|
||||
(isearch-word delimited-flag)
|
||||
(isearch-character-fold-search replace-character-fold)
|
||||
(isearch-word (or delimited-flag
|
||||
(and replace-character-fold
|
||||
#'character-fold-to-regexp)))
|
||||
(isearch-lax-whitespace
|
||||
replace-lax-whitespace)
|
||||
(isearch-regexp-lax-whitespace
|
||||
|
Loading…
Reference in New Issue
Block a user