1
0
mirror of https://git.savannah.gnu.org/git/emacs.git synced 2025-01-27 19:31:38 +00:00

* lisp/character-fold.el: Remove special case-folding support

(character-fold-to-regexp): Remove special code for
case-folding.  Char-fold search still respects the
`case-fold-search' variable (i.e., f matches F).  This only
removes the code that was added to ensure that f also matched
all chars that F matched.  For instance, after this commit, f
no longer matches 𝔽.

This was necessary because the logic created a regexp with
2^(length of the string) redundant paths.  So, when a very
long string "almost" matched, Emacs took a very long time to
figure out that it didn't.  This became particularly relevant
because isearch's lazy-highlight does a search bounded by (1-
match-end) (which, in most circumstances, is a search that
almost matches).  A recipe for this can be found in bug#22090.
This commit is contained in:
Artur Malabarba 2015-12-04 15:12:10 +00:00
parent 3a9df7589a
commit 30f3432e95
2 changed files with 25 additions and 18 deletions

View File

@ -157,8 +157,6 @@ FROM is for internal use. It specifies an index in the STRING
from which to start."
(let* ((spaces 0)
(multi-char-table (char-table-extra-slot character-fold-table 0))
(lower-case-table (current-case-table))
(upper-case-table (char-table-extra-slot lower-case-table 0))
(i (or from 0))
(end (length string))
(out nil))
@ -178,21 +176,9 @@ from which to start."
(setq spaces 0))
(let ((regexp (or (aref character-fold-table c)
(regexp-quote (string c))))
(alist nil))
;; Long string. The regexp would probably be too long.
(unless (> end 50)
(setq alist (aref multi-char-table c))
(when case-fold-search
(let ((other-c (aref lower-case-table c)))
(when (or (not other-c)
(eq other-c c))
(setq other-c (aref upper-case-table c)))
(when other-c
(setq alist (append alist (aref multi-char-table other-c)))
(setq regexp (concat "\\(?:" regexp "\\|"
(or (aref character-fold-table other-c)
(regexp-quote (string other-c)))
"\\)"))))))
;; Long string. The regexp would probably be too long.
(alist (unless (> end 50)
(aref multi-char-table c))))
(push (let ((matched-entries nil)
(max-length 0))
(dolist (entry alist)
@ -229,7 +215,7 @@ from which to start."
(push (character-fold--make-space-string spaces) out))
(let ((regexp (apply #'concat (nreverse out))))
;; Limited by `MAX_BUF_SIZE' in `regex.c'.
(if (> (length regexp) 10000)
(if (> (length regexp) 5000)
(regexp-quote string)
regexp))))

View File

@ -98,6 +98,27 @@
;; (character-fold--test-match-exactly "a12" "xxyy")
))
(ert-deftest character-fold--speed-test ()
(dolist (string (append '("tty-set-up-initial-frame-face"
"tty-set-up-initial-frame-face-frame-faceframe-faceframe-faceframe-face")
(mapcar #'character-fold--random-word '(10 50 100
50 100))))
(message "Testing %s" string)
;; Make sure we didn't just fallback on the trivial search.
(should-not (string= (regexp-quote string)
(character-fold-to-regexp string)))
(with-temp-buffer
(save-excursion (insert string))
(let ((time (time-to-seconds (current-time))))
;; Our initial implementation of case-folding in char-folding
;; created a lot of redundant paths in the regexp. Because of
;; that, if a really long string "almost" matches, the regexp
;; engine took a long time to realise that it doesn't match.
(should-not (character-fold-search-forward (concat string "c") nil 'noerror))
;; Ensure it took less than a second.
(should (< (- (time-to-seconds (current-time))
time)
1))))))
(provide 'character-fold-tests)
;;; character-fold-tests.el ends here