mirror of
https://git.savannah.gnu.org/git/emacs.git
synced 2024-12-16 09:50:25 +00:00
More string-search optimisations
All-ASCII strings cannot have substrings with non-ASCII characters in them; use this fact to avoid searching entirely. * src/fns.c (Fstring_search): For multibyte non-ASCII needle and unibyte haystack, don't check if the haystack is all-ASCII; it's a waste of time. For multibyte non-ASCII needle and multibyte all-ASCII haystack, fail immediately. * test/src/fns-tests.el (string-search): Add more test cases.
This commit is contained in:
parent
74a35d16e2
commit
90aab73f8d
23
src/fns.c
23
src/fns.c
@ -5502,25 +5502,32 @@ Case is always significant and text properties are ignored. */)
|
||||
haybytes = SBYTES (haystack) - start_byte;
|
||||
|
||||
/* We can do a direct byte-string search if both strings have the
|
||||
same multibyteness, or if at least one of them consists of ASCII
|
||||
characters only. */
|
||||
same multibyteness, or if the needle consists of ASCII characters only. */
|
||||
if (STRING_MULTIBYTE (haystack)
|
||||
? (STRING_MULTIBYTE (needle)
|
||||
|| SCHARS (haystack) == SBYTES (haystack) || string_ascii_p (needle))
|
||||
: (!STRING_MULTIBYTE (needle)
|
||||
|| SCHARS (needle) == SBYTES (needle) || string_ascii_p (haystack)))
|
||||
res = memmem (haystart, haybytes,
|
||||
SSDATA (needle), SBYTES (needle));
|
||||
else if (STRING_MULTIBYTE (haystack)) /* unibyte needle */
|
||||
|| SCHARS (needle) == SBYTES (needle)))
|
||||
{
|
||||
if (STRING_MULTIBYTE (haystack) && STRING_MULTIBYTE (needle)
|
||||
&& SCHARS (haystack) == SBYTES (haystack)
|
||||
&& SCHARS (needle) != SBYTES (needle))
|
||||
/* Multibyte non-ASCII needle, multibyte ASCII haystack: impossible. */
|
||||
return Qnil;
|
||||
else
|
||||
res = memmem (haystart, haybytes,
|
||||
SSDATA (needle), SBYTES (needle));
|
||||
}
|
||||
else if (STRING_MULTIBYTE (haystack)) /* unibyte non-ASCII needle */
|
||||
{
|
||||
Lisp_Object multi_needle = string_to_multibyte (needle);
|
||||
res = memmem (haystart, haybytes,
|
||||
SSDATA (multi_needle), SBYTES (multi_needle));
|
||||
}
|
||||
else /* unibyte haystack, multibyte needle */
|
||||
else /* unibyte haystack, multibyte non-ASCII needle */
|
||||
{
|
||||
/* The only possible way we can find the multibyte needle in the
|
||||
unibyte stack (since we know that neither are pure-ASCII) is
|
||||
unibyte stack (since we know that the needle is non-ASCII) is
|
||||
if they contain "raw bytes" (and no other non-ASCII chars.) */
|
||||
ptrdiff_t nbytes = SBYTES (needle);
|
||||
for (ptrdiff_t i = 0; i < nbytes; i++)
|
||||
|
@ -938,6 +938,13 @@
|
||||
(should (equal (string-search "\303" "aøb") nil))
|
||||
(should (equal (string-search "\270" "aøb") nil))
|
||||
(should (equal (string-search "ø" "\303\270") nil))
|
||||
(should (equal (string-search "ø" (make-string 32 ?a)) nil))
|
||||
(should (equal (string-search "ø" (string-to-multibyte (make-string 32 ?a)))
|
||||
nil))
|
||||
(should (equal (string-search "o" (string-to-multibyte
|
||||
(apply #'string
|
||||
(number-sequence ?a ?z))))
|
||||
14))
|
||||
|
||||
(should (equal (string-search "a\U00010f98z" "a\U00010f98a\U00010f98z") 2))
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user