diff --git a/etc/ChangeLog b/etc/ChangeLog index 40eb5f11252..72c8334d3df 100644 --- a/etc/ChangeLog +++ b/etc/ChangeLog @@ -1,3 +1,8 @@ +2014-08-25 Eli Zaretskii + + * NEWS: Mention that string-collate-* functions are supported on + MS-Windows as well. + 2014-08-08 Jan Nieuwenhuizen * compilation.txt (file): Add Guile backtrace example. diff --git a/etc/NEWS b/etc/NEWS index 58a749a79b4..afdaf1a2a3d 100644 --- a/etc/NEWS +++ b/etc/NEWS @@ -68,9 +68,9 @@ variable `read-hide-char'. ** The new functions `string-collate-lessp' and `string-collate-equalp' preserve the collation order as defined by the system's locale(1) -environment. For the time being this is implemented for POSIX systems -only, for other systems they fall back to their counterparts -`string-lessp' and `string-equal'. +environment. For the time being this is implemented for modern POSIX +systems and for MS-Windows, for other systems they fall back to their +counterparts `string-lessp' and `string-equal'. * Editing Changes in Emacs 24.5 diff --git a/src/ChangeLog b/src/ChangeLog index efd469ad053..bb678dc4843 100644 --- a/src/ChangeLog +++ b/src/ChangeLog @@ -1,3 +1,19 @@ +2014-08-25 Eli Zaretskii + + Implement locale-sensitive string collation for MS-Windows. + * w32proc.c (get_lcid_callback, get_lcid, w32_compare_strings): + New functions. (Bug#18051) + + * w32.h (w32_compare_strings): Add prototype. + + * w32.c : New global flag. + (globals_of_w32): Initialize it. + + * sysdep.c (str_collate) [WINDOWSNT]: Implementation for MS-Windows. + + * fns.c (Fstring_collate_lessp, Fstring_collate_equalp) + [WINDOWSNT]: Call str_collate on MS-Windows. + 2014-08-25 Dmitry Antipov One more minor cleanup of font subsystem. diff --git a/src/fns.c b/src/fns.c index 616b54d193c..2e2acf84b95 100644 --- a/src/fns.c +++ b/src/fns.c @@ -364,7 +364,7 @@ If the environment variable \"LC_COLLATE\" is set in `process-environment', it overrides the setting of your current locale. */) (Lisp_Object s1, Lisp_Object s2) { -#ifdef __STDC_ISO_10646__ +#if defined __STDC_ISO_10646__ || defined WINDOWSNT /* Check parameters. */ if (SYMBOLP (s1)) s1 = SYMBOL_NAME (s1); @@ -375,9 +375,9 @@ it overrides the setting of your current locale. */) return (str_collate (s1, s2) < 0) ? Qt : Qnil; -#else +#else /* !__STDC_ISO_10646__, !WINDOWSNT */ return Fstring_lessp (s1, s2); -#endif /* __STDC_ISO_10646__ */ +#endif /* !__STDC_ISO_10646__, !WINDOWSNT */ } DEFUN ("string-collate-equalp", Fstring_collate_equalp, Sstring_collate_equalp, 2, 2, 0, @@ -401,7 +401,7 @@ If the environment variable \"LC_COLLATE\" is set in `process-environment', it overrides the setting of your current locale. */) (Lisp_Object s1, Lisp_Object s2) { -#ifdef __STDC_ISO_10646__ +#if defined __STDC_ISO_10646__ || defined WINDOWSNT /* Check parameters. */ if (SYMBOLP (s1)) s1 = SYMBOL_NAME (s1); @@ -412,9 +412,9 @@ it overrides the setting of your current locale. */) return (str_collate (s1, s2) == 0) ? Qt : Qnil; -#else +#else /* !__STDC_ISO_10646__, !WINDOWSNT */ return Fstring_equal (s1, s2); -#endif /* __STDC_ISO_10646__ */ +#endif /* !__STDC_ISO_10646__, !WINDOWSNT */ } static Lisp_Object concat (ptrdiff_t nargs, Lisp_Object *args, diff --git a/src/sysdep.c b/src/sysdep.c index 856d668bb71..25bec264f46 100644 --- a/src/sysdep.c +++ b/src/sysdep.c @@ -3592,3 +3592,15 @@ str_collate (Lisp_Object s1, Lisp_Object s2) return res; } #endif /* __STDC_ISO_10646__ */ + +#ifdef WINDOWSNT +ptrdiff_t +str_collate (Lisp_Object s1, Lisp_Object s2) +{ + Lisp_Object lc_collate = + Fgetenv_internal (build_string ("LC_COLLATE"), Vprocess_environment); + char *loc = STRINGP (lc_collate) ? SSDATA (lc_collate) : NULL; + + return w32_compare_strings (SDATA (s1), SDATA (s2), loc); +} +#endif /* WINDOWSNT */ diff --git a/src/w32.c b/src/w32.c index 7cb9d8960c5..25549d79d7f 100644 --- a/src/w32.c +++ b/src/w32.c @@ -309,6 +309,8 @@ static BOOL g_b_init_set_named_security_info_w; static BOOL g_b_init_set_named_security_info_a; static BOOL g_b_init_get_adapters_info; +BOOL g_b_init_compare_string_w; + /* BEGIN: Wrapper functions around OpenProcessToken and other functions in advapi32.dll that are only @@ -9068,6 +9070,7 @@ globals_of_w32 (void) g_b_init_set_named_security_info_w = 0; g_b_init_set_named_security_info_a = 0; g_b_init_get_adapters_info = 0; + g_b_init_compare_string_w = 0; num_of_processors = 0; /* The following sets a handler for shutdown notifications for console apps. This actually applies to Emacs in both console and diff --git a/src/w32.h b/src/w32.h index 94f7a962833..68ee14c70e3 100644 --- a/src/w32.h +++ b/src/w32.h @@ -210,6 +210,9 @@ extern int sys_link (const char *, const char *); extern int w32_memory_info (unsigned long long *, unsigned long long *, unsigned long long *, unsigned long long *); +/* Compare 2 UTF-8 strings in locale-dependent fashion. */ +extern int w32_compare_strings (const char *, const char *, char *); + #ifdef HAVE_GNUTLS #include diff --git a/src/w32proc.c b/src/w32proc.c index 426a656f566..ed62de02433 100644 --- a/src/w32proc.c +++ b/src/w32proc.c @@ -32,6 +32,7 @@ along with GNU Emacs. If not, see . */ #include #include #include +#include /* must include CRT headers *before* config.h */ #include @@ -3144,6 +3145,159 @@ If successful, the new layout id is returned, otherwise nil. */) return Fw32_get_keyboard_layout (); } +/* Two variables to interface between get_lcid and the EnumLocales + callback function below. */ +#ifndef LOCALE_NAME_MAX_LENGTH +# define LOCALE_NAME_MAX_LENGTH 85 +#endif +static LCID found_lcid; +static char lname[3 * LOCALE_NAME_MAX_LENGTH + 1 + 1]; + +/* Callback function for EnumLocales. */ +static BOOL CALLBACK +get_lcid_callback (LPTSTR locale_num_str) +{ + char *endp; + char locval[2 * LOCALE_NAME_MAX_LENGTH + 1 + 1]; + LCID try_lcid = strtoul (locale_num_str, &endp, 16); + + if (GetLocaleInfo (try_lcid, LOCALE_SABBREVLANGNAME, + locval, LOCALE_NAME_MAX_LENGTH)) + { + strcat (locval, "_"); + if (GetLocaleInfo (try_lcid, LOCALE_SABBREVCTRYNAME, + locval + strlen (locval), LOCALE_NAME_MAX_LENGTH)) + { + size_t locval_len = strlen (locval); + + if (strnicmp (locval, lname, locval_len) == 0 + && (lname[locval_len] == '.' + || lname[locval_len] == '\0')) + { + found_lcid = try_lcid; + return FALSE; + } + } + } + return TRUE; +} + +/* Return the Locale ID (LCID) number given the locale's name, a + string, in LOCALE_NAME. This works by enumerating all the locales + supported by the system, until we find one whose name matches + LOCALE_NAME. */ +static LCID +get_lcid (const char *locale_name) +{ + /* A simple cache. */ + static LCID last_lcid; + static char last_locale[1000]; + + /* The code below is not thread-safe, as it uses static variables. + But this function is called only from the Lisp thread. */ + if (last_lcid > 0 && strcmp (locale_name, last_locale) == 0) + return last_lcid; + + strncpy (lname, locale_name, sizeof (lname) - 1); + lname[sizeof (lname) - 1] = '\0'; + found_lcid = 0; + EnumSystemLocales (get_lcid_callback, LCID_SUPPORTED); + if (found_lcid > 0) + { + last_lcid = found_lcid; + strcpy (last_locale, locale_name); + } + return found_lcid; +} + +#ifndef _NSLCMPERROR +# define _NSLCMPERROR INT_MAX +#endif + +int +w32_compare_strings (const char *s1, const char *s2, char *locname) +{ + LCID lcid = GetThreadLocale (); + wchar_t *string1_w, *string2_w; + int val, needed; + extern BOOL g_b_init_compare_string_w; + static int (WINAPI *pCompareStringW)(LCID, DWORD, LPCWSTR, int, LPCWSTR, int); + + USE_SAFE_ALLOCA; + + if (!g_b_init_compare_string_w) + { + if (os_subtype == OS_9X) + { + pCompareStringW = GetProcAddress (LoadLibrary ("Unicows.dll"), + "CompareStringW"); + if (!pCompareStringW) + { + errno = EINVAL; + /* This return value is compatible with wcscoll and + other MS CRT functions. */ + return _NSLCMPERROR; + } + } + else + pCompareStringW = CompareStringW; + + g_b_init_compare_string_w = 1; + } + + needed = pMultiByteToWideChar (CP_UTF8, MB_ERR_INVALID_CHARS, s1, -1, NULL, 0); + if (needed > 0) + { + SAFE_NALLOCA (string1_w, 1, needed + 1); + pMultiByteToWideChar (CP_UTF8, MB_ERR_INVALID_CHARS, s1, -1, + string1_w, needed); + } + else + { + errno = EINVAL; + return _NSLCMPERROR; + } + + needed = pMultiByteToWideChar (CP_UTF8, MB_ERR_INVALID_CHARS, s2, -1, NULL, 0); + if (needed > 0) + { + SAFE_NALLOCA (string2_w, 1, needed + 1); + pMultiByteToWideChar (CP_UTF8, MB_ERR_INVALID_CHARS, s2, -1, + string2_w, needed); + } + else + { + SAFE_FREE (); + errno = EINVAL; + return _NSLCMPERROR; + } + + if (locname) + { + /* Convert locale name string to LCID. We don't want to use + LocaleNameToLCID because (a) it is only available since + Vista, and (b) it doesn't accept locale names returned by + 'setlocale' and 'GetLocaleInfo'. */ + LCID new_lcid = get_lcid (locname); + + if (new_lcid > 0) + lcid = new_lcid; + } + + /* FIXME: Need a way to control the FLAGS argument, perhaps via the + CODESET part of LOCNAME. In particular, ls-lisp will want + NORM_IGNORESYMBOLS and sometimes LINGUISTIC_IGNORECASE or + NORM_IGNORECASE. */ + val = pCompareStringW (lcid, 0, string1_w, -1, string2_w, -1); + SAFE_FREE (); + if (!val) + { + errno = EINVAL; + return _NSLCMPERROR; + } + return val - 2; +} + void syms_of_ntproc (void)