1
0
mirror of https://git.savannah.gnu.org/git/emacs.git synced 2025-01-15 17:00:26 +00:00

Implement locale-sensitive string collation for MS-Windows. (Bug#18051)

src/w32proc.c (get_lcid_callback, get_lcid, w32_compare_strings):
 New functions.
 src/w32.h (w32_compare_strings): Add prototype.
 src/w32.c <g_b_init_compare_string_w>: New global flag.
 (globals_of_w32): Initialize it.
 src/sysdep.c (str_collate) [WINDOWSNT]: Implementation for MS-Windows.
 src/fns.c (Fstring_collate_lessp, Fstring_collate_equalp)
 [WINDOWSNT]: Call str_collate on MS-Windows.

 etc/NEWS: Mention that string-collate-* functions are supported on
 MS-Windows as well.
This commit is contained in:
Eli Zaretskii 2014-08-25 18:55:46 +03:00
parent 8661ebaa6c
commit 015ea0ffdb
8 changed files with 202 additions and 9 deletions

View File

@ -1,3 +1,8 @@
2014-08-25 Eli Zaretskii <eliz@gnu.org>
* NEWS: Mention that string-collate-* functions are supported on
MS-Windows as well.
2014-08-08 Jan Nieuwenhuizen <janneke@gnu.org>
* compilation.txt (file): Add Guile backtrace example.

View File

@ -68,9 +68,9 @@ variable `read-hide-char'.
** The new functions `string-collate-lessp' and `string-collate-equalp'
preserve the collation order as defined by the system's locale(1)
environment. For the time being this is implemented for POSIX systems
only, for other systems they fall back to their counterparts
`string-lessp' and `string-equal'.
environment. For the time being this is implemented for modern POSIX
systems and for MS-Windows, for other systems they fall back to their
counterparts `string-lessp' and `string-equal'.
* Editing Changes in Emacs 24.5

View File

@ -1,3 +1,19 @@
2014-08-25 Eli Zaretskii <eliz@gnu.org>
Implement locale-sensitive string collation for MS-Windows.
* w32proc.c (get_lcid_callback, get_lcid, w32_compare_strings):
New functions. (Bug#18051)
* w32.h (w32_compare_strings): Add prototype.
* w32.c <g_b_init_compare_string_w>: New global flag.
(globals_of_w32): Initialize it.
* sysdep.c (str_collate) [WINDOWSNT]: Implementation for MS-Windows.
* fns.c (Fstring_collate_lessp, Fstring_collate_equalp)
[WINDOWSNT]: Call str_collate on MS-Windows.
2014-08-25 Dmitry Antipov <dmantipov@yandex.ru>
One more minor cleanup of font subsystem.

View File

@ -364,7 +364,7 @@ If the environment variable \"LC_COLLATE\" is set in `process-environment',
it overrides the setting of your current locale. */)
(Lisp_Object s1, Lisp_Object s2)
{
#ifdef __STDC_ISO_10646__
#if defined __STDC_ISO_10646__ || defined WINDOWSNT
/* Check parameters. */
if (SYMBOLP (s1))
s1 = SYMBOL_NAME (s1);
@ -375,9 +375,9 @@ it overrides the setting of your current locale. */)
return (str_collate (s1, s2) < 0) ? Qt : Qnil;
#else
#else /* !__STDC_ISO_10646__, !WINDOWSNT */
return Fstring_lessp (s1, s2);
#endif /* __STDC_ISO_10646__ */
#endif /* !__STDC_ISO_10646__, !WINDOWSNT */
}
DEFUN ("string-collate-equalp", Fstring_collate_equalp, Sstring_collate_equalp, 2, 2, 0,
@ -401,7 +401,7 @@ If the environment variable \"LC_COLLATE\" is set in `process-environment',
it overrides the setting of your current locale. */)
(Lisp_Object s1, Lisp_Object s2)
{
#ifdef __STDC_ISO_10646__
#if defined __STDC_ISO_10646__ || defined WINDOWSNT
/* Check parameters. */
if (SYMBOLP (s1))
s1 = SYMBOL_NAME (s1);
@ -412,9 +412,9 @@ it overrides the setting of your current locale. */)
return (str_collate (s1, s2) == 0) ? Qt : Qnil;
#else
#else /* !__STDC_ISO_10646__, !WINDOWSNT */
return Fstring_equal (s1, s2);
#endif /* __STDC_ISO_10646__ */
#endif /* !__STDC_ISO_10646__, !WINDOWSNT */
}
static Lisp_Object concat (ptrdiff_t nargs, Lisp_Object *args,

View File

@ -3592,3 +3592,15 @@ str_collate (Lisp_Object s1, Lisp_Object s2)
return res;
}
#endif /* __STDC_ISO_10646__ */
#ifdef WINDOWSNT
ptrdiff_t
str_collate (Lisp_Object s1, Lisp_Object s2)
{
Lisp_Object lc_collate =
Fgetenv_internal (build_string ("LC_COLLATE"), Vprocess_environment);
char *loc = STRINGP (lc_collate) ? SSDATA (lc_collate) : NULL;
return w32_compare_strings (SDATA (s1), SDATA (s2), loc);
}
#endif /* WINDOWSNT */

View File

@ -309,6 +309,8 @@ static BOOL g_b_init_set_named_security_info_w;
static BOOL g_b_init_set_named_security_info_a;
static BOOL g_b_init_get_adapters_info;
BOOL g_b_init_compare_string_w;
/*
BEGIN: Wrapper functions around OpenProcessToken
and other functions in advapi32.dll that are only
@ -9068,6 +9070,7 @@ globals_of_w32 (void)
g_b_init_set_named_security_info_w = 0;
g_b_init_set_named_security_info_a = 0;
g_b_init_get_adapters_info = 0;
g_b_init_compare_string_w = 0;
num_of_processors = 0;
/* The following sets a handler for shutdown notifications for
console apps. This actually applies to Emacs in both console and

View File

@ -210,6 +210,9 @@ extern int sys_link (const char *, const char *);
extern int w32_memory_info (unsigned long long *, unsigned long long *,
unsigned long long *, unsigned long long *);
/* Compare 2 UTF-8 strings in locale-dependent fashion. */
extern int w32_compare_strings (const char *, const char *, char *);
#ifdef HAVE_GNUTLS
#include <gnutls/gnutls.h>

View File

@ -32,6 +32,7 @@ along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
#include <signal.h>
#include <sys/file.h>
#include <mbstring.h>
#include <locale.h>
/* must include CRT headers *before* config.h */
#include <config.h>
@ -3144,6 +3145,159 @@ If successful, the new layout id is returned, otherwise nil. */)
return Fw32_get_keyboard_layout ();
}
/* Two variables to interface between get_lcid and the EnumLocales
callback function below. */
#ifndef LOCALE_NAME_MAX_LENGTH
# define LOCALE_NAME_MAX_LENGTH 85
#endif
static LCID found_lcid;
static char lname[3 * LOCALE_NAME_MAX_LENGTH + 1 + 1];
/* Callback function for EnumLocales. */
static BOOL CALLBACK
get_lcid_callback (LPTSTR locale_num_str)
{
char *endp;
char locval[2 * LOCALE_NAME_MAX_LENGTH + 1 + 1];
LCID try_lcid = strtoul (locale_num_str, &endp, 16);
if (GetLocaleInfo (try_lcid, LOCALE_SABBREVLANGNAME,
locval, LOCALE_NAME_MAX_LENGTH))
{
strcat (locval, "_");
if (GetLocaleInfo (try_lcid, LOCALE_SABBREVCTRYNAME,
locval + strlen (locval), LOCALE_NAME_MAX_LENGTH))
{
size_t locval_len = strlen (locval);
if (strnicmp (locval, lname, locval_len) == 0
&& (lname[locval_len] == '.'
|| lname[locval_len] == '\0'))
{
found_lcid = try_lcid;
return FALSE;
}
}
}
return TRUE;
}
/* Return the Locale ID (LCID) number given the locale's name, a
string, in LOCALE_NAME. This works by enumerating all the locales
supported by the system, until we find one whose name matches
LOCALE_NAME. */
static LCID
get_lcid (const char *locale_name)
{
/* A simple cache. */
static LCID last_lcid;
static char last_locale[1000];
/* The code below is not thread-safe, as it uses static variables.
But this function is called only from the Lisp thread. */
if (last_lcid > 0 && strcmp (locale_name, last_locale) == 0)
return last_lcid;
strncpy (lname, locale_name, sizeof (lname) - 1);
lname[sizeof (lname) - 1] = '\0';
found_lcid = 0;
EnumSystemLocales (get_lcid_callback, LCID_SUPPORTED);
if (found_lcid > 0)
{
last_lcid = found_lcid;
strcpy (last_locale, locale_name);
}
return found_lcid;
}
#ifndef _NSLCMPERROR
# define _NSLCMPERROR INT_MAX
#endif
int
w32_compare_strings (const char *s1, const char *s2, char *locname)
{
LCID lcid = GetThreadLocale ();
wchar_t *string1_w, *string2_w;
int val, needed;
extern BOOL g_b_init_compare_string_w;
static int (WINAPI *pCompareStringW)(LCID, DWORD, LPCWSTR, int, LPCWSTR, int);
USE_SAFE_ALLOCA;
if (!g_b_init_compare_string_w)
{
if (os_subtype == OS_9X)
{
pCompareStringW = GetProcAddress (LoadLibrary ("Unicows.dll"),
"CompareStringW");
if (!pCompareStringW)
{
errno = EINVAL;
/* This return value is compatible with wcscoll and
other MS CRT functions. */
return _NSLCMPERROR;
}
}
else
pCompareStringW = CompareStringW;
g_b_init_compare_string_w = 1;
}
needed = pMultiByteToWideChar (CP_UTF8, MB_ERR_INVALID_CHARS, s1, -1, NULL, 0);
if (needed > 0)
{
SAFE_NALLOCA (string1_w, 1, needed + 1);
pMultiByteToWideChar (CP_UTF8, MB_ERR_INVALID_CHARS, s1, -1,
string1_w, needed);
}
else
{
errno = EINVAL;
return _NSLCMPERROR;
}
needed = pMultiByteToWideChar (CP_UTF8, MB_ERR_INVALID_CHARS, s2, -1, NULL, 0);
if (needed > 0)
{
SAFE_NALLOCA (string2_w, 1, needed + 1);
pMultiByteToWideChar (CP_UTF8, MB_ERR_INVALID_CHARS, s2, -1,
string2_w, needed);
}
else
{
SAFE_FREE ();
errno = EINVAL;
return _NSLCMPERROR;
}
if (locname)
{
/* Convert locale name string to LCID. We don't want to use
LocaleNameToLCID because (a) it is only available since
Vista, and (b) it doesn't accept locale names returned by
'setlocale' and 'GetLocaleInfo'. */
LCID new_lcid = get_lcid (locname);
if (new_lcid > 0)
lcid = new_lcid;
}
/* FIXME: Need a way to control the FLAGS argument, perhaps via the
CODESET part of LOCNAME. In particular, ls-lisp will want
NORM_IGNORESYMBOLS and sometimes LINGUISTIC_IGNORECASE or
NORM_IGNORECASE. */
val = pCompareStringW (lcid, 0, string1_w, -1, string2_w, -1);
SAFE_FREE ();
if (!val)
{
errno = EINVAL;
return _NSLCMPERROR;
}
return val - 2;
}
void
syms_of_ntproc (void)