1
0
mirror of https://git.savannah.gnu.org/git/emacs.git synced 2024-11-22 07:09:54 +00:00

Improve font search and handling on MS-Windows

* src/w32font.c: Add commentary about font search on MS-Windows.
(w32font_coverage_ok, add_font_entity_to_list)
(font_supported_scripts): Consider the coverage OK if a font has
only the SIP bit set, but also sets relevant codepage bits in the
CSB bits.
(font_supported_scripts): Fix script for USB bit 99.
* src/font.c (font_parse_fcname, font_parse_family_registry)
[HAVE_NTGUI]: Don't consider hyphenated suffixes of some Windows
fonts as not belonging to the family name.
* src/w32uniscribe.c (uniscribe_check_otf_1): Increase tags[]
array size, to avoid the E_OUTOFMEMORY error for some fonts.

* lisp/international/fontset.el (font-encoding-alist): Add
'unicode-sip'.
This commit is contained in:
Eli Zaretskii 2024-08-03 18:11:57 +03:00
parent ef8276d424
commit ff6954b9c8
4 changed files with 160 additions and 18 deletions

View File

@ -88,6 +88,7 @@
("iso10646-1$" . (unicode-bmp . nil))
("iso10646.indian-1" . (unicode-bmp . nil))
("unicode-bmp" . (unicode-bmp . nil))
("unicode-sip" . (unicode-sip . nil)) ; used by w32font.c
("abobe-symbol" . symbol)
("sisheng_cwnn" . chinese-sisheng)
("mulearabic-0" . arabic-digit)

View File

@ -1627,15 +1627,30 @@ font_parse_fcname (char *name, ptrdiff_t len, Lisp_Object font)
{
bool decimal = 0, size_found = 1;
for (q = p + 1; *q && *q != ':'; q++)
if (! c_isdigit (*q))
{
if (*q != '.' || decimal)
{
size_found = 0;
break;
}
decimal = 1;
}
{
#ifdef HAVE_NTGUI
/* MS-Windows has several CJK fonts whose name ends in
"-ExtB". It also has fonts whose names end in "-R" or
"-B", and one font whose name ends in "-SB". */
if (q == p + 1 && (strncmp (q, "ExtB", 4) == 0
|| strncmp (q, "R", 1) == 0
|| strncmp (q, "B", 1) == 0
|| strncmp (q, "SB", 2) == 0))
{
size_found = 0;
break;
}
#endif
if (! c_isdigit (*q))
{
if (*q != '.' || decimal)
{
size_found = 0;
break;
}
decimal = 1;
}
}
if (size_found)
{
family_end = p;
@ -2000,6 +2015,15 @@ font_parse_family_registry (Lisp_Object family, Lisp_Object registry, Lisp_Objec
len = SBYTES (family);
p0 = SSDATA (family);
p1 = strchr (p0, '-');
#ifdef HAVE_NTGUI
/* MS-Windows has fonts whose family name ends in "-ExtB" and
other suffixes which include a hyphen. */
if (p1 && (strcmp (p1, "-ExtB") == 0
|| strcmp (p1, "-R") == 0
|| strcmp (p1, "-B") == 0
|| strcmp (p1, "-SB") == 0))
p1 = NULL;
#endif
if (p1)
{
if ((*p0 != '*' && p1 - p0 > 0)

View File

@ -809,6 +809,93 @@ w32font_otf_drive (struct font *font, Lisp_Object features,
bool alternate_subst);
*/
/* Notes about the way fonts are found on MS-Windows when we have a
character unsupported by the default font.
Since we don't use Fontconfig on MS-Windows, we cannot efficiently
search for fonts which support certain characters, because Windows
doesn't store this information anywhere, and we can only know whether
a font supports some character if we actually open the font, which is
expensive and slow. Instead, we rely on font information Windows
exposes to the API we use to enumerate available fonts,
EnumFontFamiliesEx. This information includes two bitmapped attributes:
USB (which stands for Unicode Subset Bitfields) -- this is an array
of 4 32-bit values, 128 bits in total, where each bit
corresponds to some block (sometimes several related blocks) of
Unicode codepoints which the font claims to support.
CSB (which stands for Codepage Bitfields) -- this is an array of 2
32-bit values (64 bits), where each bit corresponds to some
codepage whose characters the font claims to support.
When Emacs needs to find a font for a character, it enumerates the
available fonts, filtering the fonts by examining these bitmaps and a
few other font attributes. The script of the character is converted
to the corresponding bits in USB, and a font that has any of these
bits set is deemed as a candidate; see font_supported_scripts, which
is called by font_matches_spec. The problem with this strategy is
twofold:
- Some Unicode blocks have no USB bits. For the scripts
corresponding to those blocks we use a small cache of fonts known
to support those script. This cache is calculated once, and needs
not be recalculated as long as no fonts are installed or deleted
(it can be saved in your init file and reused for the following
sessions). See the function w32-find-non-USB-fonts. Note that
for that function to work well, 'script-representative-chars'
should include the important characters for each script which has
no USB bits defined.
- Some fonts claim support for a block, but don't support it well.
Other fonts support some blocks very well, but don't set the
corresponding USB bits for the blocks. For these we use some
heuristics:
. For few fonts that claim coverage, but don't provide it, we
either recognize them by name and reject their false claims, or
let users set face-ignored-fonts to ignore those fonts.
. For fonts that support some blocks very well, but don't set
their USB bits, we examine the CSB bits instead. This is
particularly important for some CJK fonts with good support in
the SIP area: they only set the SIP bit (bit 57) in the USB. We
consider those as candidates for CJK scripts ('han', 'kana',
etc.) if the CSB bits are set for the corresponding CJK
codepages.
Eventually, some characters could still appear as "tofu" (a box with
the character's hex codepoint), even though a font might be available
on the system which supports the character. This is because the
above strategy, with all its heuristics and tricks, sometimes fails.
For example, it could fail if the system has several fonts installed
whose coverage of some blocks is incomplete -- Emacs could select
such a font based on its USB bits, and realize the font has no glyph
for a character only when it's too late. This happens because when
several fonts claim coverage of the same Unicode block, Emacs on
Windows has no way of preferring one over the other, if they all
support the same values of size, weight, and slant. So Emacs usually
selects the first such candidate, which could lack glyphs for the
characters Emacs needs to display. Since we avoid naming non-free
Windows fonts in Emacs's sources, this cannot be fixed in the the
default fontset setup provided by Emacs: we cannot arrange for the
"good" fonts to be used in all such cases, because that would mean
naming those fonts. The solution for thes issues is to customize the
default fontset using set-fontset-font, to force Emacs to use a font
known to support some characters.
One other Windows-specific issue is the fact that some Windows fonts
have hyphens in their names. Emacs generally follows the XLFD
specifications, where a hyphen is used as separator between segments
of a font spec. There are few places in the code in font.c where
Emacs handles such font names specially, and it currently knows about
font names documented for Windows versions up to and including 11.
See this page for the latest update:
https://learn.microsoft.com/en-us/typography/fonts/windows_11_font_list
If more fonts are added to Windows that have hyphens in their names,
the code in font.c will need to be updated. */
/* Internal implementation of w32font_list.
Additional parameter opentype_only restricts the returned fonts to
opentype fonts, which can be used with the Uniscribe backend. */
@ -1455,22 +1542,34 @@ static int
w32font_coverage_ok (FONTSIGNATURE * coverage, BYTE charset)
{
DWORD subrange1 = coverage->fsUsb[1];
DWORD codepages0 = coverage->fsCsb[0];
#define SUBRANGE1_HAN_MASK 0x08000000
#define SUBRANGE1_HANGEUL_MASK 0x01000000
#define SUBRANGE1_JAPANESE_MASK (0x00060000 | SUBRANGE1_HAN_MASK)
#define SUBRANGE1_SIP_MASK 0x02000000
/* We consider the coverage to be OK if either (a) subrange1 has the
bits set that correspond to CHARSET, or (b) subrange1 indicates SIP
support and codepages0 has one or more bits set corresponding to
CHARSET. */
if (charset == GB2312_CHARSET || charset == CHINESEBIG5_CHARSET)
{
return (subrange1 & SUBRANGE1_HAN_MASK) == SUBRANGE1_HAN_MASK;
return ((subrange1 & SUBRANGE1_HAN_MASK) == SUBRANGE1_HAN_MASK
|| ((subrange1 & SUBRANGE1_SIP_MASK) != 0
&& (codepages0 & CSB_CHINESE) != 0));
}
else if (charset == SHIFTJIS_CHARSET)
{
return (subrange1 & SUBRANGE1_JAPANESE_MASK) == SUBRANGE1_JAPANESE_MASK;
return ((subrange1 & SUBRANGE1_JAPANESE_MASK) == SUBRANGE1_JAPANESE_MASK
|| ((subrange1 & SUBRANGE1_SIP_MASK) != 0
&& (codepages0 & CSB_JAPANESE) != 0));
}
else if (charset == HANGEUL_CHARSET)
{
return (subrange1 & SUBRANGE1_HANGEUL_MASK) == SUBRANGE1_HANGEUL_MASK;
return ((subrange1 & SUBRANGE1_HANGEUL_MASK) == SUBRANGE1_HANGEUL_MASK
|| ((subrange1 & SUBRANGE1_SIP_MASK) != 0
&& (codepages0 & CSB_KOREAN) != 0));
}
return 1;
@ -1620,11 +1719,18 @@ add_font_entity_to_list (ENUMLOGFONTEX *logical_font,
}
/* unicode-sip fonts must contain characters in Unicode plane 2.
so look for bit 57 (surrogates) in the Unicode subranges, plus
the bits for CJK ranges that include those characters. */
the bits for CJK ranges that include those characters or CJK
bits in code-page bit fields.. */
else if (EQ (spec_charset, Qunicode_sip))
{
if (!(physical_font->ntmFontSig.fsUsb[1] & 0x02000000)
|| !(physical_font->ntmFontSig.fsUsb[1] & 0x28000000))
if (!((physical_font->ntmFontSig.fsUsb[1] & 0x02000000)
&& ((physical_font->ntmFontSig.fsUsb[1] & 0x28000000)
/* Some CJK fonts with very good coverage of SIP
characters have only the 0x02000000 bit in USB
set, so we allow them if their code-page bits
indicate support for CJK character sets. */
|| (physical_font->ntmFontSig.fsCsb[0]
& (CSB_CHINESE | CSB_JAPANESE | CSB_KOREAN)))))
return 1;
}
@ -2328,7 +2434,18 @@ font_supported_scripts (FONTSIGNATURE * sig)
SUBRANGE (53, Qphags_pa);
/* 54: Enclosed CJK letters and months, 55: CJK Compatibility. */
SUBRANGE (56, Qhangul);
/* 57: Surrogates. */
/* 57: Non-BMP. Processed specially: Several fonts that support CJK
Ideographs Extensions and other extensions, set just this bit and
Latin, and nothing else. */
if (subranges[57 / 32] & (1U << (57 % 32)))
{
if ((sig->fsCsb[0] & CSB_CHINESE))
supported = Fcons (Qhan, supported);
if ((sig->fsCsb[0] & CSB_JAPANESE))
supported = Fcons (Qkana, supported);
if ((sig->fsCsb[0] & CSB_KOREAN))
supported = Fcons (Qhangul, supported);
}
SUBRANGE (58, Qphoenician);
SUBRANGE (59, Qhan); /* There are others, but this is the main one. */
SUBRANGE (59, Qideographic_description); /* Windows lumps this in. */
@ -2385,7 +2502,7 @@ font_supported_scripts (FONTSIGNATURE * sig)
SUBRANGE (97, Qglagolitic);
SUBRANGE (98, Qtifinagh);
/* 99: Yijing Hexagrams. */
SUBRANGE (99, Qhan);
SUBRANGE (99, Qcjk_misc);
SUBRANGE (100, Qsyloti_nagri);
SUBRANGE (101, Qlinear_b);
SUBRANGE (101, Qaegean_number);

View File

@ -895,7 +895,7 @@ uniscribe_check_otf_1 (HDC context, Lisp_Object script, Lisp_Object lang,
Lisp_Object features[2], int *retval)
{
SCRIPT_CACHE cache = NULL;
OPENTYPE_TAG tags[32], script_tag, lang_tag;
OPENTYPE_TAG tags[128], script_tag, lang_tag;
int max_tags = ARRAYELTS (tags);
int ntags, i, ret = 0;
HRESULT rslt;