mirror of
https://git.savannah.gnu.org/git/emacs.git
synced 2024-11-26 07:33:47 +00:00
Improve font search and handling on MS-Windows
* src/w32font.c: Add commentary about font search on MS-Windows. (w32font_coverage_ok, add_font_entity_to_list) (font_supported_scripts): Consider the coverage OK if a font has only the SIP bit set, but also sets relevant codepage bits in the CSB bits. (font_supported_scripts): Fix script for USB bit 99. * src/font.c (font_parse_fcname, font_parse_family_registry) [HAVE_NTGUI]: Don't consider hyphenated suffixes of some Windows fonts as not belonging to the family name. * src/w32uniscribe.c (uniscribe_check_otf_1): Increase tags[] array size, to avoid the E_OUTOFMEMORY error for some fonts. * lisp/international/fontset.el (font-encoding-alist): Add 'unicode-sip'.
This commit is contained in:
parent
ef8276d424
commit
ff6954b9c8
@ -88,6 +88,7 @@
|
||||
("iso10646-1$" . (unicode-bmp . nil))
|
||||
("iso10646.indian-1" . (unicode-bmp . nil))
|
||||
("unicode-bmp" . (unicode-bmp . nil))
|
||||
("unicode-sip" . (unicode-sip . nil)) ; used by w32font.c
|
||||
("abobe-symbol" . symbol)
|
||||
("sisheng_cwnn" . chinese-sisheng)
|
||||
("mulearabic-0" . arabic-digit)
|
||||
|
42
src/font.c
42
src/font.c
@ -1627,15 +1627,30 @@ font_parse_fcname (char *name, ptrdiff_t len, Lisp_Object font)
|
||||
{
|
||||
bool decimal = 0, size_found = 1;
|
||||
for (q = p + 1; *q && *q != ':'; q++)
|
||||
if (! c_isdigit (*q))
|
||||
{
|
||||
if (*q != '.' || decimal)
|
||||
{
|
||||
size_found = 0;
|
||||
break;
|
||||
}
|
||||
decimal = 1;
|
||||
}
|
||||
{
|
||||
#ifdef HAVE_NTGUI
|
||||
/* MS-Windows has several CJK fonts whose name ends in
|
||||
"-ExtB". It also has fonts whose names end in "-R" or
|
||||
"-B", and one font whose name ends in "-SB". */
|
||||
if (q == p + 1 && (strncmp (q, "ExtB", 4) == 0
|
||||
|| strncmp (q, "R", 1) == 0
|
||||
|| strncmp (q, "B", 1) == 0
|
||||
|| strncmp (q, "SB", 2) == 0))
|
||||
{
|
||||
size_found = 0;
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
if (! c_isdigit (*q))
|
||||
{
|
||||
if (*q != '.' || decimal)
|
||||
{
|
||||
size_found = 0;
|
||||
break;
|
||||
}
|
||||
decimal = 1;
|
||||
}
|
||||
}
|
||||
if (size_found)
|
||||
{
|
||||
family_end = p;
|
||||
@ -2000,6 +2015,15 @@ font_parse_family_registry (Lisp_Object family, Lisp_Object registry, Lisp_Objec
|
||||
len = SBYTES (family);
|
||||
p0 = SSDATA (family);
|
||||
p1 = strchr (p0, '-');
|
||||
#ifdef HAVE_NTGUI
|
||||
/* MS-Windows has fonts whose family name ends in "-ExtB" and
|
||||
other suffixes which include a hyphen. */
|
||||
if (p1 && (strcmp (p1, "-ExtB") == 0
|
||||
|| strcmp (p1, "-R") == 0
|
||||
|| strcmp (p1, "-B") == 0
|
||||
|| strcmp (p1, "-SB") == 0))
|
||||
p1 = NULL;
|
||||
#endif
|
||||
if (p1)
|
||||
{
|
||||
if ((*p0 != '*' && p1 - p0 > 0)
|
||||
|
133
src/w32font.c
133
src/w32font.c
@ -809,6 +809,93 @@ w32font_otf_drive (struct font *font, Lisp_Object features,
|
||||
bool alternate_subst);
|
||||
*/
|
||||
|
||||
/* Notes about the way fonts are found on MS-Windows when we have a
|
||||
character unsupported by the default font.
|
||||
|
||||
Since we don't use Fontconfig on MS-Windows, we cannot efficiently
|
||||
search for fonts which support certain characters, because Windows
|
||||
doesn't store this information anywhere, and we can only know whether
|
||||
a font supports some character if we actually open the font, which is
|
||||
expensive and slow. Instead, we rely on font information Windows
|
||||
exposes to the API we use to enumerate available fonts,
|
||||
EnumFontFamiliesEx. This information includes two bitmapped attributes:
|
||||
|
||||
USB (which stands for Unicode Subset Bitfields) -- this is an array
|
||||
of 4 32-bit values, 128 bits in total, where each bit
|
||||
corresponds to some block (sometimes several related blocks) of
|
||||
Unicode codepoints which the font claims to support.
|
||||
CSB (which stands for Codepage Bitfields) -- this is an array of 2
|
||||
32-bit values (64 bits), where each bit corresponds to some
|
||||
codepage whose characters the font claims to support.
|
||||
|
||||
When Emacs needs to find a font for a character, it enumerates the
|
||||
available fonts, filtering the fonts by examining these bitmaps and a
|
||||
few other font attributes. The script of the character is converted
|
||||
to the corresponding bits in USB, and a font that has any of these
|
||||
bits set is deemed as a candidate; see font_supported_scripts, which
|
||||
is called by font_matches_spec. The problem with this strategy is
|
||||
twofold:
|
||||
|
||||
- Some Unicode blocks have no USB bits. For the scripts
|
||||
corresponding to those blocks we use a small cache of fonts known
|
||||
to support those script. This cache is calculated once, and needs
|
||||
not be recalculated as long as no fonts are installed or deleted
|
||||
(it can be saved in your init file and reused for the following
|
||||
sessions). See the function w32-find-non-USB-fonts. Note that
|
||||
for that function to work well, 'script-representative-chars'
|
||||
should include the important characters for each script which has
|
||||
no USB bits defined.
|
||||
|
||||
- Some fonts claim support for a block, but don't support it well.
|
||||
Other fonts support some blocks very well, but don't set the
|
||||
corresponding USB bits for the blocks. For these we use some
|
||||
heuristics:
|
||||
|
||||
. For few fonts that claim coverage, but don't provide it, we
|
||||
either recognize them by name and reject their false claims, or
|
||||
let users set face-ignored-fonts to ignore those fonts.
|
||||
|
||||
. For fonts that support some blocks very well, but don't set
|
||||
their USB bits, we examine the CSB bits instead. This is
|
||||
particularly important for some CJK fonts with good support in
|
||||
the SIP area: they only set the SIP bit (bit 57) in the USB. We
|
||||
consider those as candidates for CJK scripts ('han', 'kana',
|
||||
etc.) if the CSB bits are set for the corresponding CJK
|
||||
codepages.
|
||||
|
||||
Eventually, some characters could still appear as "tofu" (a box with
|
||||
the character's hex codepoint), even though a font might be available
|
||||
on the system which supports the character. This is because the
|
||||
above strategy, with all its heuristics and tricks, sometimes fails.
|
||||
For example, it could fail if the system has several fonts installed
|
||||
whose coverage of some blocks is incomplete -- Emacs could select
|
||||
such a font based on its USB bits, and realize the font has no glyph
|
||||
for a character only when it's too late. This happens because when
|
||||
several fonts claim coverage of the same Unicode block, Emacs on
|
||||
Windows has no way of preferring one over the other, if they all
|
||||
support the same values of size, weight, and slant. So Emacs usually
|
||||
selects the first such candidate, which could lack glyphs for the
|
||||
characters Emacs needs to display. Since we avoid naming non-free
|
||||
Windows fonts in Emacs's sources, this cannot be fixed in the the
|
||||
default fontset setup provided by Emacs: we cannot arrange for the
|
||||
"good" fonts to be used in all such cases, because that would mean
|
||||
naming those fonts. The solution for thes issues is to customize the
|
||||
default fontset using set-fontset-font, to force Emacs to use a font
|
||||
known to support some characters.
|
||||
|
||||
One other Windows-specific issue is the fact that some Windows fonts
|
||||
have hyphens in their names. Emacs generally follows the XLFD
|
||||
specifications, where a hyphen is used as separator between segments
|
||||
of a font spec. There are few places in the code in font.c where
|
||||
Emacs handles such font names specially, and it currently knows about
|
||||
font names documented for Windows versions up to and including 11.
|
||||
See this page for the latest update:
|
||||
|
||||
https://learn.microsoft.com/en-us/typography/fonts/windows_11_font_list
|
||||
|
||||
If more fonts are added to Windows that have hyphens in their names,
|
||||
the code in font.c will need to be updated. */
|
||||
|
||||
/* Internal implementation of w32font_list.
|
||||
Additional parameter opentype_only restricts the returned fonts to
|
||||
opentype fonts, which can be used with the Uniscribe backend. */
|
||||
@ -1455,22 +1542,34 @@ static int
|
||||
w32font_coverage_ok (FONTSIGNATURE * coverage, BYTE charset)
|
||||
{
|
||||
DWORD subrange1 = coverage->fsUsb[1];
|
||||
DWORD codepages0 = coverage->fsCsb[0];
|
||||
|
||||
#define SUBRANGE1_HAN_MASK 0x08000000
|
||||
#define SUBRANGE1_HANGEUL_MASK 0x01000000
|
||||
#define SUBRANGE1_JAPANESE_MASK (0x00060000 | SUBRANGE1_HAN_MASK)
|
||||
#define SUBRANGE1_SIP_MASK 0x02000000
|
||||
|
||||
/* We consider the coverage to be OK if either (a) subrange1 has the
|
||||
bits set that correspond to CHARSET, or (b) subrange1 indicates SIP
|
||||
support and codepages0 has one or more bits set corresponding to
|
||||
CHARSET. */
|
||||
if (charset == GB2312_CHARSET || charset == CHINESEBIG5_CHARSET)
|
||||
{
|
||||
return (subrange1 & SUBRANGE1_HAN_MASK) == SUBRANGE1_HAN_MASK;
|
||||
return ((subrange1 & SUBRANGE1_HAN_MASK) == SUBRANGE1_HAN_MASK
|
||||
|| ((subrange1 & SUBRANGE1_SIP_MASK) != 0
|
||||
&& (codepages0 & CSB_CHINESE) != 0));
|
||||
}
|
||||
else if (charset == SHIFTJIS_CHARSET)
|
||||
{
|
||||
return (subrange1 & SUBRANGE1_JAPANESE_MASK) == SUBRANGE1_JAPANESE_MASK;
|
||||
return ((subrange1 & SUBRANGE1_JAPANESE_MASK) == SUBRANGE1_JAPANESE_MASK
|
||||
|| ((subrange1 & SUBRANGE1_SIP_MASK) != 0
|
||||
&& (codepages0 & CSB_JAPANESE) != 0));
|
||||
}
|
||||
else if (charset == HANGEUL_CHARSET)
|
||||
{
|
||||
return (subrange1 & SUBRANGE1_HANGEUL_MASK) == SUBRANGE1_HANGEUL_MASK;
|
||||
return ((subrange1 & SUBRANGE1_HANGEUL_MASK) == SUBRANGE1_HANGEUL_MASK
|
||||
|| ((subrange1 & SUBRANGE1_SIP_MASK) != 0
|
||||
&& (codepages0 & CSB_KOREAN) != 0));
|
||||
}
|
||||
|
||||
return 1;
|
||||
@ -1620,11 +1719,18 @@ add_font_entity_to_list (ENUMLOGFONTEX *logical_font,
|
||||
}
|
||||
/* unicode-sip fonts must contain characters in Unicode plane 2.
|
||||
so look for bit 57 (surrogates) in the Unicode subranges, plus
|
||||
the bits for CJK ranges that include those characters. */
|
||||
the bits for CJK ranges that include those characters or CJK
|
||||
bits in code-page bit fields.. */
|
||||
else if (EQ (spec_charset, Qunicode_sip))
|
||||
{
|
||||
if (!(physical_font->ntmFontSig.fsUsb[1] & 0x02000000)
|
||||
|| !(physical_font->ntmFontSig.fsUsb[1] & 0x28000000))
|
||||
if (!((physical_font->ntmFontSig.fsUsb[1] & 0x02000000)
|
||||
&& ((physical_font->ntmFontSig.fsUsb[1] & 0x28000000)
|
||||
/* Some CJK fonts with very good coverage of SIP
|
||||
characters have only the 0x02000000 bit in USB
|
||||
set, so we allow them if their code-page bits
|
||||
indicate support for CJK character sets. */
|
||||
|| (physical_font->ntmFontSig.fsCsb[0]
|
||||
& (CSB_CHINESE | CSB_JAPANESE | CSB_KOREAN)))))
|
||||
return 1;
|
||||
}
|
||||
|
||||
@ -2328,7 +2434,18 @@ font_supported_scripts (FONTSIGNATURE * sig)
|
||||
SUBRANGE (53, Qphags_pa);
|
||||
/* 54: Enclosed CJK letters and months, 55: CJK Compatibility. */
|
||||
SUBRANGE (56, Qhangul);
|
||||
/* 57: Surrogates. */
|
||||
/* 57: Non-BMP. Processed specially: Several fonts that support CJK
|
||||
Ideographs Extensions and other extensions, set just this bit and
|
||||
Latin, and nothing else. */
|
||||
if (subranges[57 / 32] & (1U << (57 % 32)))
|
||||
{
|
||||
if ((sig->fsCsb[0] & CSB_CHINESE))
|
||||
supported = Fcons (Qhan, supported);
|
||||
if ((sig->fsCsb[0] & CSB_JAPANESE))
|
||||
supported = Fcons (Qkana, supported);
|
||||
if ((sig->fsCsb[0] & CSB_KOREAN))
|
||||
supported = Fcons (Qhangul, supported);
|
||||
}
|
||||
SUBRANGE (58, Qphoenician);
|
||||
SUBRANGE (59, Qhan); /* There are others, but this is the main one. */
|
||||
SUBRANGE (59, Qideographic_description); /* Windows lumps this in. */
|
||||
@ -2385,7 +2502,7 @@ font_supported_scripts (FONTSIGNATURE * sig)
|
||||
SUBRANGE (97, Qglagolitic);
|
||||
SUBRANGE (98, Qtifinagh);
|
||||
/* 99: Yijing Hexagrams. */
|
||||
SUBRANGE (99, Qhan);
|
||||
SUBRANGE (99, Qcjk_misc);
|
||||
SUBRANGE (100, Qsyloti_nagri);
|
||||
SUBRANGE (101, Qlinear_b);
|
||||
SUBRANGE (101, Qaegean_number);
|
||||
|
@ -895,7 +895,7 @@ uniscribe_check_otf_1 (HDC context, Lisp_Object script, Lisp_Object lang,
|
||||
Lisp_Object features[2], int *retval)
|
||||
{
|
||||
SCRIPT_CACHE cache = NULL;
|
||||
OPENTYPE_TAG tags[32], script_tag, lang_tag;
|
||||
OPENTYPE_TAG tags[128], script_tag, lang_tag;
|
||||
int max_tags = ARRAYELTS (tags);
|
||||
int ntags, i, ret = 0;
|
||||
HRESULT rslt;
|
||||
|
Loading…
Reference in New Issue
Block a user