bool alternate_subst);
*/
+/* Notes about the way fonts are found on MS-Windows when we have a
+ character unsupported by the default font.
+
+ Since we don't use Fontconfig on MS-Windows, we cannot efficiently
+ search for fonts which support certain characters, because Windows
+ doesn't store this information anywhere, and we can only know whether
+ a font supports some character if we actually open the font, which is
+ expensive and slow. Instead, we rely on font information Windows
+ exposes to the API we use to enumerate available fonts,
+ EnumFontFamiliesEx. This information includes two bitmapped attributes:
+
+ USB (which stands for Unicode Subset Bitfields) -- this is an array
+ of 4 32-bit values, 128 bits in total, where each bit
+ corresponds to some block (sometimes several related blocks) of
+ Unicode codepoints which the font claims to support.
+ CSB (which stands for Codepage Bitfields) -- this is an array of 2
+ 32-bit values (64 bits), where each bit corresponds to some
+ codepage whose characters the font claims to support.
+
+ When Emacs needs to find a font for a character, it enumerates the
+ available fonts, filtering the fonts by examining these bitmaps and a
+ few other font attributes. The script of the character is converted
+ to the corresponding bits in USB, and a font that has any of these
+ bits set is deemed as a candidate; see font_supported_scripts, which
+ is called by font_matches_spec. The problem with this strategy is
+ twofold:
+
+ - Some Unicode blocks have no USB bits. For the scripts
+ corresponding to those blocks we use a small cache of fonts known
+ to support those script. This cache is calculated once, and needs
+ not be recalculated as long as no fonts are installed or deleted
+ (it can be saved in your init file and reused for the following
+ sessions). See the function w32-find-non-USB-fonts. Note that
+ for that function to work well, 'script-representative-chars'
+ should include the important characters for each script which has
+ no USB bits defined.
+
+ - Some fonts claim support for a block, but don't support it well.
+ Other fonts support some blocks very well, but don't set the
+ corresponding USB bits for the blocks. For these we use some
+ heuristics:
+
+ . For few fonts that claim coverage, but don't provide it, we
+ either recognize them by name and reject their false claims, or
+ let users set face-ignored-fonts to ignore those fonts.
+
+ . For fonts that support some blocks very well, but don't set
+ their USB bits, we examine the CSB bits instead. This is
+ particularly important for some CJK fonts with good support in
+ the SIP area: they only set the SIP bit (bit 57) in the USB. We
+ consider those as candidates for CJK scripts ('han', 'kana',
+ etc.) if the CSB bits are set for the corresponding CJK
+ codepages.
+
+ Eventually, some characters could still appear as "tofu" (a box with
+ the character's hex codepoint), even though a font might be available
+ on the system which supports the character. This is because the
+ above strategy, with all its heuristics and tricks, sometimes fails.
+ For example, it could fail if the system has several fonts installed
+ whose coverage of some blocks is incomplete -- Emacs could select
+ such a font based on its USB bits, and realize the font has no glyph
+ for a character only when it's too late. This happens because when
+ several fonts claim coverage of the same Unicode block, Emacs on
+ Windows has no way of preferring one over the other, if they all
+ support the same values of size, weight, and slant. So Emacs usually
+ selects the first such candidate, which could lack glyphs for the
+ characters Emacs needs to display. Since we avoid naming non-free
+ Windows fonts in Emacs's sources, this cannot be fixed in the the
+ default fontset setup provided by Emacs: we cannot arrange for the
+ "good" fonts to be used in all such cases, because that would mean
+ naming those fonts. The solution for thes issues is to customize the
+ default fontset using set-fontset-font, to force Emacs to use a font
+ known to support some characters.
+
+ One other Windows-specific issue is the fact that some Windows fonts
+ have hyphens in their names. Emacs generally follows the XLFD
+ specifications, where a hyphen is used as separator between segments
+ of a font spec. There are few places in the code in font.c where
+ Emacs handles such font names specially, and it currently knows about
+ font names documented for Windows versions up to and including 11.
+ See this page for the latest update:
+
+ https://learn.microsoft.com/en-us/typography/fonts/windows_11_font_list
+
+ If more fonts are added to Windows that have hyphens in their names,
+ the code in font.c will need to be updated. */
+
/* Internal implementation of w32font_list.
Additional parameter opentype_only restricts the returned fonts to
opentype fonts, which can be used with the Uniscribe backend. */
w32font_coverage_ok (FONTSIGNATURE * coverage, BYTE charset)
{
DWORD subrange1 = coverage->fsUsb[1];
+ DWORD codepages0 = coverage->fsCsb[0];
#define SUBRANGE1_HAN_MASK 0x08000000
#define SUBRANGE1_HANGEUL_MASK 0x01000000
#define SUBRANGE1_JAPANESE_MASK (0x00060000 | SUBRANGE1_HAN_MASK)
+#define SUBRANGE1_SIP_MASK 0x02000000
+/* We consider the coverage to be OK if either (a) subrange1 has the
+ bits set that correspond to CHARSET, or (b) subrange1 indicates SIP
+ support and codepages0 has one or more bits set corresponding to
+ CHARSET. */
if (charset == GB2312_CHARSET || charset == CHINESEBIG5_CHARSET)
{
- return (subrange1 & SUBRANGE1_HAN_MASK) == SUBRANGE1_HAN_MASK;
+ return ((subrange1 & SUBRANGE1_HAN_MASK) == SUBRANGE1_HAN_MASK
+ || ((subrange1 & SUBRANGE1_SIP_MASK) != 0
+ && (codepages0 & CSB_CHINESE) != 0));
}
else if (charset == SHIFTJIS_CHARSET)
{
- return (subrange1 & SUBRANGE1_JAPANESE_MASK) == SUBRANGE1_JAPANESE_MASK;
+ return ((subrange1 & SUBRANGE1_JAPANESE_MASK) == SUBRANGE1_JAPANESE_MASK
+ || ((subrange1 & SUBRANGE1_SIP_MASK) != 0
+ && (codepages0 & CSB_JAPANESE) != 0));
}
else if (charset == HANGEUL_CHARSET)
{
- return (subrange1 & SUBRANGE1_HANGEUL_MASK) == SUBRANGE1_HANGEUL_MASK;
+ return ((subrange1 & SUBRANGE1_HANGEUL_MASK) == SUBRANGE1_HANGEUL_MASK
+ || ((subrange1 & SUBRANGE1_SIP_MASK) != 0
+ && (codepages0 & CSB_KOREAN) != 0));
}
return 1;
}
/* unicode-sip fonts must contain characters in Unicode plane 2.
so look for bit 57 (surrogates) in the Unicode subranges, plus
- the bits for CJK ranges that include those characters. */
+ the bits for CJK ranges that include those characters or CJK
+ bits in code-page bit fields.. */
else if (EQ (spec_charset, Qunicode_sip))
{
- if (!(physical_font->ntmFontSig.fsUsb[1] & 0x02000000)
- || !(physical_font->ntmFontSig.fsUsb[1] & 0x28000000))
+ if (!((physical_font->ntmFontSig.fsUsb[1] & 0x02000000)
+ && ((physical_font->ntmFontSig.fsUsb[1] & 0x28000000)
+ /* Some CJK fonts with very good coverage of SIP
+ characters have only the 0x02000000 bit in USB
+ set, so we allow them if their code-page bits
+ indicate support for CJK character sets. */
+ || (physical_font->ntmFontSig.fsCsb[0]
+ & (CSB_CHINESE | CSB_JAPANESE | CSB_KOREAN)))))
return 1;
}
SUBRANGE (53, Qphags_pa);
/* 54: Enclosed CJK letters and months, 55: CJK Compatibility. */
SUBRANGE (56, Qhangul);
- /* 57: Surrogates. */
+ /* 57: Non-BMP. Processed specially: Several fonts that support CJK
+ Ideographs Extensions and other extensions, set just this bit and
+ Latin, and nothing else. */
+ if (subranges[57 / 32] & (1U << (57 % 32)))
+ {
+ if ((sig->fsCsb[0] & CSB_CHINESE))
+ supported = Fcons (Qhan, supported);
+ if ((sig->fsCsb[0] & CSB_JAPANESE))
+ supported = Fcons (Qkana, supported);
+ if ((sig->fsCsb[0] & CSB_KOREAN))
+ supported = Fcons (Qhangul, supported);
+ }
SUBRANGE (58, Qphoenician);
SUBRANGE (59, Qhan); /* There are others, but this is the main one. */
SUBRANGE (59, Qideographic_description); /* Windows lumps this in. */
SUBRANGE (97, Qglagolitic);
SUBRANGE (98, Qtifinagh);
/* 99: Yijing Hexagrams. */
- SUBRANGE (99, Qhan);
+ SUBRANGE (99, Qcjk_misc);
SUBRANGE (100, Qsyloti_nagri);
SUBRANGE (101, Qlinear_b);
SUBRANGE (101, Qaegean_number);