From 8054935c6130c88152387f8a35d436704dbde780 Mon Sep 17 00:00:00 2001 From: Eli Zaretskii Date: Tue, 17 Sep 2019 14:19:54 +0300 Subject: [PATCH] Improve font lookup on MS-Windows * src/w32font.c (struct font_callback_data): New member 'known_fonts'. (w32font_list_internal, w32font_match_internal): Set up match_data.known_fonts if the font spec includes :script that names one of the non-USB scripts. (add_font_entity_to_list): If font_matches_spec returns zero for a font, and we have some fonts in match_data->known_fonts, consider the font to be a match if it is named in known_fonts. (font_supported_scripts): Update the Unicode Subranges. In particular, map bit 74 to 'burmese', as this is the name Emacs uses, not 'myanmar'. Add a list of scripts that have no USBs defined for them. (syms_of_w32font) : New symbols. * lisp/term/w32-win.el (w32-no-usb-subranges): New defconst. (w32--filter-USB-scripts, w32-find-non-USB-fonts): New functions. (w32-non-USB-fonts): New defvar. * lisp/international/fontset.el (setup-default-fontset): Add more scripts to automatic setup by representative characters. * doc/emacs/msdos.texi (Windows Fonts): Document 'w32-find-non-USB-fonts' and 'w32-non-USB-fonts'. * etc/NEWS: Mention 'w32-find-non-USB-fonts' and 'w32-non-USB-fonts'. --- doc/emacs/msdos.texi | 65 +++++++++++------ etc/NEWS | 8 +++ lisp/international/fontset.el | 9 +++ lisp/term/w32-win.el | 130 ++++++++++++++++++++++++++++++++++ src/w32font.c | 127 ++++++++++++++++++++++++++++----- 5 files changed, 300 insertions(+), 39 deletions(-) diff --git a/doc/emacs/msdos.texi b/doc/emacs/msdos.texi index 6933130d5bd..5377df91d10 100644 --- a/doc/emacs/msdos.texi +++ b/doc/emacs/msdos.texi @@ -1025,7 +1025,7 @@ symbols, as in @code{(uniscribe)} or @w{@code{(harfbuzz uniscribe gdi)}}. @cindex font properties (MS Windows) @noindent -Optional properties common to all font backends on MS-Windows are: +Optional font properties supported on MS-Windows are: @table @code @@ -1078,40 +1078,61 @@ Not used on Windows, but for informational purposes and to prevent problems with code that expects it to be set, is set internally to @code{raster} for bitmapped fonts, @code{outline} for scalable fonts, or @code{unknown} if the type cannot be determined as one of those. -@end table - -@cindex font properties (MS Windows gdi backend) -Options specific to @code{GDI} fonts: - -@table @code @cindex font scripts (MS Windows) @cindex font Unicode subranges (MS Windows) @item script Specifies a Unicode subrange the font should support. -The following scripts are recognized on Windows: @code{latin}, @code{greek}, -@code{coptic}, @code{cyrillic}, @code{armenian}, @code{hebrew}, @code{arabic}, -@code{syriac}, @code{nko}, @code{thaana}, @code{devanagari}, @code{bengali}, -@code{gurmukhi}, @code{gujarati}, @code{oriya}, @code{tamil}, @code{telugu}, -@code{kannada}, @code{malayam}, @code{sinhala}, @code{thai}, @code{lao}, -@code{tibetan}, @code{myanmar}, @code{georgian}, @code{hangul}, -@code{ethiopic}, @code{cherokee}, @code{canadian-aboriginal}, @code{ogham}, -@code{runic}, @code{khmer}, @code{mongolian}, @code{symbol}, @code{braille}, -@code{han}, @code{ideographic-description}, @code{cjk-misc}, @code{kana}, -@code{bopomofo}, @code{kanbun}, @code{yi}, @code{byzantine-musical-symbol}, -@code{musical-symbol}, and @code{mathematical}. +All the scripts known to Emacs (which generally means all the scripts +defined by the latest Unicode Standard) are recognized on MS-Windows. +However, @code{GDI} fonts support only a subset of the known scripts: +@code{greek}, @code{hangul}, @code{kana}, @code{kanbun}, +@code{bopomofo}, @code{tibetan}, @code{yi}, @code{mongolian}, +@code{hebrew}, @code{arabic}, and @code{thai}. @cindex font antialiasing (MS Windows) +@cindex Cleartype @item antialias Specifies the antialiasing method. The value @code{none} means no antialiasing, @code{standard} means use standard antialiasing, -@code{subpixel} means use subpixel antialiasing (known as Cleartype on -Windows), and @code{natural} means use subpixel antialiasing with -adjusted spacing between letters. If unspecified, the font will use -the system default antialiasing. +@code{subpixel} means use subpixel antialiasing (known as +@dfn{Cleartype} on Windows), and @code{natural} means use subpixel +antialiasing with adjusted spacing between letters. If unspecified, +the font will use the system default antialiasing. @end table +@cindex font lookup, MS-Windows +@findex w32-find-non-USB-fonts +The method used by Emacs on MS-Windows to look for fonts suitable for +displaying a given non-@sc{ascii} character might fail for some rare +scripts, specifically those added by Unicode relatively recently, even +if you have fonts installed on your system that support those scripts. +That is because these scripts have no Unicode Subrange Bits (USBs) +defined for them in the information used by Emacs on MS-Windows to +look for fonts. You can use the @code{w32-find-non-USB-fonts} +function to overcome these problems. It needs to be run once at the +beginning of the Emacs session, and again if you install new fonts. +You can add the following line to your init file to have this function +run every time you start Emacs: + +@lisp +(w32-find-non-USB-fonts) +@end lisp + +@noindent +@vindex w32-non-USB-fonts +Alternatively, you can run this function manually via @kbd{M-:} +(@pxref{Lisp Eval}) at any time. On a system that has many fonts +installed, running @code{w32-find-non-USB-fonts} might take a couple +of seconds; if you consider that to be too long to be run during +startup, and if you install new fonts only rarely, run this function +once via @kbd{M-:}, and then assign the value it returns, if +non-@code{nil}, to the variable @code{w32-non-USB-fonts} in your init +file. (If the function returns @code{nil}, you have no fonts +installed that can display characters from the scripts which need this +facility.) + @node Windows Misc @section Miscellaneous Windows-specific features diff --git a/etc/NEWS b/etc/NEWS index 2db5db3978a..693a690f17a 100644 --- a/etc/NEWS +++ b/etc/NEWS @@ -2703,6 +2703,14 @@ corresponding encoding, instead of using 'w32-ansi-code-page'. Experience shows that compacting font caches causes more trouble on MS-Windows than it helps. ++++ +** Font lookup on MS-Windows was improved to support rare scripts. +To activate the improvement, run the new function +'w32-find-non-USB-fonts' once per Emacs session, or assign to the new +variable 'w32-non-USB-fonts' the list of scripts and the corresponding +fonts. See the documentation of this function and variable in the +Emacs manual for more details. + +++ ** On NS the behaviour of drag and drop can now be modified by use of modifier keys in line with Apples guidelines. This makes the drag and diff --git a/lisp/international/fontset.el b/lisp/international/fontset.el index f3ab81633dc..1debec7f469 100644 --- a/lisp/international/fontset.el +++ b/lisp/international/fontset.el @@ -719,6 +719,7 @@ symbol braille yi + tai-viet aegean-number ancient-greek-number ancient-symbol @@ -731,18 +732,26 @@ deseret shavian osmanya + osage cypriot-syllabary phoenician lydian kharoshthi + manichaean + elymaic + makasar cuneiform-numbers-and-punctuation cuneiform egyptian + bassa-vah + pahawh-hmong + medefaidrin byzantine-musical-symbol musical-symbol ancient-greek-musical-notation tai-xuan-jing-symbol counting-rod-numeral + adlam mahjong-tile domino-tile)) (set-fontset-font "fontset-default" diff --git a/lisp/term/w32-win.el b/lisp/term/w32-win.el index 198182fca72..e2c019fc548 100644 --- a/lisp/term/w32-win.el +++ b/lisp/term/w32-win.el @@ -485,6 +485,136 @@ numbers, and the build number." That includes all Windows systems except for 9X/Me." (getenv "SystemRoot")) +;; The value of the following variable was calculated using the table in +;; https://docs.microsoft.com/windows/desktop/Intl/unicode-subset-bitfields, +;; by looking for Unicode subranges for which no USB bits are defined. +(defconst w32-no-usb-subranges + '((#x000800 . #x0008ff) + (#x0018b0 . #x0018ff) + (#x001a20 . #x001aff) + (#x001bc0 . #x001bff) + (#x001c80 . #x001cff) + (#x002fe0 . #x002fef) + (#x00a4d0 . #x00a4ff) + (#x00a6a0 . #x00a6ff) + (#x00a830 . #x00a83f) + (#x00a8e0 . #x00a8ff) + (#x00a960 . #x00a9ff) + (#x00aa60 . #x00abff) + (#x00d7b0 . #x00d7ff) + (#x010200 . #x01027f) + (#x0102e0 . #x0102ff) + (#x010350 . #x01037f) + (#x0103e0 . #x0103ff) + (#x0104b0 . #x0107ff) + (#x010840 . #x0108ff) + (#x010940 . #x0109ff) + (#x010a60 . #x011fff) + (#x012480 . #x01cfff) + (#x01d250 . #x01d2ff) + (#x01d380 . #x01d3ff) + (#x01d800 . #x01efff) + (#x01f0a0 . #x01ffff) + (#x02a6e0 . #x02f7ff) + (#x02fa20 . #x0dffff) + (#x0e0080 . #x0e00ff) + (#x0e01f0 . #x0fefff)) + "List of Unicode subranges whose support cannot be announced by a font. +The FONTSIGNATURE structure reported by MS-Windows for a font +includes 123 Unicode Subset bits (USBs) to identify subranges of +the Unicode codepoint space supported by the font. Since the +number of bits is fixed, not every Unicode block can have a +corresponding USB bit; fonts that support characters from blocks +that have no USBs cannot communicate their support to Emacs, +unless the font is opened and physically tested for glyphs for +characters from these blocks.") + +(defun w32--filter-USB-scripts () + "Filter USB scripts out of `script-representative-chars'." + (let (val) + (dolist (elt script-representative-chars) + (let ((subranges w32-no-usb-subranges) + (chars (cdr elt)) + ch found subrange) + (while (and (consp chars) (not found)) + (setq ch (car chars) + chars (cdr chars)) + (while (and (consp subranges) (not found)) + (setq subrange (car subranges) + subranges (cdr subranges)) + (when (and (>= ch (car subrange)) (<= ch (cdr subrange))) + (setq found t) + (push elt val)))))) + (nreverse val))) + +(defvar w32-non-USB-fonts nil + "Alist of script symbols and corresponding fonts. +Each element of the alist has the form (SCRIPT FONTS...), where +SCRIPT is a symbol of a script and FONTS are one or more fonts installed +on the system that can display SCRIPT's characters. FONTS are +specified as symbols. +Only scripts that have no corresponding Unicode Subset Bits (USBs) can +be found in this alist. +This alist is used by w32font.c when it looks for fonts that can display +characters from scripts for which no USBs are defined.") + +(defun w32-find-non-USB-fonts (&optional frame size) + "Compute the value of `w32-non-USB-fonts' for specified SIZE and FRAME. +FRAME defaults to the selected frame. +SIZE is the required font size and defaults to the nominal size of the +default font on FRAME, or its best approximation." + (let* ((inhibit-compacting-font-caches t) + (all-fonts + (delete-dups + (x-list-fonts "-*-*-medium-r-normal-*-*-*-*-*-*-iso10646-1" + 'default frame))) + val) + (mapc (function + (lambda (script-desc) + (let* ((script (car script-desc)) + (script-chars (vconcat (cdr script-desc))) + (nchars (length script-chars)) + (fntlist all-fonts) + (entry (list script)) + fspec ffont font-obj glyphs idx) + ;; For each font in FNTLIST, determine whether it + ;; supports the representative character(s) of any + ;; scripts that have no USBs defined for it. + (dolist (fnt fntlist) + (setq fspec (ignore-errors (font-spec :name fnt))) + (if fspec + (setq ffont (find-font fspec frame))) + (when ffont + (setq font-obj + (open-font ffont size frame)) + ;; Ignore fonts for which open-font returns nil: + ;; they are buggy fonts that we cannot use anyway. + (setq glyphs + (if font-obj + (font-get-glyphs font-obj + 0 nchars script-chars) + '[nil])) + ;; Does this font support ALL of the script's + ;; representative characters? + (setq idx 0) + (while (and (< idx nchars) (not (null (aref glyphs idx)))) + (setq idx (1+ idx))) + (if (= idx nchars) + ;; It does; add this font to the script's entry in alist. + (let ((font-family (font-get font-obj :family))) + ;; Unifont is an ugly font, and it is already + ;; present in the default fontset. + (unless (string= (downcase (symbol-name font-family)) + "unifont") + (push font-family entry)))))) + (if (> (length entry) 1) + (push (nreverse entry) val))))) + (w32--filter-USB-scripts)) + ;; We've opened a lot of fonts, so clear the font caches to free + ;; some memory. + (clear-font-cache) + (and val (setq w32-non-USB-fonts val)))) + (provide 'w32-win) (provide 'term/w32-win) diff --git a/src/w32font.c b/src/w32font.c index 14d49b24d9b..9a334717c12 100644 --- a/src/w32font.c +++ b/src/w32font.c @@ -90,6 +90,8 @@ struct font_callback_data Lisp_Object orig_font_spec; /* The frame the font is being loaded on. */ Lisp_Object frame; + /* Fonts known to support the font spec, or nil if none. */ + Lisp_Object known_fonts; /* The list to add matches to. */ Lisp_Object list; /* Whether to match only opentype fonts. */ @@ -841,6 +843,25 @@ w32font_list_internal (struct frame *f, Lisp_Object font_spec, match_data.opentype_only = opentype_only; if (opentype_only) match_data.pattern.lfOutPrecision = OUT_OUTLINE_PRECIS; + match_data.known_fonts = Qnil; + Lisp_Object vw32_non_USB_fonts = Fsymbol_value (Qw32_non_USB_fonts), val; + if (CONSP (vw32_non_USB_fonts)) + { + Lisp_Object extra; + for (extra = AREF (font_spec, FONT_EXTRA_INDEX); + CONSP (extra); extra = XCDR (extra)) + { + Lisp_Object tem = XCAR (extra); + if (CONSP (tem) + && EQ (XCAR (tem), QCscript) + && SYMBOLP (XCDR (tem)) + && !NILP (val = assq_no_quit (XCDR (tem), vw32_non_USB_fonts))) + { + match_data.known_fonts = XCDR (val); + break; + } + } + } if (match_data.pattern.lfFaceName[0] == '\0') { @@ -890,6 +911,26 @@ w32font_match_internal (struct frame *f, Lisp_Object font_spec, if (opentype_only) match_data.pattern.lfOutPrecision = OUT_OUTLINE_PRECIS; + match_data.known_fonts = Qnil; + Lisp_Object vw32_non_USB_fonts = Fsymbol_value (Qw32_non_USB_fonts), val; + if (CONSP (vw32_non_USB_fonts)) + { + Lisp_Object extra; + for (extra = AREF (font_spec, FONT_EXTRA_INDEX); + CONSP (extra); extra = XCDR (extra)) + { + Lisp_Object tem = XCAR (extra); + if (CONSP (tem) + && EQ (XCAR (tem), QCscript) + && SYMBOLP (XCDR (tem)) + && !NILP (val = assq_no_quit (XCDR (tem), vw32_non_USB_fonts))) + { + match_data.known_fonts = XCDR (val); + break; + } + } + } + /* Prevent quitting while EnumFontFamiliesEx runs and conses the list it will return. That's because get_frame_dc acquires the critical section, so we cannot quit before we release it in @@ -1511,9 +1552,13 @@ add_font_entity_to_list (ENUMLOGFONTEX *logical_font, /* Ensure a match. */ if (!logfonts_match (&logical_font->elfLogFont, &match_data->pattern) - || !font_matches_spec (font_type, physical_font, - match_data->orig_font_spec, backend, - &logical_font->elfLogFont) + || !(font_matches_spec (font_type, physical_font, + match_data->orig_font_spec, backend, + &logical_font->elfLogFont) + || (!NILP (match_data->known_fonts) + && memq_no_quit + (intern_font_name (logical_font->elfLogFont.lfFaceName), + match_data->known_fonts))) || !w32font_coverage_ok (&physical_font->ntmFontSig, match_data->pattern.lfCharSet)) return 1; @@ -2214,8 +2259,9 @@ font_supported_scripts (FONTSIGNATURE * sig) || (subranges[2] & (mask2)) || (subranges[3] & (mask3))) \ supported = Fcons ((sym), supported) - SUBRANGE (0, Qlatin); - /* 1: Latin-1 supplement, 2: Latin Extended A, 3: Latin Extended B. */ + /* 0: ASCII (a.k.a. "Basic Latin"), + 1: Latin-1 supplement, 2: Latin Extended A, 3: Latin Extended B, + 29: Latin Extended Additional. */ /* Most fonts that support Latin will have good coverage of the Extended blocks, so in practice marking them below is not really needed, or useful: if a font claims support for, say, Latin @@ -2224,12 +2270,11 @@ font_supported_scripts (FONTSIGNATURE * sig) fontset to display those few characters. But we mark these subranges here anyway, for the marginal use cases where they might make a difference. */ - SUBRANGE (1, Qlatin); - SUBRANGE (2, Qlatin); - SUBRANGE (3, Qlatin); + MASK_ANY (0x2000000F, 0, 0, 0, Qlatin); SUBRANGE (4, Qphonetic); /* 5: Spacing and tone modifiers, 6: Combining Diacritical Marks. */ - SUBRANGE (7, Qgreek); + /* 7: Greek and Coptic, 30: Greek Extended. */ + MASK_ANY (0x40000080, 0, 0, 0, Qgreek); SUBRANGE (8, Qcoptic); SUBRANGE (9, Qcyrillic); SUBRANGE (10, Qarmenian); @@ -2246,7 +2291,7 @@ font_supported_scripts (FONTSIGNATURE * sig) SUBRANGE (16, Qbengali); SUBRANGE (17, Qgurmukhi); SUBRANGE (18, Qgujarati); - SUBRANGE (19, Qoriya); + SUBRANGE (19, Qoriya); /* a.k.a. "Odia" */ SUBRANGE (20, Qtamil); SUBRANGE (21, Qtelugu); SUBRANGE (22, Qkannada); @@ -2259,8 +2304,7 @@ font_supported_scripts (FONTSIGNATURE * sig) /* 29: Latin Extended, 30: Greek Extended -- covered above. */ /* 31: Supplemental Punctuation -- most probably be masked by Courier New, so fontset customization is needed. */ - SUBRANGE (31, Qsymbol); - /* 32-47: Symbols (defined below). */ + /* 31-47: Symbols (defined below). */ SUBRANGE (48, Qcjk_misc); /* Match either 49: katakana or 50: hiragana for kana. */ MASK_ANY (0, 0x00060000, 0, 0, Qkana); @@ -2286,7 +2330,7 @@ font_supported_scripts (FONTSIGNATURE * sig) SUBRANGE (71, Qsyriac); SUBRANGE (72, Qthaana); SUBRANGE (73, Qsinhala); - SUBRANGE (74, Qmyanmar); + SUBRANGE (74, Qburmese); /* a.k.a. "Myanmar" */ SUBRANGE (75, Qethiopic); SUBRANGE (76, Qcherokee); SUBRANGE (77, Qcanadian_aboriginal); @@ -2329,6 +2373,7 @@ font_supported_scripts (FONTSIGNATURE * sig) SUBRANGE (99, Qhan); SUBRANGE (100, Qsyloti_nagri); SUBRANGE (101, Qlinear_b); + SUBRANGE (101, Qaegean_number); SUBRANGE (102, Qancient_greek_number); SUBRANGE (103, Qugaritic); SUBRANGE (104, Qold_persian); @@ -2338,6 +2383,7 @@ font_supported_scripts (FONTSIGNATURE * sig) SUBRANGE (108, Qkharoshthi); SUBRANGE (109, Qtai_xuan_jing_symbol); SUBRANGE (110, Qcuneiform); + SUBRANGE (111, Qcuneiform_numbers_and_punctuation); SUBRANGE (111, Qcounting_rod_numeral); SUBRANGE (112, Qsundanese); SUBRANGE (113, Qlepcha); @@ -2357,9 +2403,52 @@ font_supported_scripts (FONTSIGNATURE * sig) /* There isn't really a main symbol range, so include symbol if any relevant range is set. */ - MASK_ANY (0x8000000, 0x0000FFFF, 0, 0, Qsymbol); - - /* Missing: Tai Viet (U+AA80-U+AADF). */ + MASK_ANY (0x80000000, 0x0000FFFF, 0, 0, Qsymbol); + + /* Missing: + Tai Viet + Old Permic + Palmyrene + Nabatean + Manichean + Hanifi Rohingya + Sogdian + Elymaic + Mahajani + Khojki + Khudawadi + Grantha + Newa + Tirhuta + Siddham + Modi + Takri + Dogra + Warang Citi + Nandinagari + Zanabazar Square + Soyombo + Pau Cin Hau + Bhaiksuki + Marchen + Masaram Gondi + Makasar + Egyptian + Mro + Bassa-Vah + Pahawh Hmong + Medefaidrin + Tangut + Tangut Components + Nushu + Duployan Shorthand + Ancient Greek Musical Notation + Nyiakeng Puachue Hmong + Wancho + Mende Kikakui + Adlam + Indic Siyaq Number + Ottoman Siyaq Number. */ #undef SUBRANGE #undef MASK_ANY @@ -2698,7 +2787,7 @@ syms_of_w32font (void) DEFSYM (Qthai, "thai"); DEFSYM (Qlao, "lao"); DEFSYM (Qtibetan, "tibetan"); - DEFSYM (Qmyanmar, "myanmar"); + DEFSYM (Qburmese, "burmese"); DEFSYM (Qgeorgian, "georgian"); DEFSYM (Qhangul, "hangul"); DEFSYM (Qethiopic, "ethiopic"); @@ -2737,6 +2826,8 @@ syms_of_w32font (void) DEFSYM (Qbuginese, "buginese"); DEFSYM (Qbuhid, "buhid"); DEFSYM (Qcuneiform, "cuneiform"); + DEFSYM (Qcuneiform_numbers_and_punctuation, + "cuneiform-numbers-and-punctuation"); DEFSYM (Qcypriot, "cypriot"); DEFSYM (Qdeseret, "deseret"); DEFSYM (Qglagolitic, "glagolitic"); @@ -2745,6 +2836,7 @@ syms_of_w32font (void) DEFSYM (Qkharoshthi, "kharoshthi"); DEFSYM (Qlimbu, "limbu"); DEFSYM (Qlinear_b, "linear_b"); + DEFSYM (Qaegean_number, "aegean-number"); DEFSYM (Qold_italic, "old_italic"); DEFSYM (Qold_persian, "old_persian"); DEFSYM (Qosmanya, "osmanya"); @@ -2818,6 +2910,7 @@ versions of Windows) characters. */); DEFSYM (Qw32_charset_vietnamese, "w32-charset-vietnamese"); DEFSYM (Qw32_charset_thai, "w32-charset-thai"); DEFSYM (Qw32_charset_mac, "w32-charset-mac"); + DEFSYM (Qw32_non_USB_fonts, "w32-non-USB-fonts"); defsubr (&Sx_select_font); -- 2.39.5