From 9bd2f59db608def1b588b03eff846d3fe8a7fa00 Mon Sep 17 00:00:00 2001 From: Robert Pluim Date: Mon, 18 Oct 2021 11:51:10 +0200 Subject: [PATCH] Handle VS-16 correctly for non-emoji codepoints * admin/unidata/blocks.awk: Remove emoji overrides for codepoints with Emoji_Presentation = No, they're no longer necessary. * lisp/composite.el: Remove #xFE0F (VS-16) from the range handled by `compose-gstring-for-variation-glyph' so it can be handled by `font_range'. * src/composite.c (syms_of_composite): New variable `auto-composition-emoji-eligible-codepoints'. * admin/unidata/emoji-zwj.awk: Generate value for `auto-composition-emoji-eligible-codepoints'. Add `composition-function-table' entries for 'codepoint + U+FE0F' for them. * src/font.c (codepoint_is_emoji_eligible): New function to check if we should try to use the emoji font for a codepoint. (font_range): Use it. --- admin/unidata/blocks.awk | 26 ++--------------------- admin/unidata/emoji-zwj.awk | 41 +++++++++++++++++++++++++++++++++++-- lisp/composite.el | 9 +++++++- src/composite.c | 11 ++++++++++ src/font.c | 20 ++++++++++++++++-- 5 files changed, 78 insertions(+), 29 deletions(-) diff --git a/admin/unidata/blocks.awk b/admin/unidata/blocks.awk index 96b0413875d..314ac3e9394 100755 --- a/admin/unidata/blocks.awk +++ b/admin/unidata/blocks.awk @@ -221,31 +221,9 @@ FILENAME ~ "emoji-data.txt" && /^[0-9A-F].*; Emoji_Presentation / { } END { - ## These codepoints have Emoji_Presentation = No, but they are - ## used in emoji-sequences.txt and emoji-zwj-sequences.txt (with a - ## Variation Selector), so force them into the emoji script so - ## they will get composed correctly. FIXME: delete this when we - ## can change the font used for a codepoint based on whether it's - ## followed by a VS (usually VS-16) idx = 0 - override_start[idx] = "1F3CB" - override_end[idx] = "1F3CC" - idx++ - override_start[idx] = "1F3F3" - override_end[idx] = "1F3F4" - idx++ - override_start[idx] = "1F441" - override_end[idx] = "1F441" - idx++ - override_start[idx] = "1F574" - override_end[idx] = "1F575" - idx++ - override_start[idx] = "1F590" - override_end[idx] = "1F590" - - ## These are here so that font_range can choose Emoji presentation - ## for the preceding codepoint when it encounters a VS - idx++ + # ## These are here so that font_range can choose Emoji presentation + # ## for the preceding codepoint when it encounters a VS override_start[idx] = "FE00" override_end[idx] = "FE0F" diff --git a/admin/unidata/emoji-zwj.awk b/admin/unidata/emoji-zwj.awk index 5aca157cbd4..d4e2944ca34 100644 --- a/admin/unidata/emoji-zwj.awk +++ b/admin/unidata/emoji-zwj.awk @@ -64,6 +64,44 @@ END { print ";;; emoji-zwj.el --- emoji zwj character composition table -*- lexical-binding:t -*-" print ";;; Automatically generated from admin/unidata/emoji-{zwj-,}sequences.txt" print "(eval-when-compile (require 'regexp-opt))" + + # The following codepoints are not emoji, but they are part of + # emoji sequences. We have code in font.c:font_range that will + # try to display them with the emoji font anyway. + + trigger_codepoints[1] = "261D" + trigger_codepoints[2] = "26F9" + trigger_codepoints[3] = "270C" + trigger_codepoints[4] = "270D" + trigger_codepoints[5] = "2764" + trigger_codepoints[6] = "1F3CB" + trigger_codepoints[7] = "1F3CC" + trigger_codepoints[8] = "1F3F3" + trigger_codepoints[9] = "1F3F4" + trigger_codepoints[10] = "1F441" + trigger_codepoints[11] = "1F574" + trigger_codepoints[12] = "1F575" + trigger_codepoints[13] = "1F590" + + printf "(setq auto-composition-emoji-eligible-codepoints\n" + printf "'(" + + for (trig in trigger_codepoints) + { + printf("\n?\\N{U+%s}", trigger_codepoints[trig]) + } + printf "\n))\n\n" + + # We add entries for 'codepoint U+FE0F' here to ensure that the + # code in font_range is triggered. + + for (trig in trigger_codepoints) + { + codepoint = trigger_codepoints[trig] + c = sprintf("\\N{U+%s}", codepoint) + vec[codepoint] = vec[codepoint] "\n\"" c "\\N{U+FE0F}\"" + } + print "(dolist (elt `(" for (elt in ch) @@ -98,6 +136,5 @@ END { print " 0" print " 'compose-gstring-for-graphic))))" - print "\n" - print "(provide 'emoji-zwj)" + printf "\n(provide 'emoji-zwj)" } diff --git a/lisp/composite.el b/lisp/composite.el index 859253ec7e2..99f528a0779 100644 --- a/lisp/composite.el +++ b/lisp/composite.el @@ -834,8 +834,15 @@ and the second is a glyph for a variation selector." (lgstring-set-glyph gstring 1 nil) (throw 'tag gstring))))))) +;; We explicitly don't handle #xFE0F (VS-16) here, because that's +;; taken care of by font_range in font.c, which will check for an +;; emoji font for codepoints used in compositions even if they're not +;; emoji themselves, and thus choose the Emoji presentation for them +;; when followed by VS-16. VS-15 *is* handled here, because if it's +;; handled in font_range, we end up choosing the Emoji presentation +;; rather than the Text presentation. (let ((elt '([".." 1 compose-gstring-for-variation-glyph]))) - (set-char-table-range composition-function-table '(#xFE00 . #xFE0F) elt) + (set-char-table-range composition-function-table '(#xFE00 . #xFE0E) elt) (set-char-table-range composition-function-table '(#xE0100 . #xE01EF) elt)) (defun auto-compose-chars (func from to font-object string direction) diff --git a/src/composite.c b/src/composite.c index f456e7a835d..c170805d9dd 100644 --- a/src/composite.c +++ b/src/composite.c @@ -2124,6 +2124,17 @@ GSTRING, or modify GSTRING itself and return it. See also the documentation of `auto-composition-mode'. */); Vcomposition_function_table = Fmake_char_table (Qnil, Qnil); + DEFVAR_LISP ("auto-composition-emoji-eligible-codepoints", Vauto_composition_emoji_eligible_codepoints, + doc: /* List of codepoints for which auto-composition will check for an emoji font. + +These are codepoints which have Emoji_Presentation = No, and thus by +default are not displayed as emoji. In certain circumstances, such as +when followed by U+FE0F (VS-16) the emoji font should be used for +them anyway. + +This list is auto-generated, you should not need to modify it. */); + Vauto_composition_emoji_eligible_codepoints = Qnil; + defsubr (&Scompose_region_internal); defsubr (&Scompose_string_internal); defsubr (&Sfind_composition_internal); diff --git a/src/font.c b/src/font.c index 83f0f8296ad..6cd4a6b5c11 100644 --- a/src/font.c +++ b/src/font.c @@ -3860,6 +3860,23 @@ font_at (int c, ptrdiff_t pos, struct face *face, struct window *w, #ifdef HAVE_WINDOW_SYSTEM +/* Check if CH is a codepoint for which we should attempt to use the + emoji font, even if the codepoint itself has Emoji_Presentation = + No. Vauto_composition_emoji_eligible_codepoints is filled in for + us by admin/unidata/emoji-zwj.awk. */ +static bool +codepoint_is_emoji_eligible (int ch) +{ + if (EQ (CHAR_TABLE_REF (Vchar_script_table, ch), Qemoji)) + return true; + + if (! NILP (Fmemq (make_fixnum (ch), + Vauto_composition_emoji_eligible_codepoints))) + return true; + + return false; +} + /* Check how many characters after character/byte position POS/POS_BYTE (at most to *LIMIT) can be displayed by the same font in the window W. FACE, if non-NULL, is the face selected for the character at POS. @@ -3907,8 +3924,7 @@ font_range (ptrdiff_t pos, ptrdiff_t pos_byte, ptrdiff_t *limit, /* If the composition was triggered by an emoji, use a character from 'script-representative-chars', rather than the first character in the string, to determine the font to use. */ - if (EQ (CHAR_TABLE_REF (Vchar_script_table, ch), - Qemoji)) + if (codepoint_is_emoji_eligible (ch)) { Lisp_Object val = assq_no_quit (Qemoji, Vscript_representative_chars); if (CONSP (val)) -- 2.39.2