From da02b9edadbc809b25ac83eccf64089f1cf3b160 Mon Sep 17 00:00:00 2001 From: Po Lu Date: Mon, 3 Oct 2022 20:18:22 +0800 Subject: [PATCH] Fix coding systems used for X input methods * doc/emacs/mule.texi (International): Refer to X Coding as well. (Communication Coding): Document that locale-coding-system is not always used on X to decode keyboard input. (X Coding): New node. * etc/NEWS: Announce change to input method coding resolution. * lisp/term/x-win.el (x-get-input-coding-system): New function. * src/coding.c (syms_of_coding): Update doc string of locale-coding-system. * src/xfns.c (struct x_xim_text_conversion_data) (x_xim_text_to_utf8_unix_1, x_xim_text_to_utf8_unix_2) (x_xim_text_to_utf8_unix): Accept dpyinfo. Use the coding system specified inside if possible. (xic_preedit_draw_callback): Pass dpyinfo. * src/xterm.c (handle_one_xevent): Use XIM coding system for IM input. (xim_open_dpy): Try to determine the input method coding system. (mark_xterm): Mark `xim_coding'. (syms_of_xterm): New variable `x-input-coding-system'. * src/xterm.h (struct x_display_info): New field `xim_coding'. (FRAME_X_XIM_CODING): New macro. --- doc/emacs/mule.texi | 40 ++++++++++++++++++++++++++++++---------- etc/NEWS | 13 +++++++++++++ lisp/term/x-win.el | 22 ++++++++++++++++++++++ src/coding.c | 6 +++--- src/xfns.c | 33 +++++++++++++++++++++++---------- src/xterm.c | 34 ++++++++++++++++++++++++++++++---- src/xterm.h | 9 +++++++++ 7 files changed, 130 insertions(+), 27 deletions(-) diff --git a/doc/emacs/mule.texi b/doc/emacs/mule.texi index 5f303418383..1bbd7440f3e 100644 --- a/doc/emacs/mule.texi +++ b/doc/emacs/mule.texi @@ -61,7 +61,7 @@ can also be input by using the @kbd{C-x 8} prefix, see @ref{Unibyte Mode}. With the X Window System, your locale should be set to an appropriate value to make sure Emacs interprets keyboard input correctly; see -@ref{Language Environments, locales}. +@ref{Language Environments, locales}, and @ref{X Coding}. @end itemize The rest of this chapter describes these issues in detail. @@ -79,6 +79,7 @@ value to make sure Emacs interprets keyboard input correctly; see * Text Coding:: Choosing conversion to use for file text. * Communication Coding:: Coding systems for interprocess communication. * File Name Coding:: Coding systems for file @emph{names}. +* X Coding:: Coding systems for X input methods. * Terminal Coding:: Specifying coding systems for converting terminal input and output. * Fontsets:: Fontsets are collections of fonts @@ -1241,15 +1242,14 @@ current language environment. The variable @code{locale-coding-system} specifies a coding system to use when encoding and decoding system strings such as system error messages and @code{format-time-string} formats and time stamps. That -coding system is also used for decoding non-@acronym{ASCII} keyboard -input on the X Window System and for encoding text sent to the -standard output and error streams when in batch mode. You should -choose a coding system that is compatible -with the underlying system's text representation, which is normally -specified by one of the environment variables @env{LC_ALL}, -@env{LC_CTYPE}, and @env{LANG}. (The first one, in the order -specified above, whose value is nonempty is the one that determines -the text representation.) +coding system might also be used for decoding non-@acronym{ASCII} +keyboard input on the X Window System and will also be used to encode +text sent to the standard output and error streams in batch mode. You +should choose a coding system that is compatible with the underlying +system's text representation, which is normally specified by one of +the environment variables @env{LC_ALL}, @env{LC_CTYPE}, and +@env{LANG}. (The first one, in the order specified above, whose value +is nonempty is the one that determines the text representation.) @node File Name Coding @section Coding Systems for File Names @@ -1311,6 +1311,26 @@ C-w} to specify a new file name for that buffer. system. This prompts for an existing file name, its old coding system, and the coding system to which you wish to convert. +@node X Coding +@section Coding Systems for X Keyboard Input +@cindex X input method coding systems + Input methods under the X Window System specify their own coding +systems that must be used to decode keyboard input. By default, Emacs +determines the coding system used for each input method automatically +upon establishing the connection to the input method server, and uses +that specific coding system to decode keyboard input. However, that +determination can sometimes fail; in that situation, the locale coding +system (@pxref{Communication Coding}) is used instead. + +@cindex X input method coding systems, overriding +@vindex x-input-coding-system + If the input method does not correctly announce the coding system it +uses to encode text, then the coding system used by Emacs to decode +text from input methods must be manually specified. The value of the +variable @code{x-input-coding-system}, when set to a symbol, is +unconditionally used as the coding system used to decode keyboard +input from input methods. + @node Terminal Coding @section Coding Systems for Terminal I/O diff --git a/etc/NEWS b/etc/NEWS index db7c675edb5..d7bc4b0e0c2 100644 --- a/etc/NEWS +++ b/etc/NEWS @@ -186,6 +186,19 @@ of 'user-emacs-directory'. * Incompatible changes in Emacs 29.1 ++++ +*** Emacs now picks the correct coding system for X input methods. +Previously, Emacs would use the locale coding system for input +methods, which could in some circumstances be incorrect, especially +when the input method chose to fall back to some other coding system. + +Now, Emacs automatically detects the coding system used by input +methods, and uses that to decode input in preference to the value of +'locale-coding-system'. This unfortunately means that users who have +changed the coding system used to decode X keyboard input must adjust +their customizations to 'locale-coding-system' to the variable +'x-input-coding-system' instead. + +++ *** Bookmarks no longer include context for encrypted files. If you're visiting an encrypted file, setting a bookmark no longer diff --git a/lisp/term/x-win.el b/lisp/term/x-win.el index 9d3e7803650..57c6b785e73 100644 --- a/lisp/term/x-win.el +++ b/lisp/term/x-win.el @@ -1613,6 +1613,28 @@ Users should not call this function; see `device-class' instead." (setq x-dnd-movement-function #'x-dnd-movement) (setq x-dnd-unsupported-drop-function #'x-dnd-handle-unsupported-drop) +(defvar x-input-coding-function) + +(defun x-get-input-coding-system (x-locale) + "Return a coding system for the locale X-LOCALE. +Return a coding system that is able to decode text sent with the +X input method locale X-LOCALE, or nil if no coding system was +found." + (if (equal x-locale "C") + ;; Treat the C locale specially, as it means "ascii" under X. + 'ascii + (let ((locale (downcase x-locale))) + (or (locale-name-match locale locale-preferred-coding-systems) + (when locale + (if (string-match "\\.\\([^@]+\\)" locale) + (locale-charset-to-coding-system + (match-string 1 locale)))) + (let ((language-name + (locale-name-match locale locale-language-names))) + (and (consp language-name) (cdr language-name))))))) + +(setq x-input-coding-function #'x-get-input-coding-system) + (provide 'x-win) (provide 'term/x-win) diff --git a/src/coding.c b/src/coding.c index 0ae8eb3282b..ab73bda8440 100644 --- a/src/coding.c +++ b/src/coding.c @@ -12014,9 +12014,9 @@ See also the function `find-operation-coding-system'. */); Vnetwork_coding_system_alist = Qnil; DEFVAR_LISP ("locale-coding-system", Vlocale_coding_system, - doc: /* Coding system to use with system messages. -Also used for decoding keyboard input on X Window system, and for -encoding standard output and error streams. */); + doc: /* Coding system to use with system messages. +Potentially also used for decoding keyboard input on X Windows, and is +used for encoding standard output and error streams. */); Vlocale_coding_system = Qnil; /* The eol mnemonics are reset in startup.el system-dependently. */ diff --git a/src/xfns.c b/src/xfns.c index bb75ca5ad1c..8cea93c6698 100644 --- a/src/xfns.c +++ b/src/xfns.c @@ -3354,22 +3354,30 @@ struct x_xim_text_conversion_data { struct coding_system *coding; char *source; + struct x_display_info *dpyinfo; }; static Lisp_Object -x_xim_text_to_utf8_unix_1 (ptrdiff_t nargs, - Lisp_Object *args) +x_xim_text_to_utf8_unix_1 (ptrdiff_t nargs, Lisp_Object *args) { struct x_xim_text_conversion_data *data; ptrdiff_t nbytes; + Lisp_Object coding_system; data = xmint_pointer (args[0]); + + if (SYMBOLP (Vx_input_coding_system)) + coding_system = Vx_input_coding_system; + else if (!NILP (data->dpyinfo->xim_coding)) + coding_system = data->dpyinfo->xim_coding; + else + coding_system = Vlocale_coding_system; + nbytes = strlen (data->source); data->coding->destination = NULL; - setup_coding_system (Vlocale_coding_system, - data->coding); + setup_coding_system (coding_system, data->coding); data->coding->mode |= (CODING_MODE_LAST_BLOCK | CODING_MODE_SAFE_ENCODING); data->coding->source = (const unsigned char *) data->source; @@ -3382,8 +3390,7 @@ x_xim_text_to_utf8_unix_1 (ptrdiff_t nargs, } static Lisp_Object -x_xim_text_to_utf8_unix_2 (Lisp_Object val, - ptrdiff_t nargs, +x_xim_text_to_utf8_unix_2 (Lisp_Object val, ptrdiff_t nargs, Lisp_Object *args) { struct x_xim_text_conversion_data *data; @@ -3400,7 +3407,8 @@ x_xim_text_to_utf8_unix_2 (Lisp_Object val, /* The string returned is not null-terminated. */ static char * -x_xim_text_to_utf8_unix (XIMText *text, ptrdiff_t *length) +x_xim_text_to_utf8_unix (struct x_display_info *dpyinfo, + XIMText *text, ptrdiff_t *length) { unsigned char *wchar_buf; ptrdiff_t wchar_actual_length, i; @@ -3424,6 +3432,7 @@ x_xim_text_to_utf8_unix (XIMText *text, ptrdiff_t *length) data.coding = &coding; data.source = text->string.multi_byte; + data.dpyinfo = dpyinfo; was_waiting_for_input_p = waiting_for_input; /* Otherwise Fsignal will crash. */ @@ -3441,18 +3450,21 @@ static void xic_preedit_draw_callback (XIC xic, XPointer client_data, XIMPreeditDrawCallbackStruct *call_data) { - struct frame *f = x_xic_to_frame (xic); + struct frame *f; struct x_output *output; - ptrdiff_t text_length = 0; + ptrdiff_t text_length; ptrdiff_t charpos; ptrdiff_t original_size; char *text; char *chg_start, *chg_end; struct input_event ie; + + f = x_xic_to_frame (xic); EVENT_INIT (ie); if (f) { + text_length = 0; output = FRAME_X_OUTPUT (f); if (!output->preedit_active) @@ -3460,7 +3472,8 @@ xic_preedit_draw_callback (XIC xic, XPointer client_data, if (call_data->text) { - text = x_xim_text_to_utf8_unix (call_data->text, &text_length); + text = x_xim_text_to_utf8_unix (FRAME_DISPLAY_INFO (f), + call_data->text, &text_length); if (!text) /* Decoding the IM text failed. */ diff --git a/src/xterm.c b/src/xterm.c index d83b03d1ca6..42335f0de0e 100644 --- a/src/xterm.c +++ b/src/xterm.c @@ -19139,7 +19139,7 @@ handle_one_xevent (struct x_display_info *dpyinfo, &xkey, (char *) copy_bufptr, copy_bufsiz, &keysym, &status_return); - coding = Qnil; + coding = FRAME_X_XIM_CODING (f); if (status_return == XBufferOverflow) { copy_bufsiz = nbytes + 1; @@ -22828,7 +22828,7 @@ handle_one_xevent (struct x_display_info *dpyinfo, &xkey, (char *) copy_bufptr, copy_bufsiz, &keysym, &status_return); - coding = Qnil; + coding = FRAME_X_XIM_CODING (f); if (status_return == XBufferOverflow) { @@ -25488,9 +25488,10 @@ xim_destroy_callback (XIM xim, XPointer client_data, XPointer call_data) static void xim_open_dpy (struct x_display_info *dpyinfo, char *resource_name) { +#ifdef HAVE_XIM XIM xim; + const char *locale; -#ifdef HAVE_XIM if (use_xim) { if (dpyinfo->xim) @@ -25513,6 +25514,14 @@ xim_open_dpy (struct x_display_info *dpyinfo, char *resource_name) destroy.client_data = (XPointer)dpyinfo; XSetIMValues (xim, XNDestroyCallback, &destroy, NULL); #endif + + locale = XLocaleOfIM (xim); + + /* Now try to determine the coding system that should be + used. locale is in Host Portable Character Encoding, and + as such can be passed to build_string as is. */ + dpyinfo->xim_coding = safe_call1 (Vx_input_coding_function, + build_string (locale)); } } @@ -29843,7 +29852,7 @@ mark_xterm (void) } #if defined HAVE_XINPUT2 || defined USE_TOOLKIT_SCROLL_BARS \ - || defined HAVE_XRANDR || defined USE_GTK + || defined HAVE_XRANDR || defined USE_GTK || defined HAVE_X_I18N for (dpyinfo = x_display_list; dpyinfo; dpyinfo = dpyinfo->next) { #ifdef HAVE_XINPUT2 @@ -29856,6 +29865,9 @@ mark_xterm (void) #endif #if defined HAVE_XRANDR || defined USE_GTK mark_object (dpyinfo->last_monitor_attributes_list); +#endif +#if defined HAVE_X_I18N + mark_object (dpyinfo->xim_coding); #endif } #endif @@ -30385,4 +30397,18 @@ on the same display. In addition, when this variable is a list, only preserve the selections whose names are contained within. */); Vx_auto_preserve_selections = list2 (QCLIPBOARD, QPRIMARY); + + DEFVAR_LISP ("x-input-coding-system", Vx_input_coding_system, + doc: /* Coding system used for input from X input methods. +If a symbol and non-nil, this is the coding system that will be used +to decode input from X input methods. It does not affect input from +GTK native input methods enabled through `x-gtk-use-native-input'. */); + Vx_input_coding_system = Qnil; + + DEFVAR_LISP ("x-input-coding-function", Vx_input_coding_function, + doc: /* Function used to determine the coding system used by input methods. +It should accept a single argument, a string describing the locale of +the input method, and return a coding system that can decode keyboard +input generated by said input method. */); + Vx_input_coding_function = Qnil; } diff --git a/src/xterm.h b/src/xterm.h index f3791aa8df9..b68a234faa5 100644 --- a/src/xterm.h +++ b/src/xterm.h @@ -580,6 +580,9 @@ struct x_display_info XIMStyles *xim_styles; struct xim_inst_t *xim_callback_data; XIMStyle preferred_xim_style; + + /* The named coding system to use for this input method. */ + Lisp_Object xim_coding; #endif /* A cache mapping color names to RGB values. */ @@ -1348,6 +1351,12 @@ extern void x_mark_frame_dirty (struct frame *f); #define FRAME_X_XIM_STYLES(f) (FRAME_DISPLAY_INFO (f)->xim_styles) #define FRAME_XIC_STYLE(f) ((f)->output_data.x->xic_style) #define FRAME_XIC_FONTSET(f) ((f)->output_data.x->xic_xfs) +#define FRAME_X_XIM_CODING(f) \ + (SYMBOLP (Vx_input_coding_system) \ + ? Vx_input_coding_system \ + : (!NILP (FRAME_DISPLAY_INFO (f)->xim_coding) \ + ? FRAME_DISPLAY_INFO(f)->xim_coding \ + : Vlocale_coding_system)) /* X-specific scroll bar stuff. */ -- 2.39.2