From a9920473f68ce20fe95f67a4941ff3c1fb274f2a Mon Sep 17 00:00:00 2001 From: Eli Zaretskii Date: Thu, 10 Mar 2022 09:29:29 +0200 Subject: [PATCH] Improve documentation of 'map-charset-chars' * doc/lispref/nonascii.texi (Character Sets): * src/charset.c (Fmap_charset_chars): Clarify the codepoint issue in using 'map-charset-chars'. --- doc/lispref/nonascii.texi | 15 ++++++++++++--- src/charset.c | 25 +++++++++++++++---------- 2 files changed, 27 insertions(+), 13 deletions(-) diff --git a/doc/lispref/nonascii.texi b/doc/lispref/nonascii.texi index f495910fcd6..d7d25dc36af 100644 --- a/doc/lispref/nonascii.texi +++ b/doc/lispref/nonascii.texi @@ -855,15 +855,24 @@ function to all or part of the characters in a charset: Call @var{function} for characters in @var{charset}. @var{function} is called with two arguments. The first one is a cons cell @code{(@var{from} . @var{to})}, where @var{from} and @var{to} -indicate a range of characters contained in charset. The second -argument passed to @var{function} is @var{arg}. +indicate a range of characters contained in @var{charset}. The second +argument passed to @var{function} is @var{arg}, or @code{nil} if +@var{arg} is omitted. By default, the range of codepoints passed to @var{function} includes all the characters in @var{charset}, but optional arguments @var{from-code} and @var{to-code} limit that to the range of characters between these two codepoints of @var{charset}. If either of them is @code{nil}, it defaults to the first or last codepoint of -@var{charset}, respectively. +@var{charset}, respectively. Note that @var{from-code} and +@var{to-code} are @var{charset}'s codepoints, not the Emacs codes of +characters; by contrast, the values @var{from} and @var{to} in the +cons cell passed to @var{function} @emph{are} Emacs character codes. +Those Emacs character codes are either Unicode code points, or Emacs +internal code points that extend Unicode and are beyond the Unicode +range of characters @code{0..#x10FFFF} (@pxref{Text Representations}). +The latter happens rarely, with legacy CJK charsets for codepoints of +@var{charset} which specify characters not yet unified with Unicode. @end defun @node Scanning Charsets diff --git a/src/charset.c b/src/charset.c index dec9d56df2c..b9e1584083f 100644 --- a/src/charset.c +++ b/src/charset.c @@ -793,16 +793,21 @@ map_charset_chars (void (*c_function)(Lisp_Object, Lisp_Object), Lisp_Object fun DEFUN ("map-charset-chars", Fmap_charset_chars, Smap_charset_chars, 2, 5, 0, doc: /* Call FUNCTION for all characters in CHARSET. -FUNCTION is called with an argument RANGE and the optional 3rd -argument ARG. - -RANGE is a cons (FROM . TO), where FROM and TO indicate a range of -characters contained in CHARSET. - -The optional 4th and 5th arguments FROM-CODE and TO-CODE specify the -range of code points (in CHARSET) of target characters. Note that -these are not character codes, but code points in CHARSET; for the -difference see `decode-char' and `list-charset-chars'. */) +Optional 3rd argument ARG is an additional argument to be passed +to FUNCTION, see below. +Optional 4th and 5th arguments FROM-CODE and TO-CODE specify the +range of code points (in CHARSET) of target characters on which to +map the FUNCTION. Note that these are not character codes, but code +points of CHARSET; for the difference see `decode-char' and +`list-charset-chars'. If FROM-CODE is nil or imitted, it stands for +the first code point of CHARSET; if TO-CODE is nil or omitted, it +stands for the last code point of CHARSET. + +FUNCTION will be called with two arguments: RANGE and ARG. +RANGE is a cons (FROM . TO), where FROM and TO specify a range of +characters that belong to CHARSET on which FUNCTION should do its +job. FROM and TO are Emacs character codes, unlike FROM-CODE and +TO-CODE, which are CHARSET code points. */) (Lisp_Object function, Lisp_Object charset, Lisp_Object arg, Lisp_Object from_code, Lisp_Object to_code) { struct charset *cs; -- 2.39.2