From a9920473f68ce20fe95f67a4941ff3c1fb274f2a Mon Sep 17 00:00:00 2001
From: Eli Zaretskii <eliz@gnu.org>
Date: Thu, 10 Mar 2022 09:29:29 +0200
Subject: [PATCH] Improve documentation of 'map-charset-chars'

* doc/lispref/nonascii.texi (Character Sets):
* src/charset.c (Fmap_charset_chars): Clarify the codepoint issue
in using 'map-charset-chars'.
---
 doc/lispref/nonascii.texi | 15 ++++++++++++---
 src/charset.c             | 25 +++++++++++++++----------
 2 files changed, 27 insertions(+), 13 deletions(-)

diff --git a/doc/lispref/nonascii.texi b/doc/lispref/nonascii.texi
index f495910fcd6..d7d25dc36af 100644
--- a/doc/lispref/nonascii.texi
+++ b/doc/lispref/nonascii.texi
@@ -855,15 +855,24 @@ function to all or part of the characters in a charset:
 Call @var{function} for characters in @var{charset}.  @var{function}
 is called with two arguments.  The first one is a cons cell
 @code{(@var{from} .  @var{to})}, where @var{from} and @var{to}
-indicate a range of characters contained in charset.  The second
-argument passed to @var{function} is @var{arg}.
+indicate a range of characters contained in @var{charset}.  The second
+argument passed to @var{function} is @var{arg}, or @code{nil} if
+@var{arg} is omitted.
 
 By default, the range of codepoints passed to @var{function} includes
 all the characters in @var{charset}, but optional arguments
 @var{from-code} and @var{to-code} limit that to the range of
 characters between these two codepoints of @var{charset}.  If either
 of them is @code{nil}, it defaults to the first or last codepoint of
-@var{charset}, respectively.
+@var{charset}, respectively.  Note that @var{from-code} and
+@var{to-code} are @var{charset}'s codepoints, not the Emacs codes of
+characters; by contrast, the values @var{from} and @var{to} in the
+cons cell passed to @var{function} @emph{are} Emacs character codes.
+Those Emacs character codes are either Unicode code points, or Emacs
+internal code points that extend Unicode and are beyond the Unicode
+range of characters @code{0..#x10FFFF} (@pxref{Text Representations}).
+The latter happens rarely, with legacy CJK charsets for codepoints of
+@var{charset} which specify characters not yet unified with Unicode.
 @end defun
 
 @node Scanning Charsets
diff --git a/src/charset.c b/src/charset.c
index dec9d56df2c..b9e1584083f 100644
--- a/src/charset.c
+++ b/src/charset.c
@@ -793,16 +793,21 @@ map_charset_chars (void (*c_function)(Lisp_Object, Lisp_Object), Lisp_Object fun
 
 DEFUN ("map-charset-chars", Fmap_charset_chars, Smap_charset_chars, 2, 5, 0,
        doc: /* Call FUNCTION for all characters in CHARSET.
-FUNCTION is called with an argument RANGE and the optional 3rd
-argument ARG.
-
-RANGE is a cons (FROM .  TO), where FROM and TO indicate a range of
-characters contained in CHARSET.
-
-The optional 4th and 5th arguments FROM-CODE and TO-CODE specify the
-range of code points (in CHARSET) of target characters.  Note that
-these are not character codes, but code points in CHARSET; for the
-difference see `decode-char' and `list-charset-chars'.  */)
+Optional 3rd argument ARG is an additional argument to be passed
+to FUNCTION, see below.
+Optional 4th and 5th arguments FROM-CODE and TO-CODE specify the
+range of code points (in CHARSET) of target characters on which to
+map the FUNCTION.  Note that these are not character codes, but code
+points of CHARSET; for the difference see `decode-char' and
+`list-charset-chars'.  If FROM-CODE is nil or imitted, it stands for
+the first code point of CHARSET; if TO-CODE is nil or omitted, it
+stands for the last code point of CHARSET.
+
+FUNCTION will be called with two arguments: RANGE and ARG.
+RANGE is a cons (FROM .  TO), where FROM and TO specify a range of
+characters that belong to CHARSET on which FUNCTION should do its
+job.  FROM and TO are Emacs character codes, unlike FROM-CODE and
+TO-CODE, which are CHARSET code points.  */)
   (Lisp_Object function, Lisp_Object charset, Lisp_Object arg, Lisp_Object from_code, Lisp_Object to_code)
 {
   struct charset *cs;
-- 
2.39.5