From 20ebd91a734c3241ca3a9ce15ba81f7357401576 Mon Sep 17 00:00:00 2001 From: Eli Zaretskii Date: Sat, 30 Oct 2021 10:26:38 +0300 Subject: [PATCH] Improve documentation of string truncation APIs * doc/lispref/display.texi (Size of Displayed Text): * lisp/international/mule-util.el (truncate-string-to-width): Document caveats of using 'truncate-string-to-width' when character composition is involved. * lisp/emacs-lisp/subr-x.el (string-limit): * doc/lispref/strings.texi (Creating Strings): Improve the documentation of 'string-limit'. --- doc/lispref/display.texi | 20 +++++++++++++++----- doc/lispref/strings.texi | 18 ++++++++++++------ lisp/emacs-lisp/subr-x.el | 12 ++++++------ lisp/international/mule-util.el | 13 +++++++++---- 4 files changed, 42 insertions(+), 21 deletions(-) diff --git a/doc/lispref/display.texi b/doc/lispref/display.texi index e5ba85db9f1..386d51a91a5 100644 --- a/doc/lispref/display.texi +++ b/doc/lispref/display.texi @@ -1997,11 +1997,11 @@ If a multi-column character in @var{string} exceeds the goal result can sometimes fall short of @var{width}, but cannot go beyond it. -The optional argument @var{start-column} specifies the starting column. -If this is non-@code{nil}, then the first @var{start-column} columns of -the string are omitted from the result. If one multi-column character in -@var{string} extends across the column @var{start-column}, that -character is omitted. +The optional argument @var{start-column} specifies the starting +column; it defaults to zero. If this is non-@code{nil}, then the +first @var{start-column} columns of the string are omitted from the +result. If one multi-column character in @var{string} extends across +the column @var{start-column}, that character is omitted. The optional argument @var{padding}, if non-@code{nil}, is a padding character added at the beginning and end of the result string, to @@ -2026,12 +2026,22 @@ means hide the excess parts of @var{string} with a @code{display} text property (@pxref{Display Property}) showing the ellipsis, instead of actually truncating the string. +@group @example (truncate-string-to-width "\tab\t" 12 4) @result{} "ab" (truncate-string-to-width "\tab\t" 12 4 ?\s) @result{} " ab " @end example +@end group + +This function uses @code{string-width} and @code{char-width} to find +the suitable truncation point when @var{string} is too wide, so it +suffers from the same basic issues as @code{string-width} does. In +particular, when character composition happens within @var{string}, +the display width of a string could be smaller than the sum of widths +of the constituent characters, and this function might return +inaccurate results. @end defun @defun truncate-string-ellipsis diff --git a/doc/lispref/strings.texi b/doc/lispref/strings.texi index 9756e6331c9..7212677d832 100644 --- a/doc/lispref/strings.texi +++ b/doc/lispref/strings.texi @@ -414,18 +414,24 @@ will not be shortened. @end defun @defun string-limit string length &optional end coding-system -If @var{string} is shorter than @var{length}, @var{string} is returned -as is. Otherwise, return a substring of @var{string} consisting of -the first @var{length} characters. If the optional @var{end} -parameter is given, return a string of the @var{length} last +If @var{string} is shorter than @var{length} characters, @var{string} +is returned as is. Otherwise, return a substring of @var{string} +consisting of the first @var{length} characters. If the optional +@var{end} parameter is given, return a string of the @var{length} last characters instead. If @var{coding-system} is non-@code{nil}, @var{string} will be encoded before limiting, and the result will be a unibyte string that's -shorter than @code{length}. If @var{string} contains characters that -are encoded into several bytes (for instance, when using +shorter than @code{length} bytes. If @var{string} contains characters +that are encoded into several bytes (for instance, when using @code{utf-8}), the resulting unibyte string is never truncated in the middle of a character representation. + +This function measures the string length in characters or bytes, and +thus is generally inappropriate if you need to shorten strings for +display purposes; use @code{truncate-string-to-width} or +@code{window-text-pixel-size} instead (@pxref{Size of Displayed +Text}). @end defun @defun string-lines string &optional omit-nulls diff --git a/lisp/emacs-lisp/subr-x.el b/lisp/emacs-lisp/subr-x.el index 3de666682fa..788cd0f34bf 100644 --- a/lisp/emacs-lisp/subr-x.el +++ b/lisp/emacs-lisp/subr-x.el @@ -264,13 +264,13 @@ result will have lines that are longer than LENGTH." (buffer-string))) (defun string-limit (string length &optional end coding-system) - "Return (up to) a LENGTH substring of STRING. -If STRING is shorter than or equal to LENGTH, the entire string -is returned unchanged. + "Return a substring of STRING that is (up to) LENGTH characters long. +If STRING is shorter than or equal to LENGTH characters, return the +entire string unchanged. -If STRING is longer than LENGTH, return a substring consisting of -the first LENGTH characters of STRING. If END is non-nil, return -the last LENGTH characters instead. +If STRING is longer than LENGTH characters, return a substring +consisting of the first LENGTH characters of STRING. If END is +non-nil, return the last LENGTH characters instead. If CODING-SYSTEM is non-nil, STRING will be encoded before limiting, and LENGTH is interpreted as the number of bytes to diff --git a/lisp/international/mule-util.el b/lisp/international/mule-util.el index 38d29cb2385..c2f91e77e7c 100644 --- a/lisp/international/mule-util.el +++ b/lisp/international/mule-util.el @@ -67,10 +67,15 @@ decide whether the selected frame can display that Unicode character." ellipsis-text-property) "Truncate string STR to end at column END-COLUMN. The optional 3rd arg START-COLUMN, if non-nil, specifies the starting -column; that means to return the characters occupying columns -START-COLUMN ... END-COLUMN of STR. Both END-COLUMN and START-COLUMN -are specified in terms of character display width in the current -buffer; see also `char-width'. +column (default: zero); that means to return the characters occupying +columns START-COLUMN ... END-COLUMN of STR. Both END-COLUMN and +START-COLUMN are specified in terms of character display width in the +current buffer; see `char-width'. + +Since character composition on display can produce glyphs whose +width is smaller than the sum of `char-width' values of the +composed characters, this function can produce inaccurate results +when used in such cases. The optional 4th arg PADDING, if non-nil, specifies a padding character (which should have a display width of 1) to add at the end -- 2.39.2