From d1559ede54684513b79025ade2b4677447c7a487 Mon Sep 17 00:00:00 2001 From: Eli Zaretskii Date: Wed, 14 Apr 2021 11:47:55 +0300 Subject: [PATCH] Add two optional arguments to 'string-width' * src/character.c (Fstring_width, lisp_string_width): Accept two optional arguments FROM and TO, to indicate the substring to be considered. (Fstring_width): Add caveats in the doc string about display features ignored by the function. (Bug#47712) * src/character.h (lisp_string_width): Update prototype. * src/editfns.c (styled_format): Adjust call of lisp_string_width to its changed signature. * test/src/character-tests.el (character-test-string-width): New file with tests for 'string-width'. * doc/lispref/display.texi (Size of Displayed Text): Document caveats of using 'string-width'. * etc/NEWS: Announce the change. --- doc/lispref/display.texi | 11 +++++++- etc/NEWS | 5 ++++ src/character.c | 56 +++++++++++++++++++++++-------------- src/character.h | 4 +-- src/editfns.c | 9 +++--- test/src/character-tests.el | 45 +++++++++++++++++++++++++++++ 6 files changed, 101 insertions(+), 29 deletions(-) create mode 100644 test/src/character-tests.el diff --git a/doc/lispref/display.texi b/doc/lispref/display.texi index 68d7e827d26..82684cd2720 100644 --- a/doc/lispref/display.texi +++ b/doc/lispref/display.texi @@ -1965,9 +1965,18 @@ Tables}). The width of a tab character is usually @code{tab-width} (@pxref{Usual Display}). @end defun -@defun string-width string +@defun string-width string &optional from to This function returns the width in columns of the string @var{string}, if it were displayed in the current buffer and the selected window. +Optional arguments @var{from} and @var{to} specify the substring of +@var{string} to consider, and are interpreted as in @code{substring} +(@pxref{Creating Strings}). + +The return value is an approximation: it only considers the values +returned by @code{char-width} for the constituent characters, always +takes a tab character as taking @code{tab-width} columns, ignores +display properties and fonts, etc. For these reasons, we recommend to +use @code{window-text-pixel-size}, described below, instead. @end defun @defun truncate-string-to-width string width &optional start-column padding ellipsis ellipsis-text-property diff --git a/etc/NEWS b/etc/NEWS index 320827d881e..d4f942bafe3 100644 --- a/etc/NEWS +++ b/etc/NEWS @@ -2664,6 +2664,11 @@ Modes that use 'truncate-string-to-width' with non-nil, non-string argument ELLIPSIS, will now indicate truncation using '…' when the selected frame can display it, and using "..." otherwise. ++++ +** 'string-width' now accepts two optional arguments FROM and TO. +This allows to calculate the width of a substring without consing a +new string. + +++ ** New command 'make-directory-autoloads'. This does the same as the old command 'update-directory-autoloads', diff --git a/src/character.c b/src/character.c index a599a0355f4..41abb83a48b 100644 --- a/src/character.c +++ b/src/character.c @@ -321,28 +321,32 @@ strwidth (const char *str, ptrdiff_t len) return c_string_width ((const unsigned char *) str, len, -1, NULL, NULL); } -/* Return width of Lisp string STRING when displayed in the current - buffer. The width is measured by how many columns it occupies on - the screen while paying attention to compositions. If PRECISION > - 0, return the width of longest substring that doesn't exceed - PRECISION, and set number of characters and bytes of the substring - in *NCHARS and *NBYTES respectively. */ +/* Return width of a (substring of a) Lisp string STRING when + displayed in the current buffer. The width is measured by how many + columns it occupies on the screen while paying attention to + compositions. If PRECISION > 0, return the width of longest + substring that doesn't exceed PRECISION, and set number of + characters and bytes of the substring in *NCHARS and *NBYTES + respectively. FROM and TO are zero-based character indices + that define the substring of STRING to consider. */ ptrdiff_t -lisp_string_width (Lisp_Object string, ptrdiff_t precision, - ptrdiff_t *nchars, ptrdiff_t *nbytes) +lisp_string_width (Lisp_Object string, ptrdiff_t from, ptrdiff_t to, + ptrdiff_t precision, ptrdiff_t *nchars, ptrdiff_t *nbytes) { - ptrdiff_t len = SCHARS (string); /* This set multibyte to 0 even if STRING is multibyte when it contains only ascii and eight-bit-graphic, but that's intentional. */ - bool multibyte = len < SBYTES (string); + bool multibyte = SCHARS (string) < SBYTES (string); unsigned char *str = SDATA (string); - ptrdiff_t i = 0, i_byte = 0; + ptrdiff_t i = from, i_byte = from ? string_char_to_byte (string, from) : 0; + ptrdiff_t from_byte = i_byte; ptrdiff_t width = 0; struct Lisp_Char_Table *dp = buffer_display_table (); - while (i < len) + eassert (precision <= 0 || (nchars && nbytes)); + + while (i < to) { ptrdiff_t chars, bytes, thiswidth; Lisp_Object val; @@ -375,8 +379,8 @@ lisp_string_width (Lisp_Object string, ptrdiff_t precision, if (0 < precision && precision - width < thiswidth) { - *nchars = i; - *nbytes = i_byte; + *nchars = i - from; + *nbytes = i_byte - from_byte; return width; } if (INT_ADD_WRAPV (thiswidth, width, &width)) @@ -387,27 +391,37 @@ lisp_string_width (Lisp_Object string, ptrdiff_t precision, if (precision > 0) { - *nchars = i; - *nbytes = i_byte; + *nchars = i - from; + *nbytes = i_byte - from_byte; } return width; } -DEFUN ("string-width", Fstring_width, Sstring_width, 1, 1, 0, +DEFUN ("string-width", Fstring_width, Sstring_width, 1, 3, 0, doc: /* Return width of STRING when displayed in the current buffer. Width is measured by how many columns it occupies on the screen. +Optional arguments FROM and TO specify the substring of STRING to +consider, and are interpreted as in `substring'. + When calculating width of a multibyte character in STRING, only the base leading-code is considered; the validity of the following bytes is not checked. Tabs in STRING are always -taken to occupy `tab-width' columns. -usage: (string-width STRING) */) - (Lisp_Object str) +taken to occupy `tab-width' columns. The effect of faces and fonts +used for non-Latin and other unusual characters (such as emoji) is +ignored as well, as are display properties and invisible text. +For these reasons, the results are not generally reliable; +for accurate dimensions of text as it will be displayed, +use `window-text-pixel-size' instead. +usage: (string-width STRING &optional FROM TO) */) + (Lisp_Object str, Lisp_Object from, Lisp_Object to) { Lisp_Object val; + ptrdiff_t ifrom, ito; CHECK_STRING (str); - XSETFASTINT (val, lisp_string_width (str, -1, NULL, NULL)); + validate_subarray (str, from, to, SCHARS (str), &ifrom, &ito); + XSETFASTINT (val, lisp_string_width (str, ifrom, ito, -1, NULL, NULL)); return val; } diff --git a/src/character.h b/src/character.h index cbf43097ae2..d19e1e2604c 100644 --- a/src/character.h +++ b/src/character.h @@ -572,8 +572,8 @@ extern ptrdiff_t str_to_unibyte (const unsigned char *, unsigned char *, extern ptrdiff_t strwidth (const char *, ptrdiff_t); extern ptrdiff_t c_string_width (const unsigned char *, ptrdiff_t, int, ptrdiff_t *, ptrdiff_t *); -extern ptrdiff_t lisp_string_width (Lisp_Object, ptrdiff_t, - ptrdiff_t *, ptrdiff_t *); +extern ptrdiff_t lisp_string_width (Lisp_Object, ptrdiff_t, ptrdiff_t, + ptrdiff_t, ptrdiff_t *, ptrdiff_t *); extern Lisp_Object Vchar_unify_table; extern Lisp_Object string_escape_byte8 (Lisp_Object); diff --git a/src/editfns.c b/src/editfns.c index 87e743afc31..bc73c1e2c5b 100644 --- a/src/editfns.c +++ b/src/editfns.c @@ -3386,12 +3386,11 @@ styled_format (ptrdiff_t nargs, Lisp_Object *args, bool message) else { ptrdiff_t nch, nby; - width = lisp_string_width (arg, prec, &nch, &nby); + nchars_string = SCHARS (arg); + width = lisp_string_width (arg, 0, nchars_string, prec, + &nch, &nby); if (prec < 0) - { - nchars_string = SCHARS (arg); - nbytes = SBYTES (arg); - } + nbytes = SBYTES (arg); else { nchars_string = nch; diff --git a/test/src/character-tests.el b/test/src/character-tests.el new file mode 100644 index 00000000000..10fc4dbf353 --- /dev/null +++ b/test/src/character-tests.el @@ -0,0 +1,45 @@ +;;; character-tests.el -- tests for character.c -*- lexical-binding:t -*- + +;; Copyright (C) 2021 Free Software Foundation, Inc. + +;; This file is part of GNU Emacs. + +;; GNU Emacs is free software: you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation, either version 3 of the License, or +;; (at your option) any later version. + +;; GNU Emacs is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GNU Emacs. If not, see . + +;;; Code: + +(require 'ert) + +(ert-deftest character-test-string-width () + "Test `string-width' with and without compositions." + (should (= (string-width "1234") 4)) + (should (= (string-width "12\t34") (+ 4 tab-width))) + (should (= (string-width "áëòç") 4)) + (should (= (string-width "áëòç") 4)) + (should (= (string-width "הַרְבֵּה אַהֲבָה") 9)) + (should (= (string-width "1234" 1 3) 2)) + (should (= (string-width "1234" nil -1) 3)) + (should (= (string-width "1234" 2) 2)) + (should-error (string-width "1234" nil 5)) + (should-error (string-width "1234" -5)) + (should (= (string-width "12\t34") (+ 4 tab-width))) + (should (= (string-width "1234\t56") (+ 6 tab-width))) + (should (= (string-width "áëòç") 4)) + (should (= (string-width "áëòç" nil 3) 3)) + (should (= (string-width "áëòç" 1 3) 2)) + (should (= (string-width "áëòç" nil 2) 1)) + (should (= (string-width "áëòç" nil 3) 2)) + (should (= (string-width "áëòç" nil 4) 2)) + (should (= (string-width "הַרְבֵּה אַהֲבָה") 9)) + (should (= (string-width "הַרְבֵּה אַהֲבָה" nil 8) 4))) -- 2.39.5