From af359de91772478587f768300ca61d64a693fedb Mon Sep 17 00:00:00 2001 From: Lars Ingebrigtsen Date: Fri, 25 Dec 2020 05:58:09 +0100 Subject: [PATCH] Allow `string-limit' to work on encoded strings * doc/lispref/strings.texi (Creating Strings): Document it. * lisp/emacs-lisp/subr-x.el (string-limit): Allow limiting on encoded strings. --- doc/lispref/strings.texi | 9 +++++++- lisp/emacs-lisp/shortdoc.el | 3 ++- lisp/emacs-lisp/subr-x.el | 34 ++++++++++++++++++++++++---- test/lisp/emacs-lisp/subr-x-tests.el | 20 ++++++++++++++++ 4 files changed, 59 insertions(+), 7 deletions(-) diff --git a/doc/lispref/strings.texi b/doc/lispref/strings.texi index 19b91471ed3..1e5f52ddfdd 100644 --- a/doc/lispref/strings.texi +++ b/doc/lispref/strings.texi @@ -394,12 +394,19 @@ there are individual words that are longer than @var{length}, these will not be shortened. @end defun -@defun string-limit string length &optional end +@defun string-limit string length &optional end coding-system If @var{string} is shorter than @var{length}, @var{string} is returned as is. Otherwise, return a substring of @var{string} consisting of the first @var{length} characters. If the optional @var{end} parameter is given, return a string of the @var{length} last characters instead. + +If @var{coding-system} is non-@code{nil}, @var{string} will be encoded +before limiting, and the result will be a unibyte string that's +shorter than @code{length}. If @var{string} contains characters that +are encoded into several bytes (for instance, when using +@code{utf-8}), the resulting unibyte string is never truncated in the +middle of a character representation. @end defun @defun string-lines string &optional omit-nulls diff --git a/lisp/emacs-lisp/shortdoc.el b/lisp/emacs-lisp/shortdoc.el index 618465513da..9d183e0d4e9 100644 --- a/lisp/emacs-lisp/shortdoc.el +++ b/lisp/emacs-lisp/shortdoc.el @@ -146,7 +146,8 @@ There can be any number of :example/:result elements." (string-limit :eval (string-limit "foobar" 3) :eval (string-limit "foobar" 3 t) - :eval (string-limit "foobar" 10)) + :eval (string-limit "foobar" 10) + :eval (string-limit "fo好" 3 nil 'utf-8)) (truncate-string-to-width :eval (truncate-string-to-width "foobar" 3) :eval (truncate-string-to-width "你好bar" 5)) diff --git a/lisp/emacs-lisp/subr-x.el b/lisp/emacs-lisp/subr-x.el index dc5840a0865..9fbb0351af4 100644 --- a/lisp/emacs-lisp/subr-x.el +++ b/lisp/emacs-lisp/subr-x.el @@ -286,7 +286,7 @@ result will have lines that are longer than LENGTH." (fill-region (point-min) (point-max))) (buffer-string))) -(defun string-limit (string length &optional end) +(defun string-limit (string length &optional end coding-system) "Return (up to) a LENGTH substring of STRING. If STRING is shorter than or equal to LENGTH, the entire string is returned unchanged. @@ -295,15 +295,39 @@ If STRING is longer than LENGTH, return a substring consisting of the first LENGTH characters of STRING. If END is non-nil, return the last LENGTH characters instead. +If CODING-SYSTEM is non-nil, STRING will be encoded before +limiting, and LENGTH is interpreted as the number of bytes to +limit the string to. The result will be a unibyte string that is +shorter than LENGTH, but will not contain \"partial\" characters, +even if CODING-SYSTEM encodes characters with several bytes per +character. + When shortening strings for display purposes, `truncate-string-to-width' is almost always a better alternative than this function." (unless (natnump length) (signal 'wrong-type-argument (list 'natnump length))) - (cond - ((<= (length string) length) string) - (end (substring string (- (length string) length))) - (t (substring string 0 length)))) + (if coding-system + (let ((result nil) + (result-length 0) + (index (if end (1- (length string)) 0))) + (while (let ((encoded (encode-coding-char + (aref string index) coding-system))) + (and (<= (+ (length encoded) result-length) length) + (progn + (push encoded result) + (cl-incf result-length (length encoded)) + (setq index (if end (1- index) + (1+ index)))) + (if end (> index -1) + (< index (length string))))) + ;; No body. + ) + (apply #'concat (if end result (nreverse result)))) + (cond + ((<= (length string) length) string) + (end (substring string (- (length string) length))) + (t (substring string 0 length))))) (defun string-lines (string &optional omit-nulls) "Split STRING into a list of lines. diff --git a/test/lisp/emacs-lisp/subr-x-tests.el b/test/lisp/emacs-lisp/subr-x-tests.el index 2ae492ecf15..b17185ab0d3 100644 --- a/test/lisp/emacs-lisp/subr-x-tests.el +++ b/test/lisp/emacs-lisp/subr-x-tests.el @@ -600,6 +600,26 @@ (should (equal (string-limit "foo" 0) "")) (should-error (string-limit "foo" -1))) +(ert-deftest subr-string-limit-coding () + (should (not (multibyte-string-p (string-limit "foó" 10 nil 'utf-8)))) + (should (equal (string-limit "foó" 10 nil 'utf-8) "fo\303\263")) + (should (equal (string-limit "foó" 3 nil 'utf-8) "fo")) + (should (equal (string-limit "foó" 4 nil 'utf-8) "fo\303\263")) + (should (equal (string-limit "foóa" 4 nil 'utf-8) "fo\303\263")) + (should (equal (string-limit "foóá" 4 nil 'utf-8) "fo\303\263")) + (should (equal (string-limit "foóa" 4 nil 'iso-8859-1) "fo\363a")) + (should (equal (string-limit "foóá" 4 nil 'iso-8859-1) "fo\363\341")) + (should (equal (string-limit "foóá" 4 nil 'utf-16) "\376\377\000f")) + + (should (equal (string-limit "foó" 10 t 'utf-8) "fo\303\263")) + (should (equal (string-limit "foó" 3 t 'utf-8) "o\303\263")) + (should (equal (string-limit "foó" 4 t 'utf-8) "fo\303\263")) + (should (equal (string-limit "foóa" 4 t 'utf-8) "o\303\263a")) + (should (equal (string-limit "foóá" 4 t 'utf-8) "\303\263\303\241")) + (should (equal (string-limit "foóa" 4 t 'iso-8859-1) "fo\363a")) + (should (equal (string-limit "foóá" 4 t 'iso-8859-1) "fo\363\341")) + (should (equal (string-limit "foóá" 4 t 'utf-16) "\376\377\000\341"))) + (ert-deftest subr-string-lines () (should (equal (string-lines "foo") '("foo"))) (should (equal (string-lines "foo \nbar") '("foo " "bar")))) -- 2.39.5