From 87e422f1044068a4d27e5e4bfdbc664d9e4bbc43 Mon Sep 17 00:00:00 2001 From: Lars Ingebrigtsen Date: Mon, 21 Dec 2020 18:53:32 +0100 Subject: [PATCH] Beef up the Emacs string utility set a bit * doc/lispref/strings.texi (Modifying Strings): Document them. * lisp/emacs-lisp/shortdoc.el (string): Add examples. * lisp/emacs-lisp/subr-x.el (string-clean-whitespace) (string-fill, string-limit, string-lines, slice-string): New functions. --- doc/lispref/strings.texi | 38 ++++++++++++++++++++ etc/NEWS | 5 +++ lisp/emacs-lisp/shortdoc.el | 15 ++++++++ lisp/emacs-lisp/subr-x.el | 53 ++++++++++++++++++++++++++++ test/lisp/emacs-lisp/subr-x-tests.el | 26 ++++++++++++++ 5 files changed, 137 insertions(+) diff --git a/doc/lispref/strings.texi b/doc/lispref/strings.texi index 0f157c39d63..e4ca2617512 100644 --- a/doc/lispref/strings.texi +++ b/doc/lispref/strings.texi @@ -381,6 +381,44 @@ The default value of @var{separators} for @code{split-string}. Its usual value is @w{@code{"[ \f\t\n\r\v]+"}}. @end defvar +@defun slice-string string regexp +Split @var{string} into a list of strings on @var{regexp} boundaries. +As opposed to @code{split-string}, the boundaries are included in the +result set: + +@example +(slice-string " two words " " +") + @result{} (" two" " words" " ") +@end example +@end defun + +@defun string-clean-whitespace string +Clean up the whitespace in @var{string} by collapsing stretches of +whitespace to a single space character, as well as removing all +whitespace from the start and the end of @var{string}. +@end defun + +@defun string-fill string length +Attempt to Word-wrap @var{string} so that no lines are longer than +@var{length}. Filling is done on whitespace boundaries only. If +there are individual words that are longer than @var{length}, these +will not be shortened. +@end defun + +@defun string-limit string length +Return a string that's shorter than @var{length}. If @var{string} is +shorter than @var{length}, @var{string} is returned as is. If +@var{length} is positive, return a substring of @var{string} +consisting of the first @var{length} characters. If @var{length} is +negative, return a string of the @var{-length} last characters +instead. +@end defun + +@defun string-lines string &optional omit-nulls +Split @var{string} into a list of strings on newline boundaries. If +@var{omit-nulls}, remove empty lines from the results. +@end defun + @node Modifying Strings @section Modifying Strings @cindex modifying strings diff --git a/etc/NEWS b/etc/NEWS index 7411295e1b5..17c6ce61f94 100644 --- a/etc/NEWS +++ b/etc/NEWS @@ -1440,6 +1440,11 @@ that makes it a valid button. ** Miscellaneous ++++ +*** A number of new string manipulation functions have been added. +'string-clean-whitespace', 'string-fill', 'string-limit', +'string-limit' and 'slice-string'. + +++ *** New variable 'current-minibuffer-command'. This is like 'this-command', but it is bound recursively when entering diff --git a/lisp/emacs-lisp/shortdoc.el b/lisp/emacs-lisp/shortdoc.el index 37d6170fee5..8b11b57ff7f 100644 --- a/lisp/emacs-lisp/shortdoc.el +++ b/lisp/emacs-lisp/shortdoc.el @@ -139,10 +139,20 @@ There can be any number of :example/:result elements." (substring :eval (substring "foobar" 0 3) :eval (substring "foobar" 3)) + (string-limit + :eval (string-limit "foobar" 3) + :eval (string-limit "foobar" -3) + :eval (string-limit "foobar" 10)) (split-string :eval (split-string "foo bar") :eval (split-string "|foo|bar|" "|") :eval (split-string "|foo|bar|" "|" t)) + (slice-string + :eval (slice-string "foo-bar" "-") + :eval (slice-string "foo-bar--zot-" "-+")) + (string-lines + :eval (string-lines "foo\n\nbar") + :eval (string-lines "foo\n\nbar" t)) (string-replace :eval (string-replace "foo" "bar" "foozot")) (replace-regexp-in-string @@ -167,6 +177,11 @@ There can be any number of :example/:result elements." (string-remove-prefix :no-manual t :eval (string-remove-prefix "foo" "foobar")) + (string-clean-whitespace + :eval (string-clean-whitespace " foo bar ")) + (string-fill + :eval (string-fill "Three short words" 12) + :eval (string-fill "Long-word" 3)) (reverse :eval (reverse "foo")) (substring-no-properties diff --git a/lisp/emacs-lisp/subr-x.el b/lisp/emacs-lisp/subr-x.el index e6abb39ddc6..41a20795378 100644 --- a/lisp/emacs-lisp/subr-x.el +++ b/lisp/emacs-lisp/subr-x.el @@ -264,6 +264,59 @@ carriage return." (substring string 0 (- (length string) (length suffix))) string)) +(defun string-clean-whitespace (string) + "Clean up whitespace in STRING. +All sequences of whitespaces in STRING are collapsed into a +single space character, and leading/trailing whitespace is +removed." + (string-trim (replace-regexp-in-string "[ \t\n\r]+" " " string))) + +(defun string-fill (string length) + "Try to word-wrap STRING so that no lines are longer than LENGTH. +Wrapping is done where there is whitespace. If there are +individual words in STRING that are longer than LENGTH, the +result will have lines that are longer than LENGTH." + (with-temp-buffer + (insert string) + (goto-char (point-min)) + (let ((fill-column length) + (adaptive-fill-mode nil)) + (fill-region (point-min) (point-max))) + (buffer-string))) + +(defun string-limit (string length) + "Return (up to) a LENGTH substring of STRING. +If STRING is shorter or equal to LENGTH, the entire string is +returned unchanged. If STRING is longer than LENGTH, and LENGTH +is a positive number, return a a substring consisting of the +first LENGTH characters of STRING. If LENGTH is negative, return +a substring consisitng of thelast LENGTH characters of STRING." + (cond + ((<= (length string) length) string) + ((>= length 0) (substring string 0 length)) + (t (substring string (+ (length string) length))))) + +(defun string-lines (string &optional omit-nulls) + "Split STRING into a list of lines. +If OMIT-NULLS, empty lines will be removed from the results." + (split-string string "\n" omit-nulls)) + +(defun slice-string (string regexp) + "Split STRING at REGEXP boundaries and return a list of slices. +The boundaries that match REGEXP are not omitted from the results." + (let ((start-substring 0) + (start-search 0) + (result nil)) + (save-match-data + (while (string-match regexp string start-search) + (if (zerop (match-beginning 0)) + (setq start-search (match-end 0)) + (push (substring string start-substring (match-beginning 0)) result) + (setq start-substring (match-beginning 0) + start-search (match-end 0)))) + (push (substring string start-substring) result) + (nreverse result)))) + (defun replace-region-contents (beg end replace-fn &optional max-secs max-costs) "Replace the region between BEG and END using REPLACE-FN. diff --git a/test/lisp/emacs-lisp/subr-x-tests.el b/test/lisp/emacs-lisp/subr-x-tests.el index 9d14a5ab7ec..949bbb163eb 100644 --- a/test/lisp/emacs-lisp/subr-x-tests.el +++ b/test/lisp/emacs-lisp/subr-x-tests.el @@ -582,5 +582,31 @@ (should (equal (string-remove-suffix "a" "aa") "a")) (should (equal (string-remove-suffix "a" "ba") "b"))) +(ert-deftest subr-clean-whitespace () + (should (equal (string-clean-whitespace " foo ") "foo")) + (should (equal (string-clean-whitespace " foo \n\t Bar") "foo Bar"))) + +(ert-deftest subr-string-fill () + (should (equal (string-fill "foo" 10) "foo")) + (should (equal (string-fill "foobar" 5) "foobar")) + (should (equal (string-fill "foo bar zot" 5) "foo\nbar\nzot")) + (should (equal (string-fill "foo bar zot" 7) "foo bar\nzot"))) + +(ert-deftest subr-string-limit () + (should (equal (string-limit "foo" 10) "foo")) + (should (equal (string-limit "foo" 2) "fo")) + (should (equal (string-limit "foo" -2) "oo")) + (should (equal (string-limit "foo" 0) ""))) + +(ert-deftest subr-string-lines () + (should (equal (string-lines "foo") '("foo"))) + (should (equal (string-lines "foo \nbar") '("foo " "bar")))) + +(ert-deftest subr-slice-string () + (should (equal (slice-string "foo-bar" "-") '("foo" "-bar"))) + (should (equal (slice-string "foo-bar-" "-") '("foo" "-bar" "-"))) + (should (equal (slice-string "-foo-bar-" "-") '("-foo" "-bar" "-"))) + (should (equal (slice-string "ooo" "lala") '("ooo")))) + (provide 'subr-x-tests) ;;; subr-x-tests.el ends here -- 2.39.5