From: Lars Ingebrigtsen Date: Sat, 30 Oct 2021 13:22:36 +0000 (+0200) Subject: Add new function string-glyph-split X-Git-Tag: emacs-29.0.90~3671^2~345 X-Git-Url: http://git.eshelyaron.com/gitweb/?a=commitdiff_plain;h=c23cb2861eab646498d2d7d28a8d46f4de91ebb3;p=emacs.git Add new function string-glyph-split * doc/lispref/strings.texi (Creating Strings): Document it. * lisp/emacs-lisp/shortdoc.el (string): Mention it. * lisp/emacs-lisp/subr-x.el (string-glyph-split): New function. --- diff --git a/doc/lispref/strings.texi b/doc/lispref/strings.texi index 7212677d832..a9e1105c824 100644 --- a/doc/lispref/strings.texi +++ b/doc/lispref/strings.texi @@ -248,6 +248,24 @@ equivalent to 0. Thus, @w{@code{(substring-no-properties properties removed. @end defun +@defun string-glyph-split string +Special care has to be taken when handling strings that are meant to +be displayed. @code{substring} and friends work on individual +characters (i.e., code points), but things like emojis are often +represented by @dfn{grapheme clusters}, which are basically a bunch of +code points ``glued together'' in various ways. This function splits +up strings like that into a list of strings, where each of these +resulting strings represents a glyph that should be displayed as a +unit. + +For instance, if you want to display a string without the first glyph, +you can say: + +@example +(apply #'insert (cdr (string-glyph-split string)))) +@end example +@end defun + @defun concat &rest sequences @cindex copying strings @cindex concatenating strings diff --git a/etc/NEWS b/etc/NEWS index da9e803e9e9..7f1fa8b8f4d 100644 --- a/etc/NEWS +++ b/etc/NEWS @@ -427,6 +427,12 @@ This returns the width of a string in pixels. This can be useful when dealing with variable pitch fonts and glyphs that have widths that aren't integer multiples of the default font. ++++ +** New function 'string-glyph-split'. +This function splits a string into a list of strings representing +separate glyphs. This takes into account combining characters and +grapheme clusters. + --- ** 'lookup-key' is more allowing when searching for extended menu items. In Emacs 28.1, the behavior of 'lookup-key' was changed: when looking diff --git a/lisp/emacs-lisp/shortdoc.el b/lisp/emacs-lisp/shortdoc.el index 817dfa6b71e..daf362dd88b 100644 --- a/lisp/emacs-lisp/shortdoc.el +++ b/lisp/emacs-lisp/shortdoc.el @@ -159,6 +159,8 @@ There can be any number of :example/:result elements." :eval (split-string-and-unquote "foo \"bar zot\"")) (split-string-shell-command :eval (split-string-shell-command "ls /tmp/'foo bar'")) + (string-glyph-split + :eval (string-glyph-split "Hello, 👼🏻🧑🏼‍🤝‍🧑🏻")) (string-lines :eval (string-lines "foo\n\nbar") :eval (string-lines "foo\n\nbar" t)) diff --git a/lisp/emacs-lisp/subr-x.el b/lisp/emacs-lisp/subr-x.el index 9a82fe2449d..e3caf88c2f5 100644 --- a/lisp/emacs-lisp/subr-x.el +++ b/lisp/emacs-lisp/subr-x.el @@ -449,6 +449,22 @@ is inserted before adjusting the number of empty lines." (car (window-text-pixel-size (current-buffer) (point-min) (point))))) +;;;###autoload +(defun string-glyph-split (string) + "Split STRING into a list of strings representing separate glyphs. +This takes into account combining characters and grapheme clusters." + (let ((result nil) + (start 0) + comp) + (while (< start (length string)) + (if (setq comp (find-composition-internal start nil string nil)) + (progn + (push (substring string (car comp) (cadr comp)) result) + (setq start (cadr comp))) + (push (substring string start (1+ start)) result) + (setq start (1+ start)))) + (nreverse result))) + (provide 'subr-x) ;;; subr-x.el ends here