From 9ccaaa4be77d1698784cceb983ef987cc80212c2 Mon Sep 17 00:00:00 2001 From: Chong Yidong Date: Fri, 12 Aug 2011 11:43:30 -0400 Subject: [PATCH] Fix behavior of string-mark-left-to-right. * lisp/subr.el (string-mark-left-to-right): Search the entire string for RTL script, not just the terminating character. Doc fix. --- etc/NEWS | 14 +++++++++----- lisp/ChangeLog | 5 +++++ lisp/subr.el | 31 +++++++++++++++++++++---------- 3 files changed, 35 insertions(+), 15 deletions(-) diff --git a/etc/NEWS b/etc/NEWS index 6f8c125f7f9..1a788e7f6f9 100644 --- a/etc/NEWS +++ b/etc/NEWS @@ -1038,11 +1038,15 @@ of function value which looks like (closure ENV ARGS &rest BODY). *** New function `special-variable-p' to check whether a variable is declared as dynamically bound. -** New function `string-mark-left-to-right' appends a Unicode LRM -(left-to-right mark) character to a string if it terminates in -right-to-left script. This is useful when the buffer has overall -left-to-right paragraph direction and you need to insert a string -whose contents (and directionality) are not known in advance. +** New function `string-mark-left-to-right'. +Given a string containing right-to-left (RTL) script, this function +returns another string with a terminating LRM (left-to-right mark) +character. If this string is inserted into a buffer, Emacs treats the +LRM as the end of an RTL segment and displays following text as LTR. + +This is useful when the buffer has overall left-to-right (LTR) +paragraph direction and you need to insert a string whose contents +(and hence directionality) are not known in advance. ** pre/post-command-hook are not reset to nil upon error. Instead, the offending function is removed. diff --git a/lisp/ChangeLog b/lisp/ChangeLog index bde9d54cfa2..b9cf5367bf0 100644 --- a/lisp/ChangeLog +++ b/lisp/ChangeLog @@ -1,3 +1,8 @@ +2011-08-12 Chong Yidong + + * subr.el (string-mark-left-to-right): Search the entire string + for RTL script, not just the terminating character. Doc fix. + 2011-08-12 Stefan Monnier * progmodes/js.el (js-syntax-propertize, js-syntax-propertize-regexp): diff --git a/lisp/subr.el b/lisp/subr.el index a897da1d9ba..a4251b6fee6 100644 --- a/lisp/subr.el +++ b/lisp/subr.el @@ -3540,18 +3540,29 @@ to case differences." str2 0 (length str1) ignore-case))) (defun string-mark-left-to-right (str) - "Return a string that can be safely embedded in left-to-right text. -If STR ends in right-to-left (RTL) script, return a string -consisting of STR followed by an invisible left-to-right -mark (LRM) character. Otherwise, return STR." + "Return a string that can be safely inserted in left-to-right text. +If STR contains right-to-left (RTL) script, return a string +consisting of STR followed by a terminating invisible +left-to-right mark (LRM) character. + +The LRM character marks the end of an RTL segment, and resets the +display direction of any subsequent text to left-to-right. +\(Otherwise, some of that text might be displayed as part of the +RTL segment, based on the bidirectional display algorithm.) + +If STR contains no RTL characters, return STR." (unless (stringp str) (signal 'wrong-type-argument (list 'stringp str))) - (if (and (> (length str) 0) - (eq (get-char-code-property (aref str (1- (length str))) - 'bidi-class) - 'R)) - (concat str (propertize (string ?\x200e) 'invisible t)) - str)) + (let ((len (length str)) + (n 0) + rtl-found) + (while (and (not rtl-found) (< n len)) + (setq rtl-found (memq (get-char-code-property + (aref str n) 'bidi-class) '(R AL RLO)) + n (1+ n))) + (if rtl-found + (concat str (propertize (string ?\x200e) 'invisible t)) + str))) ;;;; invisibility specs -- 2.39.2