From f1827846d715cfef05afe52ad2a9df2289df6952 Mon Sep 17 00:00:00 2001 From: Eli Zaretskii Date: Thu, 4 Dec 2014 11:31:33 +0200 Subject: [PATCH] Implement copying of a buffer portion while preserving visual order. See http://lists.gnu.org/archive/html/emacs-devel/2014-11/msg02203.html and http://lists.gnu.org/archive/html/emacs-devel/2014-12/msg00063.html for the rationale. lisp/simple.el (bidi-directional-controls-chars) (bidi-directional-non-controls-chars): New variables. (squeeze-bidi-context-1, squeeze-bidi-context) (line-substring-with-bidi-context) (buffer-substring-with-bidi-context): New functions. doc/lispref/display.texi (Bidirectional Display): Document 'buffer-substring-with-bidi-context'. doc/lispref/text.texi (Buffer Contents): Mention 'buffer-substring-with-bidi-context' with a cross-reference. etc/NEWS: Mention 'buffer-substring-with-bidi-context'. --- doc/lispref/ChangeLog | 8 +++ doc/lispref/display.texi | 24 +++++++ doc/lispref/text.texi | 6 ++ etc/ChangeLog | 4 ++ etc/NEWS | 7 ++ lisp/ChangeLog | 7 ++ lisp/simple.el | 138 +++++++++++++++++++++++++++++++++++++++ 7 files changed, 194 insertions(+) diff --git a/doc/lispref/ChangeLog b/doc/lispref/ChangeLog index f98e457566e..d8215be6b15 100644 --- a/doc/lispref/ChangeLog +++ b/doc/lispref/ChangeLog @@ -1,3 +1,11 @@ +2014-12-04 Eli Zaretskii + + * display.texi (Bidirectional Display): Document + 'buffer-substring-with-bidi-context'. + + * text.texi (Buffer Contents): Mention + 'buffer-substring-with-bidi-context' with a cross-reference. + 2014-12-02 Eli Zaretskii * display.texi (Bidirectional Display): Document diff --git a/doc/lispref/display.texi b/doc/lispref/display.texi index 59f73223a1f..90aa9797a59 100644 --- a/doc/lispref/display.texi +++ b/doc/lispref/display.texi @@ -6854,3 +6854,27 @@ allows it to correctly account for window-specific overlays, which might change the result of the function if some text in the buffer is covered by overlays. @end defun + +@cindex copying bidirectional text, preserve visual order +@cindex visual order, preserve when copying bidirectional text + When text that includes mixed right-to-left and left-to-right +characters and bidirectional controls is copied into a different +location, it can change its visual appearance, and also can affect the +visual appearance of the surrounding text at destination. This is +because reordering of bidirectional text specified by the +@acronym{UBA} has non-trivial context-dependent effects both on the +copied text and on the text at copy destination that will surround it. + + Sometimes, a Lisp program may need to preserve the exact visual +appearance of the copied text at destination, and of the text that +surrounds the copy. Lisp programs can use the following function to +achieve that effect. + +@defun buffer-substring-with-bidi-context start end &optional no-properties +This function works similar to @code{buffer-substring} (@pxref{Buffer +Contents}), but it prepends and appends to the copied text bidi +directional control characters necessary to preserve the visual +appearance of the text when it is inserted at another place. Optional +argument @var{no-properties}, if non-@code{nil}, means remove the text +properties from the copy of the text. +@end defun diff --git a/doc/lispref/text.texi b/doc/lispref/text.texi index 9c878a00c94..720343cb17d 100644 --- a/doc/lispref/text.texi +++ b/doc/lispref/text.texi @@ -219,6 +219,12 @@ This function returns the contents of the entire accessible portion of the current buffer, as a string. @end defun + If you need to make sure the resulting string, when copied to a +different location, will not change its visual appearance due to +reordering of bidirectional text, use the +@code{buffer-substring-with-bidi-context} function +(@pxref{Bidirectional Display, buffer-substring-with-bidi-context}). + @defun filter-buffer-substring start end &optional delete This function filters the buffer text between @var{start} and @var{end} using a function specified by the variable diff --git a/etc/ChangeLog b/etc/ChangeLog index 4f672dfce5e..84a1c483b47 100644 --- a/etc/ChangeLog +++ b/etc/ChangeLog @@ -1,3 +1,7 @@ +2014-12-04 Eli Zaretskii + + * NEWS: Mention 'buffer-substring-with-bidi-context'. + 2014-12-02 Eli Zaretskii * NEWS: Mention 'bidi-find-overridden-directionality'. diff --git a/etc/NEWS b/etc/NEWS index f3890a51aad..ae92fa957b9 100644 --- a/etc/NEWS +++ b/etc/NEWS @@ -105,6 +105,13 @@ overridden by directional override control characters. Lisp programs can use this to detect potential phishing of URLs and other links that exploits bidirectional display reordering. ++++ +** The new function `buffer-substring-with-bidi-context' allows to +copy a portion of a buffer into a different location while preserving +the visual appearance both of the copied text and the text at +destination, even when the copied text includes mixed bidirectional +text and directional control characters. + *** The ls-lisp package uses `string-collate-lessp' to sort file names. If you want the old, locale-independent sorting, customize the new option `ls-lisp-use-string-collate' to a nil value. diff --git a/lisp/ChangeLog b/lisp/ChangeLog index 9c729bc936f..157b2b45f98 100644 --- a/lisp/ChangeLog +++ b/lisp/ChangeLog @@ -1,5 +1,12 @@ 2014-12-04 Eli Zaretskii + Implement copying of a buffer portion while preserving visual order. + * simple.el (bidi-directional-controls-chars) + (bidi-directional-non-controls-chars): New variables. + (squeeze-bidi-context-1, squeeze-bidi-context) + (line-substring-with-bidi-context) + (buffer-substring-with-bidi-context): New functions. + * files.el (file-tree-walk): Doc fix. 2014-12-04 Rupert Swarbrick (tiny change) diff --git a/lisp/simple.el b/lisp/simple.el index 16db05a2158..46b346a8fd2 100644 --- a/lisp/simple.el +++ b/lisp/simple.el @@ -4126,6 +4126,144 @@ The argument is used for internal purposes; do not supply one." (setq this-command 'kill-region) (message "If the next command is a kill, it will append")) (setq last-command 'kill-region))) + +(defvar bidi-directional-controls-chars "\x202a-\x202e\x2066-\x2069" + "Character set that matches bidirectional formatting control characters.") + +(defvar bidi-directional-non-controls-chars "^\x202a-\x202e\x2066-\x2069" + "Character set that matches any character except bidirectional controls.") + +(defun squeeze-bidi-context-1 (from to category replacement) + "A subroutine of `squeeze-bidi-context'. +FROM and TO should be markers, CATEGORY and REPLACEMENT should be strings." + (let ((pt (copy-marker from)) + (limit (copy-marker to)) + (old-pt 0) + lim1) + (setq lim1 limit) + (goto-char pt) + (while (< pt limit) + (if (> pt old-pt) + (move-marker lim1 + (save-excursion + ;; L and R categories include embedding and + ;; override controls, but we don't want to + ;; replace them, because that might change + ;; the visual order. Likewise with PDF and + ;; isolate controls. + (+ pt (skip-chars-forward + bidi-directional-non-controls-chars + limit))))) + ;; Replace any run of non-RTL characters by a single LRM. + (if (null (re-search-forward category lim1 t)) + ;; No more characters of CATEGORY, we are done. + (setq pt limit) + (replace-match replacement nil t) + (move-marker pt (point))) + (setq old-pt pt) + ;; Skip directional controls, if any. + (move-marker + pt (+ pt (skip-chars-forward bidi-directional-controls-chars limit)))))) + +(defun squeeze-bidi-context (from to) + "Replace characters between FROM and TO while keeping bidi context. + +This function replaces the region of text with as few characters +as possible, while preserving the effect that region will have on +bidirectional display before and after the region." + (let ((start (set-marker (make-marker) + (if (> from 0) from (+ (point-max) from)))) + (end (set-marker (make-marker) to)) + ;; This is for when they copy text with read-only text + ;; properties. + (inhibit-read-only t)) + (if (null (marker-position end)) + (setq end (point-max-marker))) + ;; Replace each run of non-RTL characters with a single LRM. + (squeeze-bidi-context-1 start end "\\CR+" "\x200e") + ;; Replace each run of non-LTR characters with a single RLM. Note + ;; that the \cR category includes both the Arabic Letter (AL) and + ;; R characters; here we ignore the distinction between them, + ;; because that distinction only affects Arabic Number (AN) + ;; characters, which are weak and don't affect the reordering. + (squeeze-bidi-context-1 start end "\\CL+" "\x200f"))) + +(defun line-substring-with-bidi-context (start end &optional no-properties) + "Return buffer text between START and END with its bidi context. + +START and END are assumed to belong to the same physical line +of buffer text. This function prepends and appends to the text +between START and END bidi control characters that preserve the +visual order of that text when it is inserted at some other place." + (if (or (< start (point-min)) + (> end (point-max))) + (signal 'args-out-of-range (list (current-buffer) start end))) + (let ((buf (current-buffer)) + substr para-dir from to) + (save-excursion + (goto-char start) + (setq para-dir (current-bidi-paragraph-direction)) + (setq from (line-beginning-position) + to (line-end-position)) + (goto-char from) + ;; If we don't have any mixed directional characters in the + ;; entire line, we can just copy the substring without adding + ;; any context. + (if (or (looking-at-p "\\CR*$") + (looking-at-p "\\CL*$")) + (setq substr (if no-properties + (buffer-substring-no-properties start end) + (buffer-substring start end))) + (setq substr + (with-temp-buffer + (if no-properties + (insert-buffer-substring-no-properties buf from to) + (insert-buffer-substring buf from to)) + (squeeze-bidi-context 1 (1+ (- start from))) + (squeeze-bidi-context (- end to) nil) + (buffer-substring 1 (point-max))))) + + ;; Wrap the string in LRI/RLI..PDI pair to achieve 2 effects: + ;; (1) force the string to have the same base embedding + ;; direction as the paragraph direction at the source, no matter + ;; what is the paragraph direction at destination; and (2) avoid + ;; affecting the visual order of the surrounding text at + ;; destination if there are characters of different + ;; directionality there. + (concat (if (eq para-dir 'left-to-right) "\x2066" "\x2067") + substr "\x2069")))) + +(defun buffer-substring-with-bidi-context (start end &optional no-properties) + "Return portion of current buffer between START and END with bidi context. + +This function works similar to `buffer-substring', but it prepends and +appends to the text bidi directional control characters necessary to +preserve the visual appearance of the text if it is inserted at another +place. This is useful when the buffer substring includes bidirectional +text and control characters that cause non-trivial reordering on display. +If copied verbatim, such text can have a very different visual appearance, +and can also change the visual appearance of the surrounding text at the +destination of the copy. + +Optional argument NO-PROPERTIES, if non-nil, means copy the text without +the text properties." + (let (line-end substr) + (if (or (< start (point-min)) + (> end (point-max))) + (signal 'args-out-of-range (list (current-buffer) start end))) + (save-excursion + (goto-char start) + (setq line-end (min end (line-end-position))) + (while (< start end) + (setq substr + (concat substr + (if substr "\n" "") + (line-substring-with-bidi-context start line-end + no-properties))) + (forward-line 1) + (setq start (point)) + (setq line-end (min end (line-end-position)))) + substr))) ;; Yanking. -- 2.39.2