might change the result of the function if some text in the buffer is
covered by overlays.
@end defun
+
+@cindex copying bidirectional text, preserve visual order
+@cindex visual order, preserve when copying bidirectional text
+ When text that includes mixed right-to-left and left-to-right
+characters and bidirectional controls is copied into a different
+location, it can change its visual appearance, and also can affect the
+visual appearance of the surrounding text at destination. This is
+because reordering of bidirectional text specified by the
+@acronym{UBA} has non-trivial context-dependent effects both on the
+copied text and on the text at copy destination that will surround it.
+
+ Sometimes, a Lisp program may need to preserve the exact visual
+appearance of the copied text at destination, and of the text that
+surrounds the copy. Lisp programs can use the following function to
+achieve that effect.
+
+@defun buffer-substring-with-bidi-context start end &optional no-properties
+This function works similar to @code{buffer-substring} (@pxref{Buffer
+Contents}), but it prepends and appends to the copied text bidi
+directional control characters necessary to preserve the visual
+appearance of the text when it is inserted at another place. Optional
+argument @var{no-properties}, if non-@code{nil}, means remove the text
+properties from the copy of the text.
+@end defun
(setq this-command 'kill-region)
(message "If the next command is a kill, it will append"))
(setq last-command 'kill-region)))
+
+(defvar bidi-directional-controls-chars "\x202a-\x202e\x2066-\x2069"
+ "Character set that matches bidirectional formatting control characters.")
+
+(defvar bidi-directional-non-controls-chars "^\x202a-\x202e\x2066-\x2069"
+ "Character set that matches any character except bidirectional controls.")
+
+(defun squeeze-bidi-context-1 (from to category replacement)
+ "A subroutine of `squeeze-bidi-context'.
+FROM and TO should be markers, CATEGORY and REPLACEMENT should be strings."
+ (let ((pt (copy-marker from))
+ (limit (copy-marker to))
+ (old-pt 0)
+ lim1)
+ (setq lim1 limit)
+ (goto-char pt)
+ (while (< pt limit)
+ (if (> pt old-pt)
+ (move-marker lim1
+ (save-excursion
+ ;; L and R categories include embedding and
+ ;; override controls, but we don't want to
+ ;; replace them, because that might change
+ ;; the visual order. Likewise with PDF and
+ ;; isolate controls.
+ (+ pt (skip-chars-forward
+ bidi-directional-non-controls-chars
+ limit)))))
+ ;; Replace any run of non-RTL characters by a single LRM.
+ (if (null (re-search-forward category lim1 t))
+ ;; No more characters of CATEGORY, we are done.
+ (setq pt limit)
+ (replace-match replacement nil t)
+ (move-marker pt (point)))
+ (setq old-pt pt)
+ ;; Skip directional controls, if any.
+ (move-marker
+ pt (+ pt (skip-chars-forward bidi-directional-controls-chars limit))))))
+
+(defun squeeze-bidi-context (from to)
+ "Replace characters between FROM and TO while keeping bidi context.
+
+This function replaces the region of text with as few characters
+as possible, while preserving the effect that region will have on
+bidirectional display before and after the region."
+ (let ((start (set-marker (make-marker)
+ (if (> from 0) from (+ (point-max) from))))
+ (end (set-marker (make-marker) to))
+ ;; This is for when they copy text with read-only text
+ ;; properties.
+ (inhibit-read-only t))
+ (if (null (marker-position end))
+ (setq end (point-max-marker)))
+ ;; Replace each run of non-RTL characters with a single LRM.
+ (squeeze-bidi-context-1 start end "\\CR+" "\x200e")
+ ;; Replace each run of non-LTR characters with a single RLM. Note
+ ;; that the \cR category includes both the Arabic Letter (AL) and
+ ;; R characters; here we ignore the distinction between them,
+ ;; because that distinction only affects Arabic Number (AN)
+ ;; characters, which are weak and don't affect the reordering.
+ (squeeze-bidi-context-1 start end "\\CL+" "\x200f")))
+
+(defun line-substring-with-bidi-context (start end &optional no-properties)
+ "Return buffer text between START and END with its bidi context.
+
+START and END are assumed to belong to the same physical line
+of buffer text. This function prepends and appends to the text
+between START and END bidi control characters that preserve the
+visual order of that text when it is inserted at some other place."
+ (if (or (< start (point-min))
+ (> end (point-max)))
+ (signal 'args-out-of-range (list (current-buffer) start end)))
+ (let ((buf (current-buffer))
+ substr para-dir from to)
+ (save-excursion
+ (goto-char start)
+ (setq para-dir (current-bidi-paragraph-direction))
+ (setq from (line-beginning-position)
+ to (line-end-position))
+ (goto-char from)
+ ;; If we don't have any mixed directional characters in the
+ ;; entire line, we can just copy the substring without adding
+ ;; any context.
+ (if (or (looking-at-p "\\CR*$")
+ (looking-at-p "\\CL*$"))
+ (setq substr (if no-properties
+ (buffer-substring-no-properties start end)
+ (buffer-substring start end)))
+ (setq substr
+ (with-temp-buffer
+ (if no-properties
+ (insert-buffer-substring-no-properties buf from to)
+ (insert-buffer-substring buf from to))
+ (squeeze-bidi-context 1 (1+ (- start from)))
+ (squeeze-bidi-context (- end to) nil)
+ (buffer-substring 1 (point-max)))))
+
+ ;; Wrap the string in LRI/RLI..PDI pair to achieve 2 effects:
+ ;; (1) force the string to have the same base embedding
+ ;; direction as the paragraph direction at the source, no matter
+ ;; what is the paragraph direction at destination; and (2) avoid
+ ;; affecting the visual order of the surrounding text at
+ ;; destination if there are characters of different
+ ;; directionality there.
+ (concat (if (eq para-dir 'left-to-right) "\x2066" "\x2067")
+ substr "\x2069"))))
+
+(defun buffer-substring-with-bidi-context (start end &optional no-properties)
+ "Return portion of current buffer between START and END with bidi context.
+
+This function works similar to `buffer-substring', but it prepends and
+appends to the text bidi directional control characters necessary to
+preserve the visual appearance of the text if it is inserted at another
+place. This is useful when the buffer substring includes bidirectional
+text and control characters that cause non-trivial reordering on display.
+If copied verbatim, such text can have a very different visual appearance,
+and can also change the visual appearance of the surrounding text at the
+destination of the copy.
+
+Optional argument NO-PROPERTIES, if non-nil, means copy the text without
+the text properties."
+ (let (line-end substr)
+ (if (or (< start (point-min))
+ (> end (point-max)))
+ (signal 'args-out-of-range (list (current-buffer) start end)))
+ (save-excursion
+ (goto-char start)
+ (setq line-end (min end (line-end-position)))
+ (while (< start end)
+ (setq substr
+ (concat substr
+ (if substr "\n" "")
+ (line-substring-with-bidi-context start line-end
+ no-properties)))
+ (forward-line 1)
+ (setq start (point))
+ (setq line-end (min end (line-end-position))))
+ substr)))
\f
;; Yanking.