From d7f7fef1c1cdef206860a7075873de7d6c521d8d Mon Sep 17 00:00:00 2001 From: Eli Zaretskii Date: Mon, 17 Jul 2017 17:50:37 +0300 Subject: [PATCH] Allow user control on what starts and ends a paragraph for bidi * src/buffer.h (struct buffer): New members bidi_paragraph_separate_re_ and bidi_paragraph_start_re_. * src/buffer.c (bset_bidi_paragraph_start_re) (bset_bidi_paragraph_separate_re): New setters/ (Fbuffer_swap_text): Swap the values of bidi-paragraph-start-re and bidi-paragraph-separate-re. (init_buffer_once): Init the values of bidi-paragraph-start-re and bidi-paragraph-separate-re. (syms_of_buffer) : New per-buffer variables. * src/bidi.c (bidi_at_paragraph_end, bidi_find_paragraph_start): Support bidi-paragraph-start-re and bidi-paragraph-separate-re. (bidi_move_to_visually_next): Handle correctly the case when the separator matches an empty string. (Bug#27526) * doc/emacs/mule.texi (Bidirectional Editing): * doc/lispref/display.texi (Bidirectional Display): Document bidi-paragraph-start-re and bidi-paragraph-separate-re. * etc/NEWS: Mention bidi-paragraph-start-re and bidi-paragraph-separate-re. --- doc/emacs/mule.texi | 40 +++++++++++++++------------ doc/lispref/display.texi | 29 ++++++++++++++++++++ etc/NEWS | 6 ++++ src/bidi.c | 29 +++++++++++++++----- src/buffer.c | 59 ++++++++++++++++++++++++++++++++++++++++ src/buffer.h | 6 ++++ 6 files changed, 145 insertions(+), 24 deletions(-) diff --git a/doc/emacs/mule.texi b/doc/emacs/mule.texi index 8edf2640cfe..2f27b9aa0eb 100644 --- a/doc/emacs/mule.texi +++ b/doc/emacs/mule.texi @@ -1774,15 +1774,15 @@ Chars}). @cindex bidirectional editing @cindex right-to-left text - Emacs supports editing text written in scripts, such as Arabic and -Hebrew, whose natural ordering of horizontal text for display is from -right to left. However, digits and Latin text embedded in these -scripts are still displayed left to right. It is also not uncommon to -have small portions of text in Arabic or Hebrew embedded in an otherwise -Latin document; e.g., as comments and strings in a program source -file. For these reasons, text that uses these scripts is actually -@dfn{bidirectional}: a mixture of runs of left-to-right and -right-to-left characters. + Emacs supports editing text written in scripts, such as Arabic, +Farsi, and Hebrew, whose natural ordering of horizontal text for +display is from right to left. However, digits and Latin text +embedded in these scripts are still displayed left to right. It is +also not uncommon to have small portions of text in Arabic or Hebrew +embedded in an otherwise Latin document; e.g., as comments and strings +in a program source file. For these reasons, text that uses these +scripts is actually @dfn{bidirectional}: a mixture of runs of +left-to-right and right-to-left characters. This section describes the facilities and options provided by Emacs for editing bidirectional text. @@ -1811,15 +1811,21 @@ directionality when they are displayed. The default value is @cindex base direction of paragraphs @cindex paragraph, base direction +@vindex bidi-paragraph-start-re +@vindex bidi-paragraph-separate-re Each paragraph of bidirectional text can have its own @dfn{base -direction}, either right-to-left or left-to-right. (Paragraph -@c paragraph-separate etc have no influence on this? -boundaries are empty lines, i.e., lines consisting entirely of -whitespace characters.) Text in left-to-right paragraphs begins on -the screen at the left margin of the window and is truncated or -continued when it reaches the right margin. By contrast, text in -right-to-left paragraphs is displayed starting at the right margin and -is continued or truncated at the left margin. +direction}, either right-to-left or left-to-right. Text in +left-to-right paragraphs begins on the screen at the left margin of +the window and is truncated or continued when it reaches the right +margin. By contrast, text in right-to-left paragraphs is displayed +starting at the right margin and is continued or truncated at the left +margin. By default, paragraph boundaries are empty lines, i.e., lines +consisting entirely of whitespace characters. To change that, you can +customize the two variables @code{bidi-paragraph-start-re} and +@code{bidi-paragraph-separate-re}, whose values should be regular +expressions (strings); e.g., to have a single newline start a new +paragraph, set both of these variables to @code{"^"}. These two +variables are buffer-local (@pxref{Locals}). @vindex bidi-paragraph-direction Emacs determines the base direction of each paragraph dynamically, diff --git a/doc/lispref/display.texi b/doc/lispref/display.texi index 98940cbc996..5b8f58c1fd5 100644 --- a/doc/lispref/display.texi +++ b/doc/lispref/display.texi @@ -7456,6 +7456,35 @@ truncated or continued when the text reaches the right margin. Right-to-left paragraphs are displayed beginning at the right margin, and are continued or truncated at the left margin. +@cindex paragraph-start, and bidirectional display +@cindex paragraph-separate, and bidirectional display + Where exactly paragraphs start and end, for the purpose of the Emacs +@acronym{UBA} implementation, is determined by the following two +buffer-local variables (note that that @code{paragraph-start} and +@code{paragraph-separate} have no influence on this). By default both +of these variables are @code{nil}, and paragraphs are bounded by empty +lines, i.e., lines that consist entirely of zero or more whitespace +characters followed by a newline. + +@defvar bidi-paragraph-start-re +If non-@code{nil}, this variable's value should be a regular +expression matching a line that starts or separates two paragraphs. +The regular expression is always matched after a newline, so it is +best to anchor it, i.e., begin it with a @code{"^"}. +@end defvar + +@defvar bidi-paragraph-separate-re +If non-@code{nil}, this variable's value should be a regular +expression matching a line separates two paragraphs. The regular +expression is always matched after a newline, so it is best to anch +it, i.e., begin it with a @code{"^"}. +@end defvar + + If you modify any of these two variables, you should normally modify +both, to make sure they describe paragraphs consistently. For +example, to have each new line start a new paragraph for +bidi-reordering purposes, set both variables to @code{"^"}. + By default, Emacs determines the base direction of each paragraph by looking at the text at its beginning. The precise method of determining the base direction is specified by the @acronym{UBA}; in a diff --git a/etc/NEWS b/etc/NEWS index dca562cb3b9..0c2db0c398b 100644 --- a/etc/NEWS +++ b/etc/NEWS @@ -561,6 +561,12 @@ current buffer with the contents of the accessible portion of a different buffer while keeping point, mark, markers, and text properties as intact as possible. ++++ +** More user control of reordering bidirectional text for display. +The two new variables, 'bidi-paragraph-start-re' and +'bidi-paragraph-separate-re', allow customization of what exactly are +paragraphs, for the purposes of bidirectional display. + * Changes in Specialized Modes and Packages in Emacs 26.1 diff --git a/src/bidi.c b/src/bidi.c index e34da778ba0..763797488b0 100644 --- a/src/bidi.c +++ b/src/bidi.c @@ -1448,8 +1448,14 @@ bidi_at_paragraph_end (ptrdiff_t charpos, ptrdiff_t bytepos) Lisp_Object start_re; ptrdiff_t val; - sep_re = paragraph_separate_re; - start_re = paragraph_start_re; + if (STRINGP (BVAR (current_buffer, bidi_paragraph_separate_re))) + sep_re = BVAR (current_buffer, bidi_paragraph_separate_re); + else + sep_re = paragraph_separate_re; + if (STRINGP (BVAR (current_buffer, bidi_paragraph_start_re))) + start_re = BVAR (current_buffer, bidi_paragraph_start_re); + else + start_re = paragraph_start_re; val = fast_looking_at (sep_re, charpos, bytepos, ZV, ZV_BYTE, Qnil); if (val < 0) @@ -1523,7 +1529,10 @@ bidi_paragraph_cache_on_off (void) static ptrdiff_t bidi_find_paragraph_start (ptrdiff_t pos, ptrdiff_t pos_byte) { - Lisp_Object re = paragraph_start_re; + Lisp_Object re = + STRINGP (BVAR (current_buffer, bidi_paragraph_start_re)) + ? BVAR (current_buffer, bidi_paragraph_start_re) + : paragraph_start_re; ptrdiff_t limit = ZV, limit_byte = ZV_BYTE; struct region_cache *bpc = bidi_paragraph_cache_on_off (); ptrdiff_t n = 0, oldpos = pos, next; @@ -3498,10 +3507,16 @@ bidi_move_to_visually_next (struct bidi_it *bidi_it) if (sep_len >= 0) { bidi_it->new_paragraph = 1; - /* Record the buffer position of the last character of the - paragraph separator. */ - bidi_it->separator_limit - = bidi_it->charpos + bidi_it->nchars + sep_len; + /* Record the buffer position of the last character of + the paragraph separator. If the paragraph separator + is an empty string (e.g., the regex is "^"), the + newline that precedes the end of the paragraph is + that last character. */ + if (sep_len > 0) + bidi_it->separator_limit + = bidi_it->charpos + bidi_it->nchars + sep_len; + else + bidi_it->separator_limit = bidi_it->charpos; } } } diff --git a/src/buffer.c b/src/buffer.c index e0972aac33c..649ddbe1839 100644 --- a/src/buffer.c +++ b/src/buffer.c @@ -173,6 +173,16 @@ bset_bidi_display_reordering (struct buffer *b, Lisp_Object val) b->bidi_display_reordering_ = val; } static void +bset_bidi_paragraph_start_re (struct buffer *b, Lisp_Object val) +{ + b->bidi_paragraph_start_re_ = val; +} +static void +bset_bidi_paragraph_separate_re (struct buffer *b, Lisp_Object val) +{ + b->bidi_paragraph_separate_re_ = val; +} +static void bset_buffer_file_coding_system (struct buffer *b, Lisp_Object val) { b->buffer_file_coding_system_ = val; @@ -2322,6 +2332,8 @@ results, see Info node `(elisp)Swapping Text'. */) swapfield_ (enable_multibyte_characters, Lisp_Object); swapfield_ (bidi_display_reordering, Lisp_Object); swapfield_ (bidi_paragraph_direction, Lisp_Object); + swapfield_ (bidi_paragraph_separate_re, Lisp_Object); + swapfield_ (bidi_paragraph_start_re, Lisp_Object); /* FIXME: Not sure what we should do with these *_marker fields. Hopefully they're just nil anyway. */ swapfield_ (pt_marker, Lisp_Object); @@ -5121,6 +5133,8 @@ init_buffer_once (void) XSETFASTINT (BVAR (&buffer_local_flags, category_table), idx); ++idx; XSETFASTINT (BVAR (&buffer_local_flags, bidi_display_reordering), idx); ++idx; XSETFASTINT (BVAR (&buffer_local_flags, bidi_paragraph_direction), idx); ++idx; + XSETFASTINT (BVAR (&buffer_local_flags, bidi_paragraph_separate_re), idx); ++idx; + XSETFASTINT (BVAR (&buffer_local_flags, bidi_paragraph_start_re), idx); ++idx; XSETFASTINT (BVAR (&buffer_local_flags, buffer_file_coding_system), idx); /* Make this one a permanent local. */ buffer_permanent_local_flags[idx++] = 1; @@ -5202,6 +5216,8 @@ init_buffer_once (void) bset_ctl_arrow (&buffer_defaults, Qt); bset_bidi_display_reordering (&buffer_defaults, Qt); bset_bidi_paragraph_direction (&buffer_defaults, Qnil); + bset_bidi_paragraph_start_re (&buffer_defaults, Qnil); + bset_bidi_paragraph_separate_re (&buffer_defaults, Qnil); bset_cursor_type (&buffer_defaults, Qt); bset_extra_line_spacing (&buffer_defaults, Qnil); bset_cursor_in_non_selected_windows (&buffer_defaults, Qt); @@ -5616,6 +5632,49 @@ This variable is never applied to a way of decoding a file while reading it. */ &BVAR (current_buffer, bidi_display_reordering), Qnil, doc: /* Non-nil means reorder bidirectional text for display in the visual order. */); + DEFVAR_PER_BUFFER ("bidi-paragraph-start-re", + &BVAR (current_buffer, bidi_paragraph_start_re), Qnil, + doc: /* If non-nil, a regexp matching a line that starts OR separates paragraphs. + +The value of nil means to use empty lines as lines that start and +separate paragraphs. + +When Emacs displays bidirectional text, it by default computes +the base paragraph direction separately for each paragraph. +Setting this variable changes the places where paragraph base +direction is recomputed. + +The regexp is always matched after a newline, so it is best to +anchor it by beginning it with a "^". + +If you change the value of this variable, be sure to change +the value of `bidi-paragraph-separate-re' accordingly. For +example, to have a single newline behave as a paragraph separator, +set both these variables to "^". + +See also `bidi-paragraph-direction'. */); + + DEFVAR_PER_BUFFER ("bidi-paragraph-separate-re", + &BVAR (current_buffer, bidi_paragraph_separate_re), Qnil, + doc: /* If non-nil, a regexp matching a line that separates paragraphs. + +The value of nil means to use empty lines as paragraph separators. + +When Emacs displays bidirectional text, it by default computes +the base paragraph direction separately for each paragraph. +Setting this variable changes the places where paragraph base +direction is recomputed. + +The regexp is always matched after a newline, so it is best to +anchor it by beginning it with a "^". + +If you change the value of this variable, be sure to change +the value of `bidi-paragraph-start-re' accordingly. For +example, to have a single newline behave as a paragraph separator, +set both these variables to "^". + +See also `bidi-paragraph-direction'. */); + DEFVAR_PER_BUFFER ("bidi-paragraph-direction", &BVAR (current_buffer, bidi_paragraph_direction), Qnil, doc: /* If non-nil, forces directionality of text paragraphs in the buffer. diff --git a/src/buffer.h b/src/buffer.h index be270fe4823..46ca6aa7384 100644 --- a/src/buffer.h +++ b/src/buffer.h @@ -611,6 +611,12 @@ struct buffer direction dynamically for each paragraph. */ Lisp_Object bidi_paragraph_direction_; + /* If non-nil, a regular expression for bidi paragraph separator. */ + Lisp_Object bidi_paragraph_separate_re_; + + /* If non-nil, a regular expression for bidi paragraph start. */ + Lisp_Object bidi_paragraph_start_re_; + /* Non-nil means do selective display; see doc string in syms_of_buffer (buffer.c) for details. */ Lisp_Object selective_display_; -- 2.39.2