From 4787455fb7e0a33fe5420b4414a3b6e87685e45d Mon Sep 17 00:00:00 2001 From: Eli Zaretskii Date: Tue, 18 Oct 2011 18:56:09 +0200 Subject: [PATCH] Fix bug #9771 with slow redisplay in long lines full of control characters. src/bidi.c (bidi_line_init): Initialize next_en_pos to zero, not -1. (bidi_resolve_neutral): Don't enter the expensive loop looking for non-neutral characters if the current character is a paragraph separator (a.k.a. Newline). This avoids running the same expensive loop twice, once when we consume the preceding newline and the other time when the line actually needs to be displayed. Avoid the loop when we see neutrals on the base embedding level following a character whose directionality is the same as the paragraph's. This avoids running the expensive loop when a line ends in a long sequence of neutrals, like control characters. Add assertion against STRONG_AL type. Slightly rearrange code that determines the type of a neutral given the first non-neutral that follows it. (bidi_level_of_next_char): Set next_en_pos to zero when invalidating its info. --- src/ChangeLog | 19 +++++++++++++++++ src/bidi.c | 59 ++++++++++++++++++++++++++++++++++++++++++--------- 2 files changed, 68 insertions(+), 10 deletions(-) diff --git a/src/ChangeLog b/src/ChangeLog index 07b0418b399..d21e6383764 100644 --- a/src/ChangeLog +++ b/src/ChangeLog @@ -1,3 +1,22 @@ +2011-10-18 Eli Zaretskii + + Fix part 3 of bug#9771. + * bidi.c (bidi_line_init): Initialize next_en_pos to zero, not -1. + (bidi_resolve_neutral): Don't enter the expensive loop looking for + non-neutral characters if the current character is a paragraph + separator (a.k.a. Newline). This avoids running the same + expensive loop twice, once when we consume the preceding newline + and the other time when the line actually needs to be displayed. + Avoid the loop when we see neutrals on the base embedding level + following a character whose directionality is the same as the + paragraph's. This avoids running the expensive loop when a line + ends in a long sequence of neutrals, like control characters. + Add assertion against STRONG_AL type. Slightly rearrange code + that determines the type of a neutral given the first non-neutral + that follows it. + (bidi_level_of_next_char): Set next_en_pos to zero when + invalidating its info. + 2011-10-17 Eli Zaretskii * xdisp.c (push_display_prop): Determine whether to record string diff --git a/src/bidi.c b/src/bidi.c index f06c146ca84..29e3c817318 100644 --- a/src/bidi.c +++ b/src/bidi.c @@ -846,7 +846,9 @@ bidi_line_init (struct bidi_it *bidi_it) bidi_it->level_stack[0].override = NEUTRAL_DIR; /* X1 */ bidi_it->invalid_levels = 0; bidi_it->invalid_rl_levels = -1; - bidi_it->next_en_pos = -1; + /* Setting this to zero will force its recomputation the first time + we need it for W5. */ + bidi_it->next_en_pos = 0; bidi_it->next_for_ws.type = UNKNOWN_BT; bidi_set_sor_type (bidi_it, (bidi_it->paragraph_dir == R2L ? 1 : 0), @@ -1732,7 +1734,7 @@ bidi_resolve_weak (struct bidi_it *bidi_it) if (bidi_it->prev.type_after_w1 == WEAK_EN /* ET/BN w/EN before it */ || bidi_it->next_en_pos > bidi_it->charpos) type = WEAK_EN; - else /* W5: ET/BN with EN after it. */ + else if (bidi_it->next_en_pos >=0) /* W5: ET/BN with EN after it. */ { EMACS_INT en_pos = bidi_it->charpos + bidi_it->nchars; const unsigned char *s = (STRINGP (bidi_it->string.lstring) @@ -1775,6 +1777,11 @@ bidi_resolve_weak (struct bidi_it *bidi_it) else if (type == WEAK_BN) type = NEUTRAL_ON; /* W6/Retaining */ } + else if (type_of_next == NEUTRAL_B) + /* Record the fact that there are no more ENs from + here to the end of paragraph, to avoid entering the + loop above ever again in this paragraph. */ + bidi_it->next_en_pos = -1; } } } @@ -1843,13 +1850,45 @@ bidi_resolve_neutral (struct bidi_it *bidi_it) || type == NEUTRAL_ON)) abort (); - if (bidi_get_category (type) == NEUTRAL + if ((type != NEUTRAL_B /* Don't risk entering the long loop below if + we are already at paragraph end. */ + && bidi_get_category (type) == NEUTRAL) || (type == WEAK_BN && prev_level == current_level)) { if (bidi_it->next_for_neutral.type != UNKNOWN_BT) type = bidi_resolve_neutral_1 (bidi_it->prev_for_neutral.type, bidi_it->next_for_neutral.type, current_level); + /* The next two "else if" clauses are shortcuts for the + important special case when we have a long sequence of + neutral or WEAK_BN characters, such as whitespace or nulls or + other control characters, on the base embedding level of the + paragraph, and that sequence goes all the way to the end of + the paragraph and follows a character whose resolved + directionality is identical to the base embedding level. + (This is what happens in a buffer with plain L2R text that + happens to include long sequences of control characters.) By + virtue of N1, the result of examining this long sequence will + always be either STRONG_L or STRONG_R, depending on the base + embedding level. So we use this fact directly instead of + entering the expensive loop in the "else" clause. */ + else if (current_level == 0 + && bidi_it->prev_for_neutral.type == STRONG_L + && !bidi_explicit_dir_char (bidi_it->ch)) + type = bidi_resolve_neutral_1 (bidi_it->prev_for_neutral.type, + STRONG_L, current_level); + else if (/* current level is 1 */ + current_level == 1 + /* base embedding level is also 1 */ + && bidi_it->level_stack[0].level == 1 + /* previous character is one of those considered R for + the purposes of W5 */ + && (bidi_it->prev_for_neutral.type == STRONG_R + || bidi_it->prev_for_neutral.type == WEAK_EN + || bidi_it->prev_for_neutral.type == WEAK_AN) + && !bidi_explicit_dir_char (bidi_it->ch)) + type = bidi_resolve_neutral_1 (bidi_it->prev_for_neutral.type, + STRONG_R, current_level); else { /* Arrrgh!! The UAX#9 algorithm is too deeply entrenched in @@ -1900,6 +1939,9 @@ bidi_resolve_neutral (struct bidi_it *bidi_it) case STRONG_L: case STRONG_R: case STRONG_AL: + /* Actually, STRONG_AL cannot happen here, because + bidi_resolve_weak converts it to STRONG_R, per W3. */ + xassert (type != STRONG_AL); next_type = type; break; case WEAK_EN: @@ -1907,7 +1949,6 @@ bidi_resolve_neutral (struct bidi_it *bidi_it) /* N1: ``European and Arabic numbers are treated as though they were R.'' */ next_type = STRONG_R; - saved_it.next_for_neutral.type = STRONG_R; break; case WEAK_BN: if (!bidi_explicit_dir_char (bidi_it->ch)) @@ -1920,11 +1961,7 @@ bidi_resolve_neutral (struct bidi_it *bidi_it) member. */ if (saved_it.type != WEAK_BN || bidi_get_category (bidi_it->prev.type_after_w1) == NEUTRAL) - { - next_type = bidi_it->prev_for_neutral.type; - saved_it.next_for_neutral.type = next_type; - bidi_check_type (next_type); - } + next_type = bidi_it->prev_for_neutral.type; else { /* This is a BN which does not adjoin neutrals. @@ -1938,7 +1975,9 @@ bidi_resolve_neutral (struct bidi_it *bidi_it) } type = bidi_resolve_neutral_1 (saved_it.prev_for_neutral.type, next_type, current_level); + saved_it.next_for_neutral.type = next_type; saved_it.type = type; + bidi_check_type (next_type); bidi_check_type (type); bidi_copy_it (bidi_it, &saved_it); } @@ -2014,7 +2053,7 @@ bidi_level_of_next_char (struct bidi_it *bidi_it) bidi_it->next_for_neutral.type = UNKNOWN_BT; if (bidi_it->next_en_pos >= 0 && bidi_it->charpos >= bidi_it->next_en_pos) - bidi_it->next_en_pos = -1; + bidi_it->next_en_pos = 0; if (bidi_it->next_for_ws.type != UNKNOWN_BT && bidi_it->charpos >= bidi_it->next_for_ws.charpos) bidi_it->next_for_ws.type = UNKNOWN_BT; -- 2.39.2