From f3e16cbb5258fcbe2969eb48b332b2c629cfb2a6 Mon Sep 17 00:00:00 2001 From: Eli Zaretskii Date: Wed, 10 Dec 2014 19:39:37 +0200 Subject: [PATCH] Fix out-of-memory condition in display of long bracketed lines (bug#19322) src/bidi.c (BIDI_CACHE_MAX_ELTS_PER_SLOT): New macro. (bidi_cache_max_elts): New global variable. (bidi_shelve_header_size): Add the sizeof bidi_cache_max_elts. (bidi_cache_shrink, bidi_initialize): Reset bidi_cache_max_elts to its initial value. (bidi_cache_search): Handle overflown cache. Improve commentary. (bidi_cache_ensure_space): Limit allocations to the current value of bidi_cache_max_elts. Force xpalloc not to over-allocate. If less than a full BIDI_CACHE_CHUNK is left to the limit, decrease the increment to not exceed the limit. (bidi_cache_iterator_state): Now returns non-zero if succeeded to cache, zero otherwise (meaning the cache overflowed). In the latter case, set bidi_cache_last_idx to -1. (bidi_peek_at_next_level): Handle overflown cache. (bidi_push_it): Increase the cache limit for iterating the new object. (bidi_pop_it): Decrease the cache limit back to previous value. (bidi_shelve_cache): Shelve the current value of the cache limit. (bidi_unshelve_cache): Restore the value of cache limit. (bidi_find_bracket_pairs): If the cache overflows while looking for the paired bracket, give up and let bidi_resolve_neutrals process the bracket as a simple neutral. (bidi_find_other_level_edge): If the cache overflows, fall back on Plan B, which effectively stops the reordering and restarts it on the next character (after resetting the cache). (bidi_move_to_visually_next): When the cache overflows, reset it after processing the last cached character. --- src/ChangeLog | 30 ++++++++ src/bidi.c | 191 ++++++++++++++++++++++++++++++++++++++++++-------- 2 files changed, 190 insertions(+), 31 deletions(-) diff --git a/src/ChangeLog b/src/ChangeLog index 09268d1b6cd..2a6e2373fcd 100644 --- a/src/ChangeLog +++ b/src/ChangeLog @@ -1,3 +1,33 @@ +2014-12-10 Eli Zaretskii + + * bidi.c (BIDI_CACHE_MAX_ELTS_PER_SLOT): New macro. + (bidi_cache_max_elts): New global variable. + (bidi_shelve_header_size): Add the sizeof bidi_cache_max_elts. + (bidi_cache_shrink, bidi_initialize): Reset bidi_cache_max_elts to + its initial value. + (bidi_cache_search): Handle overflown cache. Improve commentary. + (bidi_cache_ensure_space): Limit allocations to the current value + of bidi_cache_max_elts. Force xpalloc not to over-allocate. If + less than a full BIDI_CACHE_CHUNK is left to the limit, decrease + the increment to not exceed the limit. + (bidi_cache_iterator_state): Now returns non-zero if succeeded to + cache, zero otherwise (meaning the cache overflowed). In the + latter case, set bidi_cache_last_idx to -1. + (bidi_peek_at_next_level): Handle overflown cache. + (bidi_push_it): Increase the cache limit for iterating the new + object. + (bidi_pop_it): Decrease the cache limit back to previous value. + (bidi_shelve_cache): Shelve the current value of the cache limit. + (bidi_unshelve_cache): Restore the value of cache limit. + (bidi_find_bracket_pairs): If the cache overflows while looking + for the paired bracket, give up and let bidi_resolve_neutrals + process the bracket as a simple neutral. (Bug#19322) + (bidi_find_other_level_edge): If the cache overflows, fall back on + Plan B, which effectively stops the reordering and restarts it on + the next character (after resetting the cache). + (bidi_move_to_visually_next): When the cache overflows, reset it + after processing the last cached character. + 2014-12-10 Paul Eggert Fix glitches in gnutls.c, mostly memory-related diff --git a/src/bidi.c b/src/bidi.c index cc70d08f01e..0d291fcb033 100644 --- a/src/bidi.c +++ b/src/bidi.c @@ -546,6 +546,30 @@ bidi_copy_it (struct bidi_it *to, struct bidi_it *from) characters). 200 was chosen as an upper limit for reasonably-long lines in a text file/buffer. */ #define BIDI_CACHE_CHUNK 200 +/* Maximum size we allow the cache to become, per iterator stack slot, + in units of struct bidi_it size. If we allow unlimited growth, we + could run out of memory for pathologically long bracketed text or + very long text lines that need to be reordered. This is aggravated + when word-wrap is in effect, since then functions display_line and + move_it_in_display_line_to need to keep up to 4 copies of the + cache. + + This limitation means there can be no more than that amount of + contiguous RTL text on any single physical line in a LTR paragraph, + and similarly with contiguous LTR + numeric text in a RTL + paragraph. (LTR text in a LTR paragraph and RTL text in a RTL + paragraph are not reordered, and so don't need the cache, and + cannot hit this limit.) More importantly, no single line can have + text longer than this inside paired brackets (because bracket pairs + resolution uses the cache). If the limit is exceeded, the fallback + code will produce visual order that will be incorrect if there are + RTL characters in the offending line of text. */ +/* Do we need to allow customization of this limit? */ +#define BIDI_CACHE_MAX_ELTS_PER_SLOT 50000 +#if BIDI_CACHE_CHUNK >= BIDI_CACHE_MAX_ELTS_PER_SLOT +# error BIDI_CACHE_CHUNK must be less than BIDI_CACHE_MAX_ELTS_PER_SLOT +#endif +static ptrdiff_t bidi_cache_max_elts = BIDI_CACHE_MAX_ELTS_PER_SLOT; static struct bidi_it *bidi_cache; static ptrdiff_t bidi_cache_size = 0; enum { elsz = sizeof (struct bidi_it) }; @@ -566,7 +590,7 @@ enum bidi_shelve_header_size = (sizeof (bidi_cache_idx) + sizeof (bidi_cache_start_stack) + sizeof (bidi_cache_sp) + sizeof (bidi_cache_start) - + sizeof (bidi_cache_last_idx)) + + sizeof (bidi_cache_last_idx) + sizeof (bidi_cache_max_elts)) }; /* Effectively remove the cached states beyond the Nth state from the @@ -604,6 +628,7 @@ bidi_cache_shrink (void) bidi_cache_size = BIDI_CACHE_CHUNK; } bidi_cache_reset (); + bidi_cache_max_elts = BIDI_CACHE_MAX_ELTS_PER_SLOT; } static void @@ -622,7 +647,9 @@ bidi_cache_fetch_state (ptrdiff_t idx, struct bidi_it *bidi_it) /* Find a cached state with a given CHARPOS and resolved embedding level less or equal to LEVEL. If LEVEL is -1, disregard the resolved levels in cached states. DIR, if non-zero, means search - in that direction from the last cache hit. */ + in that direction from the last cache hit. + + Value is the index of the cached state, or -1 if not found. */ static ptrdiff_t bidi_cache_search (ptrdiff_t charpos, int level, int dir) { @@ -696,7 +723,8 @@ bidi_cache_find_level_change (int level, int dir, bool before) ptrdiff_t i = dir ? bidi_cache_last_idx : bidi_cache_idx - 1; int incr = before ? 1 : 0; - eassert (!dir || bidi_cache_last_idx >= 0); + if (i < 0) /* cache overflowed? */ + i = 0; if (!dir) dir = -1; @@ -734,23 +762,37 @@ bidi_cache_ensure_space (ptrdiff_t idx) /* Enlarge the cache as needed. */ if (idx >= bidi_cache_size) { - /* The bidi cache cannot be larger than the largest Lisp string - or buffer. */ - ptrdiff_t string_or_buffer_bound - = max (BUF_BYTES_MAX, STRING_BYTES_BOUND); + ptrdiff_t chunk_size = BIDI_CACHE_CHUNK; - /* Also, it cannot be larger than what C can represent. */ - ptrdiff_t c_bound - = (min (PTRDIFF_MAX, SIZE_MAX) - bidi_shelve_header_size) / elsz; + if (bidi_cache_size > bidi_cache_max_elts - chunk_size) + chunk_size = bidi_cache_max_elts - bidi_cache_size; - bidi_cache - = xpalloc (bidi_cache, &bidi_cache_size, - max (BIDI_CACHE_CHUNK, idx - bidi_cache_size + 1), - min (string_or_buffer_bound, c_bound), elsz); + if (max (idx + 1, + bidi_cache_size + chunk_size) <= bidi_cache_max_elts) + { + /* The bidi cache cannot be larger than the largest Lisp + string or buffer. */ + ptrdiff_t string_or_buffer_bound + = max (BUF_BYTES_MAX, STRING_BYTES_BOUND); + + /* Also, it cannot be larger than what C can represent. */ + ptrdiff_t c_bound + = (min (PTRDIFF_MAX, SIZE_MAX) - bidi_shelve_header_size) / elsz; + ptrdiff_t max_elts = bidi_cache_max_elts; + + max_elts = min (max_elts, min (string_or_buffer_bound, c_bound)); + + /* Force xpalloc not to over-allocate by passing it MAX_ELTS + as its 4th argument. */ + bidi_cache = xpalloc (bidi_cache, &bidi_cache_size, + max (chunk_size, idx - bidi_cache_size + 1), + max_elts, elsz); + eassert (bidi_cache_size > idx); + } } } -static void +static int bidi_cache_iterator_state (struct bidi_it *bidi_it, bool resolved, bool update_only) { @@ -762,7 +804,7 @@ bidi_cache_iterator_state (struct bidi_it *bidi_it, bool resolved, idx = bidi_cache_search (bidi_it->charpos, -1, 1); if (idx < 0 && update_only) - return; + return 0; if (idx < 0) { @@ -771,19 +813,23 @@ bidi_cache_iterator_state (struct bidi_it *bidi_it, bool resolved, /* Character positions should correspond to cache positions 1:1. If we are outside the range of cached positions, the cache is useless and must be reset. */ - if (idx > bidi_cache_start && - (bidi_it->charpos > (bidi_cache[idx - 1].charpos - + bidi_cache[idx - 1].nchars) - || bidi_it->charpos < bidi_cache[bidi_cache_start].charpos)) + if (bidi_cache_start < idx && idx < bidi_cache_size + && (bidi_it->charpos > (bidi_cache[idx - 1].charpos + + bidi_cache[idx - 1].nchars) + || bidi_it->charpos < bidi_cache[bidi_cache_start].charpos)) { bidi_cache_reset (); idx = bidi_cache_start; } if (bidi_it->nchars <= 0) emacs_abort (); - bidi_copy_it (&bidi_cache[idx], bidi_it); - if (!resolved) - bidi_cache[idx].resolved_level = -1; + /* Don't cache if no available space in the cache. */ + if (bidi_cache_size > idx) + { + bidi_copy_it (&bidi_cache[idx], bidi_it); + if (!resolved) + bidi_cache[idx].resolved_level = -1; + } } else { @@ -806,9 +852,19 @@ bidi_cache_iterator_state (struct bidi_it *bidi_it, bool resolved, bidi_cache[idx].bracket_enclosed_type = bidi_it->bracket_enclosed_type; } - bidi_cache_last_idx = idx; - if (idx >= bidi_cache_idx) - bidi_cache_idx = idx + 1; + if (bidi_cache_size > idx) + { + bidi_cache_last_idx = idx; + if (idx >= bidi_cache_idx) + bidi_cache_idx = idx + 1; + return 1; + } + else + { + /* The cache overflowed. */ + bidi_cache_last_idx = -1; + return 0; + } } /* Look for a cached iterator state that corresponds to CHARPOS. If @@ -846,8 +902,13 @@ bidi_cache_find (ptrdiff_t charpos, bool resolved_only, struct bidi_it *bidi_it) static int bidi_peek_at_next_level (struct bidi_it *bidi_it) { - if (bidi_cache_idx == bidi_cache_start || bidi_cache_last_idx == -1) + if (bidi_cache_idx == bidi_cache_start) emacs_abort (); + /* If the cache overflowed, return the level of the last cached + character. */ + if (bidi_cache_last_idx == -1 + || (bidi_cache_last_idx >= bidi_cache_idx - 1 && bidi_it->scan_dir > 0)) + return bidi_cache[bidi_cache_idx - 1].resolved_level; return bidi_cache[bidi_cache_last_idx + bidi_it->scan_dir].resolved_level; } @@ -864,6 +925,8 @@ bidi_peek_at_next_level (struct bidi_it *bidi_it) void bidi_push_it (struct bidi_it *bidi_it) { + /* Give this stack slot its cache room. */ + bidi_cache_max_elts += BIDI_CACHE_MAX_ELTS_PER_SLOT; /* Save the current iterator state in its entirety after the last used cache slot. */ bidi_cache_ensure_space (bidi_cache_idx); @@ -900,6 +963,9 @@ bidi_pop_it (struct bidi_it *bidi_it) /* Invalidate the last-used cache slot data. */ bidi_cache_last_idx = -1; + + bidi_cache_max_elts -= BIDI_CACHE_MAX_ELTS_PER_SLOT; + eassert (bidi_cache_max_elts > 0); } static ptrdiff_t bidi_cache_total_alloc; @@ -939,6 +1005,11 @@ bidi_shelve_cache (void) + sizeof (bidi_cache_start_stack) + sizeof (bidi_cache_sp) + sizeof (bidi_cache_start), &bidi_cache_last_idx, sizeof (bidi_cache_last_idx)); + memcpy (databuf + sizeof (bidi_cache_idx) + + bidi_cache_idx * sizeof (struct bidi_it) + + sizeof (bidi_cache_start_stack) + sizeof (bidi_cache_sp) + + sizeof (bidi_cache_start) + sizeof (bidi_cache_last_idx), + &bidi_cache_max_elts, sizeof (bidi_cache_max_elts)); return databuf; } @@ -960,6 +1031,7 @@ bidi_unshelve_cache (void *databuf, bool just_free) /* A NULL pointer means an empty cache. */ bidi_cache_start = 0; bidi_cache_sp = 0; + bidi_cache_max_elts = BIDI_CACHE_MAX_ELTS_PER_SLOT; bidi_cache_reset (); } } @@ -999,6 +1071,12 @@ bidi_unshelve_cache (void *databuf, bool just_free) + sizeof (bidi_cache_start_stack) + sizeof (bidi_cache_sp) + sizeof (bidi_cache_start), sizeof (bidi_cache_last_idx)); + memcpy (&bidi_cache_max_elts, + p + sizeof (bidi_cache_idx) + + bidi_cache_idx * sizeof (struct bidi_it) + + sizeof (bidi_cache_start_stack) + sizeof (bidi_cache_sp) + + sizeof (bidi_cache_start) + sizeof (bidi_cache_last_idx), + sizeof (bidi_cache_max_elts)); bidi_cache_total_alloc -= (bidi_shelve_header_size + bidi_cache_idx * sizeof (struct bidi_it)); @@ -1045,6 +1123,7 @@ bidi_initialize (void) bidi_cache_sp = 0; bidi_cache_total_alloc = 0; + bidi_cache_max_elts = BIDI_CACHE_MAX_ELTS_PER_SLOT; bidi_initialized = 1; } @@ -2459,6 +2538,7 @@ bidi_find_bracket_pairs (struct bidi_it *bidi_it) struct bidi_it tem_it; bool l2r_seen = false, r2l_seen = false; ptrdiff_t pairing_pos; + int idx_at_entry = bidi_cache_idx; eassert (MAX_BPA_STACK >= 100); bidi_copy_it (&saved_it, bidi_it); @@ -2483,7 +2563,15 @@ bidi_find_bracket_pairs (struct bidi_it *bidi_it) levels below). */ if (btype == BIDI_BRACKET_OPEN && bidi_it->bracket_pairing_pos == -1) bidi_it->bracket_pairing_pos = bidi_it->charpos; - bidi_cache_iterator_state (bidi_it, type == NEUTRAL_B, 0); + if (!bidi_cache_iterator_state (bidi_it, type == NEUTRAL_B, 0)) + { + /* No more space in cache -- give up and let the opening + bracket that started this be processed as a + NEUTRAL_ON. */ + bidi_cache_reset_to (idx_at_entry - bidi_cache_start); + bidi_copy_it (bidi_it, &saved_it); + goto give_up; + } if (btype == BIDI_BRACKET_OPEN) PUSH_BPA_STACK; else if (btype == BIDI_BRACKET_CLOSE) @@ -2572,7 +2660,16 @@ bidi_find_bracket_pairs (struct bidi_it *bidi_it) { if (maxlevel < bidi_it->level_stack[bidi_it->stack_idx].level) maxlevel = bidi_it->level_stack[bidi_it->stack_idx].level; - bidi_cache_iterator_state (bidi_it, type == NEUTRAL_B, 0); + if (!bidi_cache_iterator_state (bidi_it, + type == NEUTRAL_B, 0)) + { + /* No more space in cache -- give up and let the + opening bracket that started this be + processed as any other NEUTRAL_ON. */ + bidi_cache_reset_to (idx_at_entry - bidi_cache_start); + bidi_copy_it (bidi_it, &saved_it); + goto give_up; + } type = bidi_resolve_weak (bidi_it); } } @@ -2648,6 +2745,7 @@ bidi_find_bracket_pairs (struct bidi_it *bidi_it) } } + give_up: return retval; } @@ -3210,10 +3308,35 @@ bidi_find_other_level_edge (struct bidi_it *bidi_it, int level, bool end_flag) if (end_flag) emacs_abort (); - bidi_cache_iterator_state (bidi_it, 1, 0); + if (!bidi_cache_iterator_state (bidi_it, 1, 0)) + { + /* Can't happen: if the cache needs to grow, it means we + were at base embedding level, so the cache should have + been either empty or already large enough to cover this + character position. */ + emacs_abort (); + } do { new_level = bidi_level_of_next_char (bidi_it); - bidi_cache_iterator_state (bidi_it, 1, 0); + /* If the cache is full, perform an emergency return by + pretending that the level ended. */ + if (!bidi_cache_iterator_state (bidi_it, 1, 0)) + { + new_level = level - 1; + /* Since the cache should only grow when we are scanning + forward looking for the edge of the level that is one + above the base embedding level, we can only have this + contingency when LEVEL - 1 is the base embedding + level. */ + eassert (new_level == bidi_it->level_stack[0].level); + /* Plan B, for when the cache overflows: Back up to the + previous character by fetching the last cached state, + and force the resolved level of that character be the + base embedding level. */ + bidi_cache_fetch_state (bidi_cache_idx - 1, bidi_it); + bidi_it->resolved_level = new_level; + bidi_cache_iterator_state (bidi_it, 1, 1); + } } while (new_level >= level); } } @@ -3367,6 +3490,12 @@ bidi_move_to_visually_next (struct bidi_it *bidi_it) && bidi_it->charpos > (bidi_cache[bidi_cache_idx - 1].charpos + bidi_cache[bidi_cache_idx - 1].nchars - 1)) bidi_cache_reset (); + /* Also reset the cache if it overflowed and we have just + emergency-exited using Plan B. */ + else if (bidi_it->resolved_level == bidi_it->level_stack[0].level + && bidi_cache_idx >= bidi_cache_size + && bidi_it->charpos == bidi_cache[bidi_cache_idx - 1].charpos) + bidi_cache_reset (); /* But as long as we are caching during forward scan, we must cache each state, or else the cache integrity will be compromised: it assumes cached states correspond to buffer -- 2.39.2