characters). 200 was chosen as an upper limit for reasonably-long
lines in a text file/buffer. */
#define BIDI_CACHE_CHUNK 200
+/* Maximum size we allow the cache to become, per iterator stack slot,
+ in units of struct bidi_it size. If we allow unlimited growth, we
+ could run out of memory for pathologically long bracketed text or
+ very long text lines that need to be reordered. This is aggravated
+ when word-wrap is in effect, since then functions display_line and
+ move_it_in_display_line_to need to keep up to 4 copies of the
+ cache.
+
+ This limitation means there can be no more than that amount of
+ contiguous RTL text on any single physical line in a LTR paragraph,
+ and similarly with contiguous LTR + numeric text in a RTL
+ paragraph. (LTR text in a LTR paragraph and RTL text in a RTL
+ paragraph are not reordered, and so don't need the cache, and
+ cannot hit this limit.) More importantly, no single line can have
+ text longer than this inside paired brackets (because bracket pairs
+ resolution uses the cache). If the limit is exceeded, the fallback
+ code will produce visual order that will be incorrect if there are
+ RTL characters in the offending line of text. */
+/* Do we need to allow customization of this limit? */
+#define BIDI_CACHE_MAX_ELTS_PER_SLOT 50000
+#if BIDI_CACHE_CHUNK >= BIDI_CACHE_MAX_ELTS_PER_SLOT
+# error BIDI_CACHE_CHUNK must be less than BIDI_CACHE_MAX_ELTS_PER_SLOT
+#endif
+static ptrdiff_t bidi_cache_max_elts = BIDI_CACHE_MAX_ELTS_PER_SLOT;
static struct bidi_it *bidi_cache;
static ptrdiff_t bidi_cache_size = 0;
enum { elsz = sizeof (struct bidi_it) };
bidi_shelve_header_size
= (sizeof (bidi_cache_idx) + sizeof (bidi_cache_start_stack)
+ sizeof (bidi_cache_sp) + sizeof (bidi_cache_start)
- + sizeof (bidi_cache_last_idx))
+ + sizeof (bidi_cache_last_idx) + sizeof (bidi_cache_max_elts))
};
/* Effectively remove the cached states beyond the Nth state from the
bidi_cache_size = BIDI_CACHE_CHUNK;
}
bidi_cache_reset ();
+ bidi_cache_max_elts = BIDI_CACHE_MAX_ELTS_PER_SLOT;
}
static void
/* Find a cached state with a given CHARPOS and resolved embedding
level less or equal to LEVEL. If LEVEL is -1, disregard the
resolved levels in cached states. DIR, if non-zero, means search
- in that direction from the last cache hit. */
+ in that direction from the last cache hit.
+
+ Value is the index of the cached state, or -1 if not found. */
static ptrdiff_t
bidi_cache_search (ptrdiff_t charpos, int level, int dir)
{
ptrdiff_t i = dir ? bidi_cache_last_idx : bidi_cache_idx - 1;
int incr = before ? 1 : 0;
- eassert (!dir || bidi_cache_last_idx >= 0);
+ if (i < 0) /* cache overflowed? */
+ i = 0;
if (!dir)
dir = -1;
/* Enlarge the cache as needed. */
if (idx >= bidi_cache_size)
{
- /* The bidi cache cannot be larger than the largest Lisp string
- or buffer. */
- ptrdiff_t string_or_buffer_bound
- = max (BUF_BYTES_MAX, STRING_BYTES_BOUND);
+ ptrdiff_t chunk_size = BIDI_CACHE_CHUNK;
- /* Also, it cannot be larger than what C can represent. */
- ptrdiff_t c_bound
- = (min (PTRDIFF_MAX, SIZE_MAX) - bidi_shelve_header_size) / elsz;
+ if (bidi_cache_size > bidi_cache_max_elts - chunk_size)
+ chunk_size = bidi_cache_max_elts - bidi_cache_size;
- bidi_cache
- = xpalloc (bidi_cache, &bidi_cache_size,
- max (BIDI_CACHE_CHUNK, idx - bidi_cache_size + 1),
- min (string_or_buffer_bound, c_bound), elsz);
+ if (max (idx + 1,
+ bidi_cache_size + chunk_size) <= bidi_cache_max_elts)
+ {
+ /* The bidi cache cannot be larger than the largest Lisp
+ string or buffer. */
+ ptrdiff_t string_or_buffer_bound
+ = max (BUF_BYTES_MAX, STRING_BYTES_BOUND);
+
+ /* Also, it cannot be larger than what C can represent. */
+ ptrdiff_t c_bound
+ = (min (PTRDIFF_MAX, SIZE_MAX) - bidi_shelve_header_size) / elsz;
+ ptrdiff_t max_elts = bidi_cache_max_elts;
+
+ max_elts = min (max_elts, min (string_or_buffer_bound, c_bound));
+
+ /* Force xpalloc not to over-allocate by passing it MAX_ELTS
+ as its 4th argument. */
+ bidi_cache = xpalloc (bidi_cache, &bidi_cache_size,
+ max (chunk_size, idx - bidi_cache_size + 1),
+ max_elts, elsz);
+ eassert (bidi_cache_size > idx);
+ }
}
}
-static void
+static int
bidi_cache_iterator_state (struct bidi_it *bidi_it, bool resolved,
bool update_only)
{
idx = bidi_cache_search (bidi_it->charpos, -1, 1);
if (idx < 0 && update_only)
- return;
+ return 0;
if (idx < 0)
{
/* Character positions should correspond to cache positions 1:1.
If we are outside the range of cached positions, the cache is
useless and must be reset. */
- if (idx > bidi_cache_start &&
- (bidi_it->charpos > (bidi_cache[idx - 1].charpos
- + bidi_cache[idx - 1].nchars)
- || bidi_it->charpos < bidi_cache[bidi_cache_start].charpos))
+ if (bidi_cache_start < idx && idx < bidi_cache_size
+ && (bidi_it->charpos > (bidi_cache[idx - 1].charpos
+ + bidi_cache[idx - 1].nchars)
+ || bidi_it->charpos < bidi_cache[bidi_cache_start].charpos))
{
bidi_cache_reset ();
idx = bidi_cache_start;
}
if (bidi_it->nchars <= 0)
emacs_abort ();
- bidi_copy_it (&bidi_cache[idx], bidi_it);
- if (!resolved)
- bidi_cache[idx].resolved_level = -1;
+ /* Don't cache if no available space in the cache. */
+ if (bidi_cache_size > idx)
+ {
+ bidi_copy_it (&bidi_cache[idx], bidi_it);
+ if (!resolved)
+ bidi_cache[idx].resolved_level = -1;
+ }
}
else
{
bidi_cache[idx].bracket_enclosed_type = bidi_it->bracket_enclosed_type;
}
- bidi_cache_last_idx = idx;
- if (idx >= bidi_cache_idx)
- bidi_cache_idx = idx + 1;
+ if (bidi_cache_size > idx)
+ {
+ bidi_cache_last_idx = idx;
+ if (idx >= bidi_cache_idx)
+ bidi_cache_idx = idx + 1;
+ return 1;
+ }
+ else
+ {
+ /* The cache overflowed. */
+ bidi_cache_last_idx = -1;
+ return 0;
+ }
}
/* Look for a cached iterator state that corresponds to CHARPOS. If
static int
bidi_peek_at_next_level (struct bidi_it *bidi_it)
{
- if (bidi_cache_idx == bidi_cache_start || bidi_cache_last_idx == -1)
+ if (bidi_cache_idx == bidi_cache_start)
emacs_abort ();
+ /* If the cache overflowed, return the level of the last cached
+ character. */
+ if (bidi_cache_last_idx == -1
+ || (bidi_cache_last_idx >= bidi_cache_idx - 1 && bidi_it->scan_dir > 0))
+ return bidi_cache[bidi_cache_idx - 1].resolved_level;
return bidi_cache[bidi_cache_last_idx + bidi_it->scan_dir].resolved_level;
}
void
bidi_push_it (struct bidi_it *bidi_it)
{
+ /* Give this stack slot its cache room. */
+ bidi_cache_max_elts += BIDI_CACHE_MAX_ELTS_PER_SLOT;
/* Save the current iterator state in its entirety after the last
used cache slot. */
bidi_cache_ensure_space (bidi_cache_idx);
/* Invalidate the last-used cache slot data. */
bidi_cache_last_idx = -1;
+
+ bidi_cache_max_elts -= BIDI_CACHE_MAX_ELTS_PER_SLOT;
+ eassert (bidi_cache_max_elts > 0);
}
static ptrdiff_t bidi_cache_total_alloc;
+ sizeof (bidi_cache_start_stack) + sizeof (bidi_cache_sp)
+ sizeof (bidi_cache_start),
&bidi_cache_last_idx, sizeof (bidi_cache_last_idx));
+ memcpy (databuf + sizeof (bidi_cache_idx)
+ + bidi_cache_idx * sizeof (struct bidi_it)
+ + sizeof (bidi_cache_start_stack) + sizeof (bidi_cache_sp)
+ + sizeof (bidi_cache_start) + sizeof (bidi_cache_last_idx),
+ &bidi_cache_max_elts, sizeof (bidi_cache_max_elts));
return databuf;
}
/* A NULL pointer means an empty cache. */
bidi_cache_start = 0;
bidi_cache_sp = 0;
+ bidi_cache_max_elts = BIDI_CACHE_MAX_ELTS_PER_SLOT;
bidi_cache_reset ();
}
}
+ sizeof (bidi_cache_start_stack) + sizeof (bidi_cache_sp)
+ sizeof (bidi_cache_start),
sizeof (bidi_cache_last_idx));
+ memcpy (&bidi_cache_max_elts,
+ p + sizeof (bidi_cache_idx)
+ + bidi_cache_idx * sizeof (struct bidi_it)
+ + sizeof (bidi_cache_start_stack) + sizeof (bidi_cache_sp)
+ + sizeof (bidi_cache_start) + sizeof (bidi_cache_last_idx),
+ sizeof (bidi_cache_max_elts));
bidi_cache_total_alloc
-= (bidi_shelve_header_size
+ bidi_cache_idx * sizeof (struct bidi_it));
bidi_cache_sp = 0;
bidi_cache_total_alloc = 0;
+ bidi_cache_max_elts = BIDI_CACHE_MAX_ELTS_PER_SLOT;
bidi_initialized = 1;
}
struct bidi_it tem_it;
bool l2r_seen = false, r2l_seen = false;
ptrdiff_t pairing_pos;
+ int idx_at_entry = bidi_cache_idx;
eassert (MAX_BPA_STACK >= 100);
bidi_copy_it (&saved_it, bidi_it);
levels below). */
if (btype == BIDI_BRACKET_OPEN && bidi_it->bracket_pairing_pos == -1)
bidi_it->bracket_pairing_pos = bidi_it->charpos;
- bidi_cache_iterator_state (bidi_it, type == NEUTRAL_B, 0);
+ if (!bidi_cache_iterator_state (bidi_it, type == NEUTRAL_B, 0))
+ {
+ /* No more space in cache -- give up and let the opening
+ bracket that started this be processed as a
+ NEUTRAL_ON. */
+ bidi_cache_reset_to (idx_at_entry - bidi_cache_start);
+ bidi_copy_it (bidi_it, &saved_it);
+ goto give_up;
+ }
if (btype == BIDI_BRACKET_OPEN)
PUSH_BPA_STACK;
else if (btype == BIDI_BRACKET_CLOSE)
{
if (maxlevel < bidi_it->level_stack[bidi_it->stack_idx].level)
maxlevel = bidi_it->level_stack[bidi_it->stack_idx].level;
- bidi_cache_iterator_state (bidi_it, type == NEUTRAL_B, 0);
+ if (!bidi_cache_iterator_state (bidi_it,
+ type == NEUTRAL_B, 0))
+ {
+ /* No more space in cache -- give up and let the
+ opening bracket that started this be
+ processed as any other NEUTRAL_ON. */
+ bidi_cache_reset_to (idx_at_entry - bidi_cache_start);
+ bidi_copy_it (bidi_it, &saved_it);
+ goto give_up;
+ }
type = bidi_resolve_weak (bidi_it);
}
}
}
}
+ give_up:
return retval;
}
if (end_flag)
emacs_abort ();
- bidi_cache_iterator_state (bidi_it, 1, 0);
+ if (!bidi_cache_iterator_state (bidi_it, 1, 0))
+ {
+ /* Can't happen: if the cache needs to grow, it means we
+ were at base embedding level, so the cache should have
+ been either empty or already large enough to cover this
+ character position. */
+ emacs_abort ();
+ }
do {
new_level = bidi_level_of_next_char (bidi_it);
- bidi_cache_iterator_state (bidi_it, 1, 0);
+ /* If the cache is full, perform an emergency return by
+ pretending that the level ended. */
+ if (!bidi_cache_iterator_state (bidi_it, 1, 0))
+ {
+ new_level = level - 1;
+ /* Since the cache should only grow when we are scanning
+ forward looking for the edge of the level that is one
+ above the base embedding level, we can only have this
+ contingency when LEVEL - 1 is the base embedding
+ level. */
+ eassert (new_level == bidi_it->level_stack[0].level);
+ /* Plan B, for when the cache overflows: Back up to the
+ previous character by fetching the last cached state,
+ and force the resolved level of that character be the
+ base embedding level. */
+ bidi_cache_fetch_state (bidi_cache_idx - 1, bidi_it);
+ bidi_it->resolved_level = new_level;
+ bidi_cache_iterator_state (bidi_it, 1, 1);
+ }
} while (new_level >= level);
}
}
&& bidi_it->charpos > (bidi_cache[bidi_cache_idx - 1].charpos
+ bidi_cache[bidi_cache_idx - 1].nchars - 1))
bidi_cache_reset ();
+ /* Also reset the cache if it overflowed and we have just
+ emergency-exited using Plan B. */
+ else if (bidi_it->resolved_level == bidi_it->level_stack[0].level
+ && bidi_cache_idx >= bidi_cache_size
+ && bidi_it->charpos == bidi_cache[bidi_cache_idx - 1].charpos)
+ bidi_cache_reset ();
/* But as long as we are caching during forward scan, we must
cache each state, or else the cache integrity will be
compromised: it assumes cached states correspond to buffer