From 182ce2d254ed316239b8deab8adac05c3dbe0149 Mon Sep 17 00:00:00 2001
From: Eli Zaretskii <eliz@gnu.org>
Date: Tue, 10 May 2011 19:12:16 +0300
Subject: [PATCH] Started work on reordering display strings. Refactor
 FETCH_CHAR. Only compiled, not tested.

 src/xdisp.c (compute_display_string_pos): New function.
 (reseat_1): Initialize bidi_it.disp_pos.
 src/bidi.c (bidi_copy_it): Use offsetof.
 (bidi_fetch_char, bidi_fetch_char_advance): New functions.
 (bidi_cache_search, bidi_cache_iterator_state)
 (bidi_paragraph_init, bidi_resolve_explicit, bidi_resolve_weak)
 (bidi_level_of_next_char, bidi_move_to_visually_next): Support
 character positions inside a run of characters covered by a
 display string.
 (bidi_paragraph_init, bidi_resolve_explicit_1)
 (bidi_level_of_next_char): Call bidi_fetch_char and
 bidi_fetch_char_advance instead of FETCH_CHAR and
 FETCH_CHAR_ADVANCE.
 (bidi_init_it): Initialize new members.
 (LRE_CHAR, RLE_CHAR, PDF_CHAR, LRO_CHAR, RLO_CHAR): Remove macro
 definitions.
 (bidi_explicit_dir_char): Lookup character type in bidi_type_table,
 instead of using explicit *_CHAR codes.
 (bidi_resolve_explicit, bidi_resolve_weak): Use
 FETCH_MULTIBYTE_CHAR instead of FETCH_CHAR, as reordering of
 bidirectional text is supported only in multibyte buffers.
 src/dispextern.h (struct bidi_it): New members nchars and disp_pos.
 ch_len is now EMACS_INT.
 (compute_display_string_pos): Declare prototype.
---
 src/ChangeLog    |  29 ++++++
 src/bidi.c       | 246 ++++++++++++++++++++++++++++++++++-------------
 src/dispextern.h |  12 ++-
 src/xdisp.c      |  15 +++
 4 files changed, 232 insertions(+), 70 deletions(-)

diff --git a/src/ChangeLog b/src/ChangeLog
index 6f70908caae..03fe0029e70 100644
--- a/src/ChangeLog
+++ b/src/ChangeLog
@@ -1,3 +1,32 @@
+2011-05-10  Eli Zaretskii  <eliz@gnu.org>
+
+	* xdisp.c (compute_display_string_pos): New function.
+	(reseat_1): Initialize bidi_it.disp_pos.
+
+	* bidi.c (bidi_copy_it): Use offsetof.
+	(bidi_fetch_char, bidi_fetch_char_advance): New functions.
+	(bidi_cache_search, bidi_cache_iterator_state)
+	(bidi_paragraph_init, bidi_resolve_explicit, bidi_resolve_weak)
+	(bidi_level_of_next_char, bidi_move_to_visually_next): Support
+	character positions inside a run of characters covered by a
+	display string.
+	(bidi_paragraph_init, bidi_resolve_explicit_1)
+	(bidi_level_of_next_char): Call bidi_fetch_char and
+	bidi_fetch_char_advance instead of FETCH_CHAR and
+	FETCH_CHAR_ADVANCE.
+	(bidi_init_it): Initialize new members.
+	(LRE_CHAR, RLE_CHAR, PDF_CHAR, LRO_CHAR, RLO_CHAR): Remove macro
+	definitions.
+	(bidi_explicit_dir_char): Lookup character type in bidi_type_table,
+	instead of using explicit *_CHAR codes.
+	(bidi_resolve_explicit, bidi_resolve_weak): Use
+	FETCH_MULTIBYTE_CHAR instead of FETCH_CHAR, as reordering of
+	bidirectional text is supported only in multibyte buffers.
+
+	* dispextern.h (struct bidi_it): New members nchars and disp_pos.
+	ch_len is now EMACS_INT.
+	(compute_display_string_pos): Declare prototype.
+
 2011-05-09  Andreas Schwab  <schwab@linux-m68k.org>
 
 	* w32menu.c (set_frame_menubar): Fix submenu allocation.
diff --git a/src/bidi.c b/src/bidi.c
index 88c45e24a14..0a7c92cfea6 100644
--- a/src/bidi.c
+++ b/src/bidi.c
@@ -62,15 +62,8 @@ static int bidi_initialized = 0;
 
 static Lisp_Object bidi_type_table, bidi_mirror_table;
 
-/* FIXME: Remove these when bidi_explicit_dir_char uses a lookup table.  */
 #define LRM_CHAR   0x200E
 #define RLM_CHAR   0x200F
-#define LRE_CHAR   0x202A
-#define RLE_CHAR   0x202B
-#define PDF_CHAR   0x202C
-#define LRO_CHAR   0x202D
-#define RLO_CHAR   0x202E
-
 #define BIDI_EOB   -1
 
 /* Local data structures.  (Look in dispextern.h for the rest.)  */
@@ -258,7 +251,7 @@ bidi_copy_it (struct bidi_it *to, struct bidi_it *from)
   int i;
 
   /* Copy everything except the level stack and beyond.  */
-  memcpy (to, from, ((size_t)&((struct bidi_it *)0)->level_stack[0]));
+  memcpy (to, from, offsetof (struct bidi_it, level_stack[0]));
 
   /* Copy the active part of the level stack.  */
   to->level_stack[0] = from->level_stack[0]; /* level zero is always in use */
@@ -319,10 +312,17 @@ bidi_cache_search (EMACS_INT charpos, int level, int dir)
   if (bidi_cache_idx)
     {
       if (charpos < bidi_cache[bidi_cache_last_idx].charpos)
-	dir = -1;
-      else if (charpos > bidi_cache[bidi_cache_last_idx].charpos)
-	dir = 1;
-      if (dir)
+	{
+	  dir = -1;
+	  i_start = bidi_cache_last_idx - 1;
+	}
+      else if (charpos > (bidi_cache[bidi_cache_last_idx].charpos
+			  + bidi_cache[bidi_cache_last_idx].nchars - 1))
+	{
+	  dir = 1;
+	  i_start = bidi_cache_last_idx + 1;
+	}
+      else if (dir)
 	i_start = bidi_cache_last_idx;
       else
 	{
@@ -334,14 +334,16 @@ bidi_cache_search (EMACS_INT charpos, int level, int dir)
 	{
 	  /* Linear search for now; FIXME!  */
 	  for (i = i_start; i >= 0; i--)
-	    if (bidi_cache[i].charpos == charpos
+	    if (bidi_cache[i].charpos <= charpos
+		&& charpos < bidi_cache[i].charpos + bidi_cache[i].nchars
 		&& (level == -1 || bidi_cache[i].resolved_level <= level))
 	      return i;
 	}
       else
 	{
 	  for (i = i_start; i < bidi_cache_idx; i++)
-	    if (bidi_cache[i].charpos == charpos
+	    if (bidi_cache[i].charpos <= charpos
+		&& charpos < bidi_cache[i].charpos + bidi_cache[i].nchars
 		&& (level == -1 || bidi_cache[i].resolved_level <= level))
 	      return i;
 	}
@@ -426,7 +428,8 @@ bidi_cache_iterator_state (struct bidi_it *bidi_it, int resolved)
 	 If we are outside the range of cached positions, the cache is
 	 useless and must be reset.  */
       if (idx > 0 &&
-	  (bidi_it->charpos > bidi_cache[idx - 1].charpos + 1
+	  (bidi_it->charpos > (bidi_cache[idx - 1].charpos
+			       + bidi_cache[idx - 1].nchars)
 	   || bidi_it->charpos < bidi_cache[0].charpos))
 	{
 	  bidi_cache_reset ();
@@ -548,6 +551,7 @@ bidi_set_sor_type (struct bidi_it *bidi_it, int level_before, int level_after)
   bidi_it->ignore_bn_limit = 0; /* meaning it's unknown */
 }
 
+/* Perform initializations for reordering a new line of bidi text.  */
 static void
 bidi_line_init (struct bidi_it *bidi_it)
 {
@@ -565,6 +569,77 @@ bidi_line_init (struct bidi_it *bidi_it)
   bidi_cache_reset ();
 }
 
+/* Fetch and return the character at BYTEPOS.  If that character is
+   covered by a display string, treat the entire run of covered
+   characters as a single character u+FFFC, and return their combined
+   length in CH_LEN and NCHARS.  DISP_POS specifies the byte position
+   of the next display string, or -1 if not yet computed.  When the
+   next character is at or beyond that position, the function updates
+   DISP_POS with the position of the next display string.  */
+static INLINE int
+bidi_fetch_char (EMACS_INT bytepos, EMACS_INT *disp_pos,
+		 EMACS_INT *ch_len, EMACS_INT *nchars)
+{
+  int ch;
+
+  /* FIXME: Support strings in addition to buffers.  */
+  /* If we got past the last known position of display string, compute
+     the position of the next one.  That position could be at BYTEPOS.  */
+  if (bytepos < ZV_BYTE && bytepos > *disp_pos)
+    *disp_pos = compute_display_string_pos (bytepos);
+  if (bytepos >= ZV_BYTE)
+    {
+      ch = BIDI_EOB;
+      *ch_len = 1;
+      *nchars = 1;
+    }
+#if 0
+  else if (bytepos >= *disp_pos)
+    {
+      /* support characters covered by a display string */
+      ch = 0xFFFC;	/* Unicode Object Replacement Character */
+    }
+#endif
+  else
+    {
+      ch = FETCH_MULTIBYTE_CHAR (bytepos);
+      *ch_len = CHAR_BYTES (ch);
+      *nchars = 1;
+    }
+
+  /* If we just entered a run of characters covered by a display
+     string, compute the position of the next display string.  */
+  if (bytepos + *ch_len <= ZV_BYTE && bytepos + *ch_len > *disp_pos)
+    *disp_pos = compute_display_string_pos (bytepos + *ch_len);
+
+  return ch;
+}
+
+/* Looks like we won't need this one.  */
+#if 0
+/* Fetch character at CHARPOS/BYTEPOS.  Return the character, and
+   advance CHARPOS and BYTEPOS to the next character in logical
+   order.  */
+static INLINE int
+bidi_fetch_char_advance (EMACS_INT *charpos, EMACS_INT *bytepos)
+{
+  int ch;
+
+  /* FIXME: Support strings in addition to buffers.  */
+  FETCH_CHAR_ADVANCE_NO_CHECK (ch, charpos, bytepos);
+
+#if 0
+  if (...)
+    {
+      /* FIXME: Support characters covered by display strings.  */
+      ch = 0xFFFC;
+    }
+#endif
+
+  return ch;
+}
+#endif
+
 /* Find the beginning of this paragraph by looking back in the buffer.
    Value is the byte position of the paragraph's beginning.  */
 static EMACS_INT
@@ -576,6 +651,10 @@ bidi_find_paragraph_start (EMACS_INT pos, EMACS_INT pos_byte)
   while (pos_byte > BEGV_BYTE
 	 && fast_looking_at (re, pos, pos_byte, limit, limit_byte, Qnil) < 0)
     {
+      /* FIXME: What if the paragraph beginning is covered by a
+	 display string?  And what if a display string covering some
+	 of the text over which we scan back includes
+	 paragraph_start_re?  */
       pos = find_next_newline_no_quit (pos - 1, -1);
       pos_byte = CHAR_TO_BYTE (pos);
     }
@@ -587,7 +666,7 @@ bidi_find_paragraph_start (EMACS_INT pos, EMACS_INT pos_byte)
    R2L, just use that.  Otherwise, determine the paragraph direction
    from the first strong directional character of the paragraph.
 
-   NO_DEFAULT_P non-nil means don't default to L2R if the paragraph
+   NO_DEFAULT_P non-zero means don't default to L2R if the paragraph
    has no strong directional characters and both DIR and
    bidi_it->paragraph_dir are NEUTRAL_DIR.  In that case, search back
    in the buffer until a paragraph is found with a strong character,
@@ -622,8 +701,9 @@ bidi_paragraph_init (bidi_dir_t dir, struct bidi_it *bidi_it, int no_default_p)
     }
   else if (dir == NEUTRAL_DIR)	/* P2 */
     {
-      int ch, ch_len;
-      EMACS_INT pos;
+      int ch;
+      EMACS_INT ch_len, nchars;
+      EMACS_INT pos, disp_pos = -1;
       bidi_type_t type;
 
       if (!bidi_initialized)
@@ -658,12 +738,11 @@ bidi_paragraph_init (bidi_dir_t dir, struct bidi_it *bidi_it, int no_default_p)
 	 is non-zero.  */
       do {
 	bytepos = pstartbyte;
-	ch = FETCH_CHAR (bytepos);
-	ch_len = CHAR_BYTES (ch);
+	ch = bidi_fetch_char (bytepos, &ch_len, &nchars, &disp_pos);
 	pos = BYTE_TO_CHAR (bytepos);
 	type = bidi_get_type (ch, NEUTRAL_DIR);
 
-	for (pos++, bytepos += ch_len;
+	for (pos += nchars, bytepos += ch_len;
 	     /* NOTE: UAX#9 says to search only for L, AL, or R types
 		of characters, and ignore RLE, RLO, LRE, and LRO.
 		However, I'm not sure it makes sense to omit those 4;
@@ -683,7 +762,9 @@ bidi_paragraph_init (bidi_dir_t dir, struct bidi_it *bidi_it, int no_default_p)
 		type = NEUTRAL_B;
 		break;
 	      }
-	    FETCH_CHAR_ADVANCE (ch, pos, bytepos);
+	    ch = bidi_fetch_char (bytepos, &ch_len, &nchars, &disp_pos);
+	    pos += nchars;
+	    bytepos += ch_len;
 	  }
 	if (type == STRONG_R || type == STRONG_AL) /* P3 */
 	  bidi_it->paragraph_dir = R2L;
@@ -702,6 +783,9 @@ bidi_paragraph_init (bidi_dir_t dir, struct bidi_it *bidi_it, int no_default_p)
 		/* Find the beginning of the previous paragraph, if any.  */
 		while (pbyte > BEGV_BYTE && prevpbyte >= pstartbyte)
 		  {
+		    /* FXIME: What if p is covered by a display
+		       string?  See also a FIXME inside
+		       bidi_find_paragraph_start.  */
 		    p--;
 		    pbyte = CHAR_TO_BYTE (p);
 		    prevpbyte = bidi_find_paragraph_start (p, pbyte);
@@ -738,7 +822,7 @@ bidi_set_paragraph_end (struct bidi_it *bidi_it)
   bidi_it->resolved_level = bidi_it->level_stack[0].level;
 }
 
-/* Initialize the bidi iterator from buffer position CHARPOS.  */
+/* Initialize the bidi iterator from buffer/string position CHARPOS.  */
 void
 bidi_init_it (EMACS_INT charpos, EMACS_INT bytepos, struct bidi_it *bidi_it)
 {
@@ -746,6 +830,7 @@ bidi_init_it (EMACS_INT charpos, EMACS_INT bytepos, struct bidi_it *bidi_it)
     bidi_initialize ();
   bidi_it->charpos = charpos;
   bidi_it->bytepos = bytepos;
+  bidi_it->nchars = -1;	/* to be computed in bidi_resolve_explicit_1 */
   bidi_it->first_elt = 1;
   bidi_set_paragraph_end (bidi_it);
   bidi_it->new_paragraph = 1;
@@ -767,6 +852,7 @@ bidi_init_it (EMACS_INT charpos, EMACS_INT bytepos, struct bidi_it *bidi_it)
     bidi_it->prev_for_neutral.type_after_w1 =
     bidi_it->prev_for_neutral.orig_type = UNKNOWN_BT;
   bidi_it->sor = L2R;	 /* FIXME: should it be user-selectable? */
+  bidi_it->disp_pos = -1;	/* invalid/unknown */
   bidi_cache_shrink ();
 }
 
@@ -829,12 +915,16 @@ bidi_resolve_neutral_1 (bidi_type_t prev_type, bidi_type_t next_type, int lev)
 }
 
 static INLINE int
-bidi_explicit_dir_char (int c)
+bidi_explicit_dir_char (int ch)
 {
-  /* FIXME: this should be replaced with a lookup table with suitable
-     bits set, like standard C ctype macros do.  */
-  return (c == LRE_CHAR || c == LRO_CHAR
-	  || c == RLE_CHAR || c == RLO_CHAR || c == PDF_CHAR);
+  bidi_type_t ch_type;
+
+  if (!bidi_initialized)
+    abort ();
+  ch_type = (bidi_type_t) XINT (CHAR_TABLE_REF (bidi_type_table, ch));
+  return (ch_type == LRE || ch_type == LRO
+	  || ch_type == RLE || ch_type == RLO
+	  || ch_type == PDF);
 }
 
 /* A helper function for bidi_resolve_explicit.  It advances to the
@@ -850,7 +940,10 @@ bidi_resolve_explicit_1 (struct bidi_it *bidi_it)
   int new_level;
   bidi_dir_t override;
 
-  if (bidi_it->bytepos < BEGV_BYTE	/* after reseat to BEGV? */
+  /* If reseat()'ed, don't advance, so as to start iteration from the
+     position where we were reseated.  bidi_it->bytepos can be less
+     than BEGV_BYTE after reseat to BEGV.  */
+  if (bidi_it->bytepos < BEGV_BYTE
       || bidi_it->first_elt)
     {
       bidi_it->first_elt = 0;
@@ -860,7 +953,9 @@ bidi_resolve_explicit_1 (struct bidi_it *bidi_it)
     }
   else if (bidi_it->bytepos < ZV_BYTE)	/* don't move at ZV */
     {
-      bidi_it->charpos++;
+      /* Advance to the next character, skipping characters covered by
+	 display strings (nchars > 1).  */
+      bidi_it->charpos += bidi_it->nchars;
       if (bidi_it->ch_len == 0)
 	abort ();
       bidi_it->bytepos += bidi_it->ch_len;
@@ -870,17 +965,20 @@ bidi_resolve_explicit_1 (struct bidi_it *bidi_it)
   override = bidi_it->level_stack[bidi_it->stack_idx].override;
   new_level = current_level;
 
-  /* in case it is a unibyte character (not yet implemented) */
-  /* _fetch_multibyte_char_len = 1; */
   if (bidi_it->bytepos >= ZV_BYTE)
     {
       curchar = BIDI_EOB;
       bidi_it->ch_len = 1;
+      bidi_it->nchars = 1;
+      bidi_it->disp_pos = ZV_BYTE;
     }
   else
     {
-      curchar = FETCH_CHAR (bidi_it->bytepos);
-      bidi_it->ch_len = CHAR_BYTES (curchar);
+      /* Fetch the character at BYTEPOS.  If it is covered by a
+	 display string, treat the entire run of covered characters as
+	 a single character u+FFFC.  */
+      curchar = bidi_fetch_char (bidi_it->bytepos, &bidi_it->ch_len,
+				 &bidi_it->nchars, &bidi_it->disp_pos);
     }
   bidi_it->ch = curchar;
 
@@ -1006,10 +1104,10 @@ bidi_resolve_explicit_1 (struct bidi_it *bidi_it)
 }
 
 /* Given an iterator state in BIDI_IT, advance one character position
-   in the buffer to the next character (in the logical order), resolve
-   any explicit embeddings and directional overrides, and return the
-   embedding level of the character after resolving explicit
-   directives and ignoring empty embeddings.  */
+   in the buffer/string to the next character (in the logical order),
+   resolve any explicit embeddings and directional overrides, and
+   return the embedding level of the character after resolving
+   explicit directives and ignoring empty embeddings.  */
 static int
 bidi_resolve_explicit (struct bidi_it *bidi_it)
 {
@@ -1020,8 +1118,8 @@ bidi_resolve_explicit (struct bidi_it *bidi_it)
       && bidi_it->type == WEAK_BN
       && bidi_it->ignore_bn_limit == 0 /* only if not already known */
       && bidi_it->bytepos < ZV_BYTE    /* not already at EOB */
-      && bidi_explicit_dir_char (FETCH_CHAR (bidi_it->bytepos
-					     + bidi_it->ch_len)))
+      && bidi_explicit_dir_char (FETCH_MULTIBYTE_CHAR (bidi_it->bytepos
+						       + bidi_it->ch_len)))
     {
       /* Avoid pushing and popping embedding levels if the level run
 	 is empty, as this breaks level runs where it shouldn't.
@@ -1033,14 +1131,16 @@ bidi_resolve_explicit (struct bidi_it *bidi_it)
 
       bidi_copy_it (&saved_it, bidi_it);
 
-      while (bidi_explicit_dir_char (FETCH_CHAR (bidi_it->bytepos
-						 + bidi_it->ch_len)))
+      while (bidi_explicit_dir_char (FETCH_MULTIBYTE_CHAR (bidi_it->bytepos
+							   + bidi_it->ch_len)))
 	{
+	  /* This advances to the next character, skipping any
+	     characters covered by display strings.  */
 	  level = bidi_resolve_explicit_1 (bidi_it);
 	}
 
       if (level == prev_level)	/* empty embedding */
-	saved_it.ignore_bn_limit = bidi_it->charpos + 1;
+	saved_it.ignore_bn_limit = bidi_it->charpos + bidi_it->nchars;
       else			/* this embedding is non-empty */
 	saved_it.ignore_bn_limit = -1;
 
@@ -1076,8 +1176,8 @@ bidi_resolve_explicit (struct bidi_it *bidi_it)
   return new_level;
 }
 
-/* Advance in the buffer, resolve weak types and return the type of
-   the next character after weak type resolution.  */
+/* Advance in the buffer/string, resolve weak types and return the
+   type of the next character after weak type resolution.  */
 static bidi_type_t
 bidi_resolve_weak (struct bidi_it *bidi_it)
 {
@@ -1156,7 +1256,8 @@ bidi_resolve_weak (struct bidi_it *bidi_it)
 	{
 	  next_char =
 	    bidi_it->bytepos + bidi_it->ch_len >= ZV_BYTE
-	    ? BIDI_EOB : FETCH_CHAR (bidi_it->bytepos + bidi_it->ch_len);
+	    ? BIDI_EOB : FETCH_MULTIBYTE_CHAR (bidi_it->bytepos
+					       + bidi_it->ch_len);
 	  type_of_next = bidi_get_type (next_char, override);
 
 	  if (type_of_next == WEAK_BN
@@ -1204,11 +1305,12 @@ bidi_resolve_weak (struct bidi_it *bidi_it)
 	    type = WEAK_EN;
 	  else			/* W5: ET/BN with EN after it.  */
 	    {
-	      EMACS_INT en_pos = bidi_it->charpos + 1;
+	      EMACS_INT en_pos = bidi_it->charpos + bidi_it->nchars;
 
 	      next_char =
 		bidi_it->bytepos + bidi_it->ch_len >= ZV_BYTE
-		? BIDI_EOB : FETCH_CHAR (bidi_it->bytepos + bidi_it->ch_len);
+		? BIDI_EOB : FETCH_MULTIBYTE_CHAR (bidi_it->bytepos
+						   + bidi_it->ch_len);
 	      type_of_next = bidi_get_type (next_char, override);
 
 	      if (type_of_next == WEAK_ET
@@ -1299,8 +1401,8 @@ bidi_resolve_neutral (struct bidi_it *bidi_it)
 	  /* Arrrgh!!  The UAX#9 algorithm is too deeply entrenched in
 	     the assumption of batch-style processing; see clauses W4,
 	     W5, and especially N1, which require to look far forward
-	     (as well as back) in the buffer.  May the fleas of a
-	     thousand camels infest the armpits of those who design
+	     (as well as back) in the buffer/string.  May the fleas of
+	     a thousand camels infest the armpits of those who design
 	     supposedly general-purpose algorithms by looking at their
 	     own implementations, and fail to consider other possible
 	     implementations!  */
@@ -1391,8 +1493,9 @@ bidi_resolve_neutral (struct bidi_it *bidi_it)
 }
 
 /* Given an iterator state in BIDI_IT, advance one character position
-   in the buffer to the next character (in the logical order), resolve
-   the bidi type of that next character, and return that type.  */
+   in the buffer/string to the next character (in the logical order),
+   resolve the bidi type of that next character, and return that
+   type.  */
 static bidi_type_t
 bidi_type_of_next_char (struct bidi_it *bidi_it)
 {
@@ -1416,15 +1519,16 @@ bidi_type_of_next_char (struct bidi_it *bidi_it)
 }
 
 /* Given an iterator state BIDI_IT, advance one character position in
-   the buffer to the next character (in the logical order), resolve
-   the embedding and implicit levels of that next character, and
-   return the resulting level.  */
+   the buffer/string to the next character (in the current scan
+   direction), resolve the embedding and implicit levels of that next
+   character, and return the resulting level.  */
 static int
 bidi_level_of_next_char (struct bidi_it *bidi_it)
 {
   bidi_type_t type;
   int level, prev_level = -1;
   struct bidi_saved_info next_for_neutral;
+  EMACS_INT next_char_pos;
 
   if (bidi_it->scan_dir == 1)
     {
@@ -1466,8 +1570,14 @@ bidi_level_of_next_char (struct bidi_it *bidi_it)
     }
   next_for_neutral = bidi_it->next_for_neutral;
 
-  /* Perhaps it is already cached.  */
-  type = bidi_cache_find (bidi_it->charpos + bidi_it->scan_dir, -1, bidi_it);
+  /* Perhaps the character we want is already cached.  If it is, the
+     call to bidi_cache_find below will return a type other than
+     UNKNOWN_BT.  */
+  if (bidi_it->scan_dir > 0)
+    next_char_pos = bidi_it->charpos + bidi_it->nchars;
+  else
+    next_char_pos = bidi_it->charpos - 1;
+  type = bidi_cache_find (next_char_pos, -1, bidi_it);
   if (type != UNKNOWN_BT)
     {
       /* Don't lose the information for resolving neutrals!  The
@@ -1529,14 +1639,13 @@ bidi_level_of_next_char (struct bidi_it *bidi_it)
       int clen = bidi_it->ch_len;
       EMACS_INT bpos = bidi_it->bytepos;
       EMACS_INT cpos = bidi_it->charpos;
+      EMACS_INT disp_pos = bidi_it->disp_pos;
+      EMACS_INT nc;
       bidi_type_t chtype;
 
       do {
-	/*_fetch_multibyte_char_len = 1;*/
-	ch = bpos + clen >= ZV_BYTE ? BIDI_EOB : FETCH_CHAR (bpos + clen);
-	bpos += clen;
-	cpos++;
-	clen = (ch == BIDI_EOB ? 1 : CHAR_BYTES (ch));
+	ch = bidi_fetch_char (bpos += clen, &clen, &nc, &disp_pos);
+	cpos += nc;
 	if (ch == '\n' || ch == BIDI_EOB /* || ch == LINESEP_CHAR */)
 	  chtype = NEUTRAL_B;
 	else
@@ -1615,8 +1724,8 @@ bidi_level_of_next_char (struct bidi_it *bidi_it)
 
    If this level's other edge is cached, we simply jump to it, filling
    the iterator structure with the iterator state on the other edge.
-   Otherwise, we walk the buffer until we come back to the same level
-   as LEVEL.
+   Otherwise, we walk the buffer or string until we come back to the
+   same level as LEVEL.
 
    Note: we are not talking here about a ``level run'' in the UAX#9
    sense of the term, but rather about a ``level'' which includes
@@ -1680,6 +1789,7 @@ bidi_move_to_visually_next (struct bidi_it *bidi_it)
 	  sentinel.bytepos--;
 	  sentinel.ch = '\n';	/* doesn't matter, but why not? */
 	  sentinel.ch_len = 1;
+	  sentinel.nchars = 1;
 	}
       bidi_cache_iterator_state (&sentinel, 1);
     }
@@ -1750,14 +1860,15 @@ bidi_move_to_visually_next (struct bidi_it *bidi_it)
       && bidi_it->bytepos < ZV_BYTE)
     {
       EMACS_INT sep_len =
-	bidi_at_paragraph_end (bidi_it->charpos + 1,
+	bidi_at_paragraph_end (bidi_it->charpos + bidi_it->nchars,
 			       bidi_it->bytepos + bidi_it->ch_len);
       if (sep_len >= 0)
 	{
 	  bidi_it->new_paragraph = 1;
 	  /* Record the buffer position of the last character of the
 	     paragraph separator.  */
-	  bidi_it->separator_limit = bidi_it->charpos + 1 + sep_len;
+	  bidi_it->separator_limit =
+	    bidi_it->charpos + bidi_it->nchars + sep_len;
 	}
     }
 
@@ -1767,7 +1878,8 @@ bidi_move_to_visually_next (struct bidi_it *bidi_it)
 	 last cached position, the cache's job is done and we can
 	 discard it.  */
       if (bidi_it->resolved_level == bidi_it->level_stack[0].level
-	  && bidi_it->charpos > bidi_cache[bidi_cache_idx - 1].charpos)
+	  && bidi_it->charpos > (bidi_cache[bidi_cache_idx - 1].charpos
+				 + bidi_cache[bidi_cache_idx - 1].nchars - 1))
 	bidi_cache_reset ();
 	/* But as long as we are caching during forward scan, we must
 	   cache each state, or else the cache integrity will be
diff --git a/src/dispextern.h b/src/dispextern.h
index 72e23e6642a..f94723099f9 100644
--- a/src/dispextern.h
+++ b/src/dispextern.h
@@ -1812,12 +1812,16 @@ struct bidi_stack {
   bidi_dir_t override;
 };
 
-/* Data type for iterating over bidi text.  */
+/* Data type for reordering bidirectional text.  */
 struct bidi_it {
   EMACS_INT bytepos;		/* iterator's position in buffer */
   EMACS_INT charpos;
-  int ch;			/* character itself */
-  int ch_len;			/* length of its multibyte sequence */
+  int ch;			/* character at that position, or u+FFFC
+				   ("object replacement character") for a run
+				   of characters covered by a display string */
+  EMACS_INT nchars;		/* its "length", usually 1; it's > 1 for a run
+				   of characters covered by a display string */
+  EMACS_INT ch_len;		/* its length in bytes */
   bidi_type_t type;		/* bidi type of this character, after
 				   resolving weak and neutral types */
   bidi_type_t type_after_w1;	/* original type, after overrides and W1 */
@@ -1844,6 +1848,7 @@ struct bidi_it {
   bidi_dir_t paragraph_dir;	/* current paragraph direction */
   int new_paragraph;		/* if non-zero, we expect a new paragraph */
   EMACS_INT separator_limit;	/* where paragraph separator should end */
+  EMACS_INT disp_pos;		/* byte position of display string after ch */
 };
 
 /* Value is non-zero when the bidi iterator is at base paragraph
@@ -3001,6 +3006,7 @@ extern void reseat_at_previous_visible_line_start (struct it *);
 extern Lisp_Object lookup_glyphless_char_display (int, struct it *);
 extern int calc_pixel_width_or_height (double *, struct it *, Lisp_Object,
                                        struct font *, int, int *);
+extern EMACS_INT compute_display_string_pos (EMACS_INT);
 
 #ifdef HAVE_WINDOW_SYSTEM
 
diff --git a/src/xdisp.c b/src/xdisp.c
index 88353e37925..10f69b4cd38 100644
--- a/src/xdisp.c
+++ b/src/xdisp.c
@@ -3085,6 +3085,20 @@ next_overlay_change (EMACS_INT pos)
   return endpos;
 }
 
+/* Return the byte position of a display string at or after BYTEPOS.
+   If no display string exist at or after BYTEPOS, return ZV_BYTE.  A
+   display string is either an overlay with `display' property whose
+   value is a string or a `display' text property whose value is a
+   string.  */
+EMACS_INT
+compute_display_string_pos (EMACS_INT bytepos)
+{
+  if (bytepos >= ZV_BYTE)
+    return ZV_BYTE;
+  /* FIXME! */
+  return ZV_BYTE;
+}
+
 
 
 /***********************************************************************
@@ -5382,6 +5396,7 @@ reseat_1 (struct it *it, struct text_pos pos, int set_stop_p)
     {
       it->bidi_it.first_elt = 1;
       it->bidi_it.paragraph_dir = NEUTRAL_DIR;
+      it->bidi_it.disp_pos = -1;
     }
 
   if (set_stop_p)
-- 
2.39.5