Fix bug #7038 with cursor motion in paragraphs w/o strong characters.

author Eli Zaretskii <eliz@gnu.org>

Sat, 18 Sep 2010 11:59:53 +0000 (13:59 +0200)

committer Eli Zaretskii <eliz@gnu.org>

Sat, 18 Sep 2010 11:59:53 +0000 (13:59 +0200)
author Eli Zaretskii <eliz@gnu.org>
Sat, 18 Sep 2010 11:59:53 +0000 (13:59 +0200)
committer Eli Zaretskii <eliz@gnu.org>
Sat, 18 Sep 2010 11:59:53 +0000 (13:59 +0200)
diff --git a/src/ChangeLog b/src/ChangeLog

index 66735cd5f4e25e024166426bb56b2a78c9872a26..88030b7b4cef1802ed3a3082d10d03b6bb5bdcc9 100644 (file)
--- a/src/ChangeLog
+++ b/src/ChangeLog
@@ -1,3 +1,15 @@
+2010-09-18  Eli Zaretskii  <eliz@gnu.org>
+
+       * xdisp.c (Fcurrent_bidi_paragraph_direction): Call
+       bidi_paragraph_init with NO_DEFAULT_P non-zero.  (Bug#7038)
+
+       * bidi.c (bidi_paragraph_init): Accept an additional argument
+       NO_DEFAULT_P; all callers changed.  If NO_DEFAULT_P is non-zero,
+       search back until a paragraph with a strong directional character
+       is found, and use that to determine paragraph's base direction.
+
+       * dispextern.h (bidi_paragraph_init): Update prototype.
+
  2010-09-17  Eli Zaretskii  <eliz@gnu.org>
  
         * w32.c (_PROCESS_MEMORY_COUNTERS_EX): Don't define with versions
diff --git a/src/bidi.c b/src/bidi.c

index a6d4d1b250665d311b8ab567ebb114d90c394faa..224ed552a6d3399066262a554f9f0cfd6ea56f03 100644 (file)
--- a/src/bidi.c
+++ b/src/bidi.c
@@ -583,18 +583,26 @@ bidi_find_paragraph_start (EMACS_INT pos, EMACS_INT pos_byte)
    return pos_byte;
  }
  
-/* Determine the direction, a.k.a. base embedding level, of the
+/* Determine the base direction, a.k.a. base embedding level, of the
     paragraph we are about to iterate through.  If DIR is either L2R or
     R2L, just use that.  Otherwise, determine the paragraph direction
-   from the first strong character of the paragraph.
-
-   Note that this gives the paragraph separator the same direction as
-   the preceding paragraph, even though Emacs generally views the
-   separartor as not belonging to any paragraph.  */
+   from the first strong directional character of the paragraph.
+
+   NO_DEFAULT_P non-nil means don't default to L2R if the paragraph
+   has no strong directional characters and both DIR and
+   bidi_it->paragraph_dir are NEUTRAL_DIR.  In that case, search back
+   in the buffer until a paragraph is found with a strong character,
+   or until hitting BEGV.  In the latter case, fall back to L2R.  This
+   flag is used in current-bidi-paragraph-direction.
+
+   Note that this function gives the paragraph separator the same
+   direction as the preceding paragraph, even though Emacs generally
+   views the separartor as not belonging to any paragraph.  */
  void
-bidi_paragraph_init (bidi_dir_t dir, struct bidi_it *bidi_it)
+bidi_paragraph_init (bidi_dir_t dir, struct bidi_it *bidi_it, int no_default_p)
  {
    EMACS_INT bytepos = bidi_it->bytepos;
+  EMACS_INT pstartbyte;
  
    /* Special case for an empty buffer. */
    if (bytepos == BEGV_BYTE && bytepos == ZV_BYTE)
@@ -643,49 +651,75 @@ bidi_paragraph_init (bidi_dir_t dir, struct bidi_it *bidi_it)
  
        /* We are either at the beginning of a paragraph or in the
          middle of it.  Find where this paragraph starts.  */
-      bytepos = bidi_find_paragraph_start (pos, bytepos);
-
+      pstartbyte = bidi_find_paragraph_start (pos, bytepos);
        bidi_it->separator_limit = -1;
        bidi_it->new_paragraph = 0;
-      ch = FETCH_CHAR (bytepos);
-      ch_len = CHAR_BYTES (ch);
-      pos = BYTE_TO_CHAR (bytepos);
-      type = bidi_get_type (ch, NEUTRAL_DIR);
-
-      for (pos++, bytepos += ch_len;
-          /* NOTE: UAX#9 says to search only for L, AL, or R types of
-             characters, and ignore RLE, RLO, LRE, and LRO.  However,
-             I'm not sure it makes sense to omit those 4; should try
-             with and without that to see the effect.  */
-          (bidi_get_category (type) != STRONG)
-            || (bidi_ignore_explicit_marks_for_paragraph_level
-                && (type == RLE || type == RLO
-                    || type == LRE || type == LRO));
-          type = bidi_get_type (ch, NEUTRAL_DIR))
-       {
-         if (type == NEUTRAL_B && bidi_at_paragraph_end (pos, bytepos) >= -1)
-           break;
-         if (bytepos >= ZV_BYTE)
-           {
-             /* Pretend there's a paragraph separator at end of buffer.  */
-             type = NEUTRAL_B;
+
+      /* The following loop is run more than once only if NO_DEFAULT_P
+        is non-zero.  */
+      do {
+       bytepos = pstartbyte;
+       ch = FETCH_CHAR (bytepos);
+       ch_len = CHAR_BYTES (ch);
+       pos = BYTE_TO_CHAR (bytepos);
+       type = bidi_get_type (ch, NEUTRAL_DIR);
+
+       for (pos++, bytepos += ch_len;
+            /* NOTE: UAX#9 says to search only for L, AL, or R types
+               of characters, and ignore RLE, RLO, LRE, and LRO.
+               However, I'm not sure it makes sense to omit those 4;
+               should try with and without that to see the effect.  */
+            (bidi_get_category (type) != STRONG)
+              || (bidi_ignore_explicit_marks_for_paragraph_level
+                  && (type == RLE || type == RLO
+                      || type == LRE || type == LRO));
+            type = bidi_get_type (ch, NEUTRAL_DIR))
+         {
+           if (type == NEUTRAL_B && bidi_at_paragraph_end (pos, bytepos) >= -1)
               break;
-           }
-         FETCH_CHAR_ADVANCE (ch, pos, bytepos);
-       }
-      if (type == STRONG_R || type == STRONG_AL) /* P3 */
-       bidi_it->paragraph_dir = R2L;
-      else if (type == STRONG_L)
-       bidi_it->paragraph_dir = L2R;
+           if (bytepos >= ZV_BYTE)
+             {
+               /* Pretend there's a paragraph separator at end of
+                  buffer.  */
+               type = NEUTRAL_B;
+               break;
+             }
+           FETCH_CHAR_ADVANCE (ch, pos, bytepos);
+         }
+       if (type == STRONG_R || type == STRONG_AL) /* P3 */
+         bidi_it->paragraph_dir = R2L;
+       else if (type == STRONG_L)
+         bidi_it->paragraph_dir = L2R;
+       if (no_default_p && bidi_it->paragraph_dir == NEUTRAL_DIR)
+         {
+           /* If this paragraph is at BEGV, default to L2R.  */
+           if (pstartbyte == BEGV_BYTE)
+             bidi_it->paragraph_dir = L2R; /* P3 and HL1 */
+           else
+             {
+               EMACS_INT prevpbyte = pstartbyte;
+               EMACS_INT p = BYTE_TO_CHAR (pstartbyte), pbyte = pstartbyte;
+
+               /* Find the beginning of the previous paragraph, if any.  */
+               while (pbyte > BEGV_BYTE && prevpbyte >= pstartbyte)
+                 {
+                   p--;
+                   pbyte = CHAR_TO_BYTE (p);
+                   prevpbyte = bidi_find_paragraph_start (p, pbyte);
+                 }
+               pstartbyte = prevpbyte;
+             }
+         }
+      } while (no_default_p && bidi_it->paragraph_dir == NEUTRAL_DIR);
      }
    else
      abort ();
  
    /* Contrary to UAX#9 clause P3, we only default the paragraph
       direction to L2R if we have no previous usable paragraph
-     direction.  */
+     direction.  This is allowed by the HL1 clause.  */
    if (bidi_it->paragraph_dir != L2R && bidi_it->paragraph_dir != R2L)
-    bidi_it->paragraph_dir = L2R; /* P3 and ``higher protocols'' */
+    bidi_it->paragraph_dir = L2R; /* P3 and HL1 ``higher-level protocols'' */
    if (bidi_it->paragraph_dir == R2L)
      bidi_it->level_stack[0].level = 1;
    else
diff --git a/src/dispextern.h b/src/dispextern.h

index 5138958b6db8a8adb3b08862d15bfc16038570a1..6fd92ba940d54890daa93e0ada04ec12698e58c5 100644 (file)
--- a/src/dispextern.h
+++ b/src/dispextern.h
@@ -2896,7 +2896,7 @@ extern EMACS_INT tool_bar_button_relief;
  
  extern void bidi_init_it (EMACS_INT, EMACS_INT, struct bidi_it *);
  extern void bidi_move_to_visually_next (struct bidi_it *);
-extern void bidi_paragraph_init (bidi_dir_t, struct bidi_it *);
+extern void bidi_paragraph_init (bidi_dir_t, struct bidi_it *, int);
  extern int  bidi_mirror_char (int);
  
  /* Defined in xdisp.c */
diff --git a/src/xdisp.c b/src/xdisp.c

index 2ec271cdb6b8cbb23a0e38c494dcf864e8ac60d3..7b49eed4b2cbb701a9d641ce684fb09669e14d9f 100644 (file)
--- a/src/xdisp.c
+++ b/src/xdisp.c
@@ -3821,7 +3821,8 @@ handle_invisible_prop (struct it *it)
                      not have a chance to do it, if we are going to
                      skip any text at the beginning, which resets the
                      FIRST_ELT flag.  */
-                 bidi_paragraph_init (it->paragraph_embedding, &it->bidi_it);
+                 bidi_paragraph_init (it->paragraph_embedding,
+                                      &it->bidi_it, 0);
                 }
               do
                 {
@@ -5143,7 +5144,7 @@ iterate_out_of_display_property (struct it *it)
       of a new paragraph, next_element_from_buffer may not have a
       chance to do that.  */
    if (it->bidi_it.first_elt && it->bidi_it.charpos < ZV)
-    bidi_paragraph_init (it->paragraph_embedding, &it->bidi_it);
+    bidi_paragraph_init (it->paragraph_embedding, &it->bidi_it, 0);
    /* prev_stop can be zero, so check against BEGV as well.  */
    while (it->bidi_it.charpos >= BEGV
          && it->prev_stop <= it->bidi_it.charpos
@@ -6201,7 +6202,7 @@ set_iterator_to_next (struct it *it, int reseat_p)
               /* If this is a new paragraph, determine its base
                  direction (a.k.a. its base embedding level).  */
               if (it->bidi_it.new_paragraph)
-               bidi_paragraph_init (it->paragraph_embedding, &it->bidi_it);
+               bidi_paragraph_init (it->paragraph_embedding, &it->bidi_it, 0);
               bidi_move_to_visually_next (&it->bidi_it);
               IT_BYTEPOS (*it) = it->bidi_it.bytepos;
               IT_CHARPOS (*it) = it->bidi_it.charpos;
@@ -6673,7 +6674,7 @@ next_element_from_buffer (struct it *it)
         {
           /* If we are at the beginning of a line, we can produce the
              next element right away.  */
-         bidi_paragraph_init (it->paragraph_embedding, &it->bidi_it);
+         bidi_paragraph_init (it->paragraph_embedding, &it->bidi_it, 0);
           bidi_move_to_visually_next (&it->bidi_it);
         }
        else
@@ -6687,7 +6688,7 @@ next_element_from_buffer (struct it *it)
           IT_BYTEPOS (*it) = CHAR_TO_BYTE (IT_CHARPOS (*it));
           it->bidi_it.charpos = IT_CHARPOS (*it);
           it->bidi_it.bytepos = IT_BYTEPOS (*it);
-         bidi_paragraph_init (it->paragraph_embedding, &it->bidi_it);
+         bidi_paragraph_init (it->paragraph_embedding, &it->bidi_it, 0);
           do
             {
               /* Now return to buffer position where we were asked to
@@ -6910,7 +6911,7 @@ next_element_from_composition (struct it *it)
           if (it->bidi_p)
             {
               if (it->bidi_it.new_paragraph)
-               bidi_paragraph_init (it->paragraph_embedding, &it->bidi_it);
+               bidi_paragraph_init (it->paragraph_embedding, &it->bidi_it, 0);
               /* Resync the bidi iterator with IT's new position.
                  FIXME: this doesn't support bidirectional text.  */
               while (it->bidi_it.charpos < IT_CHARPOS (*it))
@@ -17992,8 +17993,9 @@ See also `bidi-paragraph-direction'.  */)
        itb.bytepos = bytepos;
        itb.first_elt = 1;
        itb.separator_limit = -1;
+      itb.paragraph_dir = NEUTRAL_DIR;
  
-      bidi_paragraph_init (NEUTRAL_DIR, &itb);
+      bidi_paragraph_init (NEUTRAL_DIR, &itb, 1);
        if (buf != current_buffer)
         set_buffer_temp (old);
        switch (itb.paragraph_dir)
author	Eli Zaretskii <eliz@gnu.org>
	Sat, 18 Sep 2010 11:59:53 +0000 (13:59 +0200)
committer	Eli Zaretskii <eliz@gnu.org>
	Sat, 18 Sep 2010 11:59:53 +0000 (13:59 +0200)
src/ChangeLog		patch \| blob \| history
src/bidi.c		patch \| blob \| history
src/dispextern.h		patch \| blob \| history
src/xdisp.c		patch \| blob \| history