Retrospective commit from 2009-10-03.

author Eli Zaretskii <eliz@gnu.org>

Fri, 1 Jan 2010 11:06:48 +0000 (06:06 -0500)

committer Eli Zaretskii <eliz@gnu.org>

Fri, 1 Jan 2010 11:06:48 +0000 (06:06 -0500)
author Eli Zaretskii <eliz@gnu.org>
Fri, 1 Jan 2010 11:06:48 +0000 (06:06 -0500)
committer Eli Zaretskii <eliz@gnu.org>
Fri, 1 Jan 2010 11:06:48 +0000 (06:06 -0500)
diff --git a/src/ChangeLog.bidi b/src/ChangeLog.bidi

index 97f300fc8b36957ee2de348388be2777cb784b6e..a2bcb3bf026e88840bd3c5b2487ee5b80aee0879 100644 (file)
--- a/src/ChangeLog.bidi
+++ b/src/ChangeLog.bidi
@@ -1,3 +1,35 @@
+2009-10-03  Eli Zaretskii  <eliz@gnu.org>
+
+       * bidi.c (bidi_set_paragraph_end): Don't set the new_paragraph
+       flag in the iterator.
+       (bidi_init_it): Set the new_paragraph flag.
+       (bidi_at_paragraph_end): Arguments are now character and byte
+       position of the next character.  All callers changed.
+       (bidi_resolve_explicit): Don't call bidi_at_paragraph_end, and
+       don't behave as if at paragraph end if it returns true.
+       (bidi_get_next_char_visually): Don't call bidi_paragraph_init if
+       new_paragraph flags is set.  Set new_paragraph flag when at end of
+       a paragraph.
+       <fallback_paragraph_start_re, fallback_paragraph_separate_re>: New
+       variables.
+       <Qparagraph_start, Qparagraph_separate>: New variables.
+       (bidi_initialize): Initialize and staticpro them.
+
+       * dispextern.h <struct bidi_it>: New element paragraph_dir.  Make
+       positional elements EMACS_INT.
+
+       * bidi.c <bidi_overriding_paragraph_direction>: Delete.
+
+2009-09-28  Eli Zaretskii  <eliz@gnu.org>
+
+       * bidi.c (bidi_init_it): Initialize charpos, bytepos, and
+       first_elt before calling bidi_set_paragraph_end.
+       (bidi_resolve_explicit): Don't call bidi_set_paragraph_end at
+       EOB.
+       (bidi_at_paragraph_end): Don't set new_paragraph flag at EOB.
+       (bidi_get_type): Accept an additional argument OVERRIDE, per UAX#9
+       "Explicit Overrides".  All callers changed.
+
  2009-09-27  Eli Zaretskii  <eliz@gnu.org>
  
         * xdisp.c (next_element_from_buffer): If called not at line
@@ -129,8 +161,7 @@
  
  2004-03-04  Kenichi Handa  <handa@m17n.org>
  
-       The following changes are to support for bi-directional text
-       display.
+       The following changes are to support bidirectional text display.
  
         * Makefile.in (obj): Include bidi.o.
         (bidi.o): New target.
diff --git a/src/bidi.c b/src/bidi.c

index 3ec0d2c10353ea84aaf7c828f08b04ebb764073c..2798b20aaebca9f8aa85b3527738ed09313e5050 100644 (file)
--- a/src/bidi.c
+++ b/src/bidi.c
@@ -96,8 +96,8 @@ typedef enum {
  
  int bidi_ignore_explicit_marks_for_paragraph_level = 1;
  
-/* FIXME: Should be user-definable.  */
-bidi_dir_t bidi_overriding_paragraph_direction = L2R;
+static Lisp_Object fallback_paragraph_start_re, fallback_paragraph_separate_re;
+static Lisp_Object Qparagraph_start, Qparagraph_separate;
  
  static void
  bidi_initialize ()
@@ -392,18 +392,67 @@ bidi_initialize ()
      char_table_set_range (bidi_type_table, bidi_type[i].from,
                           bidi_type[i].to ? bidi_type[i].to : bidi_type[i].from,
                           make_number (bidi_type[i].type));
+
+  fallback_paragraph_start_re =
+    XSYMBOL (Fintern_soft (build_string ("paragraph-start"), Qnil))->value;
+  if (!STRINGP (fallback_paragraph_start_re))
+    fallback_paragraph_start_re = build_string ("\f\\|[ \t]*$");
+  staticpro (&fallback_paragraph_start_re);
+  Qparagraph_start = intern ("paragraph-start");
+  staticpro (&Qparagraph_start);
+  fallback_paragraph_separate_re =
+    XSYMBOL (Fintern_soft (build_string ("paragraph-separate"), Qnil))->value;
+  if (!STRINGP (fallback_paragraph_separate_re))
+    fallback_paragraph_separate_re = build_string ("[ \t\f]*$");
+  staticpro (&fallback_paragraph_separate_re);
+  Qparagraph_separate = intern ("paragraph-separate");
+  staticpro (&Qparagraph_separate);
    bidi_initialized = 1;
  }
  
-/* Return the bidi type of a character CH.  */
+/* Return the bidi type of a character CH, subject to the current
+   directional OVERRIDE.  */
  bidi_type_t
-bidi_get_type (int ch)
+bidi_get_type (int ch, bidi_dir_t override)
  {
+  bidi_type_t default_type;
+
    if (ch == BIDI_EOB)
      return NEUTRAL_B;
    if (ch < 0 || ch > MAX_CHAR)
      abort ();
-  return (bidi_type_t) XINT (CHAR_TABLE_REF (bidi_type_table, ch));
+
+  default_type = (bidi_type_t) XINT (CHAR_TABLE_REF (bidi_type_table, ch));
+
+  if (override == NEUTRAL_DIR)
+    return default_type;
+
+  switch (default_type)
+    {
+      /* Although UAX#9 does not tell, it doesn't make sense to
+        override NEUTRAL_B and LRM/RLM characters.  */
+      case NEUTRAL_B:
+      case LRE:
+      case LRO:
+      case RLE:
+      case RLO:
+      case PDF:
+       return default_type;
+      default:
+       switch (ch)
+         {
+           case LRM_CHAR:
+           case RLM_CHAR:
+             return default_type;
+           default:
+             if (override == L2R) /* X6 */
+               return STRONG_L;
+             else if (override == R2L)
+               return STRONG_R;
+             else
+               abort ();       /* can't happen: handled above */
+         }
+    }
  }
  
  void
@@ -684,21 +733,17 @@ bidi_peek_at_next_level (struct bidi_it *bidi_it)
    return bidi_cache[bidi_cache_last_idx + bidi_it->scan_dir].resolved_level;
  }
  
-/* Return non-zero if buffer's byte position POS is the last character
-   of a paragraph.  THIS_CH is the character preceding the one at POS in
-   the buffer.  */
+/* Return non-zero if buffer's byte position POS is the end of a
+   paragraph.  */
  int
-bidi_at_paragraph_end (int this_ch, int pos)
+bidi_at_paragraph_end (EMACS_INT charpos, EMACS_INT bytepos)
  {
-  int next_ch;
+  Lisp_Object re = XSYMBOL (Qparagraph_separate)->value;
  
-  if (pos >= ZV_BYTE)
-    return 1;
+  if (!STRINGP (re))
+    re = fallback_paragraph_separate_re;
  
-  next_ch = FETCH_CHAR (pos);
-  /* FIXME: This should support all Unicode characters that can end a
-     paragraph.  */
-  return (this_ch == '\n' && next_ch == '\n');
+  return fast_looking_at (re, charpos, bytepos, ZV, ZV_BYTE, Qnil) > 0;
  }
  
  /* Determine the start-of-run (sor) directional type given the two
@@ -734,30 +779,58 @@ bidi_set_sor_type (struct bidi_it *bidi_it, int level_before, int level_after)
    bidi_it->ignore_bn_limit = 0; /* meaning it's unknown */
  }
  
+/* Find the beginning of this paragraph by looking back in the
+   buffer.  */
+static void
+bidi_find_paragraph_start (struct bidi_it *bidi_it)
+{
+  Lisp_Object re = XSYMBOL (Qparagraph_start)->value;
+  EMACS_INT pos = bidi_it->charpos;
+  EMACS_INT pos_byte = bidi_it->bytepos;
+  EMACS_INT limit = ZV, limit_byte = ZV_BYTE;
+
+  if (!STRINGP (re))
+    re = fallback_paragraph_start_re;
+  while (pos_byte > BEGV_BYTE
+        && fast_looking_at (re, pos, pos_byte, limit, limit_byte, Qnil) < 0)
+    {
+      find_next_newline_no_quit (pos, -1);
+    }
+}
+
  void
  bidi_paragraph_init (bidi_dir_t dir, struct bidi_it *bidi_it)
  {
-  int bytepos = bidi_it->bytepos;
+  EMACS_INT bytepos = bidi_it->bytepos;
  
    /* We should never be called at EOB or before BEGV.  */
    if (bytepos >= ZV_BYTE || bytepos < BEGV_BYTE)
      abort ();
  
-  /* We should always be called at the beginning of a new
-     paragraph.  */
-  if (!(bytepos == BEGV_BYTE
-       || FETCH_CHAR (bytepos) == '\n'
-       || FETCH_CHAR (bytepos - 1) == '\n'))
-    abort ();
-
    bidi_it->level_stack[0].level = 0; /* default for L2R */
+  bidi_it->paragraph_dir = L2R;
    if (dir == R2L)
      bidi_it->level_stack[0].level = 1;
    else if (dir == NEUTRAL_DIR) /* P2 */
      {
-      int ch = FETCH_CHAR (bytepos), ch_len = CHAR_BYTES (ch);
-      int pos = bidi_it->charpos;
-      bidi_type_t type = bidi_get_type (ch);
+      int ch, ch_len;
+      EMACS_INT pos;
+      bidi_type_t type;
+
+      /* Search back to where this paragraph starts.  */
+      bidi_find_paragraph_start (bidi_it);
+
+      /* We should always be at the beginning of a new line at this
+        point.  */
+      if (!(bytepos == BEGV_BYTE
+           || FETCH_CHAR (bytepos) == '\n'
+           || FETCH_CHAR (bytepos - 1) == '\n'))
+       abort ();
+
+      ch = FETCH_CHAR (bytepos);
+      ch_len = CHAR_BYTES (ch);
+      pos = bidi_it->charpos;
+      type = bidi_get_type (ch, NEUTRAL_DIR);
  
        for (pos++, bytepos += ch_len;
            /* NOTE: UAX#9 says to search only for L, AL, or R types of
@@ -768,15 +841,17 @@ bidi_paragraph_init (bidi_dir_t dir, struct bidi_it *bidi_it)
              || (bidi_ignore_explicit_marks_for_paragraph_level
                  && (type == RLE || type == RLO
                      || type == LRE || type == LRO));
-          type = bidi_get_type (ch))
+          type = bidi_get_type (ch, NEUTRAL_DIR))
         {
-         if (type == NEUTRAL_B || bidi_at_paragraph_end (ch, bytepos))
+         if (type == NEUTRAL_B || bidi_at_paragraph_end (pos, bytepos))
             break;
           FETCH_CHAR_ADVANCE (ch, pos, bytepos);
         }
        if (type == STRONG_R || type == STRONG_AL) /* P3 */
         bidi_it->level_stack[0].level = 1;
      }
+  if (bidi_it->level_stack[0].level == 1)
+    bidi_it->paragraph_dir = R2L;
    bidi_it->scan_dir = 1; /* FIXME: do we need to have control on this? */
    bidi_it->resolved_level = bidi_it->level_stack[0].level;
    bidi_it->level_stack[0].override = NEUTRAL_DIR; /* X1 */
@@ -785,14 +860,14 @@ bidi_paragraph_init (bidi_dir_t dir, struct bidi_it *bidi_it)
    bidi_it->new_paragraph = 0;
    bidi_it->next_en_pos = -1;
    bidi_it->next_for_ws.type = UNKNOWN_BT;
-  bidi_set_sor_type (bidi_it, bidi_overriding_paragraph_direction,
+  bidi_set_sor_type (bidi_it, bidi_it->paragraph_dir,
                      bidi_it->level_stack[0].level); /* X10 */
  
    bidi_cache_reset ();
  }
  
-/* Do whatever UAX#9 clause X8 says should be done at paragraph's end,
-   and set the new paragraph flag in the iterator.  */
+/* Do whatever UAX#9 clause X8 says should be done at paragraph's
+   end.  */
  static inline void
  bidi_set_paragraph_end (struct bidi_it *bidi_it)
  {
@@ -800,19 +875,19 @@ bidi_set_paragraph_end (struct bidi_it *bidi_it)
    bidi_it->invalid_rl_levels = -1;
    bidi_it->stack_idx = 0;
    bidi_it->resolved_level = bidi_it->level_stack[0].level;
-  bidi_it->new_paragraph = 1;
  }
  
  /* Initialize the bidi iterator from buffer position CHARPOS.  */
  void
-bidi_init_it (int charpos, int bytepos, struct bidi_it *bidi_it)
+bidi_init_it (EMACS_INT charpos, EMACS_INT bytepos, struct bidi_it *bidi_it)
  {
    if (! bidi_initialized)
      bidi_initialize ();
-  bidi_set_paragraph_end (bidi_it);
-  bidi_it->first_elt = 1;
    bidi_it->charpos = charpos;
    bidi_it->bytepos = bytepos;
+  bidi_it->first_elt = 1;
+  bidi_set_paragraph_end (bidi_it);
+  bidi_it->new_paragraph = 1;
    bidi_it->type = NEUTRAL_B;
    bidi_it->type_after_w1 = UNKNOWN_BT;
    bidi_it->orig_type = UNKNOWN_BT;
@@ -945,7 +1020,11 @@ bidi_resolve_explicit_1 (struct bidi_it *bidi_it)
      }
    bidi_it->ch = curchar;
  
-  type = bidi_get_type (curchar);
+  /* Don't apply directional override here, as all the types we handle
+     below will not be affected by the override anyway, and we need
+     the original type unaltered.  The override will be applied in
+     bidi_resolve_weak.  */
+  type = bidi_get_type (curchar, NEUTRAL_DIR);
    bidi_it->orig_type = type;
    bidi_check_type (bidi_it->orig_type);
  
@@ -1122,17 +1201,15 @@ bidi_resolve_explicit (struct bidi_it *bidi_it)
         }
      }
  
-  /* For when the paragraph end is defined by anything other than a
-     special Unicode character (a.k.a. ``higher protocols'').  */
-  if (bidi_it->type != NEUTRAL_B)
-    if (bidi_at_paragraph_end (bidi_it->ch,
-                              bidi_it->bytepos + bidi_it->ch_len))
-      bidi_it->type = NEUTRAL_B;
-
    if (bidi_it->type == NEUTRAL_B)      /* X8 */
      {
-      bidi_set_paragraph_end (bidi_it);
-      bidi_it->type_after_w1 = bidi_it->type; /* needed below and in L1 */
+      /* End of buffer does _not_ indicate a new paragraph is coming.
+        Otherwise, each character inserted at EOB will be processed
+        as starting a new paragraph.  */
+      if (bidi_it->bytepos < ZV_BYTE)
+       bidi_set_paragraph_end (bidi_it);
+      /* This is needed by bidi_resolve_weak below, and in L1.  */
+      bidi_it->type_after_w1 = bidi_it->type;
        bidi_check_type (bidi_it->type_after_w1);
      }
  
@@ -1219,7 +1296,7 @@ bidi_resolve_weak (struct bidi_it *bidi_it)
        next_char =
         bidi_it->bytepos + bidi_it->ch_len >= ZV_BYTE
         ? BIDI_EOB : FETCH_CHAR (bidi_it->bytepos + bidi_it->ch_len);
-      type_of_next = bidi_get_type (next_char);
+      type_of_next = bidi_get_type (next_char, override);
  
        if (type_of_next == WEAK_BN
           || bidi_explicit_dir_char (next_char))
@@ -1267,12 +1344,12 @@ bidi_resolve_weak (struct bidi_it *bidi_it)
        /* W5: ET with EN after it.  */
        else
         {
-         int en_pos = bidi_it->charpos + 1;
+         EMACS_INT en_pos = bidi_it->charpos + 1;
  
           next_char =
             bidi_it->bytepos + bidi_it->ch_len >= ZV_BYTE
             ? BIDI_EOB : FETCH_CHAR (bidi_it->bytepos + bidi_it->ch_len);
-         type_of_next = bidi_get_type (next_char);
+         type_of_next = bidi_get_type (next_char, override);
  
           if (type_of_next == WEAK_ET
               || type_of_next == WEAK_BN
@@ -1588,8 +1665,8 @@ bidi_level_of_next_char (struct bidi_it *bidi_it)
      {
        int ch;
        int clen = bidi_it->ch_len;
-      int bpos = bidi_it->bytepos;
-      int cpos = bidi_it->charpos;
+      EMACS_INT bpos = bidi_it->bytepos;
+      EMACS_INT cpos = bidi_it->charpos;
        bidi_type_t chtype;
  
        do {
@@ -1601,7 +1678,7 @@ bidi_level_of_next_char (struct bidi_it *bidi_it)
         if (ch == '\n' || ch == BIDI_EOB /* || ch == LINESEP_CHAR */)
           chtype = NEUTRAL_B;
         else
-         chtype = bidi_get_type (ch);
+         chtype = bidi_get_type (ch, NEUTRAL_DIR);
        } while (chtype == NEUTRAL_WS || chtype == WEAK_BN
                || bidi_explicit_dir_char (ch)); /* L1/Retaining */
        bidi_it->next_for_ws.type = chtype;
@@ -1725,8 +1802,6 @@ bidi_get_next_char_visually (struct bidi_it *bidi_it)
        bidi_it->scan_dir = 1;   /* default to logical order */
      }
  
-  if (bidi_it->new_paragraph)
-    bidi_paragraph_init (bidi_overriding_paragraph_direction, bidi_it);
    /* Prepare the sentinel iterator state.  */
    if (bidi_cache_idx == 0)
      {
@@ -1799,6 +1874,16 @@ bidi_get_next_char_visually (struct bidi_it *bidi_it)
        next_level = bidi_level_of_next_char (bidi_it);
      }
  
+  /* Take note when we are at the end of the paragraph.  The next time
+     we are about to be called, next_element_from_buffer will
+     automatically reinit the paragraph direction, if needed.  */
+  if (bidi_it->scan_dir == 1
+      && bidi_it->type == NEUTRAL_B
+      && bidi_it->bytepos < ZV_BYTE
+      && bidi_at_paragraph_end (bidi_it->charpos + 1,
+                               bidi_it->bytepos + bidi_it->ch_len))
+    bidi_it->new_paragraph = 1;
+
    if (bidi_it->scan_dir == 1 && bidi_cache_idx)
      {
        /* If we are at paragraph's base embedding level and beyond the
diff --git a/src/dispextern.h b/src/dispextern.h

index 0e765aa667c7ab622a3cb5b7a21d45874d6a79e7..b5b6dc7f61834bfb84d66be9f76593e7b4b9bfaa 100644 (file)
--- a/src/dispextern.h
+++ b/src/dispextern.h
@@ -1761,8 +1761,8 @@ struct bidi_stack {
  /* Data type for iterating over bidi text.  */
  struct bidi_it {
    int first_elt;               /* if non-zero, examine current char first */
-  int bytepos;                 /* iterator's position in buffer */
-  int charpos;
+  EMACS_INT bytepos;           /* iterator's position in buffer */
+  EMACS_INT charpos;
    int ch;                      /* character itself */
    int ch_len;                  /* length of its multibyte sequence */
    bidi_type_t type;            /* bidi type of this character, after
@@ -1773,14 +1773,15 @@ struct bidi_it {
    int invalid_levels;          /* how many PDFs to ignore */
    int invalid_rl_levels;       /* how many PDFs from RLE/RLO to ignore */
    int new_paragraph;           /* if non-zero, a new paragraph begins here */
+  bidi_dir_t paragraph_dir;    /* current paragraph direction */
    int prev_was_pdf;            /* if non-zero, previous char was PDF */
    struct bidi_saved_info prev; /* info about previous character */
    struct bidi_saved_info last_strong; /* last-seen strong directional char */
    struct bidi_saved_info next_for_neutral; /* surrounding characters for... */
    struct bidi_saved_info prev_for_neutral; /* ...resolving neutrals */
    struct bidi_saved_info next_for_ws; /* character after sequence of ws */
-  int next_en_pos;             /* position of next EN char for ET */
-  int ignore_bn_limit;         /* position until which to ignore BNs */
+  EMACS_INT next_en_pos;       /* position of next EN char for ET */
+  EMACS_INT ignore_bn_limit;   /* position until which to ignore BNs */
    bidi_dir_t sor;              /* direction of start-of-run in effect */
    int scan_dir;                        /* direction of text scan */
    int stack_idx;               /* index of current data on the stack */
@@ -2798,7 +2799,7 @@ extern EMACS_INT tool_bar_button_relief;
  
  /* Defined in bidi.c */
  
-extern void bidi_init_it P_ ((int, int, struct bidi_it *));
+extern void bidi_init_it P_ ((EMACS_INT, EMACS_INT, struct bidi_it *));
  extern void bidi_get_next_char_visually P_ ((struct bidi_it *));
  extern void bidi_paragraph_init P_ ((bidi_dir_t, struct bidi_it *));
  extern int  bidi_mirror_char P_ ((int));
diff --git a/src/window.h b/src/window.h

index 8b7d945fd1341d3867dcf896bae9b2d0d459ae97..80e7bfb73b278ad66e5662d7d31ddf8b3b1a9d87 100644 (file)
--- a/src/window.h
+++ b/src/window.h
@@ -117,7 +117,10 @@ struct window
      /* The buffer displayed in this window */
      /* Of the fields vchild, hchild and buffer, only one is non-nil.  */
      Lisp_Object buffer;
-    /* A marker pointing to where in the text to start displaying */
+    /* A marker pointing to where in the text to start displaying.
+       BIDI Note: This is the _logical-order_ start, i.e. the smallest
+       buffer position visible in the window, not necessarily the
+       character displayed in the top left corner of the window.  */
      Lisp_Object start;
      /* A marker pointing to where in the text point is in this window,
         used only when the window is not selected.
author	Eli Zaretskii <eliz@gnu.org>
	Fri, 1 Jan 2010 11:06:48 +0000 (06:06 -0500)
committer	Eli Zaretskii <eliz@gnu.org>
	Fri, 1 Jan 2010 11:06:48 +0000 (06:06 -0500)
src/ChangeLog.bidi		patch \| blob \| history
src/bidi.c		patch \| blob \| history
src/dispextern.h		patch \| blob \| history
src/window.h		patch \| blob \| history