From: Eli Zaretskii Date: Fri, 1 Jan 2010 11:01:34 +0000 (-0500) Subject: Retrospective commit from 2009-09-27. X-Git-Tag: emacs-pretest-24.0.90~104^2~275^2~438^2~635^2~54 X-Git-Url: http://git.eshelyaron.com/gitweb/?a=commitdiff_plain;h=e342a24d6251c0cef99572722eb816d860352390;p=emacs.git Retrospective commit from 2009-09-27. Support character mirroring. Support iteration that starts in the middle of a line. Misc cleanups. xdisp.c (next_element_from_buffer): If called not at line beginning, start bidi iteration from line beginning. bidi.c (bidi_paragraph_init): Use bidi_overriding_paragraph_direction instead of a literal zero. (bidi_initialize): Fix some character types, per Unicode 5.x. (bidi_get_type): Abort if called with invalid character code. dispextern.h: Add prototype of bidi_mirror_char. xdisp.c (get_next_display_element): Mirror characters whose resolved type is STRONG_R. --- diff --git a/src/ChangeLog.bidi b/src/ChangeLog.bidi index bc7a473af49..97f300fc8b3 100644 --- a/src/ChangeLog.bidi +++ b/src/ChangeLog.bidi @@ -1,3 +1,18 @@ +2009-09-27 Eli Zaretskii + + * xdisp.c (next_element_from_buffer): If called not at line + beginning, start bidi iteration from line beginning. + + * bidi.c (bidi_paragraph_init): Use + bidi_overriding_paragraph_direction instead of a literal zero. + (bidi_initialize): Fix some character types, per Unicode 5.x. + (bidi_get_type): Abort if called with invalid character code. + + * dispextern.h: Add prototype of bidi_mirror_char. + + * xdisp.c (get_next_display_element): Mirror characters whose + resolved type is STRONG_R. + 2009-09-26 Eli Zaretskii * bidi.c (bidi_paragraph_init): Don't set bidi_it->ch_len. Abort diff --git a/src/bidi.c b/src/bidi.c index bcbbb485e1a..3ec0d2c1035 100644 --- a/src/bidi.c +++ b/src/bidi.c @@ -99,14 +99,6 @@ int bidi_ignore_explicit_marks_for_paragraph_level = 1; /* FIXME: Should be user-definable. */ bidi_dir_t bidi_overriding_paragraph_direction = L2R; -/* FIXME: Unused? */ -#define ASCII_BIDI_TYPE_SET(STR, TYPE) \ - do { \ - unsigned char *p; \ - for (p = (STR); *p; p++) \ - CHAR_TABLE_SET (bidi_type_table, *p, (TYPE)); \ - } while (0) - static void bidi_initialize () { @@ -128,11 +120,10 @@ bidi_initialize () { 0x0021, 0x0022, NEUTRAL_ON }, { 0x0023, 0x0025, WEAK_ET }, { 0x0026, 0x002A, NEUTRAL_ON }, - { 0x002B, 0x0000, WEAK_ET }, + { 0x002B, 0x0000, WEAK_ES }, { 0x002C, 0x0000, WEAK_CS }, - { 0x002D, 0x0000, WEAK_ET }, - { 0x002E, 0x0000, WEAK_CS }, - { 0x002F, 0x0000, WEAK_ES }, + { 0x002D, 0x0000, WEAK_ES }, + { 0x002E, 0x002F, WEAK_CS }, { 0x0030, 0x0039, WEAK_EN }, { 0x003A, 0x0000, WEAK_CS }, { 0x003B, 0x0040, NEUTRAL_ON }, @@ -145,7 +136,9 @@ bidi_initialize () { 0x00A1, 0x0000, NEUTRAL_ON }, { 0x00A2, 0x00A5, WEAK_ET }, { 0x00A6, 0x00A9, NEUTRAL_ON }, - { 0x00AB, 0x00AF, NEUTRAL_ON }, + { 0x00AB, 0x00AC, NEUTRAL_ON }, + { 0x00AD, 0x0000, WEAK_BN }, + { 0x00AE, 0x00Af, NEUTRAL_ON }, { 0x00B0, 0x00B1, WEAK_ET }, { 0x00B2, 0x00B3, WEAK_EN }, { 0x00B4, 0x0000, NEUTRAL_ON }, @@ -171,7 +164,9 @@ bidi_initialize () { 0x05C0, 0x0000, STRONG_R }, { 0x05C1, 0x05C2, WEAK_NSM }, { 0x05C3, 0x0000, STRONG_R }, - { 0x05C4, 0x0000, WEAK_NSM }, + { 0x05C4, 0x05C5, WEAK_NSM }, + { 0x05C6, 0x0000, STRONG_R }, + { 0x05C7, 0x0000, WEAK_NSM }, { 0x05D0, 0x05F4, STRONG_R }, { 0x060C, 0x0000, WEAK_CS }, { 0x061B, 0x064A, STRONG_AL }, @@ -400,18 +395,14 @@ bidi_initialize () bidi_initialized = 1; } -static int -bidi_is_arabic_number (int ch) -{ - return 0; /* FIXME! */ -} - /* Return the bidi type of a character CH. */ bidi_type_t bidi_get_type (int ch) { if (ch == BIDI_EOB) return NEUTRAL_B; + if (ch < 0 || ch > MAX_CHAR) + abort (); return (bidi_type_t) XINT (CHAR_TABLE_REF (bidi_type_table, ch)); } @@ -457,6 +448,10 @@ bidi_get_category (bidi_type_t type) } } +/* Return the mirrored character of C, if any. + + Note: The conditions in UAX#9 clause L4 must be tested by the + caller. */ /* FIXME: exceedingly temporary! Should consult the Unicode database of character properties. */ int @@ -722,7 +717,7 @@ bidi_set_sor_type (struct bidi_it *bidi_it, int level_before, int level_after) that we find on the two sides of the level boundary (see UAX#9, clause X10), and so we don't need to know the final embedding level to which we descend after processing all the PDFs. */ - if (level_before < level_after || !bidi_it->prev_was_pdf) + if (!bidi_it->prev_was_pdf || level_before < level_after) /* FIXME: should the default sor direction be user selectable? */ bidi_it->sor = (higher_level & 1) != 0 ? R2L : L2R; if (level_before > level_after) @@ -742,8 +737,7 @@ bidi_set_sor_type (struct bidi_it *bidi_it, int level_before, int level_after) void bidi_paragraph_init (bidi_dir_t dir, struct bidi_it *bidi_it) { - int pos = bidi_it->charpos, bytepos = bidi_it->bytepos; - int ch, ch_len; + int bytepos = bidi_it->bytepos; /* We should never be called at EOB or before BEGV. */ if (bytepos >= ZV_BYTE || bytepos < BEGV_BYTE) @@ -756,20 +750,16 @@ bidi_paragraph_init (bidi_dir_t dir, struct bidi_it *bidi_it) || FETCH_CHAR (bytepos - 1) == '\n')) abort (); - ch = FETCH_CHAR (bytepos); - ch_len = CHAR_BYTES (ch); bidi_it->level_stack[0].level = 0; /* default for L2R */ if (dir == R2L) bidi_it->level_stack[0].level = 1; else if (dir == NEUTRAL_DIR) /* P2 */ { - bidi_type_t type; - - /* FIXME: should actually go to where the paragraph begins and - start the loop below from there, since UAX#9 says to find the - first strong directional character in the paragraph. */ + int ch = FETCH_CHAR (bytepos), ch_len = CHAR_BYTES (ch); + int pos = bidi_it->charpos; + bidi_type_t type = bidi_get_type (ch); - for (type = bidi_get_type (ch), pos++, bytepos += ch_len; + for (pos++, bytepos += ch_len; /* NOTE: UAX#9 says to search only for L, AL, or R types of characters, and ignore RLE, RLO, LRE, and LRO. However, I'm not sure it makes sense to omit those 4; should try @@ -795,7 +785,8 @@ bidi_paragraph_init (bidi_dir_t dir, struct bidi_it *bidi_it) bidi_it->new_paragraph = 0; bidi_it->next_en_pos = -1; bidi_it->next_for_ws.type = UNKNOWN_BT; - bidi_set_sor_type (bidi_it, bidi_it->level_stack[0].level, 0); /* X10 */ + bidi_set_sor_type (bidi_it, bidi_overriding_paragraph_direction, + bidi_it->level_stack[0].level); /* X10 */ bidi_cache_reset (); } diff --git a/src/dispextern.h b/src/dispextern.h index 389d0acc23c..0e765aa667c 100644 --- a/src/dispextern.h +++ b/src/dispextern.h @@ -2801,6 +2801,7 @@ extern EMACS_INT tool_bar_button_relief; extern void bidi_init_it P_ ((int, int, struct bidi_it *)); extern void bidi_get_next_char_visually P_ ((struct bidi_it *)); extern void bidi_paragraph_init P_ ((bidi_dir_t, struct bidi_it *)); +extern int bidi_mirror_char P_ ((int)); /* Defined in xdisp.c */ diff --git a/src/xdisp.c b/src/xdisp.c index ae4a0305034..e77a197006d 100644 --- a/src/xdisp.c +++ b/src/xdisp.c @@ -5682,6 +5682,13 @@ get_next_display_element (it) if (it->what == IT_CHARACTER) { + /* UAX#9, L4: "A character is depicted by a mirrored glyph if + and only if (a) the resolved directionality of that character + is R..." */ + /* FIXME: Do we need an exception for characters from display + tables? */ + if (it->bidi_p && it->bidi_it.type == STRONG_R) + it->c = bidi_mirror_char (it->c); /* Map via display table or translate control characters. IT->c, IT->len etc. have been set to the next character by the function call above. If we have a display table, and it @@ -6505,17 +6512,46 @@ next_element_from_buffer (it) the character at IT_CHARPOS. */ if (it->bidi_p && it->bidi_it.first_elt) { - /* FIXME: L2R below is just for easyness of testing, as we - currently support only left-to-right paragraphs. The value - should be user-definable and/or come from some ``higher - protocol''. In the absence of any other guidance, the default - for this initialization should be NEUTRAL_DIR. */ it->bidi_it.charpos = IT_CHARPOS (*it); it->bidi_it.bytepos = IT_BYTEPOS (*it); - bidi_paragraph_init (L2R, &it->bidi_it); - bidi_get_next_char_visually (&it->bidi_it); - it->bidi_it.first_elt = 0; - /* Adjust IT's position information to where we moved. */ + /* If we are at the beginning of a line, we can produce the next + element right away. */ + if (it->bidi_it.bytepos == BEGV_BYTE + /* FIXME: Should support all Unicode line separators. */ + || FETCH_CHAR (it->bidi_it.bytepos - 1) == '\n' + || FETCH_CHAR (it->bidi_it.bytepos) == '\n') + { + /* FIXME: L2R below is just for easyness of testing, as we + currently support only left-to-right paragraphs. The + value should be user-definable and/or come from some + ``higher protocol''. In the absence of any other + guidance, the default for this initialization should be + NEUTRAL_DIR. */ + bidi_paragraph_init (L2R, &it->bidi_it); + bidi_get_next_char_visually (&it->bidi_it); + } + else + { + int orig_bytepos = IT_BYTEPOS (*it); + + /* We need to prime the bidi iterator starting at the line's + beginning, before we will be able to produce the next + element. */ + IT_CHARPOS (*it) = find_next_newline_no_quit (IT_CHARPOS (*it), -1); + IT_BYTEPOS (*it) = CHAR_TO_BYTE (IT_CHARPOS (*it)); + it->bidi_it.charpos = IT_CHARPOS (*it); + it->bidi_it.bytepos = IT_BYTEPOS (*it); + bidi_paragraph_init (L2R, &it->bidi_it); + do { + /* Now return to buffer position where we were asked to + get the next display element, and produce that. */ + bidi_get_next_char_visually (&it->bidi_it); + } while (it->bidi_it.bytepos != orig_bytepos + && it->bidi_it.bytepos < ZV_BYTE); + } + + it->bidi_it.first_elt = 0; /* paranoia: bidi.c does this */ + /* Adjust IT's position information to where we ended up. */ IT_CHARPOS (*it) = it->bidi_it.charpos; IT_BYTEPOS (*it) = it->bidi_it.bytepos; SET_TEXT_POS (it->position, IT_CHARPOS (*it), IT_BYTEPOS (*it));