src/bidi.c (bidi_find_first_overridden): New function.
src/xdisp.c (Fbidi_find_overridden_directionality): New function.
(syms_of_xdisp): Defsubr it.
src/dispextern.h (bidi_find_first_overridden): Add prototype.
doc/lispref/display.texi (Bidirectional Display): Document
'bidi-find-overridden-directionality'.
etc/NEWS: Mention 'bidi-find-overridden-directionality'.
+2014-12-02 Eli Zaretskii <eliz@gnu.org>
+
+ * display.texi (Bidirectional Display): Document
+ 'bidi-find-overridden-directionality'.
+
2014-11-29 Paul Eggert <eggert@cs.ucla.edu>
Lessen focus on ChangeLog files, as opposed to change log entries.
appropriate mirrored character in the reordered text. Lisp programs
can affect the mirrored display by changing this property. Again, any
such changes affect all of Emacs display.
+
+@cindex overriding bidirectional properties
+@cindex directional overrides
+@cindex LRO
+@cindex RLO
+ The bidirectional properties of characters can be overridden by
+inserting into the text special directional control characters,
+LEFT-TO-RIGHT OVERRIDE (@acronym{LRO}) and RIGHT-TO-LEFT OVERRIDE
+(@acronym{RLO}). Any characters between a @acronym{RLO} and the
+following newline or POP DIRECTIONAL FORMATTING (@acronym{PDF})
+control character, whichever comes first, will be displayed as if they
+were strong right-to-left characters, i.e.@: they will be reversed on
+display. Similarly, any characters between @acronym{LRO} and
+@acronym{PDF} or newline will display as if they were strong
+left-to-right, and will @emph{not} be reversed even if they are strong
+right-to-left characters.
+
+@cindex phishing using directional overrides
+@cindex malicious use of directional overrides
+ These overrides are useful when you want to make some text
+unaffected by the reordering algorithm, and instead directly control
+the display order. But they can also be used for malicious purposes,
+known as @dfn{phishing}. Specifically, a URL on a Web page or a link
+in an email message can be manipulated to make its visual appearance
+unrecognizable, or similar to some popular benign location, while the
+real location, interpreted by a browser in the logical order, is very
+different.
+
+ Emacs provides a primitive that applications can use to detect
+instances of text whose bidirectional properties were overridden so as
+to make a left-to-right character display as if it were a
+right-to-left character, or vise versa.
+
+@defun bidi-find-overridden-directionality from to &optional object
+This function looks at the text of the specified @var{object} between
+positions @var{from} (inclusive) and @var{to} (exclusive), and returns
+the first position where it finds a strong left-to-right character
+whose directional properties were forced to display the character as
+right-to-left, or for a strong right-to-left character that was forced
+to display as left-to-right. If it finds no such characters in the
+specified region of text, it returns @code{nil}.
+
+The optional argument @var{object} specifies which text to search, and
+defaults to the current buffer. If @var{object} is non-@code{nil}, it
+can be some other buffer, or it can be a string or a window. If it is
+a string, the function searches that string. If it is a window, the
+function searches the buffer displayed in that window. If a buffer
+whose text you want to examine is displayed in some window, we
+recommend to specify it by that window, rather than pass the buffer to
+the function. This is because telling the function about the window
+allows it to correctly account for window-specific overlays, which
+might change the result of the function if some text in the buffer is
+covered by overlays.
+@end defun
+2014-12-02 Eli Zaretskii <eliz@gnu.org>
+
+ * NEWS: Mention 'bidi-find-overridden-directionality'.
+
2014-11-29 Paul Eggert <eggert@cs.ucla.edu>
Lessen focus on ChangeLog files, as opposed to change log entries.
systems and for MS-Windows, for other systems they fall back to their
counterparts `string-lessp' and `string-equal'.
++++
+** The new function `bidi-find-overridden-directionality' allows to
+find characters whose directionality was, perhaps maliciously,
+overridden by directional override control characters. Lisp programs
+can use this to detect potential phishing of URLs and other links that
+exploits bidirectional display reordering.
+
*** The ls-lisp package uses `string-collate-lessp' to sort file names.
If you want the old, locale-independent sorting, customize the new
option `ls-lisp-use-string-collate' to a nil value.
+2014-12-02 Eli Zaretskii <eliz@gnu.org>
+
+ * bidi.c (bidi_find_first_overridden): New function.
+
+ * xdisp.c (Fbidi_find_overridden_directionality): New function.
+ (syms_of_xdisp): Defsubr it.
+
+ * dispextern.h (bidi_find_first_overridden): Add prototype.
+
2014-12-02 Jan Djärv <jan.h.d@swipnet.se>
* nsimage.m (initFromSkipXBM:width:height:flip:length:): Set bmRep
UNGCPRO;
}
+/* Utility function for looking for strong directional characters
+ whose bidi type was overridden by a directional override. */
+ptrdiff_t
+bidi_find_first_overridden (struct bidi_it *bidi_it)
+{
+ ptrdiff_t found_pos = ZV;
+
+ do
+ {
+ /* Need to call bidi_resolve_weak, not bidi_resolve_explicit,
+ because the directional overrides are applied by the
+ former. */
+ bidi_type_t type = bidi_resolve_weak (bidi_it);
+
+ if ((type == STRONG_R && bidi_it->orig_type == STRONG_L)
+ || (type == STRONG_L
+ && (bidi_it->orig_type == STRONG_R
+ || bidi_it->orig_type == STRONG_AL)))
+ found_pos = bidi_it->charpos;
+ } while (found_pos == ZV
+ && bidi_it->charpos < ZV
+ && bidi_it->ch != BIDI_EOB
+ && bidi_it->ch != '\n');
+
+ return found_pos;
+}
+
/* This is meant to be called from within the debugger, whenever you
wish to examine the cache contents. */
void bidi_dump_cached_states (void) EXTERNALLY_VISIBLE;
extern void bidi_pop_it (struct bidi_it *);
extern void *bidi_shelve_cache (void);
extern void bidi_unshelve_cache (void *, bool);
+extern ptrdiff_t bidi_find_first_overridden (struct bidi_it *);
/* Defined in xdisp.c */
}
}
+DEFUN ("bidi-find-overridden-directionality",
+ Fbidi_find_overridden_directionality,
+ Sbidi_find_overridden_directionality, 2, 3, 0,
+ doc: /* Return position between FROM and TO where directionality was overridden.
+
+This function returns the first character position in the specified
+region of OBJECT where there is a character whose `bidi-class' property
+is `L', but which was forced to display as `R' by a directional
+override, and likewise with characters whose `bidi-class' is `R'
+or `AL' that were forced to display as `L'.
+
+If no such character is found, the function returns nil.
+
+OBJECT is a Lisp string or buffer to search for overridden
+directionality, and defaults to the current buffer if nil or omitted.
+OBJECT can also be a window, in which case the function will search
+the buffer displayed in that window. Passing the window instead of
+a buffer is preferable when the buffer is displayed in some window,
+because this function will then be able to correctly account for
+window-specific overlays, which can affect the results.
+
+Strong directional characters `L', `R', and `AL' can have their
+intrinsic directionality overridden by directional override
+control characters RLO \(u+202e) and LRO \(u+202d). See the
+function `get-char-code-property' for a way to inquire about
+the `bidi-class' property of a character. */)
+ (Lisp_Object from, Lisp_Object to, Lisp_Object object)
+{
+ struct buffer *buf = current_buffer;
+ struct buffer *old = buf;
+ struct window *w = NULL;
+ bool frame_window_p = FRAME_WINDOW_P (SELECTED_FRAME ());
+ struct bidi_it itb;
+ ptrdiff_t from_pos, to_pos, from_bpos;
+ void *itb_data;
+
+ if (!NILP (object))
+ {
+ if (BUFFERP (object))
+ buf = XBUFFER (object);
+ else if (WINDOWP (object))
+ {
+ w = decode_live_window (object);
+ buf = XBUFFER (w->contents);
+ frame_window_p = FRAME_WINDOW_P (XFRAME (w->frame));
+ }
+ else
+ CHECK_STRING (object);
+ }
+
+ if (STRINGP (object))
+ {
+ /* Characters in unibyte strings are always treated by bidi.c as
+ strong LTR. */
+ if (!STRING_MULTIBYTE (object)
+ /* When we are loading loadup.el, the character property
+ tables needed for bidi iteration are not yet
+ available. */
+ || !NILP (Vpurify_flag))
+ return Qnil;
+
+ validate_subarray (object, from, to, SCHARS (object), &from_pos, &to_pos);
+ if (from_pos >= SCHARS (object))
+ return Qnil;
+
+ /* Set up the bidi iterator. */
+ itb_data = bidi_shelve_cache ();
+ itb.paragraph_dir = NEUTRAL_DIR;
+ itb.string.lstring = object;
+ itb.string.s = NULL;
+ itb.string.schars = SCHARS (object);
+ itb.string.bufpos = 0;
+ itb.string.from_disp_str = 0;
+ itb.string.unibyte = 0;
+ itb.w = w;
+ bidi_init_it (0, 0, frame_window_p, &itb);
+ }
+ else
+ {
+ /* Nothing this fancy can happen in unibyte buffers, or in a
+ buffer that disabled reordering, or if FROM is at EOB. */
+ if (NILP (BVAR (buf, bidi_display_reordering))
+ || NILP (BVAR (buf, enable_multibyte_characters))
+ /* When we are loading loadup.el, the character property
+ tables needed for bidi iteration are not yet
+ available. */
+ || !NILP (Vpurify_flag))
+ return Qnil;
+
+ set_buffer_temp (buf);
+ validate_region (&from, &to);
+ from_pos = XINT (from);
+ to_pos = XINT (to);
+ if (from_pos >= ZV)
+ return Qnil;
+
+ /* Set up the bidi iterator. */
+ itb_data = bidi_shelve_cache ();
+ from_bpos = CHAR_TO_BYTE (from_pos);
+ if (from_pos == BEGV)
+ {
+ itb.charpos = BEGV;
+ itb.bytepos = BEGV_BYTE;
+ }
+ else if (FETCH_CHAR (from_bpos - 1) == '\n')
+ {
+ itb.charpos = from_pos;
+ itb.bytepos = from_bpos;
+ }
+ else
+ itb.charpos = find_newline_no_quit (from_pos, CHAR_TO_BYTE (from_pos),
+ -1, &itb.bytepos);
+ itb.paragraph_dir = NEUTRAL_DIR;
+ itb.string.s = NULL;
+ itb.string.lstring = Qnil;
+ itb.string.bufpos = 0;
+ itb.string.from_disp_str = 0;
+ itb.string.unibyte = 0;
+ itb.w = w;
+ bidi_init_it (itb.charpos, itb.bytepos, frame_window_p, &itb);
+ }
+
+ ptrdiff_t found;
+ do {
+ /* For the purposes of this function, the actual base direction of
+ the paragraph doesn't matter, so just set it to L2R. */
+ bidi_paragraph_init (L2R, &itb, 0);
+ while ((found = bidi_find_first_overridden (&itb)) < from_pos)
+ ;
+ } while (found == ZV && itb.ch == '\n' && itb.charpos < to_pos);
+
+ bidi_unshelve_cache (itb_data, 0);
+ set_buffer_temp (old);
+
+ return (from_pos <= found && found < to_pos) ? make_number (found) : Qnil;
+}
+
DEFUN ("move-point-visually", Fmove_point_visually,
Smove_point_visually, 1, 1, 0,
doc: /* Move point in the visual order in the specified DIRECTION.
defsubr (&Scurrent_bidi_paragraph_direction);
defsubr (&Swindow_text_pixel_size);
defsubr (&Smove_point_visually);
+ defsubr (&Sbidi_find_overridden_directionality);
DEFSYM (Qmenu_bar_update_hook, "menu-bar-update-hook");
DEFSYM (Qoverriding_terminal_local_map, "overriding-terminal-local-map");