From: Eli Zaretskii Date: Thu, 4 Nov 2021 18:33:26 +0000 (+0200) Subject: Better detection of potentially malicious bidi text X-Git-Tag: emacs-29.0.90~3671^2~246 X-Git-Url: http://git.eshelyaron.com/gitweb/?a=commitdiff_plain;h=b96855310efed13e0db1403759b686b9bc3e7490;p=emacs.git Better detection of potentially malicious bidi text * src/bidi.c (bidi_find_first_overridden): Extend to detect more subtle effects of directional formatting controls, to include embeddings and isolates. * src/xdisp.c (Fbidi_find_overridden_directionality): Accept an additional argument BASE-DIR to specify the base direction of the paragraphs. * lisp/international/mule-cmds.el (confusingly-reordered): New face. (reorder-starters, reorder-enders): New variables. (highlight-confusing-reorderings): New command to detect and highlight suspiciously reordered text. * test/src/xdisp-tests.el (xdisp-tests--find-directional-overrides): New test. * etc/NEWS: Announce the new and improved features. * etc/tutorials/TUTORIAL.he: Fix embeddings with incorrect directions. --- diff --git a/etc/NEWS b/etc/NEWS index bd5dbf16e9a..30df529c687 100644 --- a/etc/NEWS +++ b/etc/NEWS @@ -93,6 +93,14 @@ Image specifiers can now use ':type webp'. For example, an alist entry as '(window-width . (body-columns . 40))' will make the body of the chosen window 40 columns wide. +** Better detection of text suspiciously reordered on display. +The function 'bidi-find-overridden-directionality' has been extended +to detect reordering effects produced by embeddings and isolates +(started by directional formatting control characters such as RLO and +LRI). The new command 'highlight-confusing-reorderings' finds and +highlights segments of buffer text whose reordering for display is +suspicious and could be malicious. + * Editing Changes in Emacs 29.1 diff --git a/etc/tutorials/TUTORIAL.he b/etc/tutorials/TUTORIAL.he index 2ee4f74c324..465768aa87c 100644 --- a/etc/tutorials/TUTORIAL.he +++ b/etc/tutorials/TUTORIAL.he @@ -1,4 +1,4 @@ -שיעור ראשון בשימוש ב־‫Emacs‬. זכויות שימוש ראה בסוף המסמך. +שיעור ראשון בשימוש ב־‪Emacs‬. זכויות שימוש ראה בסוף המסמך. פקודות רבות של Emacs משתמשות במקש CONTROL (בדרך־כלל מסומן ב־CTRL) או במקש META (בדרך־כלל מסומן ALT). במקום לציין את כל השמות האפשריים @@ -24,7 +24,7 @@ שימו לב לחפיפה של שתי שורות כאשר אתם עוברים ממסך למסך, מה שמבטיח רציפות מסוימת בעת קריאת הטקסט. -הטקסט שלפניכם הינו עותק של שיעור בשימוש ב־‫Emacs‬ שהותאם קלות עבורכם. +הטקסט שלפניכם הינו עותק של שיעור בשימוש ב־‪Emacs‬ שהותאם קלות עבורכם. בהמשך תקבלו הוראות לנסות פקודות שונות כדי לבצע שינויים בטקסט הזה. אם במקרה תשנו את הטקסט לפני שנבקש, אל דאגה: זוהי "עריכה" שהיא יעודו של Emacs. diff --git a/lisp/international/mule-cmds.el b/lisp/international/mule-cmds.el index be4a4eb0cb2..8a64d6195cc 100644 --- a/lisp/international/mule-cmds.el +++ b/lisp/international/mule-cmds.el @@ -3259,4 +3259,51 @@ as names, not numbers." (define-obsolete-function-alias 'ucs-insert 'insert-char "24.3") (define-key ctl-x-map "8\r" 'insert-char) +(defface confusingly-reordered + '((t :inherit underline :underline (:style wave :color "Red1"))) + "Face for highlighting text that was bidi-reordered in confusing ways." + :version "29.1") + +(defvar reorder-starters "[\u202A\u202B\u202D\u202E\u2066-\u2068]+" + "Regular expression for characters that start forced-reordered text.") +(defvar reorder-enders "[\u202C\u2069]+\\|\n" + "Regular expression for characters that end forced-reordered text.") + +(defun highlight-confusing-reorderings (beg end) + "Highlight text in region that might be bidi-reordered in suspicious ways. +This command find and highlights segments of buffer text that could have +been reordered on display by using directional control characters, such +as RLO and LRI, in a way that their display is deliberately meant to +confuse the reader. These techniques can be used for obfuscating +malicious source code. The suspicious stretches of buffer text are +highlighted using the `confusingly-reordered' face. + +If the region is active, check the text inside the region. Otherwise +check the entire buffer. When called from Lisp, pass BEG and END to +specify the portion of the buffer to check." + (interactive + (if (use-region-p) + (list (region-beginning) (region-end)) + (list (point-min) (point-max)))) + (save-excursion + (let (next) + (goto-char beg) + (while (setq next + (bidi-find-overridden-directionality + (point) end nil + (current-bidi-paragraph-direction))) + (goto-char next) + (let ((start + (save-excursion + (re-search-backward reorder-starters nil t))) + (finish + (save-excursion + (re-search-forward reorder-enders nil t)))) + (with-silent-modifications + (add-text-properties start (1- finish) + '(font-lock-face + 'confusingly-reordered + face 'confusingly-reordered))) + (goto-char finish)))))) + ;;; mule-cmds.el ends here diff --git a/src/bidi.c b/src/bidi.c index 1413ba6b888..3cdcd7da1b5 100644 --- a/src/bidi.c +++ b/src/bidi.c @@ -3564,11 +3564,17 @@ bidi_move_to_visually_next (struct bidi_it *bidi_it) } /* Utility function for looking for strong directional characters - whose bidi type was overridden by a directional override. */ + whose bidi type was overridden by directional override or embedding + or isolate control characters. */ ptrdiff_t bidi_find_first_overridden (struct bidi_it *bidi_it) { ptrdiff_t found_pos = ZV; + /* Maximum bidi levels we allow for L2R and R2L characters. Note + that these are levels after resolving explicit embeddings, + overrides, and isolates, i.e. before resolving implicit levels. */ + int max_l2r = bidi_it->paragraph_dir == L2R ? 0 : 2; + int max_r2l = 1; do { @@ -3577,10 +3583,20 @@ bidi_find_first_overridden (struct bidi_it *bidi_it) former. */ bidi_type_t type = bidi_resolve_weak (bidi_it); + /* Detect strong L or R types that have been overridden by + explicit overrides. */ if ((type == STRONG_R && bidi_it->orig_type == STRONG_L) || (type == STRONG_L && (bidi_it->orig_type == STRONG_R - || bidi_it->orig_type == STRONG_AL))) + || bidi_it->orig_type == STRONG_AL)) + /* Detect strong L or R types that were pushed into higher + embedding levels (and will thus reorder) by explicit + embeddings and isolates. */ + || (bidi_it->orig_type == STRONG_L + && bidi_it->level_stack[bidi_it->stack_idx].level > max_l2r) + || ((bidi_it->orig_type == STRONG_R + || bidi_it->orig_type == STRONG_AL) + && bidi_it->level_stack[bidi_it->stack_idx].level > max_r2l)) found_pos = bidi_it->charpos; } while (found_pos == ZV && bidi_it->charpos < ZV diff --git a/src/xdisp.c b/src/xdisp.c index 39ede3c0952..646beed6f02 100644 --- a/src/xdisp.c +++ b/src/xdisp.c @@ -24511,7 +24511,7 @@ See also `bidi-paragraph-direction'. */) DEFUN ("bidi-find-overridden-directionality", Fbidi_find_overridden_directionality, - Sbidi_find_overridden_directionality, 2, 3, 0, + Sbidi_find_overridden_directionality, 3, 4, 0, doc: /* Return position between FROM and TO where directionality was overridden. This function returns the first character position in the specified @@ -24530,12 +24530,18 @@ a buffer is preferable when the buffer is displayed in some window, because this function will then be able to correctly account for window-specific overlays, which can affect the results. +Optional argument BASE-DIR specifies the base paragraph directory +of the text. It should be a symbol, either `left-to-right' +or `right-to-left', and defaults to `left-to-right'. + Strong directional characters `L', `R', and `AL' can have their intrinsic directionality overridden by directional override -control characters RLO (u+202e) and LRO (u+202d). See the -function `get-char-code-property' for a way to inquire about +control characters RLO (u+202E) and LRO (u+202D). They can also +have their directionality affected by other formatting control +characters: LRE (u+202A), RLE (u+202B), LRI (u+2066), and RLI (u+2067). +See the function `get-char-code-property' for a way to inquire about the `bidi-class' property of a character. */) - (Lisp_Object from, Lisp_Object to, Lisp_Object object) + (Lisp_Object from, Lisp_Object to, Lisp_Object object, Lisp_Object base_dir) { struct buffer *buf = current_buffer; struct buffer *old = buf; @@ -24632,10 +24638,9 @@ the `bidi-class' property of a character. */) } ptrdiff_t found; + bidi_dir_t bdir = EQ (base_dir, Qright_to_left) ? R2L : L2R; do { - /* For the purposes of this function, the actual base direction of - the paragraph doesn't matter, so just set it to L2R. */ - bidi_paragraph_init (L2R, &itb, false); + bidi_paragraph_init (bdir, &itb, false); while ((found = bidi_find_first_overridden (&itb)) < from_pos) ; } while (found == ZV && itb.ch == '\n' && itb.charpos < to_pos); diff --git a/test/src/xdisp-tests.el b/test/src/xdisp-tests.el index 4e7d2ad8ab2..a385ded8e1d 100644 --- a/test/src/xdisp-tests.el +++ b/test/src/xdisp-tests.el @@ -99,4 +99,19 @@ (width-in-chars (/ (car size) char-width))) (should (equal width-in-chars 3))))) +(ert-deftest xdisp-tests--find-directional-overrides () + (with-temp-buffer + (insert "\ +int main() { + bool isAdmin = false; + /*‮ }⁦if (isAdmin)⁩ ⁦ begin admins only */ + printf(\"You are an admin.\\n\"); + /* end admins only ‮ { ⁦*/ + return 0; +}") + (goto-char (point-min)) + (should (eq (bidi-find-overridden-directionality (point-min) (point-max) + nil) + 46)))) + ;;; xdisp-tests.el ends here