From d8abff398bc45a791783c5c463838ba6fa3f030e Mon Sep 17 00:00:00 2001 From: lWarne Date: Sat, 6 Aug 2022 15:01:38 +0200 Subject: [PATCH] Fontify python escape sequences in literals * lisp/progmodes/python.el (python-rx): Add regular expressions matching escape codes in string and byte literals (python--string-bytes-literal-matcher): new function (python--not-raw-bytes-literal-start-regexp): new constant (python--not-raw-string-literal-start-regexp): new constant * test/lisp/progmodes/python-tests.el: Add tests for new fontification (bug#57004). --- lisp/progmodes/python.el | 56 ++++++++++++++++++++- test/lisp/progmodes/python-tests.el | 77 +++++++++++++++++++++++++++++ 2 files changed, 131 insertions(+), 2 deletions(-) diff --git a/lisp/progmodes/python.el b/lisp/progmodes/python.el index b8fc7d4c546..27bdbae3113 100644 --- a/lisp/progmodes/python.el +++ b/lisp/progmodes/python.el @@ -427,7 +427,19 @@ This variant of `rx' supports common Python named REGEXPS." (: "vim:" (* space) "set" (+ space) "fileencoding" (* space) ?= (* space) (group-n 1 (+ (or word ?-))) - (* space) ":"))))) + (* space) ":")))) + (bytes-escape-sequence + (seq (not "\\") + (group (or "\\\\" "\\'" "\\a" "\\b" "\\f" + "\\n" "\\r" "\\t" "\\v" + (seq "\\" (= 3 (in "0-7"))) + (seq "\\x" hex hex))))) + (string-escape-sequence + (or bytes-escape-sequence + (seq (not "\\") + (or (group-n 1 "\\u" (= 4 hex)) + (group-n 1 "\\U" (= 8 hex)) + (group-n 1 "\\N{" (*? anychar) "}")))))) (rx ,@regexps))) @@ -539,6 +551,29 @@ the {...} holes that appear within f-strings." (goto-char (min limit (1+ send))) (setq ppss (syntax-ppss)))))) +(defconst python--not-raw-bytes-literal-start-regexp + (rx (or bos (not alnum)) (or "b" "B") (or "\"" "\"\"\"" "'" "'''") eos) + "A regular expression matching the start of a not-raw bytes literal.") + +(defconst python--not-raw-string-literal-start-regexp + (rx (or bos (not alnum)) (? (or "u" "U" "F" "f")) (or "\"" "\"\"\"" "'" "'''") eos) + "A regular expression matching the start of a not-raw string literal.") + +(defun python--string-bytes-literal-matcher (regexp start-regexp) + "Match REGEXP within a string or bytes literal whose start matches START-REGEXP." + (lambda (limit) + (cl-loop for result = (re-search-forward regexp limit t) + for result-valid = (and + result + (let* ((pos (nth 8 (syntax-ppss))) + (before-quote + (buffer-substring-no-properties + (max (- pos 5) (point-min)) + (min (+ pos 1) (point-max))))) + (string-match-p start-regexp before-quote))) + until (or (not result) result-valid) + finally return (and result-valid result)))) + (defvar python-font-lock-keywords-level-1 `((,(python-rx symbol-start "def" (1+ space) (group symbol-name)) (1 font-lock-function-name-face)) @@ -716,7 +751,24 @@ sign in chained assignment." grouped-assignment-target (* space) (or ")" "]") (* space) assignment-operator)) - (1 font-lock-variable-name-face))) + (1 font-lock-variable-name-face)) + ;; escape sequences within bytes literals + ;; "\\" "\'" "\a" "\b" "\f" "\n" "\r" "\t" "\v" + ;; "\ooo" character with octal value ooo + ;; "\xhh" character with hex value hh + (,(python--string-bytes-literal-matcher + (python-rx bytes-escape-sequence) + python--not-raw-bytes-literal-start-regexp) + (1 font-lock-constant-face t)) + ;; escape sequences within string literals, the same as appear in bytes + ;; literals in addition to: + ;; "\uxxxx" Character with 16-bit hex value xxxx + ;; "\Uxxxxxxxx" Character with 32-bit hex value xxxxxxxx + ;; "\N{name}" Character named name in the Unicode database + (,(python--string-bytes-literal-matcher + (python-rx string-escape-sequence) + python--not-raw-string-literal-start-regexp) + (1 'font-lock-constant-face t))) "Font lock keywords to use in `python-mode' for maximum decoration. This decoration level includes everything in diff --git a/test/lisp/progmodes/python-tests.el b/test/lisp/progmodes/python-tests.el index 6f2ad87f81a..07f2c4f09a3 100644 --- a/test/lisp/progmodes/python-tests.el +++ b/test/lisp/progmodes/python-tests.el @@ -380,6 +380,83 @@ def f(x: CustomInt) -> CustomInt: (128 . font-lock-builtin-face) (131) (144 . font-lock-keyword-face) (150)))) +(ert-deftest python-font-lock-escape-sequence-string-newline () + (python-tests-assert-faces + "'\\n' +\"\\n\" +f'\\n' +f\"\\n\" +u'\\n' +u\"\\n\"" + '((1 . font-lock-doc-face) + (2 . font-lock-constant-face) + (4 . font-lock-doc-face) (5) + (6 . font-lock-doc-face) + (7 . font-lock-constant-face) + (9 . font-lock-doc-face) (10) + (12 . font-lock-string-face) + (13 . font-lock-constant-face) + (15 . font-lock-string-face) (16) + (18 . font-lock-string-face) + (19 . font-lock-constant-face) + (21 . font-lock-string-face) (22) + (24 . font-lock-string-face) + (25 . font-lock-constant-face) + (27 . font-lock-string-face) (28) + (30 . font-lock-string-face) + (31 . font-lock-constant-face) + (33 . font-lock-string-face)))) + +(ert-deftest python-font-lock-escape-sequence-bytes-newline () + (python-tests-assert-faces + "b'\\n' +b\"\\n\"" + '((1) + (2 . font-lock-doc-face) + (3 . font-lock-constant-face) + (5 . font-lock-doc-face) (6) + (8 . font-lock-doc-face) + (9 . font-lock-constant-face) + (11 . font-lock-doc-face)))) + +(ert-deftest python-font-lock-escape-sequence-hex-octal () + (python-tests-assert-faces + "b'\\x12 \\777' +'\\x12 \\777'" + '((1) + (2 . font-lock-doc-face) + (3 . font-lock-constant-face) + (7 . font-lock-doc-face) + (8 . font-lock-constant-face) + (12 . font-lock-doc-face) (13) + (14 . font-lock-doc-face) + (15 . font-lock-constant-face) + (19 . font-lock-doc-face) + (20 . font-lock-constant-face) + (24 . font-lock-doc-face)))) + +(ert-deftest python-font-lock-escape-sequence-unicode () + (python-tests-assert-faces + "b'\\u1234 \\U00010348 \\N{Plus-Minus Sign}' +'\\u1234 \\U00010348 \\N{Plus-Minus Sign}'" + '((1) + (2 . font-lock-doc-face) (41) + (42 . font-lock-doc-face) + (43 . font-lock-constant-face) + (49 . font-lock-doc-face) + (50 . font-lock-constant-face) + (60 . font-lock-doc-face) + (61 . font-lock-constant-face) + (80 . font-lock-doc-face)))) + +(ert-deftest python-font-lock-raw-escape-sequence () + (python-tests-assert-faces + "rb'\\x12 \123 \\n' +r'\\x12 \123 \\n \\u1234 \\U00010348 \\N{Plus-Minus Sign}'" + '((1) + (3 . font-lock-doc-face) (14) + (16 . font-lock-doc-face)))) + ;;; Indentation -- 2.39.2