From: Stefan Monnier Date: Thu, 29 Oct 2020 17:38:59 +0000 (-0400) Subject: * lisp/progmodes/tcl.el: Better match Tcl string formation rules (bug#39277) X-Git-Tag: emacs-28.0.90~5354 X-Git-Url: http://git.eshelyaron.com/gitweb/?a=commitdiff_plain;h=7b02d9ba480cdd5710b0cdfefc11facd96b44444;p=emacs.git * lisp/progmodes/tcl.el: Better match Tcl string formation rules (bug#39277) (tcl--word-delimiters): New const. (tcl--syntax-of-quote): New function. (tcl-syntax-propertize-function): Use them. (tcl-mode): Enable `syntax-propertize-multiline`. * test/manual/indent/tcl.tcl: New file. --- diff --git a/lisp/progmodes/tcl.el b/lisp/progmodes/tcl.el index 717008a0a22..d4d51e8b50c 100644 --- a/lisp/progmodes/tcl.el +++ b/lisp/progmodes/tcl.el @@ -407,10 +407,64 @@ This variable is generally set from `tcl-proc-regexp', `tcl-typeword-list', and `tcl-keyword-list' by the function `tcl-set-font-lock-keywords'.") +(eval-and-compile + (defconst tcl--word-delimiters "[;{ \t\n")) + +(defun tcl--syntax-of-quote (pos) + "Decide whether a double quote opens a string or not." + ;; This is pretty tricky, because strings can be written as "..." + ;; or as {...} or without any quoting at all for some simple and not so + ;; simple cases (e.g. `abc' but also `a"b'). To make things more + ;; interesting, code is represented as strings, so the content of + ;; strings can be later re-lexed to find nested strings. + (save-excursion + (let ((ppss (syntax-ppss pos))) + (cond + ((nth 8 ppss) nil) ;; Within a string or a comment. + ((not (memq (char-before pos) + (cons nil + (eval-when-compile + (mapcar #'identity tcl--word-delimiters))))) + ;; The double quote appears within some other lexical entity. + ;; FIXME: Similar treatment should be used for `{' which can appear + ;; within non-delimited strings (but only at top-level, so + ;; maybe it's not worth worrying about). + (string-to-syntax ".")) + ((zerop (nth 0 ppss)) + ;; Not within a { ... }, so can't be truncated by a }. + ;; FIXME: The syntax-table also considers () and [] as paren + ;; delimiters just like {}, even though Tcl treats them differently. + ;; Tho I'm not sure it's worth worrying about, either. + nil) + (t + ;; A double quote within a {...}: leave it as a normal string + ;; delimiter only if we don't find a closing } before we + ;; find a closing ". + (let ((type nil) + (depth 0)) + (forward-char 1) + (while (and (not type) + (re-search-forward "[\"{}\\]" nil t)) + (pcase (char-after (match-beginning 0)) + (?\\ (forward-char 1)) + (?\" (setq type 'matched)) + (?\{ (cl-incf depth)) + (?\} (if (zerop depth) (setq type 'unmatched) + (cl-incf depth))))) + (when (> (line-beginning-position) pos) + ;; The quote is not on the same line as the deciding + ;; factor, so make sure we revisit this choice later. + (put-text-property pos (point) 'syntax-multiline t)) + (when (eq type 'unmatched) + ;; The quote has no matching close because a } closes the + ;; surrounding string before, so it doesn't really "open a string". + (string-to-syntax ".")))))))) + (defconst tcl-syntax-propertize-function (syntax-propertize-rules ;; Mark the few `#' that are not comment-markers. - ("[^;[{ \t\n][ \t]*\\(#\\)" (1 "."))) + ((concat "[^" tcl--word-delimiters "][ \t]*\\(#\\)") (1 ".")) + ("\"" (0 (tcl--syntax-of-quote (match-beginning 0))))) "Syntactic keywords for `tcl-mode'.") ;; FIXME need some way to recognize variables because array refs look @@ -593,6 +647,8 @@ already exist." '(tcl-font-lock-keywords nil nil nil beginning-of-defun)) (set (make-local-variable 'syntax-propertize-function) tcl-syntax-propertize-function) + (add-hook 'syntax-propertize-extend-region-functions + #'syntax-propertize-multiline 'append 'local) (set (make-local-variable 'imenu-generic-expression) tcl-imenu-generic-expression) diff --git a/test/manual/indent/tcl.tcl b/test/manual/indent/tcl.tcl new file mode 100644 index 00000000000..c3781533ca4 --- /dev/null +++ b/test/manual/indent/tcl.tcl @@ -0,0 +1,22 @@ +# Some sample code that tries to exercise the font-lock +# of various forms of writing strings. + +puts "hello}"; # Top-level strings can contain unescaped closing braces! + +puts a"b; # Non-delimited strings can contain quotes! +puts a""b; # Even several of them! + +proc foo1 {} { + puts "hello"; # Normal case! + puts "hello\}; # This will signal an error when `foo1` is called! +} + +proc foo2 {} { + puts "hello; # This will also signal an error when `foo2` is called! +} + +proc foo3 {} { + puts a"b; # This will not signal an error! + puts a""b"; # And that won't either! + puts "a""b"; # But this will! +}