From: Alan Mackenzie Date: Sun, 20 May 2018 13:28:14 +0000 (+0000) Subject: Enhance CC Mode's fontification, etc., of unterminated strings. X-Git-Tag: emacs-27.0.90~5018 X-Git-Url: http://git.eshelyaron.com/gitweb/?a=commitdiff_plain;h=bb591f139f0602af292c772f974dcc14dabb1deb;p=emacs.git Enhance CC Mode's fontification, etc., of unterminated strings. String delimiters, including escaped new lines, of correctly terminated strings are left in font-lock-string-face. All others get font-lock-warning-face. The latter get syntax-table text properties on the opening string delim and the "terminating EOL". Correct two miscellaneous bugs: the handling of text properties on Java Mode's generic delimiters; the handling of c-just-done-before-change. * lisp/progmodes/cc-defs.el (c-point): New position 'eoll "end of logical line". (c-characterp): New macro. * lisp/progmodes/cc-fonts.el (c-font-lock-invalid-string): Removed. (c-basic-matchers-before): Use a simple matcher in place of the form around c-font-lock-invalid-string. * lisp/progmodes/cc-langs.el (c-get-state-before-change-functions): Add c-before-change-check-unbalanced-strings to the value for all modes except AWK Mode. Also add c-before-change-check-<>-operators to Java Mode, correcting an error in that mode's handling of generic delimiters. (c-before-font-lock-functions): Add c-after-change-re-mark-unbalanced-strings to the value for all modes except AWK Mode. (c-single-quotes-quote-strings, c-string-delims): New lang variables for future enhancements. (c-string-innards-re-alist): New lang variable. * lisp/progmodes/cc-mode.el (c-just-done-before-change): Do not set this variable when a change is the alteration of text properties. (c-basic-common-init): Set parse-sexp-lookup-properties (and the XEmacs equivalent) also for Pike Mode. (c-neutralize-CPP-line): No longer neutralize unbalanced quotes here. (c-unescaped-nls-in-string-p, c-multiline-string-start-is-being-detached) (c-pps-to-string-delim, c-before-change-check-unbalanced-strings) (c-after-change-re-mark-unbalanced-strings): New functions. (c-after-change): Fix a bug with the handling of c-just-done-before-change. --- diff --git a/lisp/progmodes/cc-defs.el b/lisp/progmodes/cc-defs.el index 7ec57e03b38..53d665477c1 100644 --- a/lisp/progmodes/cc-defs.el +++ b/lisp/progmodes/cc-defs.el @@ -219,6 +219,7 @@ one of the following symbols: `bol' -- beginning of line `eol' -- end of line +`eoll' -- end of logical line (i.e. without escaped NL) `bod' -- beginning of defun `eod' -- end of defun `boi' -- beginning of indentation @@ -254,6 +255,16 @@ to it is returned. This function does not modify the point or the mark." (end-of-line) (point)))) + ((eq position 'eoll) + `(save-excursion + ,@(if point `((goto-char ,point))) + (while (progn + (end-of-line) + (prog1 (eq (logand 1 (skip-chars-backward "\\\\")) 1))) + (beginning-of-line 2)) + (end-of-line) + (point))) + ((eq position 'boi) `(save-excursion ,@(if point `((goto-char ,point))) @@ -453,6 +464,13 @@ to it is returned. This function does not modify the point or the mark." `(int-to-char ,integer) integer)) +(defmacro c-characterp (arg) + ;; Return t when ARG is a character (XEmacs) or integer (Emacs), otherwise + ;; return nil. + (if (integerp ?c) + `(integerp ,arg) + `(characterp ,arg))) + (defmacro c-last-command-char () ;; The last character just typed. Note that `last-command-event' exists in ;; both Emacs and XEmacs, but with confusingly different meanings. diff --git a/lisp/progmodes/cc-fonts.el b/lisp/progmodes/cc-fonts.el index 7cac55e057f..9d2517f2524 100644 --- a/lisp/progmodes/cc-fonts.el +++ b/lisp/progmodes/cc-fonts.el @@ -682,33 +682,6 @@ stuff. Used on level 1 and higher." ''c-nonbreakable-space-face))) )) -(defun c-font-lock-invalid-string () - ;; Assuming the point is after the opening character of a string, - ;; fontify that char with `font-lock-warning-face' if the string - ;; decidedly isn't terminated properly. - ;; - ;; This function does hidden buffer changes. - (let ((start (1- (point)))) - (save-excursion - (and (eq (elt (parse-partial-sexp start (c-point 'eol)) 8) start) - (if (if (eval-when-compile (integerp ?c)) - ;; Emacs - (integerp c-multiline-string-start-char) - ;; XEmacs - (characterp c-multiline-string-start-char)) - ;; There's no multiline string start char before the - ;; string, so newlines aren't allowed. - (not (eq (char-before start) c-multiline-string-start-char)) - ;; Multiline strings are allowed anywhere if - ;; c-multiline-string-start-char is t. - (not c-multiline-string-start-char)) - (if c-string-escaped-newlines - ;; There's no \ before the newline. - (not (eq (char-before (point)) ?\\)) - ;; Escaped newlines aren't supported. - t) - (c-put-font-lock-face start (1+ start) 'font-lock-warning-face))))) - (defun c-font-lock-invalid-single-quotes (limit) ;; This function will be called from font-lock for a region bounded by POINT ;; and LIMIT, as though it were to identify a keyword for @@ -749,16 +722,12 @@ casts and declarations are fontified. Used on level 2 and higher." ;; `c-recognize-<>-arglists' is set. t `(;; Put a warning face on the opener of unclosed strings that - ;; can't span lines. Later font + ;; can't span lines and on the "terminating" newlines. Later font ;; lock packages have a `font-lock-syntactic-face-function' for ;; this, but it doesn't give the control we want since any ;; fontification done inside the function will be ;; unconditionally overridden. - ,(c-make-font-lock-search-function - ;; Match a char before the string starter to make - ;; `c-skip-comments-and-strings' work correctly. - (concat ".\\(" c-string-limit-regexp "\\)") - '((c-font-lock-invalid-string))) + ("\\s|" 0 font-lock-warning-face t nil) ;; Invalid single quotes. c-font-lock-invalid-single-quotes diff --git a/lisp/progmodes/cc-langs.el b/lisp/progmodes/cc-langs.el index 4a7c79a6dfa..53cb7f7f725 100644 --- a/lisp/progmodes/cc-langs.el +++ b/lisp/progmodes/cc-langs.el @@ -472,21 +472,24 @@ so that all identifiers are recognized as words.") (c-lang-defconst c-get-state-before-change-functions ;; For documentation see the following c-lang-defvar of the same name. ;; The value here may be a list of functions or a single function. - t nil + t 'c-before-change-check-unbalanced-strings c++ '(c-extend-region-for-CPP c-before-change-check-raw-strings c-before-change-check-<>-operators c-depropertize-CPP c-invalidate-macro-cache c-truncate-bs-cache + c-before-change-check-unbalanced-strings c-parse-quotes-before-change) (c objc) '(c-extend-region-for-CPP c-depropertize-CPP c-invalidate-macro-cache c-truncate-bs-cache + c-before-change-check-unbalanced-strings c-parse-quotes-before-change) - java 'c-parse-quotes-before-change - ;; 'c-before-change-check-<>-operators + java '(c-parse-quotes-before-change + c-before-change-check-unbalanced-strings + c-before-change-check-<>-operators) awk 'c-awk-record-region-clear-NL) (c-lang-defvar c-get-state-before-change-functions (let ((fs (c-lang-const c-get-state-before-change-functions))) @@ -514,14 +517,17 @@ parameters \(point-min) and \(point-max).") ;; For documentation see the following c-lang-defvar of the same name. ;; The value here may be a list of functions or a single function. t '(c-depropertize-new-text + c-after-change-re-mark-unbalanced-strings c-change-expand-fl-region) (c objc) '(c-depropertize-new-text c-parse-quotes-after-change + c-after-change-re-mark-unbalanced-strings c-extend-font-lock-region-for-macros c-neutralize-syntax-in-CPP c-change-expand-fl-region) c++ '(c-depropertize-new-text c-parse-quotes-after-change + c-after-change-re-mark-unbalanced-strings c-extend-font-lock-region-for-macros c-after-change-re-mark-raw-strings c-neutralize-syntax-in-CPP @@ -529,6 +535,7 @@ parameters \(point-min) and \(point-max).") c-change-expand-fl-region) java '(c-depropertize-new-text c-parse-quotes-after-change + c-after-change-re-mark-unbalanced-strings c-restore-<>-properties c-change-expand-fl-region) awk '(c-depropertize-new-text @@ -611,6 +618,19 @@ EOL terminated statements." (c c++ objc) t) (c-lang-defvar c-has-bitfields (c-lang-const c-has-bitfields)) +(c-lang-defconst c-single-quotes-quote-strings + "Whether the language uses single quotes for multi-char strings." + t nil) +(c-lang-defvar c-single-quotes-quote-strings + (c-lang-const c-single-quotes-quote-strings)) + +(c-lang-defconst c-string-delims + "A list of characters which can delimit arbitrary length strings" + t (if (c-lang-const c-single-quotes-quote-strings) + '(?\" ?\') + '(?\"))) +(c-lang-defvar c-string-delims (c-lang-const c-string-delims)) + (c-lang-defconst c-has-quoted-numbers "Whether the language has numbers quoted like 4'294'967'295." t nil @@ -856,6 +876,28 @@ literal are multiline." (c-lang-defvar c-multiline-string-start-char (c-lang-const c-multiline-string-start-char)) +(c-lang-defconst c-string-innards-re-alist + ;; An alist of regexps matching the innards of a string, the key being the + ;; string's delimiter. + ;; + ;; The regexps' matches extend up to, but not including, the closing string + ;; delimiter or an unescaped NL. An EOL is part of the string only if it is + ;; escaped. + t (mapcar (lambda (delim) + (cons + delim + (concat "\\(\\\\\\(.\\|\n\\|\r\\)\\|[^\\\n\r" + (string delim) + "]\\)*"))) + (and + (or (null (c-lang-const c-multiline-string-start-char)) + (c-characterp (c-lang-const c-multiline-string-start-char))) + (if (c-lang-const c-single-quotes-quote-strings) + '(?\" ?\') + '(?\"))))) +(c-lang-defvar c-string-innards-re-alist + (c-lang-const c-string-innards-re-alist)) + (c-lang-defconst c-opt-cpp-symbol "The symbol which starts preprocessor constructs when in the margin." t "#" diff --git a/lisp/progmodes/cc-mode.el b/lisp/progmodes/cc-mode.el index f42510932e5..2427191eae0 100644 --- a/lisp/progmodes/cc-mode.el +++ b/lisp/progmodes/cc-mode.el @@ -499,9 +499,10 @@ preferably use the `c-mode-menu' language constant directly." ;; `basic-save-buffer' does (insert ?\n) when `require-final-newline' is ;; non-nil; (ii) to detect when Emacs fails to invoke ;; `before-change-functions'. This can happen when reverting a buffer - see -;; bug #24094. It seems these failures happen only in GNU Emacs; XEmacs -;; seems to maintain the strict alternation of calls to -;; `before-change-functions' and `after-change-functions'. +;; bug #24094. It seems these failures happen only in GNU Emacs; XEmacs seems +;; to maintain the strict alternation of calls to `before-change-functions' +;; and `after-change-functions'. Note that this variable is not set when +;; `c-before-change' is invoked by a change to text properties. (defun c-basic-common-init (mode default-style) "Do the necessary initialization for the syntax handling routines @@ -563,7 +564,7 @@ that requires a literal mode spec at compile time." (when (or c-recognize-<>-arglists (c-major-mode-is 'awk-mode) - (c-major-mode-is '(java-mode c-mode c++-mode objc-mode))) + (c-major-mode-is '(java-mode c-mode c++-mode objc-mode pike-mode))) ;; We'll use the syntax-table text property to change the syntax ;; of some chars for this language, so do the necessary setup for ;; that. @@ -996,9 +997,9 @@ Note that the style variables are always made local to the buffer." ;; characters, ones which would interact syntactically with stuff outside ;; this region. ;; - ;; These are unmatched string delimiters, or unmatched - ;; parens/brackets/braces. An unclosed comment is regarded as valid, NOT - ;; obtrusive. + ;; These are unmatched parens/brackets/braces. An unclosed comment is + ;; regarded as valid, NOT obtrusive. Unbalanced strings are handled + ;; elsewhere. (save-excursion (let (s) (while @@ -1008,9 +1009,11 @@ Note that the style variables are always made local to the buffer." ((< (nth 0 s) 0) ; found an unmated ),},] (c-put-char-property (1- (point)) 'syntax-table '(1)) t) - ((nth 3 s) ; In a string - (c-put-char-property (nth 8 s) 'syntax-table '(1)) - t) + ;; Unbalanced strings are now handled by + ;; `c-before-change-check-unbalanced-strings', etc. + ;; ((nth 3 s) ; In a string + ;; (c-put-char-property (nth 8 s) 'syntax-table '(1)) + ;; t) ((> (nth 0 s) 0) ; In a (,{,[ (c-put-char-property (nth 1 s) 'syntax-table '(1)) t) @@ -1070,6 +1073,205 @@ Note that the style variables are always made local to the buffer." (forward-line)) ; no infinite loop with, e.g., "#//" ))))) +(defun c-unescaped-nls-in-string-p (&optional quote-pos) + ;; Return whether unescaped newlines can be inside strings. + ;; + ;; QUOTE-POS, if present, is the position of the opening quote of a string. + ;; Depending on the language, there might be a special character before it + ;; signifying the validity of such NLs. + (cond + ((null c-multiline-string-start-char) nil) + ((c-characterp c-multiline-string-start-char) + (and quote-pos + (eq (char-before quote-pos) c-multiline-string-start-char))) + (t t))) + +(defun c-multiline-string-start-is-being-detached (end) + ;; If (e.g.), the # character in Pike is being detached from the string + ;; opener it applies to, return t. Else return nil. END is the argument + ;; supplied to every before-change function. + (and (memq (char-after end) c-string-delims) + (c-characterp c-multiline-string-start-char) + (eq (char-before end) c-multiline-string-start-char))) + +(defun c-pps-to-string-delim (end) + ;; parse-partial-sexp forward to the next string quote, which is deemed to + ;; be a closing quote. Return nil. + ;; + ;; We remove string-fence syntax-table text properties from characters we + ;; pass over. + (let* ((start (point)) + (no-st-s `(0 nil nil ?\" nil nil 0 nil ,start nil nil)) + (st-s `(0 nil nil t nil nil 0 nil ,start nil nil)) + no-st-pos st-pos + ) + (parse-partial-sexp start end nil nil no-st-s 'syntax-table) + (setq no-st-pos (point)) + (goto-char start) + (while (progn + (parse-partial-sexp (point) end nil nil st-s 'syntax-table) + (c-clear-char-property (1- (point)) 'syntax-table) + (setq st-pos (point)) + (and (< (point) end) + (not (eq (char-before) ?\"))))) + (goto-char (min no-st-pos st-pos)) + nil)) + +(defun c-before-change-check-unbalanced-strings (beg end) + ;; If BEG or END is inside an unbalanced string, remove the syntax-table + ;; text property from respectively the start or end of the string. Also + ;; extend the region (c-new-BEG c-new-END) as necessary to cope with the + ;; change being the insertion of an odd number of quotes. + ;; + ;; POINT is undefined both at entry to and exit from this function, the + ;; buffer will have been widened, and match data will have been saved. + ;; + ;; This function is called exclusively as a before-change function via + ;; `c-get-state-before-change-functions'. + (c-save-buffer-state + ((end-limits + (progn + (goto-char (if (c-multiline-string-start-is-being-detached end) + (1+ end) + end)) + (c-literal-limits))) + (end-literal-type (and end-limits + (c-literal-type end-limits))) + (beg-limits + (progn + (goto-char beg) + (c-literal-limits))) + (beg-literal-type (and beg-limits + (c-literal-type beg-limits)))) + + (when (eq end-literal-type 'string) + (setq c-new-END (max c-new-END (cdr end-limits)))) + ;; It is possible the buffer change will include inserting a string quote. + ;; This could have the effect of flipping the meaning of any following + ;; quotes up until the next unescaped EOL. Also guard against the change + ;; being the insertion of \ before an EOL, escaping it. + (cond + ((c-characterp c-multiline-string-start-char) + ;; The text about to be inserted might contain a multiline string + ;; opener. Set c-new-END after anything which might be affected. + ;; Go to the end of the putative multiline string. + (goto-char end) + (c-pps-to-string-delim (point-max)) + (when (< (point) (point-max)) + (while + (and + (progn + (while + (and + (c-syntactic-re-search-forward + "\"\\|\\s|" (point-max) t t) + (progn + (c-clear-char-property (1- (point)) 'syntax-table) + (not (eq (char-before) ?\"))))) + (eq (char-before) ?\")) + (if (eq (char-before (1- (point))) + c-multiline-string-start-char) + (progn + (c-pps-to-string-delim (point-max)) + (< (point) (point-max))) + (c-pps-to-string-delim (c-point 'eoll)) + (< (point) (c-point 'eoll)))))) + (setq c-new-END (max (point) c-new-END))) + + ((< c-new-END (point-max)) + (goto-char (1+ c-new-END)) ; might be a newline. + ;; In the following regexp, the initial \n caters for a newline getting + ;; joined to a preceding \ by the removal of what comes between. + (re-search-forward "\n?\\(\\\\\\(.\\|\n\\|\r\\)\\|[^\\\n\r]\\)*" nil t) + ;; We're at an EOLL or point-max. + (setq c-new-END (min (1+ (point)) (point-max))) + ;; FIXME!!! Write a clever comment here. + (goto-char c-new-END) + (when (equal (c-get-char-property (1- (point)) 'syntax-table) '(15)) + (backward-sexp) + (c-clear-char-property (1- c-new-END) 'syntax-table) + (c-clear-char-property (point) 'syntax-table))) + + (t (if (memq (char-before c-new-END) c-string-delims) + (c-clear-char-property (1- c-new-END) 'syntax-table)))) + + (when (eq end-literal-type 'string) + (c-clear-char-property (1- (cdr end-limits)) 'syntax-table)) + + (when (eq beg-literal-type 'string) + (setq c-new-BEG (min c-new-BEG (car beg-limits))) + (c-clear-char-property (car beg-limits) 'syntax-table)))) + +(defun c-after-change-re-mark-unbalanced-strings (beg _end _old-len) + ;; Mark any unbalanced strings in the region (c-new-BEG c-new-END) with + ;; string fence syntax-table text properties. + ;; + ;; POINT is undefined both at entry to and exit from this function, the + ;; buffer will have been widened, and match data will have been saved. + ;; + ;; This function is called exclusively as an after-change function via + ;; `c-before-font-lock-functions'. + (c-save-buffer-state + ((cll (progn (goto-char c-new-BEG) + (c-literal-limits))) + (beg-literal-type (and cll (c-literal-type cll))) + (beg-limits + (cond + ((and (eq beg-literal-type 'string) + (c-unescaped-nls-in-string-p (car cll))) + (cons + (car cll) + (progn + (goto-char (1+ (car cll))) + (search-forward-regexp + (cdr (assq (char-after (car cll)) c-string-innards-re-alist)) + nil t) + (min (1+ (point)) (point-max))))) + ((and (null beg-literal-type) + (goto-char beg) + (eq (char-before) c-multiline-string-start-char) + (memq (char-after) c-string-delims)) + (cons (point) + (progn + (forward-char) + (search-forward-regexp + (cdr (assq (char-before) c-string-innards-re-alist)) nil t) + (1+ (point))))) + (cll))) + s) + (goto-char + (cond ((null beg-literal-type) + c-new-BEG) + ((eq beg-literal-type 'string) + (car beg-limits)) + (t ; comment + (cdr beg-limits)))) + (while + (and + (< (point) c-new-END) + (progn + ;; Skip over any comments before the next string. + (while (progn + (setq s (parse-partial-sexp (point) c-new-END nil + nil s 'syntax-table)) + (and (not (nth 3 s)) + (< (point) c-new-END) + (not (memq (char-before) c-string-delims))))) + ;; We're at the start of a string. + (memq (char-before) c-string-delims))) + (if (c-unescaped-nls-in-string-p (1- (point))) + (looking-at "[^\"]*") + (looking-at (cdr (assq (char-before) c-string-innards-re-alist)))) + (cond + ((memq (char-after (match-end 0)) '(?\n ?\r)) + (c-put-char-property (1- (point)) 'syntax-table '(15)) + (c-put-char-property (match-end 0) 'syntax-table '(15))) + ((or (eq (match-end 0) (point-max)) + (eq (char-after (match-end 0)) ?\\)) ; \ at EOB + (c-put-char-property (1- (point)) 'syntax-table '(15)))) + (goto-char (min (1+ (match-end 0)) (point-max))) + (setq s nil)))) + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; Parsing of quotes. ;; @@ -1418,7 +1620,8 @@ Note that this is a strict tail, so won't match, e.g. \"0x....\".") ;; without an intervening call to `before-change-functions' when reverting ;; the buffer (see bug #24094). Whatever the cause, assume that the entire ;; buffer has changed. - (when (not c-just-done-before-change) + (when (and (not c-just-done-before-change) + (not (c-called-from-text-property-change-p))) (save-restriction (widen) (c-before-change (point-min) (point-max))