From ef7df187eb0b631a6909bdc02f82b3dfef0ad689 Mon Sep 17 00:00:00 2001 From: Alan Mackenzie Date: Thu, 30 Mar 2017 20:24:39 +0000 Subject: [PATCH] Fix C++ fontification problems 500 bytes after typing a space, and other bugs Also implement the "asymmetric space" rule for fontifying otherwise ambiguous declarations/expressions. * lisp/progmodes/cc-engine.el (c-before-change-check-<>-operators): Don't set c-new-BEG or c-new-END when there is no need. (c-forward-decl-or-cast-1): Add "CASE 17.5" to implement the "asymmetric space" rule. * lisp/progmodes/cc-fonts.el (c-get-fontification-context): New function, extracted from c-font-lock-declarations. Add to this function processing to make `context' 'decl for lines contained within parens when these are also declarations. (c-font-lock-declarations): Call the newly extracted function above in place of inline code. * lisp/progmodes/cc-mode.el (c-fl-decl-start): Set point before calling c-literal-start. * lisp/progmodes/cc-vars.el (c-asymmetry-fontification-flag): New user option. * doc/misc/cc-mode.texi (Misc Font Locking): New node documenting the new "asymmetric fontification" rule, including the variable c-asymmetric-fontification-flag. --- doc/misc/cc-mode.texi | 60 +++++++++- lisp/progmodes/cc-engine.el | 32 +++++- lisp/progmodes/cc-fonts.el | 216 +++++++++++++++++++++--------------- lisp/progmodes/cc-mode.el | 1 + lisp/progmodes/cc-vars.el | 12 ++ 5 files changed, 225 insertions(+), 96 deletions(-) diff --git a/doc/misc/cc-mode.texi b/doc/misc/cc-mode.texi index a29873b03b3..91e20fa7247 100644 --- a/doc/misc/cc-mode.texi +++ b/doc/misc/cc-mode.texi @@ -274,6 +274,7 @@ Font Locking * Font Locking Preliminaries:: * Faces:: * Doc Comments:: +* Misc Font Locking:: * AWK Mode Font Locking:: Configuration Basics @@ -1821,6 +1822,7 @@ sections apply to the other languages. * Font Locking Preliminaries:: * Faces:: * Doc Comments:: +* Misc Font Locking:: * AWK Mode Font Locking:: @end menu @@ -2023,7 +2025,7 @@ since those aren't syntactic errors in themselves. @comment !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! -@node Doc Comments, AWK Mode Font Locking, Faces, Font Locking +@node Doc Comments, Misc Font Locking, Faces, Font Locking @comment node-name, next, previous, up @section Documentation Comments @cindex documentation comments @@ -2099,9 +2101,63 @@ initialization and the result is prepended. For an example, see If you add support for another doc comment style, please consider contributing it: send a note to @email{bug-cc-mode@@gnu.org}. +@comment !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +@node Misc Font Locking, AWK Mode Font Locking, Doc Comments, Font Locking +@comment node-name, next, previous, up +@section Miscellaneous Font Locking +@comment !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + +In some languages, particularly in C++, there are constructs which are +syntactically ambiguous---they could be either declarations or +expressions, and @ccmode{} cannot tell for sure which. Often such a +construct is one of the operators @samp{*} or @samp{&} surrounded by +two identifiers. + +Experience shows that very often when such a construct is a +declaration it will be written with the operator touching exactly one +of the identifiers, like: + +@example +foo *bar +@end example +or +@example +foo& bar +@end example + +. Whether such code is fontified depends on the setting of +@code{c-asymmetry-fontification-flag}. + +@defvar c-asymmetry-fontification-flag +@vindex asymmetry-fontification-flag (c-) +When @code{c-asymmetry-fontification-flag} is non-nil (which it is by +default), code like the above, with white space either before or after +the operator, but not both, is fontified as a declaration. When the +variable is nil, such a construct gets the default face. +@end defvar + +When the construct is an expression there will often be white space +both before and after the operator or there will be no white space +around it at all, like: + +@example +foo * bar +@end example +or +@example +foo&bar +@end example +. + +Such code is not fontified as a declaration. (Typically, the +identifiers don't get a non-default face.) + +For clarity's sake, we emphasize that the ``asymmetry'' rule in this +section only applies when CC Mode cannot disambiguate a construct in +any other way. @comment !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! -@node AWK Mode Font Locking, , Doc Comments, Font Locking +@node AWK Mode Font Locking, , Misc Font Locking, Font Locking @comment node-name, next, previous, up @section AWK Mode Font Locking @comment !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! diff --git a/lisp/progmodes/cc-engine.el b/lisp/progmodes/cc-engine.el index bdc77dc5028..de15d1d82fc 100644 --- a/lisp/progmodes/cc-engine.el +++ b/lisp/progmodes/cc-engine.el @@ -6243,9 +6243,9 @@ comment at the start of cc-engine.el for more info." (eq (char-before) ?<)) (c-backward-token-2) (when (eq (char-after) ?<) - (c-clear-<-pair-props-if-match-after beg))) + (c-clear-<-pair-props-if-match-after beg) + (setq new-beg (point)))) (c-forward-syntactic-ws) - (setq new-beg (point)) ;; ...Then the ones with < before end and > after end. (goto-char (if end-lit-limits (cdr end-lit-limits) end)) @@ -6254,9 +6254,9 @@ comment at the start of cc-engine.el for more info." (eq (char-before) ?>)) (c-end-of-current-token) (when (eq (char-before) ?>) - (c-clear->-pair-props-if-match-before end (1- (point))))) + (c-clear->-pair-props-if-match-before end (1- (point))) + (setq new-end (point)))) (c-backward-syntactic-ws) - (setq new-end (point)) ;; Extend the fontification region, if needed. (and new-beg @@ -8863,7 +8863,29 @@ comment at the start of cc-engine.el for more info." ;; it as a declaration if "a" has been used as a type ;; somewhere else (if it's a known type we won't get here). (setq maybe-expression t) - (throw 'at-decl-or-cast t))) + (throw 'at-decl-or-cast t)) + + ;; CASE 17.5 + (when (and c-asymmetry-fontification-flag + got-prefix-before-parens + at-type + (or (not got-suffix) + at-decl-start)) + (let ((space-before-id + (save-excursion + (goto-char name-start) + (or (bolp) (memq (char-before) '(?\ ?\t))))) + (space-after-type + (save-excursion + (goto-char type-start) + (and (c-forward-type) + (progn (c-backward-syntactic-ws) t) + (or (eolp) + (memq (char-after) '(?\ ?\t))))))) + (when (not (eq (not space-before-id) + (not space-after-type))) + (setq maybe-expression t) + (throw 'at-decl-or-cast t))))) ;; CASE 18 (when (and (not (memq context '(nil top))) diff --git a/lisp/progmodes/cc-fonts.el b/lisp/progmodes/cc-fonts.el index f623b9f3332..923f077b411 100644 --- a/lisp/progmodes/cc-fonts.el +++ b/lisp/progmodes/cc-fonts.el @@ -1117,6 +1117,124 @@ casts and declarations are fontified. Used on level 2 and higher." (setq pos (point)))))) ; acts to make the `while' form continue. nil) +(defun c-get-fontification-context (match-pos not-front-decl &optional toplev) + ;; Return a cons (CONTEXT . RESTRICTED-<>-ARGLISTS) for MATCH-POS. + ;; NOT-FRONT-DECL is non-nil when a declaration later in the buffer than + ;; MATCH-POS has already been parsed. TOPLEV is non-nil when MATCH-POS is + ;; known to be at "top level", i.e. outside any braces, or directly inside a + ;; namespace, class, etc. + ;; + ;; CONTEXT is the fontification context of MATCH-POS, and is one of the + ;; following: + ;; 'decl In a comma-separated declaration context (typically + ;; inside a function declaration arglist). + ;; '<> In an angle bracket arglist. + ;; 'arglist Some other type of arglist. + ;; 'top Some other context and point is at the top-level (either + ;; outside any braces or directly inside a class or namespace, + ;; etc.) + ;; nil Some other context or unknown context. Includes + ;; within the parens of an if, for, ... construct. + ;; 'not-decl Definitely not in a declaration. + ;; + ;; RESTRICTED-<>-ARGLISTS is non-nil when a scan of template/generic + ;; arguments lists (i.e. lists enclosed by <...>) is more strict about what + ;; characters it allows within the list. + (let ((type (and (> match-pos (point-min)) + (c-get-char-property (1- match-pos) 'c-type)))) + (cond ((not (memq (char-before match-pos) '(?\( ?, ?\[ ?< ?{))) + (cons (and toplev 'top) nil)) + ;; A control flow expression or a decltype + ((and (eq (char-before match-pos) ?\() + (save-excursion + (goto-char match-pos) + (backward-char) + (c-backward-token-2) + (or (looking-at c-block-stmt-2-key) + (looking-at c-block-stmt-1-2-key) + (looking-at c-typeof-key)))) + (cons nil t)) + ;; Near BOB. + ((<= match-pos (point-min)) + (cons 'arglist t)) + ;; Got a cached hit in a declaration arglist. + ((eq type 'c-decl-arg-start) + (cons 'decl nil)) + ;; We're inside (probably) a brace list. + ((eq type 'c-not-decl) + (cons 'not-decl nil)) + ;; Inside a C++11 lambda function arglist. + ((and (c-major-mode-is 'c++-mode) + (eq (char-before match-pos) ?\() + (save-excursion + (goto-char match-pos) + (c-backward-token-2) + (and + (c-safe (goto-char (scan-sexps (point) -1))) + (c-looking-at-c++-lambda-capture-list)))) + (c-put-char-property (1- match-pos) 'c-type + 'c-decl-arg-start) + (cons 'decl nil)) + ;; We're inside a brace list. + ((and (eq (char-before match-pos) ?{) + (save-excursion + (goto-char (1- match-pos)) + (consp + (c-looking-at-or-maybe-in-bracelist)))) + (c-put-char-property (1- match-pos) 'c-type + 'c-not-decl) + (cons 'not-decl nil)) + ;; We're inside an "ordinary" open brace. + ((eq (char-before match-pos) ?{) + (cons (and toplev 'top) nil)) + ;; Inside an angle bracket arglist. + ((or (eq type 'c-<>-arg-sep) + (eq (char-before match-pos) ?<)) + (cons '<> nil)) + ;; Got a cached hit in some other type of arglist. + (type + (cons 'arglist t)) + (not-front-decl + ;; The point is within the range of a previously + ;; encountered type decl expression, so the arglist + ;; is probably one that contains declarations. + ;; However, if `c-recognize-paren-inits' is set it + ;; might also be an initializer arglist. + ;; + ;; The result of this check is cached with a char + ;; property on the match token, so that we can look + ;; it up again when refontifying single lines in a + ;; multiline declaration. + (c-put-char-property (1- match-pos) + 'c-type 'c-decl-arg-start) + (cons 'decl nil)) + ;; Got an open paren preceded by an arith operator. + ((and (eq (char-before match-pos) ?\() + (save-excursion + (and (zerop (c-backward-token-2 2)) + (looking-at c-arithmetic-op-regexp)))) + (cons nil nil)) + ;; At start of a declaration inside a declaration paren. + ((save-excursion + (and (memq (char-before match-pos) '(?\( ?\,)) + (c-go-up-list-backward match-pos) + (eq (char-after) ?\() + (let ((type (c-get-char-property (point) 'c-type))) + (or (memq type '(c-decl-arg-start c-decl-type-start)) + (and + (progn (c-backward-syntactic-ws) t) + (c-back-over-compound-identifier) + (progn + (c-backward-syntactic-ws) + (or (bobp) + (progn + (setq type (c-get-char-property (1- (point)) + 'c-type)) + (memq type '(c-decl-arg-start + c-decl-type-start)))))))))) + (cons 'decl nil)) + (t (cons 'arglist t))))) + (defun c-font-lock-declarations (limit) ;; Fontify all the declarations, casts and labels from the point to LIMIT. ;; Assumes that strings and comments have been fontified already. @@ -1231,95 +1349,15 @@ casts and declarations are fontified. Used on level 2 and higher." ;; "<" for the sake of C++-style template arglists. ;; Ignore "(" when it's part of a control flow construct ;; (e.g. "for ("). - (let ((type (and (> match-pos (point-min)) - (c-get-char-property (1- match-pos) 'c-type)))) - (cond ((not (memq (char-before match-pos) '(?\( ?, ?\[ ?< ?{))) - (setq context (and toplev 'top) - c-restricted-<>-arglists nil)) - ;; A control flow expression or a decltype - ((and (eq (char-before match-pos) ?\() - (save-excursion - (goto-char match-pos) - (backward-char) - (c-backward-token-2) - (or (looking-at c-block-stmt-2-key) - (looking-at c-block-stmt-1-2-key) - (looking-at c-typeof-key)))) - (setq context nil - c-restricted-<>-arglists t)) - ;; Near BOB. - ((<= match-pos (point-min)) - (setq context 'arglist - c-restricted-<>-arglists t)) - ;; Got a cached hit in a declaration arglist. - ((eq type 'c-decl-arg-start) - (setq context 'decl - c-restricted-<>-arglists nil)) - ;; We're inside (probably) a brace list. - ((eq type 'c-not-decl) - (setq context 'not-decl - c-restricted-<>-arglists nil)) - ;; Inside a C++11 lambda function arglist. - ((and (c-major-mode-is 'c++-mode) - (eq (char-before match-pos) ?\() - (save-excursion - (goto-char match-pos) - (c-backward-token-2) - (and - (c-safe (goto-char (scan-sexps (point) -1))) - (c-looking-at-c++-lambda-capture-list)))) - (setq context 'decl - c-restricted-<>-arglists nil) - (c-put-char-property (1- match-pos) 'c-type - 'c-decl-arg-start)) - ;; We're inside a brace list. - ((and (eq (char-before match-pos) ?{) - (save-excursion - (goto-char (1- match-pos)) - (consp - (c-looking-at-or-maybe-in-bracelist)))) - (setq context 'not-decl - c-restricted-<>-arglists nil) - (c-put-char-property (1- match-pos) 'c-type - 'c-not-decl)) - ;; We're inside an "ordinary" open brace. - ((eq (char-before match-pos) ?{) - (setq context (and toplev 'top) - c-restricted-<>-arglists nil)) - ;; Inside an angle bracket arglist. - ((or (eq type 'c-<>-arg-sep) - (eq (char-before match-pos) ?<)) - (setq context '<> - c-restricted-<>-arglists nil)) - ;; Got a cached hit in some other type of arglist. - (type - (setq context 'arglist - c-restricted-<>-arglists t)) - ((if inside-macro - (< match-pos max-type-decl-end-before-token) - (< match-pos max-type-decl-end)) - ;; The point is within the range of a previously - ;; encountered type decl expression, so the arglist - ;; is probably one that contains declarations. - ;; However, if `c-recognize-paren-inits' is set it - ;; might also be an initializer arglist. - (setq context 'decl - c-restricted-<>-arglists nil) - ;; The result of this check is cached with a char - ;; property on the match token, so that we can look - ;; it up again when refontifying single lines in a - ;; multiline declaration. - (c-put-char-property (1- match-pos) - 'c-type 'c-decl-arg-start)) - ;; Got an open paren preceded by an arith operator. - ((and (eq (char-before match-pos) ?\() - (save-excursion - (and (zerop (c-backward-token-2 2)) - (looking-at c-arithmetic-op-regexp)))) - (setq context nil - c-restricted-<>-arglists nil)) - (t (setq context 'arglist - c-restricted-<>-arglists t)))) + (let ((got-context + (c-get-fontification-context + match-pos + (< match-pos (if inside-macro + max-type-decl-end-before-token + max-type-decl-end)) + toplev))) + (setq context (car got-context) + c-restricted-<>-arglists (cdr got-context))) ;; Check we haven't missed a preceding "typedef". (when (not (looking-at c-typedef-key)) diff --git a/lisp/progmodes/cc-mode.el b/lisp/progmodes/cc-mode.el index 8326e6a6f29..20c63d4dbe2 100644 --- a/lisp/progmodes/cc-mode.el +++ b/lisp/progmodes/cc-mode.el @@ -1363,6 +1363,7 @@ Note that the style variables are always made local to the buffer." ;; This function is called indirectly from font locking stuff - either from ;; c-after-change (to prepare for after-change font-locking) or from font ;; lock context (etc.) fontification. + (goto-char pos) (let ((lit-start (c-literal-start)) (new-pos pos) capture-opener diff --git a/lisp/progmodes/cc-vars.el b/lisp/progmodes/cc-vars.el index 1114b21381d..ccd4fd29940 100644 --- a/lisp/progmodes/cc-vars.el +++ b/lisp/progmodes/cc-vars.el @@ -1634,6 +1634,18 @@ names).")) :type 'c-extra-types-widget :group 'c) +(defcustom c-asymmetry-fontification-flag t + "Whether to fontify certain ambiguous constructs by white space asymmetry. + +In the fontification engine, it is sometimes impossible to determine +whether a construct is a declaration or an expression. This happens +particularly in C++, due to ambiguities in the language. When such a +construct is like \"foo * bar\" or \"foo &bar\", and this variable is non-nil +(the default), the construct will be fontified as a declaration if there is +white space either before or after the operator, but not both." + :type 'boolean + :group 'c) + (defvar c-noise-macro-with-parens-name-re "\\<\\>") (defvar c-noise-macro-name-re "\\<\\>") -- 2.39.2