From 9b44824620fd500b9e7094bd1a8ca23608cb2e5b Mon Sep 17 00:00:00 2001 From: Alan Mackenzie Date: Sat, 18 Jun 2022 16:41:57 +0000 Subject: [PATCH] CC Mode: Add accurate handling for backslash in C line and block comments This is needed to handle the idiosyncratic meaning of backslash in comments in the C and C++ standards. * lisp/progmodes/cc-engine.el: Correct a spelling error. * lisp/progmodes/cc-mode.el (c-before-change-fix-comment-escapes) (c-after-change-fix-comment-escapes): New functions. * lisp/progmodes/cc-langs.el (c-get-state-before-change-functions): Add c-before-change-fix-comment-escapes to the C/Objc and C++ values. (c-before-font-lock-functions): Add c-after-change-fix-comment-escapes to the C/Objc and C++ values. --- lisp/progmodes/cc-engine.el | 2 +- lisp/progmodes/cc-langs.el | 8 +++- lisp/progmodes/cc-mode.el | 82 ++++++++++++++++++++++++++++++++++++- 3 files changed, 88 insertions(+), 4 deletions(-) diff --git a/lisp/progmodes/cc-engine.el b/lisp/progmodes/cc-engine.el index f9c33f51495..cfbb668baeb 100644 --- a/lisp/progmodes/cc-engine.el +++ b/lisp/progmodes/cc-engine.el @@ -6837,7 +6837,7 @@ comment at the start of cc-engine.el for more info." (let ((type (c-syntactic-content from to c-recognize-<>-arglists))) (unless (gethash type c-found-types) (puthash type t c-found-types) - (when (and (not c-record-found-types) ; Only call `c-fontify-new-fount-type' + (when (and (not c-record-found-types) ; Only call `c-fontify-new-found-type' ; when we haven't "bound" c-found-types ; to itself in c-forward-<>-arglist. (eq (string-match c-symbol-key type) 0) diff --git a/lisp/progmodes/cc-langs.el b/lisp/progmodes/cc-langs.el index 68070cd0581..c5964165c8d 100644 --- a/lisp/progmodes/cc-langs.el +++ b/lisp/progmodes/cc-langs.el @@ -458,12 +458,14 @@ so that all identifiers are recognized as words.") c-before-change-check-<>-operators c-truncate-bs-cache c-before-change-check-unbalanced-strings - c-parse-quotes-before-change) + c-parse-quotes-before-change + c-before-change-fix-comment-escapes) (c objc) '(c-extend-region-for-CPP c-depropertize-CPP c-truncate-bs-cache c-before-change-check-unbalanced-strings - c-parse-quotes-before-change) + c-parse-quotes-before-change + c-before-change-fix-comment-escapes) java '(c-parse-quotes-before-change c-before-change-check-unbalanced-strings c-before-change-check-<>-operators) @@ -500,6 +502,7 @@ parameters \(point-min) and \(point-max).") c-after-change-mark-abnormal-strings c-change-expand-fl-region) (c objc) '(c-depropertize-new-text + c-after-change-fix-comment-escapes c-after-change-escape-NL-in-string c-parse-quotes-after-change c-after-change-mark-abnormal-strings @@ -507,6 +510,7 @@ parameters \(point-min) and \(point-max).") c-neutralize-syntax-in-CPP c-change-expand-fl-region) c++ '(c-depropertize-new-text + c-after-change-fix-comment-escapes c-after-change-escape-NL-in-string c-after-change-unmark-ml-strings c-parse-quotes-after-change diff --git a/lisp/progmodes/cc-mode.el b/lisp/progmodes/cc-mode.el index ae96cdbd2fe..70fc1cb73a9 100644 --- a/lisp/progmodes/cc-mode.el +++ b/lisp/progmodes/cc-mode.el @@ -1979,6 +1979,87 @@ Note that this is a strict tail, so won't match, e.g. \"0x....\".") (defvar c-new-id-is-type nil) (make-variable-buffer-local 'c-new-id-is-type) +(defun c-before-change-fix-comment-escapes (beg end) + "Remove punctuation syntax-table text properties from C/C++ comment markers. +This is to handle the rare case of two or more backslashes at an +end of line in a // comment or the equally rare case of a +backslash preceding the terminator of a /* comment, as \\*/. + +This function is used solely as a member of +`c-get-state-before-change-functions', where it should appear +late in that variable, and it must be used only together with +`c-after-change-fix-comment-escapes'. + +Note that the function currently only handles comments beginning +with // and /*, not more generic line and block comments." + (c-save-buffer-state (end-state) + (setq end-state (c-full-pp-to-literal end)) + (when (memq (cadr end-state) '(c c++)) + (goto-char (max (- beg 2) (point-min))) + (if (eq (cadr end-state) 'c) + (when (search-forward "\\*/" + (or (cdr (caddr end-state)) (point-max)) t) + (c-clear-char-property (match-beginning 0) 'syntax-table) + (c-truncate-lit-pos-cache (match-beginning 0))) + (while (search-forward "\\\\\n" + (or (cdr (caddr end-state)) (point-max)) t) + (c-clear-char-property (match-beginning 0) 'syntax-table) + (c-truncate-lit-pos-cache (match-beginning 0))))))) + +(defun c-after-change-fix-comment-escapes (beg end _old-len) + "Apply punctuation syntax-table text properties to C/C++ comment markers. +This is to handle the rare case of two or more backslashes at an +end of line in a // comment or the equally rare case of a +backslash preceding the terminator of a /* comment, as \\*/. + +This function is used solely as a member of +`c-before-font-lock-functions', where it should appear early in +that variable, and it must be used only together with +`c-before-change-fix-comment-escapes'. + +Note that the function currently only handles comments beginning +with // and /*, not more generic line and block comments." + (c-save-buffer-state (state) + ;; We cannot use `c-full-pp-to-literal' in this function, since the + ;; `syntax-table' text properties after point are not yet in a consistent + ;; state. + (setq state (c-semi-pp-to-literal beg)) + (goto-char (if (memq (cadr state) '(c c++)) + (caddr state) + (max (- beg 2) (point-min)))) + (while + (re-search-forward "\\\\\\(\\(\\\\\n\\)\\|\\(\\*/\\)\\)" + (min (+ end 2) (point-max)) t) + (setq state (c-semi-pp-to-literal (match-beginning 0))) + (when (cond + ((eq (cadr state) 'c) + (match-beginning 3)) + ((eq (cadr state) 'c++) + (match-beginning 2))) + (c-put-char-property (match-beginning 0) 'syntax-table '(1)) + (c-truncate-lit-pos-cache (match-beginning 0)))) + + (goto-char end) + (setq state (c-semi-pp-to-literal (point))) + (cond + ((eq (cadr state) 'c) + (when (search-forward "*/" nil t) + (when (eq (char-before (match-beginning 0)) ?\\) + (c-put-char-property (1- (match-beginning 0)) 'syntax-table '(1)) + (c-truncate-lit-pos-cache (1- (match-beginning 0)))))) + ((eq (cadr state) 'c++) + (while + (progn + (end-of-line) + (and (eq (char-before) ?\\) + (progn + (when (eq (char-before (1- (point))) ?\\) + (c-put-char-property (- (point) 2) 'syntax-table '(1)) + (c-truncate-lit-pos-cache (1- (point)))) + t) + (not (eobp)))) + (forward-char)))))) + (defun c-update-new-id (end) ;; Note the bounds of any identifier that END is in or just after, in ;; `c-new-id-start' and `c-new-id-end'. Otherwise set these variables to @@ -1990,7 +2071,6 @@ Note that this is a strict tail, so won't match, e.g. \"0x....\".") c-new-id-end (and id-beg (progn (c-end-of-current-token) (point))))))) - (defun c-post-command () ;; If point was inside of a new identifier and no longer is, record that ;; fact. -- 2.39.2