From a057d41c7577c0e7089dd259fd8edf75f636c7a1 Mon Sep 17 00:00:00 2001 From: Alan Mackenzie Date: Thu, 15 Sep 2022 19:54:22 +0000 Subject: [PATCH] CC Mode: Handle C++20 modules * lisp/progmodes/cc-engine.el (c-before-after-change-check-c++-modules): New function. (c-forward-<>-arglist): Add special handling for "import <...>". * lisp/progmodes/cc-fonts.el (c-preprocessor-face-name): Add extra "fallback" face after font-lock-reference-face, namely font-lock-constant-face. (c-cpp-matchers): Don't fontify the <> delimiters for XEmacs in #include <..>. (c-basic-matchers-before): Add c-font-lock-c++-modules to the C++ value. (c-forward-c++-module-name, c-forward-c++-module-partition-name) (c-font-lock-c++-modules): New functions. * lisp/progmodes/cc-langs.el (c-get-state-before-change-functions) (c-before-font-lock-functions): Include c-before-after-change-check-c++-modules in the C++ value of these variables. (c-module-name-re): New c-lang-const/var. (c-other-decl-kwds): Add a C++ value "export". (c-<>-sexp-kwds): Add a new component c-import-<>-kwds. (c-import-<>-kwds, c-module-kwds): New c-lang-consts. (c-module-key): New c-lang-const/var. --- lisp/progmodes/cc-engine.el | 87 ++++++++++++++---- lisp/progmodes/cc-fonts.el | 173 +++++++++++++++++++++++++++++++++++- lisp/progmodes/cc-langs.el | 35 +++++++- 3 files changed, 271 insertions(+), 24 deletions(-) diff --git a/lisp/progmodes/cc-engine.el b/lisp/progmodes/cc-engine.el index 94225d6e3e9..1127ffe2498 100644 --- a/lisp/progmodes/cc-engine.el +++ b/lisp/progmodes/cc-engine.el @@ -8155,6 +8155,40 @@ multi-line strings (but not C++, for example)." (c-clear-char-property c-neutralize-pos 'syntax-table)) (c-truncate-lit-pos-cache c-neutralize-pos))) + +(defun c-before-after-change-check-c++-modules (beg end &optional _old_len) + ;; Extend the region (c-new-BEG c-new-END) as needed to enclose complete + ;; C++20 module statements. This function is called solely from + ;; `c-get-state-before-change-functions' and `c-before-font-lock-functions' + ;; as part of the before-change and after-change processing for C++. + ;; + ;; Point is undefined both on entry and exit, and the return value has no + ;; significance. + (c-save-buffer-state (res bos lit-start) + (goto-char end) + (if (setq lit-start (c-literal-start)) + (goto-char lit-start)) + (when (>= (point) beg) + (setq res (c-beginning-of-statement-1 nil t)) ; t is IGNORE-LABELS + (setq bos (point)) + (when (and (memq res '(same previous)) + (looking-at c-module-key)) + (setq c-new-BEG (min c-new-BEG (point))) + (if (c-syntactic-re-search-forward + ";" (min (+ (point) 500) (point-max)) t) + (setq c-new-END (max c-new-END (point)))))) + (when (or (not bos) (< beg bos)) + (goto-char beg) + (when (not (c-literal-start)) + (setq res (c-beginning-of-statement-1 nil t)) + (setq bos (point)) + (when (and (memq res '(same previous)) + (looking-at c-module-key)) + (setq c-new-BEG (min c-new-BEG (point))) + (if (c-syntactic-re-search-forward + ";" (min (+ (point) 500) (point-max)) t) + (setq c-new-END (max c-new-END (point))))))))) + ;; Handling of small scale constructs like types and names. @@ -8474,25 +8508,40 @@ multi-line strings (but not C++, for example)." ;; recording of any found types that constitute an argument in ;; the arglist. (c-record-found-types (if c-record-type-identifiers t))) - (if (catch 'angle-bracket-arglist-escape - (setq c-record-found-types - (c-forward-<>-arglist-recur all-types))) - (progn - (when (consp c-record-found-types) - (let ((cur c-record-found-types)) - (while (consp (car-safe cur)) - (c-fontify-new-found-type - (buffer-substring-no-properties (caar cur) (cdar cur))) - (setq cur (cdr cur)))) - (setq c-record-type-identifiers - ;; `nconc' doesn't mind that the tail of - ;; `c-record-found-types' is t. - (nconc c-record-found-types c-record-type-identifiers))) - t) - - (setq c-found-types old-found-types) - (goto-char start) - nil))) + ;; Special handling for C++20's "import <...>" operator. + (if (and (c-major-mode-is 'c++-mode) + (save-excursion + (and (zerop (c-backward-token-2)) + (looking-at "import\\>\\(?:[^_$]\\|$\\)")))) + (when (looking-at "<\\(?:\\\\.\\|[^\\\n\r\t>]\\)*\\(>\\)?") + (if (match-beginning 1) ; A terminated <..> + (progn + (when c-parse-and-markup-<>-arglists + (c-mark-<-as-paren (point)) + (c-mark->-as-paren (match-beginning 1)) + (c-truncate-lit-pos-cache (point))) + (goto-char (match-end 1)) + t) + nil)) + (if (catch 'angle-bracket-arglist-escape + (setq c-record-found-types + (c-forward-<>-arglist-recur all-types))) + (progn + (when (consp c-record-found-types) + (let ((cur c-record-found-types)) + (while (consp (car-safe cur)) + (c-fontify-new-found-type + (buffer-substring-no-properties (caar cur) (cdar cur))) + (setq cur (cdr cur)))) + (setq c-record-type-identifiers + ;; `nconc' doesn't mind that the tail of + ;; `c-record-found-types' is t. + (nconc c-record-found-types c-record-type-identifiers))) + t) + + (setq c-found-types old-found-types) + (goto-char start) + nil)))) (defun c-forward-<>-arglist-recur (all-types) ;; Recursive part of `c-forward-<>-arglist'. diff --git a/lisp/progmodes/cc-fonts.el b/lisp/progmodes/cc-fonts.el index f34f7f177db..c52f4a8416b 100644 --- a/lisp/progmodes/cc-fonts.el +++ b/lisp/progmodes/cc-fonts.el @@ -112,8 +112,10 @@ ;; In Emacs font-lock-builtin-face has traditionally been ;; used for preprocessor directives. 'font-lock-builtin-face) - (t - 'font-lock-reference-face))) + ((and (c-face-name-p 'font-lock-reference-face) + (eq font-lock-reference-face 'font-lock-reference-face)) + 'font-lock-reference-face) + (t 'font-lock-constant-face))) (cc-bytecomp-defvar font-lock-constant-face) @@ -558,8 +560,10 @@ stuff. Used on level 1 and higher." (c-lang-const c-opt-cpp-prefix) re (c-lang-const c-syntactic-ws) - "\\(<[^>\n\r]*>?\\)") - `(,(+ ncle-depth re-depth sws-depth 1) + "\\(<\\([^>\n\r]*\\)>?\\)") + `(,(+ ncle-depth re-depth sws-depth + (if (featurep 'xemacs) 2 1) + ) font-lock-string-face t) `((let ((beg (match-beginning ,(+ ncle-depth re-depth sws-depth 1))) @@ -878,6 +882,10 @@ casts and declarations are fontified. Used on level 2 and higher." c-reference-face-name)) (goto-char (match-end 1)))))))))) + ;; Module declarations (e.g. in C++20). + ,@(when (c-major-mode-is 'c++-mode) + '(c-font-lock-c++-modules)) + ;; Fontify the special declarations in Objective-C. ,@(when (c-major-mode-is 'objc-mode) `(;; Fontify class names in the beginning of message expressions. @@ -1909,6 +1917,163 @@ casts and declarations are fontified. Used on level 2 and higher." (forward-char))))) ; over the terminating "]" or other close paren. nil) +(defun c-forward-c++-module-name (limit) + ;; Is there a C++20 module name at point? If so, return a cons of the start + ;; and end of that name, in which case point will be moved over the name and + ;; following whitespace. Otherwise nil will be returned and point will be + ;; unmoved. This function doesn't regard a partition as part of the name. + ;; The entire construct must end not after LIMIT. + (when (and + (looking-at c-module-name-re) + (<= (match-end 0) limit) + (not (looking-at c-keywords-regexp))) + (goto-char (match-end 0)) + (prog1 (cons (match-beginning 0) (match-end 0)) + (c-forward-syntactic-ws limit)))) + +(defun c-forward-c++-module-partition-name (limit) + ;; Is there a C++20 module partition name (starting with its colon) at + ;; point? If so return a cons of the start and end of the name, not + ;; including the colon, in which case point will be move to after the name + ;; and following whitespace. Otherwise nil will be returned and point not + ;; moved. The entire construct must end not after LIMIT. + (when (and + (eq (char-after) ?:) + (progn + (forward-char) + (c-forward-syntactic-ws limit) + (looking-at c-module-name-re)) + (<= (match-end 0) limit) + (not (looking-at c-keywords-regexp))) + (goto-char (match-end 0)) + (prog1 (cons (match-beginning 0) (match-end 0)) + (c-forward-syntactic-ws limit)))) + +(defun c-font-lock-c++-modules (limit) + ;; Fontify the C++20 module stanzas, characterised by the keywords `module', + ;; `export' and `import'. Note that this has to be done by a function (as + ;; opposed to regexps) due to the presence of optional C++ attributes. + ;; + ;; This function will be called from font-lock for a region bounded by POINT + ;; and LIMIT, as though it were to identify a keyword for + ;; font-lock-keyword-face. It always returns NIL to inhibit this and + ;; prevent a repeat invocation. See elisp/lispref page "Search-based + ;; Fontification". + (while (and (< (point) limit) + (re-search-forward + "\\<\\(module\\|export\\|import\\)\\>\\(?:[^_$]\\|$\\)" + limit t)) + (goto-char (match-end 1)) + (let (name-bounds pos beg end + module-names) ; A list of conses of start and end + ; of pertinent module names + (unless (c-skip-comments-and-strings limit) + (when + (cond + ;; module foo...; Note we don't handle module; or module + ;; :private; here, since they don't really need handling. + ((save-excursion + (when (equal (match-string-no-properties 1) "export") + (c-forward-syntactic-ws limit) + (re-search-forward "\\=\\(module\\)\\>\\(?:[^_$]\\|$\\)" + limit t)) + (and (equal (match-string-no-properties 1) "module") + (< (point) limit) + (progn (c-forward-syntactic-ws limit) + (setq name-bounds (c-forward-c++-module-name + limit))) + (setq pos (point)))) + (push name-bounds module-names) + (goto-char pos) + ;; Is there a partition name? + (when (setq name-bounds (c-forward-c++-module-partition-name + limit)) + (push name-bounds module-names)) + t) + + ;; import + ((save-excursion + (when (equal (match-string-no-properties 1) "export") + (c-forward-syntactic-ws limit) + (re-search-forward "\\=\\(import\\)\\>\\(?:[^_$]\\|$\\)" + limit t)) + (and (equal (match-string-no-properties 1) "import") + (< (point) limit) + (progn (c-forward-syntactic-ws limit) + (setq pos (point))))) + (goto-char pos) + (cond + ;; import foo; + ((setq name-bounds (c-forward-c++-module-name limit)) + (push name-bounds module-names) + t) + ;; import :foo; + ((setq name-bounds (c-forward-c++-module-partition-name limit)) + (push name-bounds module-names) + t) + ;; import "foo"; + ((and (eq (char-after) ?\") + (setq pos (point)) + (c-safe (c-forward-sexp) t)) ; Should already have string face. + (when (eq (char-before) ?\") + (setq beg pos + end (point))) + (c-forward-syntactic-ws limit) + t) + ;; import ; + ((and (looking-at "<\\(?:\\\\.\\|[^\\\n\r\t>]\\)*\\(>\\)?") + (< (match-end 0) limit)) + (setq beg (point)) + (goto-char (match-end 0)) + (when (match-end 1) + (setq end (point))) + (if (featurep 'xemacs) + (c-put-font-lock-face + (1+ beg) (if end (1- end) (point)) font-lock-string-face) + (c-put-font-lock-face + beg (or end (point)) font-lock-string-face)) + (c-forward-syntactic-ws limit) + t) + (t nil))) + + ;; export + ;; There is no fontification to be done here, but we need to + ;; skip over the declaration or declaration sequence. + ((save-excursion + (when (equal (match-string-no-properties 0) "export") + (c-forward-syntactic-ws limit) + (setq pos (point)))) + (goto-char (point)) + (if (eq (char-after) ?{) + ;; Declaration sequence. + (unless (and (c-go-list-forward nil limit) + (eq (char-before) ?})) + (goto-char limit) + nil) + ;; Single declaration + (unless (c-end-of-decl-1) + (goto-char limit) + nil)))) ; Nothing more to do, here. + + ;; Optional attributes? + (while (and (c-looking-at-c++-attribute) + (< (match-end 0) limit)) + (goto-char (match-end 0)) + (c-forward-syntactic-ws limit)) + ;; Finally, there must be a semicolon. + (if (and (< (point) limit) + (eq (char-after) ?\;)) + (progn + (forward-char) + ;; Fontify any module names we've encountered. + (dolist (name module-names) + (c-put-font-lock-face (car name) (cdr name) + c-reference-face-name))) + ;; No semicolon, so put warning faces on any delimiters. + (when beg + (c-put-font-lock-face beg (1+ beg) font-lock-warning-face)) + (when end + (c-put-font-lock-face (1- end) end font-lock-warning-face)))))))) (c-lang-defconst c-simple-decl-matchers "Simple font lock matchers for types and declarations. These are used diff --git a/lisp/progmodes/cc-langs.el b/lisp/progmodes/cc-langs.el index bf7eee22834..d33ed4bcda5 100644 --- a/lisp/progmodes/cc-langs.el +++ b/lisp/progmodes/cc-langs.el @@ -456,6 +456,7 @@ so that all identifiers are recognized as words.") c-depropertize-CPP c-before-change-check-ml-strings c-before-change-check-<>-operators + c-before-after-change-check-c++-modules c-truncate-bs-cache c-before-change-check-unbalanced-strings c-parse-quotes-before-change @@ -516,6 +517,7 @@ parameters \(point-min) and \(point-max).") c-parse-quotes-after-change c-after-change-mark-abnormal-strings c-extend-font-lock-region-for-macros + c-before-after-change-check-c++-modules c-neutralize-syntax-in-CPP c-restore-<>-properties c-change-expand-fl-region) @@ -1018,6 +1020,16 @@ e.g. identifiers with template arguments such as \"A\" in C++." ""))) (c-lang-defvar c-identifier-key (c-lang-const c-identifier-key)) +(c-lang-defconst c-module-name-re + "This regexp matches (a component of) a module name. +Currently (2022-09) just C++ Mode uses this." + t nil + c++ (concat (c-lang-const c-symbol-key) + "\\(?:\\." + (c-lang-const c-symbol-key) + "\\)*")) +(c-lang-defvar c-module-name-re (c-lang-const c-module-name-re)) + (c-lang-defconst c-identifier-last-sym-match ;; This was a docstring constant in 5.30 but it's no longer used. ;; It's only kept to avoid breaking third party code. @@ -2624,6 +2636,7 @@ If any of these also are on `c-type-list-kwds', `c-ref-list-kwds', `c-<>-type-kwds', or `c-<>-arglist-kwds' then the associated clauses will be handled." t nil + c++ '("export") objc '("@class" "@defs" "@end" "@property" "@dynamic" "@synthesize" "@compatibility_alias") java '("import" "package") @@ -2937,7 +2950,8 @@ assumed to be set if this isn't nil." (c-lang-defconst c-<>-sexp-kwds ;; All keywords that can be followed by an angle bracket sexp. t (c--delete-duplicates (append (c-lang-const c-<>-type-kwds) - (c-lang-const c-<>-arglist-kwds)) + (c-lang-const c-<>-arglist-kwds) + (c-lang-const c-import-<>-kwds)) :test 'string-equal)) (c-lang-defconst c-opt-<>-sexp-key @@ -3099,6 +3113,25 @@ This construct is \" :\"." idl nil awk nil) +(c-lang-defconst c-import-<>-kwds + "Keywords which can start an expression like \"import <...>\" in C++20. +The <, and > operators are like those of #include <...>, they are +not really template operators." + t nil + c++ '("import")) + +(c-lang-defconst c-module-kwds + "The keywords which introduce module constructs in C++20 onwards." + t nil + c++ '("module" "import" "export")) + +(c-lang-defconst c-module-key + ;; Adorned regexp matching module declaration keywords, or nil if there are + ;; none. + t (if (c-lang-const c-module-kwds) + (c-make-keywords-re t (c-lang-const c-module-kwds)))) +(c-lang-defvar c-module-key (c-lang-const c-module-key)) + (c-lang-defconst c-constant-kwds "Keywords for constants." t nil -- 2.39.2