From c4d34d24e36c7f7c54cf3ec3e5d76e3e8fc005aa Mon Sep 17 00:00:00 2001 From: Alan Mackenzie Date: Thu, 12 Aug 2021 19:04:28 +0000 Subject: [PATCH] CC Mode: Enhance C++ Mode raw strings to multi-line strings for any language * lisp/progmodes/cc-defs.el (cadar, caddr, cdddr): Add defsubsts for these for when they are missing from the host Emacs. (c-point): Add new `position' 'boll "beginning of logical line". (c-clear-char-properties): Return the position of the lowest removed property. * lisp/progmodes/cc-engine.el (c-full-pp-to-literal): Fix for rare case where LIMIT < START in parse-partial-sexp. (c-old-beg-rs, c-old-end-rs, c-raw-string-end-delim-disrupted) (c-raw-string-pos, c-raw-string-in-end-delim, c-depropertize-raw-string) (c-depropertize-raw-strings-in-region, c-before-change-check-raw-strings) (c-propertize-raw-string-id, c-propertize-raw-string-opener): Old functions and variables removed or renamed "raw" -> "ml" and adapted. (c-old-beg-ml, c-old-1-beg-ml, c-old-end-ml, c-beg-pos, c-end-pos) (c-ml-string-end-delim-disrupted, c-depropertize-ml-string-delims) (c-ml-string-delims-around-point,c-position-wrt-ml-delims) (c-before-change-check-ml-strings, c-after-change-unmark-ml-strings) (c-maybe-re-mark-ml-string, c-propertize-ml-string-id) (c-propertize-ml-string-opener, c-depropertize-ml-string) (c-depropertize-ml-strings-in-region): New functions and variables adapted and possibly renamed from "raw" -> "ml". (c-ml-string-make-closer-re, c-ml-string-make-opener-re) (c-c++-make-ml-string-closer-re, c-c++-make-ml-string-opener-re) (c-get-ml-closer, c-ml-string-opener-around-point) (c-ml-string-opener-intersects-region, c-ml-string-opener-at-or-around-point) (c-ml-string-back-to-neutral, c-ml-string-in-end-delim, c-neutralize-pos) (c-neutralized-prop): New functions and variables. * lisp/progmodes/cc-fonts.el (c-basic-matchers-before): Replace c-font-lock-raw-strings with c-font-lock-ml-strings. (c-font-lock-ml-strings): New function taking the place of the old c-font-lock-ml-strings. * lisp/progmodes/cc-langs.el (c-get-state-before-change-functions): Move c-depropertize-CPP to the second item of the C++ entry, and replace c-before-change-check-raw-strings by c-before-change-check-ml-strings. Add a new entry for Pike Mode. (c-before-font-lock-functions): (Replace c-after-change-unmark-raw-strings by c-after-change-unmark-ml-strings in the C++ entry, and add a new entry for Pike Mode. (c-ml-string-backslash-escapes, c-ml-string-non-punc-skip-chars) (c-ml-string-opener-re, c-ml-string-max-opener-len, c-ml-string-any-closer-re) (c-ml-string-max-closer-len, c-ml-string-max-closer-len-no-leader) (c-ml-string-back-closer-re, c-make-ml-string-closer-re-function) (c-make-ml-string-opener-re-function, c-ml-string-cpp-or-opener-re) (c-cpp-or-ml-match-offset): New c-lang-defconsts and c-land-defvars. (c-multiline-string-start-char): Remove the Pike Mode setting. * lisp/progmodes/cc-mode.el (c-depropertize-CPP): Test for general ml strings rather than C++ raw strings. (c-unescaped-nls-in-string-p): Handle languages with ml strings. (c-clear-string-fences): Fix bug with wrong parenthesisation. (c-before-change-check-unbalanced-strings) (c-after-change-mark-abnormal-strings, c-after-change-escape-NL-in-string): Adapt for multi-line strings. --- lisp/progmodes/cc-defs.el | 53 +- lisp/progmodes/cc-engine.el | 1339 ++++++++++++++++++++++------------- lisp/progmodes/cc-fonts.el | 121 ++-- lisp/progmodes/cc-langs.el | 195 ++++- lisp/progmodes/cc-mode.el | 69 +- 5 files changed, 1196 insertions(+), 581 deletions(-) diff --git a/lisp/progmodes/cc-defs.el b/lisp/progmodes/cc-defs.el index 5d93435066f..01bd64cb5c3 100644 --- a/lisp/progmodes/cc-defs.el +++ b/lisp/progmodes/cc-defs.el @@ -174,6 +174,10 @@ This variant works around bugs in `eval-when-compile' in various ;;; Macros. +(or (fboundp 'cadar) (defsubst cadar (elt) (car (cdar elt)))) +(or (fboundp 'caddr) (defsubst caddr (elt) (car (cddr elt)))) +(or (fboundp 'cdddr) (defsubst cdddr (elt) (cdr (cddr elt)))) + (defmacro c--mapcan (fun liszt) ;; CC Mode equivalent of `mapcan' which bridges the difference ;; between the host [X]Emacsen." @@ -236,6 +240,7 @@ The current point is used if POINT isn't specified. POSITION can be one of the following symbols: `bol' -- beginning of line +`boll' -- beginning of logical line (i.e. without preceding escaped NL) `eol' -- end of line `eoll' -- end of logical line (i.e. without escaped NL) `bod' -- beginning of defun @@ -266,6 +271,15 @@ to it is returned. This function does not modify the point or the mark." (beginning-of-line) (point)))) + ((eq position 'boll) + `(save-excursion + ,@(if point `((goto-char ,point))) + (while (progn (beginning-of-line) + (when (not (bobp)) + (eq (char-before (1- (point))) ?\\))) + (backward-char)) + (point))) + ((eq position 'eol) (if (and (cc-bytecomp-fboundp 'line-end-position) (not point)) '(line-end-position) @@ -1254,6 +1268,9 @@ MODE is either a mode symbol or a list of mode symbols." ;; region that has been put with `c-put-char-property'. PROPERTY is ;; assumed to be constant. ;; + ;; The returned value is the buffer position of the lowest character + ;; whose PROPERTY was removed, or nil if there was none. + ;; ;; Note that this function does not clean up the property from the ;; lists of the `rear-nonsticky' properties in the region, if such ;; are used. Thus it should not be used for common properties like @@ -1262,20 +1279,28 @@ MODE is either a mode symbol or a list of mode symbols." ;; This macro does hidden buffer changes. (declare (debug t)) (setq property (eval property)) - (if c-use-extents - ;; XEmacs. - `(map-extents (lambda (ext ignored) - (delete-extent ext)) - nil ,from ,to nil nil ',property) - ;; Emacs. - (if (and (fboundp 'syntax-ppss) - (eq `,property 'syntax-table)) - `(let ((-from- ,from) (-to- ,to)) - (setq c-syntax-table-hwm - (min c-syntax-table-hwm - (c-min-property-position -from- -to- ',property))) - (remove-text-properties -from- -to- '(,property nil))) - `(remove-text-properties ,from ,to '(,property nil))))) + `(let* ((-to- ,to) + (ret (c-min-property-position ,from -to- ',property))) + (if (< ret -to-) + (progn + ,(cond + (c-use-extents + ;; XEmacs + `(map-extents (lambda (ext ignored) + (delete-extent ext)) + nil ret -to- nil nil ',property)) + ((and (fboundp 'syntax-ppss) + (eq property 'syntax-table)) + ;; Emacs 'syntax-table + `(progn + (setq c-syntax-table-hwm + (min c-syntax-table-hwm ret)) + (remove-text-properties ret -to- '(,property nil)))) + (t + ;; Emacs other property. + `(remove-text-properties ret -to- '(,property nil)))) + ret) + nil))) (defmacro c-clear-syn-tab-properties (from to) ;; Remove all occurrences of the `syntax-table' and `c-fl-syn-tab' text diff --git a/lisp/progmodes/cc-engine.el b/lisp/progmodes/cc-engine.el index 984a75c4b83..4222dbefa9d 100644 --- a/lisp/progmodes/cc-engine.el +++ b/lisp/progmodes/cc-engine.el @@ -170,6 +170,7 @@ (cc-bytecomp-defun c-clear-syn-tab) (cc-bytecomp-defun c-clear-string-fences) (cc-bytecomp-defun c-restore-string-fences) +(cc-bytecomp-defun c-remove-string-fences) ;; Make declarations for all the `c-lang-defvar' variables in cc-langs. @@ -3140,21 +3141,21 @@ comment at the start of cc-engine.el for more info." (setq base far-base s far-s end nil)))) - (when - (or - (and (> here base) (null end)) - (null (nth 8 s)) - (and end (>= here end)) - (not - (or - (and (nth 3 s) ; string - (not (eq (char-before here) ?\\))) - (and (nth 4 s) (not (nth 7 s)) ; Block comment - (not (memq (char-before here) - c-block-comment-awkward-chars))) - (and (nth 4 s) (nth 7 s) ; Line comment - (not (memq (char-before here) '(?\\ ?\n))))))) + (cond + ((or (and (> here base) (null end)) + (null (nth 8 s)) + (and end (>= here end))) (setq s (parse-partial-sexp base here nil nil s))) + ((or (and (nth 3 s) ; string + (eq (char-before here) ?\\)) + (and (nth 4 s) (not (nth 7 s)) ; block comment + (memq (char-before here) c-block-comment-awkward-chars)) + (and (nth 4 s) (nth 7 s) ; line comment + (memq (char-before here) '(?\\ ?\n)))) + (setq s + (if (>= here base) + (parse-partial-sexp base here nil nil s) + (parse-partial-sexp (nth 8 s) here))))) (cond ((or (nth 3 s) (and (nth 4 s) @@ -7167,554 +7168,932 @@ comment at the start of cc-engine.el for more info." (goto-char c-new-END))))) -;; Functions to handle C++ raw strings. +;; Handling of CC Mode multi-line strings. ;; -;; A valid C++ raw string looks like -;; R"()" -;; , where is an identifier from 0 to 16 characters long, not containing -;; spaces, control characters, or left/right paren. can include -;; anything which isn't the terminating )", including new lines, "s, -;; parentheses, etc. +;; By a "multi-line string" is meant a string opened by a "decorated" +;; double-quote mark, and which can continue over several lines without the +;; need to escape the newlines, terminating at a closer, a possibly +;; "decorated" double-quote mark. The string can usually contain double +;; quotes without them being quoted, whether or not backslashes quote the +;; following character being a matter of configuration. ;; -;; CC Mode handles C++ raw strings by the use of `syntax-table' text +;; CC Mode handles multi-line strings by the use of `syntax-table' text ;; properties as follows: ;; -;; (i) On a validly terminated raw string, no `syntax-table' text properties -;; are applied to the opening and closing delimiters, but any " in the -;; contents is given the property value "punctuation" (`(1)') to prevent it -;; interacting with the "s in the delimiters. +;; (i) On a validly terminated ml string, syntax-table text-properties are +;; applied as needed to the opener, so that the " character in the opener +;; (or (usually) the first of them if there are several) retains its normal +;; syntax, and any other characters with obtrusive syntax are given +;; "punctuation" '(1) properties. Similarly, the " character in the closer +;; retains its normal syntax, and characters with obtrusive syntax are +;; "punctuated out" as before. ;; -;; The font locking routine `c-font-lock-raw-strings' (in cc-fonts.el) -;; recognizes valid raw strings, and fontifies the delimiters (apart from -;; the parentheses) with the default face and the parentheses and the -;; with font-lock-string-face. +;; The font locking routine `c-font-lock-ml-strings' (in cc-fonts.el) +;; recognizes validly terminated ml strings and fontifies (typically) the +;; innermost character of each delimiter in font-lock-string-face and the +;; rest of those delimiters in the default face. The contents, of course, +;; are in font-lock-string-face. ;; -;; (ii) A valid, but unterminated, raw string opening delimiter gets the -;; "punctuation" value (`(1)') of the `syntax-table' text property, and the -;; open parenthesis gets the "string fence" value (`(15)'). When such a -;; delimiter is found, no attempt is made in any way to "correct" any text -;; properties after the delimiter. +;; (ii) A valid, but unterminated, ml string's opening delimiter gets the +;; "punctuation" value (`(1)') of the `syntax-table' text property on its ", +;; and the last char of the opener gets the "string fence" value '(15). +;; (The latter takes precedence over the former.) When such a delimiter is +;; found, no attempt is made in any way to "correct" any text properties +;; after the delimiter. ;; -;; `c-font-lock-raw-strings' puts c-font-lock-warning-face on the entire -;; unmatched opening delimiter (from the R up to the open paren), and allows -;; the rest of the buffer to get font-lock-string-face, caused by the -;; unmatched "string fence" `syntax-table' text property value. +;; `c-font-lock-ml-strings' puts c-font-lock-warning-face on the entire +;; unmatched opening delimiter, and allows the tail of the buffer to get +;; font-lock-string-face, caused by the unmatched "string fence" +;; `syntax-table' text property value. ;; -;; (iii) Inside a macro, a valid raw string is handled as in (i). An -;; unmatched opening delimiter is handled slightly differently. In addition -;; to the "punctuation" and "string fence" properties on the delimiter, -;; another "string fence" `syntax-table' property is applied to the last -;; possible character of the macro before the terminating linefeed (if there -;; is such a character after the "("). This "last possible" character is +;; (iii) Inside a macro, a valid ml string is handled as in (i). An unmatched +;; opening delimiter is handled slightly differently. In addition to the +;; "punctuation" and "string fence" properties on the delimiter, another +;; "string fence" `syntax-table' property is applied to the last possible +;; character of the macro before the terminating linefeed (if there is such +;; a character after the delimiter). This "last possible" character is ;; never a backslash escaping the end of line. If the character preceding ;; this "last possible" character is itself a backslash, this preceding -;; character gets a "punctuation" `syntax-table' value. If the "(" is -;; already at the end of the macro, it gets the "punctuation" value, and no -;; "string fence"s are used. +;; character gets a "punctuation" `syntax-table' value. If the last +;; character of the closing delimiter is already at the end of the macro, it +;; gets the "punctuation" value, and no "string fence"s are used. ;; ;; The effect on the fontification of either of these tactics is that the ;; rest of the macro (if any) after the "(" gets font-lock-string-face, but ;; the rest of the file is fontified normally. -;; The values of the function `c-raw-string-pos' at before-change-functions' -;; BEG and END. -(defvar c-old-beg-rs nil) -(defvar c-old-end-rs nil) -;; Whether a buffer change has disrupted or will disrupt the terminating id of -;; a raw string. -(defvar c-raw-string-end-delim-disrupted nil) - -(defun c-raw-string-pos () - ;; Get POINT's relationship to any containing raw string. - ;; If point isn't in a raw string, return nil. - ;; Otherwise, return the following list: - ;; - ;; (POS B\" B\( E\) E\") - ;; - ;; , where POS is the symbol `open-delim' if point is in the opening - ;; delimiter, the symbol `close-delim' if it's in the closing delimiter, and - ;; nil if it's in the string body. B\", B\(, E\), E\" are the positions of - ;; the opening and closing quotes and parentheses of a correctly terminated - ;; raw string. (N.B.: E\) and E\" are NOT on the "outside" of these - ;; characters.) If the raw string is not terminated, E\) and E\" are set to +(defun c-ml-string-make-closer-re (_opener) + "Return c-ml-string-any-closer-re. + +This is a suitable language specific value of +`c-make-ml-string-closer-re-function' for most languages with +multi-line strings (but not C++, for example)." + c-ml-string-any-closer-re) + +(defun c-ml-string-make-opener-re (_closer) + "Return c-ml-string-opener-re. + +This is a suitable language specific value of +`c-make-ml-string-opener-re-function' for most languages with +multi-line strings (but not C++, for example)." + c-ml-string-opener-re) + +(defun c-c++-make-ml-string-closer-re (opener) + "Construct a regexp for a C++ raw string closer matching OPENER." + (concat "\\()" (regexp-quote (substring opener 2 -1)) "\\(\"\\)\\)")) + +(defun c-c++-make-ml-string-opener-re (closer) + "Construct a regexp for a C++ raw string opener matching CLOSER." + (concat "\\(R\\(\"\\)" (regexp-quote (substring closer 1 -1)) "(\\)")) + +;; The positions of various components of mult-line strings surrounding BEG, +;; END and (1- BEG) (of before-change-functions) as returned by +;; `c-ml-string-delims-around-point'. +(defvar c-old-beg-ml nil) +(defvar c-old-1-beg-ml nil) ; only non-nil when `c-old-beg-ml' is nil. +(defvar c-old-end-ml nil) +;; The values of the function `c-position-wrt-ml-delims' at +;; before-change-function's BEG and END. +(defvar c-beg-pos nil) +(defvar c-end-pos nil) +;; Whether a buffer change has disrupted or will disrupt the terminator of an +;; multi-line string. +(defvar c-ml-string-end-delim-disrupted nil) + +(defun c-depropertize-ml-string-delims (string-delims) + ;; Remove any syntax-table text properties from the multi-line string + ;; delimiters specified by STRING-DELIMS, the output of + ;; `c-ml-string-delims-around-point'. + (let (found) + (if (setq found (c-clear-char-properties (caar string-delims) + (cadar string-delims) + 'syntax-table)) + (c-truncate-lit-pos-cache found)) + (when (cdr string-delims) + (if (setq found (c-clear-char-properties (cadr string-delims) + (caddr string-delims) + 'syntax-table)) + (c-truncate-lit-pos-cache found))))) + +(defun c-get-ml-closer (open-delim) + ;; Return the closer, a three element dotted list of the closer's start, its + ;; end and the position of the double quote, matching the given multi-line + ;; string OPENER, also such a three element dotted list. Otherwise return + ;; nil. All pertinent syntax-table text properties must be in place. + (save-excursion + (goto-char (cadr open-delim)) + (and (not (equal (c-get-char-property (1- (point)) 'syntax-table) + '(15))) + (re-search-forward (funcall c-make-ml-string-closer-re-function + (buffer-substring-no-properties + (car open-delim) (cadr open-delim))) + nil t) + (cons (match-beginning 1) + (cons (match-end 1) (match-beginning 2)))))) + +(defun c-ml-string-opener-around-point () + ;; If point is inside an ml string opener, return a dotted list of the start + ;; and end of that opener, and the position of its double-quote. That list + ;; will not include any "context characters" before or after the opener. If + ;; an opener is found, the match-data will indicate it, with (match-string + ;; 1) being the entire delimiter, and (match-string 2) the "main" double + ;; quote. Otherwise the match-data is undefined. + (let ((here (point)) found) + (goto-char (max (- here (1- c-ml-string-max-opener-len)) (point-min))) + (while + (and + (setq found + (search-forward-regexp + c-ml-string-opener-re + (min (+ here (1- c-ml-string-max-opener-len)) (point-max)) + 'bound)) + (<= (match-end 1) here))) + (prog1 + (and found + (< (match-beginning 1) here) + (cons (match-beginning 1) + (cons (match-end 1) (match-beginning 2)))) + (goto-char here)))) + +(defun c-ml-string-opener-intersects-region (&optional start finish) + ;; If any part of the region [START FINISH] is inside an ml-string opener, + ;; return a dotted list of the start, end and double-quote position of that + ;; opener. That list wlll not include any "context characters" before or + ;; after the opener. If an opener is found, the match-data will indicate + ;; it, with (match-string 1) being the entire delimiter, and (match-string + ;; 2) the "main" double-quote. Otherwise, the match-data is undefined. + ;; Both START and FINISH default to point. FINISH may not be at an earlier + ;; buffer position than START. + (let ((here (point)) found) + (or finish (setq finish (point))) + (or start (setq start (point))) + (goto-char (max (- start (1- c-ml-string-max-opener-len)) (point-min))) + (while + (and + (setq found + (search-forward-regexp + c-ml-string-opener-re + (min (+ finish (1- c-ml-string-max-opener-len)) (point-max)) + 'bound)) + (<= (match-end 1) start))) + (prog1 + (and found + (< (match-beginning 1) finish) + (cons (match-beginning 1) + (cons (match-end 1) (match-beginning 2)))) + (goto-char here)))) + +(defun c-ml-string-opener-at-or-around-point (&optional position) + ;; If POSITION (default point) is at or inside an ml string opener, return a + ;; dotted list of the start and end of that opener, and the position of the + ;; double-quote in it. That list will not include any "context characters" + ;; before or after the opener. + (let ((here (point)) + found) + (or position (setq position (point))) + (goto-char (max (- position (1- c-ml-string-max-opener-len)) (point-min))) + (while + (and + (setq found + (search-forward-regexp + c-ml-string-opener-re + (min (+ position c-ml-string-max-opener-len) (point-max)) + 'bound)) + (<= (match-end 1) position))) + (prog1 + (and found + (<= (match-beginning 1) position) + (cons (match-beginning 1) + (cons (match-end 1) (match-beginning 2)))) + (goto-char here)))) + +(defun c-ml-string-back-to-neutral (opening-point) + ;; Given OPENING-POINT, the position of the start of a multiline string + ;; opening delimiter, move point back to a neutral position within the ml + ;; string. It is assumed that point is within the innards of or the closing + ;; delimiter of string opened by OPEN-DELIM. + (let ((opener-end (save-excursion + (goto-char opening-point) + (looking-at c-ml-string-opener-re) + (match-end 1)))) + (if (not c-ml-string-back-closer-re) + (goto-char (max (c-point 'boll) opener-end)) + (re-search-backward c-ml-string-back-closer-re + (max opener-end + (c-point 'eopl)) + 'bound)))) + +(defun c-ml-string-in-end-delim (beg end open-delim) + ;; If the region (BEG END) intersects or touches a possible multiline string + ;; terminator, return a cons of the position of the start and end of the + ;; first such terminator. The syntax-table text properties must be in a + ;; consistent state when using this function. OPEN-DELIM is the three + ;; element dotted list of the start, end, and double quote position of the + ;; multiline string opener that BEG is in, or nil if it isn't in one. + (save-excursion + (goto-char beg) + (when open-delim + (if (<= beg (cadr open-delim)) + (goto-char (cadr open-delim)) + (c-ml-string-back-to-neutral (car open-delim)))) + (or (and c-ml-string-back-closer-re + (looking-at c-ml-string-any-closer-re) + (eq (c-in-literal) 'string) + (goto-char (match-end 0))) + (progn + (while + (and + (search-forward-regexp + c-ml-string-any-closer-re + (min (+ end c-ml-string-max-closer-len-no-leader) (point-max)) + t) + (save-excursion + (goto-char (match-end 1)) + (not (c-in-literal))) + (<= (point) beg) + (not (save-excursion + (goto-char (match-beginning 2)) + (c-literal-start))))))) + + (unless (or (and (not (eobp)) + (<= (point) beg)) + (> (match-beginning 0) beg) + (progn (goto-char (match-beginning 2)) + (not (c-literal-start)))) + (cons (match-beginning 1) (match-end 1))))) + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(defun c-ml-string-delims-around-point () + ;; Get POINT's relationship to any containing multi-line string or such a + ;; multi-line string which point is at the end of. + ;; + ;; If point isn't thus situated, return nil. + ;; Otherwise return the following cons: + ;; + ;; (OPENER . CLOSER) + ;; + ;; , where each of OPENER and CLOSER is a dotted list of the form + ;; + ;; (START-DELIM END-DELIM . QUOTE-POSITION) + ;; + ;; , the bounds of the delimiters and the buffer position of the ?" in the + ;; delimiter. If the ml-string is not validly terminated, CLOSER is instead ;; nil. ;; ;; Note: this function is dependent upon the correct syntax-table text ;; properties being set. - (let ((state (c-semi-pp-to-literal (point))) - open-quote-pos open-paren-pos close-paren-pos close-quote-pos id) - (save-excursion - (when - (and - (cond - ((null (cadr state)) - (or (eq (char-after) ?\") - (search-backward "\"" (max (- (point) 17) (point-min)) t))) - ((and (eq (cadr state) 'string) - (goto-char (nth 2 state)) - (cond - ((eq (char-after) ?\")) - ((eq (char-after) ?\() - (let ((here (point))) - (goto-char (max (- (point) 18) (point-min))) - (while - (and - (search-forward-regexp - c-c++-raw-string-opener-re - (1+ here) 'limit) - (< (point) here))) - (and (eq (point) (1+ here)) - (match-beginning 1) - (goto-char (1- (match-beginning 1))))))) - (not (bobp))))) - (c-at-c++-raw-string-opener)) - (setq open-quote-pos (point) - open-paren-pos (match-end 1) - id (match-string-no-properties 1)) - (goto-char (1+ open-paren-pos)) - (when (and (not (c-get-char-property open-paren-pos 'syntax-table)) - (search-forward (concat ")" id "\"") nil t)) - (setq close-paren-pos (match-beginning 0) - close-quote-pos (1- (point)))))) - (and open-quote-pos - (list - (cond - ((<= (point) open-paren-pos) - 'open-delim) - ((and close-paren-pos - (> (point) close-paren-pos)) - 'close-delim) - (t nil)) - open-quote-pos open-paren-pos close-paren-pos close-quote-pos)))) - -(defun c-raw-string-in-end-delim (beg end) - ;; If the region (BEG END) intersects a possible raw string terminator, - ;; return a cons of the position of the ) and the position of the " in the - ;; first one found. - (save-excursion - (goto-char (max (- beg 17) (point-min))) - (while - (and - (search-forward-regexp ")\\([^ ()\\\n\r\t]\\{0,16\\}\\)\"" - (min (+ end 17) (point-max)) t) - (<= (point) beg))) - (unless (or (<= (point) beg) - (>= (match-beginning 0) end)) - (cons (match-beginning 0) (match-end 1))))) - -(defun c-depropertize-raw-string (id open-quote open-paren bound) - ;; Point is immediately after a raw string opening delimiter. Remove any - ;; `syntax-table' text properties associated with the delimiter (if it's - ;; unmatched) or the raw string. - ;; - ;; ID, a string, is the delimiter's identifier. OPEN-QUOTE and OPEN-PAREN - ;; are the buffer positions of the delimiter's components. BOUND is the - ;; bound for searching for a matching closing delimiter; it is usually nil, - ;; but if we're inside a macro, it's the end of the macro (i.e. just before - ;; the terminating \n). - ;; - ;; Point is moved to after the (terminated) raw string, or left after the - ;; unmatched opening delimiter, as the case may be. The return value is of - ;; no significance. - (let ((open-paren-prop (c-get-char-property open-paren 'syntax-table)) - first) - ;; If the delimiter is "unclosed", or sombody's used " in their id, clear - ;; the 'syntax-table property from all of them. - (setq first (c-clear-char-property-with-value-on-char - open-quote open-paren 'syntax-table '(1) ?\")) - (if first (c-truncate-lit-pos-cache first)) + (let ((here (point)) + (state (c-semi-pp-to-literal (point))) + open-dlist close-dlist ret found opener) (cond - ((null open-paren-prop) - ;; Should be a terminated raw string... - (when (search-forward (concat ")" id "\"") nil t) - ;; Yes, it is. :-) - ;; Clear any '(1)s from "s in the identifier. - (setq first (c-clear-char-property-with-value-on-char - (1+ (match-beginning 0)) (1- (match-end 0)) - 'syntax-table '(1) ?\")) - (if first (c-truncate-lit-pos-cache first)) - ;; Clear any random `syntax-table' text properties from the contents. - (let* ((closing-paren (match-beginning 0)) - (first-st - (and - (< (1+ open-paren) closing-paren) - (or - (and (c-get-char-property (1+ open-paren) 'syntax-table) - (1+ open-paren)) - (and - (setq first - (c-next-single-property-change - (1+ open-paren) 'syntax-table nil closing-paren)) - (< first closing-paren) - first))))) - (when first-st - (c-clear-char-properties first-st (match-beginning 0) - 'syntax-table) - (c-truncate-lit-pos-cache first-st)) - (when (c-get-char-property (1- (match-end 0)) 'syntax-table) - ;; Was previously an unterminated (ordinary) string - (save-excursion - (goto-char (1- (match-end 0))) - (when (c-safe (c-forward-sexp)) ; to '(1) at EOL. - (c-clear-char-property (1- (point)) 'syntax-table)) - (c-clear-char-property (1- (match-end 0)) 'syntax-table) - (c-truncate-lit-pos-cache (1- (match-end 0)))))))) - ((or (and (equal open-paren-prop '(15)) (null bound)) - (equal open-paren-prop '(1))) - ;; An unterminated raw string either not in a macro, or in a macro with - ;; the open parenthesis right up against the end of macro - (c-clear-char-property open-quote 'syntax-table) - (c-truncate-lit-pos-cache open-quote) - (c-clear-char-property open-paren 'syntax-table)) - (t - ;; An unterminated string in a macro, with at least one char after the - ;; open paren - (c-clear-char-property open-quote 'syntax-table) - (c-truncate-lit-pos-cache open-quote) - (c-clear-char-property open-paren 'syntax-table) - (c-clear-char-property-with-value (1+ open-paren) bound 'syntax-table - '(15)))))) - -(defun c-depropertize-raw-strings-in-region (start finish) - ;; Remove any `syntax-table' text properties associated with C++ raw strings - ;; contained in the region (START FINISH). Point is undefined at entry and - ;; exit, and the return value has no significance. - (goto-char start) - (while (and (< (point) finish) - (re-search-forward - (concat "\\(" ; 1 - c-anchored-cpp-prefix ; 2 - "\\)\\|\\(" ; 3 - c-c++-raw-string-opener-re ; 4 - "\\)") - finish t)) - (when (save-excursion - (goto-char (match-beginning 0)) (not (c-in-literal))) - (if (match-beginning 4) ; the id - ;; We've found a raw string - (c-depropertize-raw-string - (match-string-no-properties 4) ; id - (1+ (match-beginning 3)) ; open quote - (match-end 4) ; open paren - nil) ; bound - ;; We've found a CPP construct. Search for raw strings within it. - (goto-char (match-beginning 2)) ; the "#" - (c-end-of-macro) - (let ((eom (point))) - (goto-char (match-end 2)) ; after the "#". - (while (and (< (point) eom) - (c-syntactic-re-search-forward - c-c++-raw-string-opener-re eom t)) - (c-depropertize-raw-string - (match-string-no-properties 1) ; id - (1+ (match-beginning 0)) ; open quote - (match-end 1) ; open paren - eom))))))) ; bound. - -(defun c-before-change-check-raw-strings (beg end) - ;; This function clears `syntax-table' text properties from C++ raw strings - ;; whose delimiters are about to change in the region (c-new-BEG c-new-END). - ;; BEG and END are the standard arguments supplied to any before-change - ;; function. + ((or + ;; Is HERE between the start of an opener and the "? + (and (null (cadr state)) + (progn + ;; Search for the start of the opener. + (goto-char (max (- (point) (1- c-ml-string-max-opener-len)) + (point-min))) + (setq found nil) + ;; In the next loop, skip over any complete ml strings, or an ml + ;; string opener which is in a macro not containing HERE, or an + ;; apparent "opener" which is in a comment or string. + (while + (and (re-search-forward c-ml-string-opener-re + (+ here (1- c-ml-string-max-opener-len)) + t) + (< (match-beginning 1) here) + (or + (save-excursion + (goto-char (match-beginning 1)) + (or (c-in-literal) + (and (c-beginning-of-macro) + (< (progn (c-end-of-macro) (point)) + here)))) + (and + (setq found (match-beginning 1)) + (<= (point) here) + (save-match-data + (re-search-forward + (funcall c-make-ml-string-closer-re-function + (match-string-no-properties 1)) + here t)) + (<= (point) here)))) + (setq found nil)) + found)) + ;; Is HERE after the "? + (and (eq (cadr state) 'string) + (goto-char (nth 2 state)) + (c-ml-string-opener-at-or-around-point))) + (setq open-dlist (cons (match-beginning 1) + (cons (match-end 1) (match-beginning 2)))) + (goto-char (cadr open-dlist)) + (setq ret + (cons open-dlist + (if (re-search-forward + (funcall c-make-ml-string-closer-re-function + (match-string-no-properties 1)) + nil t) + (cons (match-beginning 1) + (cons (match-end 1) (match-beginning 2))) + nil))) + (goto-char here) + ret) + ;; Is HERE between the " and the end of the closer? + ((and (null (cadr state)) + (progn + (if (null c-ml-string-back-closer-re) + (goto-char (max (- here (1- c-ml-string-max-closer-len)) + (point-min))) + (goto-char here) + (re-search-backward c-ml-string-back-closer-re nil t)) + (re-search-forward c-ml-string-any-closer-re + (+ here -1 c-ml-string-max-closer-len-no-leader) + t)) + (>= (match-end 1) here) + (<= (match-end 2) here) + (setq close-dlist (cons (match-beginning 1) + (cons (match-end 1) (match-beginning 2)))) + (goto-char (car close-dlist)) + (setq state (c-semi-pp-to-literal (point))) + (eq (cadr state) 'string) + (goto-char (nth 2 state)) + (setq opener (c-ml-string-opener-around-point)) + (goto-char (cadr opener)) + (setq open-dlist (cons (match-beginning 1) + (cons (match-end 1) (match-beginning 2)))) + (re-search-forward (funcall c-make-ml-string-closer-re-function + (match-string-no-properties 1)) + nil t)) + (goto-char here) + (cons open-dlist close-dlist)) + + (t (goto-char here) + nil)))) + +(defun c-position-wrt-ml-delims (ml-string-delims) + ;; Given ML-STRING-DELIMS, a structure produced by + ;; `c-ml-string-delims-around-point' called at point, return one of the + ;; following indicating where POINT is with respect to the multi-line + ;; string: + ;; o - nil; not in the string. + ;; o - open-delim: in the open-delimiter. + ;; o - close-delim: in the close-delimiter. + ;; o - after-close: just after the close-delimiter + ;; o - string: inside the delimited string. + (cond + ((null ml-string-delims) + nil) + ((< (point) (cadar ml-string-delims)) + 'open-delim) + ((or (null (cdr ml-string-delims)) + (<= (point) (cadr ml-string-delims))) + 'string) + ((eq (point) (caddr ml-string-delims)) + 'after-close) + (t 'close-delim))) + +(defun c-before-change-check-ml-strings (beg end) + ;; This function clears `syntax-table' text properties from multi-line + ;; strings whose delimiters are about to change in the region (c-new-BEG + ;; c-new-END). BEG and END are the standard arguments supplied to any + ;; before-change function. ;; ;; Point is undefined on both entry and exit, and the return value has no ;; significance. ;; ;; This function is called as a before-change function solely due to its - ;; membership of the C++ value of `c-get-state-before-change-functions'. + ;; membership of mode-specific value of + ;; `c-get-state-before-change-functions'. (goto-char end) - (setq c-raw-string-end-delim-disrupted nil) + (setq c-ml-string-end-delim-disrupted nil) ;; We use the following to detect a R"( being swallowed into a string by ;; the pending change. (setq c-old-END-literality (c-in-literal)) + (goto-char beg) + (setq c-old-beg-ml (c-ml-string-delims-around-point)) + (setq c-beg-pos (c-position-wrt-ml-delims c-old-beg-ml)) + (setq c-old-1-beg-ml + (and (not (or c-old-beg-ml (bobp))) + (goto-char (1- beg)) + (c-ml-string-delims-around-point))) + (goto-char end) + (setq c-old-end-ml + (if (or (eq end beg) + (and c-old-beg-ml + (>= end (caar c-old-beg-ml)) + (or (null (cdr c-old-beg-ml)) + (< end (caddr c-old-beg-ml))))) + c-old-beg-ml + (c-ml-string-delims-around-point))) + (setq c-end-pos (c-position-wrt-ml-delims c-old-end-ml)) + (c-save-buffer-state - ((term-del (c-raw-string-in-end-delim beg end)) + ((term-del (c-ml-string-in-end-delim beg end (car c-old-beg-ml))) Rquote close-quote) - (setq c-old-beg-rs (progn (goto-char beg) (c-raw-string-pos)) - c-old-end-rs (progn (goto-char end) (c-raw-string-pos))) (cond - ;; We're not changing, or we're obliterating raw strings. - ((and (null c-old-beg-rs) (null c-old-end-rs))) - ;; We're changing the putative terminating delimiter of a raw string + ;; We're not changing, or we're obliterating ml strings. + ((and (null c-beg-pos) (null c-end-pos))) + ;; We're changing the putative terminating delimiter of an ml string ;; containing BEG. - ((and c-old-beg-rs term-del - (or (null (nth 3 c-old-beg-rs)) - (<= (car term-del) (nth 3 c-old-beg-rs)))) - (setq Rquote (1- (cadr c-old-beg-rs)) - close-quote (1+ (cdr term-del))) - (setq c-raw-string-end-delim-disrupted t) - (c-depropertize-raw-strings-in-region Rquote close-quote) + ((and c-beg-pos term-del + (or (null (cdr c-old-beg-ml)) + (<= (car term-del) (cadr c-old-beg-ml)))) + (setq Rquote (caar c-old-beg-ml) + close-quote (cdr term-del)) + (setq c-ml-string-end-delim-disrupted t) + (c-depropertize-ml-strings-in-region Rquote close-quote) (setq c-new-BEG (min c-new-BEG Rquote) c-new-END (max c-new-END close-quote))) ;; We're breaking an escaped NL in a raw string in a macro. - ((and c-old-end-rs + ((and c-old-end-ml (< beg end) (goto-char end) (eq (char-before) ?\\) (c-beginning-of-macro)) (let ((bom (point)) (eom (progn (c-end-of-macro) (point)))) - (c-depropertize-raw-strings-in-region bom eom) + (c-depropertize-ml-strings-in-region bom eom) (setq c-new-BEG (min c-new-BEG bom) c-new-END (max c-new-END eom)))) ;; We're changing only the contents of a raw string. - ((and (equal (cdr c-old-beg-rs) (cdr c-old-end-rs)) - (null (car c-old-beg-rs)) (null (car c-old-end-rs)))) + ;; Any critical deletion of "s will be handled in + ;; `c-after-change-unmark-ml-strings'. + ((and (equal c-old-beg-ml c-old-end-ml) + (eq c-beg-pos 'string) (eq c-end-pos 'string))) ((or ;; We're removing (at least part of) the R" of the starting delim of a ;; raw string: - (null c-old-beg-rs) - (and (eq beg (cadr c-old-beg-rs)) + (null c-old-beg-ml) + (and (eq beg (caar c-old-beg-ml)) (< beg end)) ;; Or we're removing the ( of the starting delim of a raw string. - (and (eq (car c-old-beg-rs) 'open-delim) - (or (null c-old-end-rs) - (not (eq (car c-old-end-rs) 'open-delim)) - (not (equal (cdr c-old-beg-rs) (cdr c-old-end-rs)))))) - (let ((close (nth 4 (or c-old-end-rs c-old-beg-rs)))) - (setq Rquote (1- (cadr (or c-old-end-rs c-old-beg-rs))) - close-quote (if close (1+ close) (point-max)))) - (c-depropertize-raw-strings-in-region Rquote close-quote) + (and (eq c-beg-pos 'open-delim) + (or (null c-old-end-ml) + (not (eq c-end-pos 'open-delim)) + (not (equal c-old-beg-ml c-old-end-ml)))) + ;; Or we're disrupting a starting delim by typing into it, or removing + ;; characters from it. + (and (eq c-beg-pos 'open-delim) + (eq c-end-pos 'open-delim) + (equal c-old-beg-ml c-old-end-ml))) + (let ((close (caddr (or c-old-end-ml c-old-beg-ml)))) + (setq Rquote (caar (or c-old-end-ml c-old-beg-ml)) + close-quote (or close (point-max)))) + (c-depropertize-ml-strings-in-region Rquote close-quote) (setq c-new-BEG (min c-new-BEG Rquote) - c-new-END (max c-new-END close-quote))) - ;; We're changing only the text of the identifier of the opening - ;; delimiter of a raw string. - ((and (eq (car c-old-beg-rs) 'open-delim) - (equal c-old-beg-rs c-old-end-rs)))))) - -(defun c-propertize-raw-string-id (start end) - ;; If the raw string identifier between buffer positions START and END - ;; contains any double quote characters, put a punctuation syntax-table text - ;; property on them. The return value is of no significance. - (save-excursion - (goto-char start) - (while (and (skip-chars-forward "^\"" end) - (< (point) end)) - (c-put-char-property (point) 'syntax-table '(1)) - (c-truncate-lit-pos-cache (point)) - (forward-char)))) + c-new-END (max c-new-END close-quote)))))) -(defun c-propertize-raw-string-opener (id open-quote open-paren bound) - ;; Point is immediately after a raw string opening delimiter. Apply any - ;; pertinent `syntax-table' text properties to the delimiter and also the - ;; raw string, should there be a valid matching closing delimiter. - ;; - ;; ID, a string, is the delimiter's identifier. OPEN-QUOTE and OPEN-PAREN - ;; are the buffer positions of the delimiter's components. BOUND is the - ;; bound for searching for a matching closing delimiter; it is usually nil, - ;; but if we're inside a macro, it's the end of the macro (i.e. the position - ;; of the closing newline). - ;; - ;; Point is moved to after the (terminated) raw string and t is returned, or - ;; it is left after the unmatched opening delimiter and nil is returned. - (c-propertize-raw-string-id (1+ open-quote) open-paren) - (prog1 - (if (search-forward (concat ")" id "\"") bound t) - (let ((end-string (match-beginning 0)) - (after-quote (match-end 0))) - (c-propertize-raw-string-id - (1+ (match-beginning 0)) (1- (match-end 0))) - (goto-char open-paren) - (while (progn (skip-syntax-forward "^\"" end-string) - (< (point) end-string)) - (c-put-char-property (point) 'syntax-table '(1)) ; punctuation - (c-truncate-lit-pos-cache (point)) - (forward-char)) - (goto-char after-quote) - t) - (c-put-char-property open-quote 'syntax-table '(1)) ; punctuation - (c-truncate-lit-pos-cache open-quote) - (c-put-char-property open-paren 'syntax-table '(15)) ; generic string - (when bound - ;; In a CPP construct, we try to apply a generic-string - ;; `syntax-table' text property to the last possible character in - ;; the string, so that only characters within the macro get - ;; "stringed out". - (goto-char bound) - (if (save-restriction - (narrow-to-region (1+ open-paren) (point-max)) - (re-search-backward - (eval-when-compile - ;; This regular expression matches either an escape pair - ;; (which isn't an escaped NL) (submatch 5) or a - ;; non-escaped character (which isn't itself a backslash) - ;; (submatch 10). The long preambles to these - ;; (respectively submatches 2-4 and 6-9) ensure that we - ;; have the correct parity for sequences of backslashes, - ;; etc.. - (concat "\\(" ; 1 - "\\(\\`[^\\]?\\|[^\\][^\\]\\)\\(\\\\\\(.\\|\n\\)\\)*" ; 2-4 - "\\(\\\\.\\)" ; 5 - "\\|" - "\\(\\`\\|[^\\]\\|\\(\\`[^\\]?\\|[^\\][^\\]\\)\\(\\\\\\(.\\|\n\\)\\)+\\)" ; 6-9 - "\\([^\\]\\)" ; 10 - "\\)" - "\\(\\\\\n\\)*\\=")) ; 11 - (1+ open-paren) t)) - (if (match-beginning 10) - (progn - (c-put-char-property (match-beginning 10) 'syntax-table '(15)) - (c-truncate-lit-pos-cache (match-beginning 10))) - (c-put-char-property (match-beginning 5) 'syntax-table '(1)) - (c-put-char-property (1+ (match-beginning 5)) 'syntax-table '(15)) - (c-truncate-lit-pos-cache (1+ (match-beginning 5)))) - ;; (c-put-char-property open-paren 'syntax-table '(1)) - ) - (goto-char bound)) - nil))) - -(defun c-after-change-unmark-raw-strings (beg end _old-len) - ;; This function removes `syntax-table' text properties from any raw strings +(defun c-after-change-unmark-ml-strings (beg end old-len) + ;; This function removes `syntax-table' text properties from any ml strings ;; which have been affected by the current change. These are those which - ;; have been "stringed out" and from newly formed raw strings, or any - ;; existing raw string which the new text terminates. BEG, END, and - ;; _OLD-LEN are the standard arguments supplied to any + ;; have been "stringed out" and from newly formed ml strings, or any + ;; existing ml string which the new text terminates. BEG, END, and + ;; OLD-LEN are the standard arguments supplied to any ;; after-change-function. ;; ;; Point is undefined on both entry and exit, and the return value has no ;; significance. ;; ;; This functions is called as an after-change function by virtue of its - ;; membership of the C++ value of `c-before-font-lock-functions'. + ;; membership of the mode's value of `c-before-font-lock-functions'. ;; (when (< beg end) - (c-save-buffer-state (found eoll state id found-beg) - ;; Has an inserted " swallowed up a R"(, turning it into "...R"(? + ;; + ;; Maintainers' note: Be careful with the use of `c-old-beg-ml' and + ;; `c-old-end-ml'; since text has been inserted or removed, most of the + ;; components in these variables will no longer be valid. (caar + ;; c-old-beg-ml) is normally OK, (cadar c-old-beg-ml) often is, any others + ;; will need adjstments. + (c-save-buffer-state (found eoll state opener) + ;; Has an inserted " swallowed up a R"(, turning it into "...R"(? + (goto-char end) + (setq eoll (c-point 'eoll)) + (when (and (null c-old-END-literality) + (search-forward-regexp c-ml-string-opener-re eoll t)) + (setq state (c-semi-pp-to-literal end)) + (when (eq (cadr state) 'string) + (unwind-protect + ;; Temporarily insert a closing string delimiter.... + (progn + (goto-char end) + (cond + ((c-characterp (nth 3 (car state))) + (insert (nth 3 (car state)))) + ((eq (nth 3 (car state)) t) + (insert ?\") + (c-put-char-property end 'syntax-table '(15)))) + (c-truncate-lit-pos-cache end) + ;; ....ensure c-new-END extends right to the end of the about + ;; to be un-stringed raw string.... + (save-excursion + (goto-char (1+ (match-end 1))) ; Count inserted " too. + (setq c-new-END + (max c-new-END + (if (re-search-forward + (funcall c-make-ml-string-closer-re-function + (match-string-no-properties 1)) + nil t) + (1- (match-end 1)) ; 1- For the inserted ". + eoll)))) + + ;; ...and clear `syntax-table' text propertes from the + ;; following raw strings. + (c-depropertize-ml-strings-in-region (point) (1+ eoll))) + ;; Remove the temporary string delimiter. + (goto-char end) + (delete-char 1) + (c-truncate-lit-pos-cache end)))) + + ;; Have we just created a new starting id? + (goto-char beg) + (setq opener + (if (eq beg end) + (c-ml-string-opener-at-or-around-point end) + (c-ml-string-opener-intersects-region beg end))) + (when + (and opener (<= (car opener) end) + (setq state (c-semi-pp-to-literal (car opener))) + (not (cadr state))) + (setq c-new-BEG (min c-new-BEG (car opener))) + (goto-char (cadr opener)) + (when (re-search-forward + (funcall c-make-ml-string-closer-re-function + (buffer-substring-no-properties + (car opener) (cadr opener))) + nil t) ; No bound + (setq c-new-END (max c-new-END (match-end 1)))) + (goto-char c-new-BEG) + (while (c-search-forward-char-property-with-value-on-char + 'syntax-table '(15) ?\" c-new-END) + (c-remove-string-fences (1- (point)))) + (c-depropertize-ml-strings-in-region c-new-BEG c-new-END)) + + ;; Have we matched up with an existing terminator by typing into or + ;; deleting from an opening delimiter? ... or by messing up a raw string's + ;; terminator so that it now matches a later terminator? + (when + (cond + ((or c-ml-string-end-delim-disrupted + (and c-old-beg-ml + (eq c-beg-pos 'open-delim))) + (goto-char (caar c-old-beg-ml))) + ((and (< beg end) + (not c-old-beg-ml) + c-old-1-beg-ml + (save-excursion + (goto-char (1- beg)) + (c-ml-string-back-to-neutral (caar c-old-1-beg-ml)) + (re-search-forward + (funcall c-make-ml-string-closer-re-function + (buffer-substring-no-properties + (caar c-old-1-beg-ml) + (cadar c-old-1-beg-ml))) + nil 'bound) + (> (point) beg))) + (goto-char (caar c-old-1-beg-ml)) + (setq c-new-BEG (min c-new-BEG (point))) + (c-truncate-lit-pos-cache (point)))) + + (when (looking-at c-ml-string-opener-re) + (goto-char (match-end 1)) + (when (re-search-forward (funcall c-make-ml-string-closer-re-function + (match-string-no-properties 1)) + nil t) ; No bound + ;; If what is to be the new delimiter was previously an unterminated + ;; ordinary string, clear the c-fl-syn-tab properties from this old + ;; string. + (when (c-get-char-property (match-beginning 2) 'c-fl-syn-tab) + (c-remove-string-fences (match-beginning 2))) + (setq c-new-END (point-max)) + (c-clear-char-properties (caar (or c-old-beg-ml c-old-1-beg-ml)) + c-new-END + 'syntax-table) + (c-truncate-lit-pos-cache + (caar (or c-old-beg-ml c-old-1-beg-ml)))))) + + ;; Have we disturbed the innards of an ml string, possibly by deleting "s? + (when (and + c-old-beg-ml + (eq c-beg-pos 'string) + (eq beg end)) + (goto-char beg) + (c-ml-string-back-to-neutral (caar c-old-beg-ml)) + (let ((bound (if (cdr c-old-end-ml) + (min (+ (- (caddr c-old-end-ml) old-len) + c-ml-string-max-closer-len-no-leader) + (point-max)) + (point-max))) + (new-END-end-ml-string + (if (cdr c-old-end-ml) + (- (caddr c-old-end-ml) old-len) + (point-max)))) + (when (and + (re-search-forward + (funcall c-make-ml-string-closer-re-function + (buffer-substring-no-properties + (caar c-old-beg-ml) (cadar c-old-beg-ml))) + bound 'bound) + (< (match-end 1) new-END-end-ml-string)) + (setq c-new-END (max new-END-end-ml-string c-new-END)) + (c-clear-char-properties (caar c-old-beg-ml) c-new-END + 'syntax-table) + (setq c-new-BEG (min (caar c-old-beg-ml) c-new-BEG)) + (c-truncate-lit-pos-cache (caar c-old-beg-ml))))) + + ;; Have we terminated an existing raw string by inserting or removing + ;; text? + (when + (and + (< beg end) + (eq c-old-END-literality 'string) + c-old-beg-ml) + ;; Have we just made or modified a closing delimiter? (goto-char end) - (setq eoll (c-point 'eoll)) - (when (and (null c-old-END-literality) - (search-forward-regexp c-c++-raw-string-opener-re eoll t)) - (setq state (c-semi-pp-to-literal end)) - (when (eq (cadr state) 'string) - (unwind-protect - ;; Temporarily insert a closing string delimiter.... - (progn - (goto-char end) - (cond - ((c-characterp (nth 3 (car state))) - (insert (nth 3 (car state)))) - ((eq (nth 3 (car state)) t) - (insert ?\") - (c-put-char-property end 'syntax-table '(15)))) - (c-truncate-lit-pos-cache end) - ;; ....ensure c-new-END extends right to the end of the about - ;; to be un-stringed raw string.... - (save-excursion - (goto-char (match-beginning 1)) - (let ((end-bs (c-raw-string-pos))) - (setq c-new-END - (max c-new-END - (if (nth 4 end-bs) - (1+ (nth 4 end-bs)) - eoll))))) - - ;; ...and clear `syntax-table' text propertes from the - ;; following raw strings. - (c-depropertize-raw-strings-in-region (point) (1+ eoll))) - ;; Remove the temporary string delimiter. - (goto-char end) - (delete-char 1)))) - - ;; Have we just created a new starting id? - (goto-char (max (- beg 18) (point-min))) + (c-ml-string-back-to-neutral (caar c-old-beg-ml)) (while (and (setq found - (search-forward-regexp c-c++-raw-string-opener-re - c-new-END 'bound)) - (<= (match-end 0) beg))) + (search-forward-regexp + c-ml-string-any-closer-re + (+ (c-point 'eol end) + (1- c-ml-string-max-closer-len-no-leader)) + t)) + (< (match-end 1) beg)) + (goto-char (match-end 1))) (when (and found (<= (match-beginning 0) end)) - (setq c-new-BEG (min c-new-BEG (match-beginning 0))) - (c-depropertize-raw-strings-in-region c-new-BEG c-new-END)) - - ;; Have we invalidated an opening delimiter by typing into it? - (when (and c-old-beg-rs - (eq (car c-old-beg-rs) 'open-delim) - (equal (c-get-char-property (cadr c-old-beg-rs) - 'syntax-table) - '(1))) - (goto-char (1- (cadr c-old-beg-rs))) - (unless (looking-at c-c++-raw-string-opener-re) - (c-clear-char-property (1+ (point)) 'syntax-table) - (c-truncate-lit-pos-cache (1+ (point))) - (if (c-search-forward-char-property 'syntax-table '(15) - (c-point 'eol)) - (c-clear-char-property (1- (point)) 'syntax-table)))) - - ;; Have we matched up with an existing terminator by typing into an - ;; opening delimiter? ... or by messing up a raw string's terminator so - ;; that it now matches a later terminator? - (when - (or c-raw-string-end-delim-disrupted - (and c-old-beg-rs - (eq (car c-old-beg-rs) 'open-delim))) - (goto-char (cadr c-old-beg-rs)) - (when (looking-at c-c++-raw-string-opener-1-re) - (setq id (match-string-no-properties 1)) - (when (search-forward (concat ")" id "\"") nil t) ; No bound. - (setq c-new-END (point-max)) - (c-clear-char-properties (cadr c-old-beg-rs) c-new-END - 'syntax-table) - (c-truncate-lit-pos-cache (cadr c-old-beg-rs))))) - ;; Have we terminated an existing raw string by inserting or removing - ;; text? - (when (eq c-old-END-literality 'string) - ;; Have we just made or modified a closing delimiter? - (goto-char (max (- beg 18) (point-min))) - (while - (and - (setq found - (search-forward-regexp ")\\([^ ()\\\n\r\t]\\{0,16\\}\\)\"" - (+ end 17) t)) - (< (match-end 0) beg))) - (when (and found (<= (match-beginning 0) end)) - (setq id (match-string-no-properties 1)) - (goto-char (match-beginning 0)) + (let ((opener-re (funcall c-make-ml-string-opener-re-function + (match-string 1)))) (while (and - (setq found (search-backward (concat "R\"" id "(") nil t)) + (setq found (re-search-backward opener-re nil t)) (setq state (c-semi-pp-to-literal (point))) - (memq (nth 3 (car state)) '(t ?\")))) - (when found - (setq c-new-BEG (min (point) c-new-BEG) - c-new-END (point-max)) - (c-clear-syn-tab-properties (point) c-new-END) - (c-truncate-lit-pos-cache (point))))) - - ;; Are there any raw strings in a newly created macro? - (when (< beg end) - (goto-char beg) - (setq found-beg (point)) - (when (search-forward-regexp c-anchored-cpp-prefix end t) + (memq (nth 3 (car state)) '(t ?\"))))) + (when found + (setq c-new-BEG (min (point) c-new-BEG) + c-new-END (point-max)) + (c-clear-syn-tab-properties (point) c-new-END) + (c-truncate-lit-pos-cache (point))))) + + ;; Are there any raw strings in a newly created macro? + (goto-char (c-point 'bol beg)) + (while (and (< (point) (c-point 'eol end)) + (re-search-forward c-anchored-cpp-prefix (c-point 'eol end) + 'boundt)) + (when (and (<= beg (match-end 1)) + (>= end (match-beginning 1))) + (goto-char (match-beginning 1)) (c-end-of-macro) - (c-depropertize-raw-strings-in-region found-beg (point)))))) + (c-depropertize-ml-strings-in-region + (match-beginning 1) (point)))))) -(defun c-maybe-re-mark-raw-string () +(defun c-maybe-re-mark-ml-string () ;; When this function is called, point is immediately after a " which opens - ;; a string. If this " is the characteristic " of a raw string - ;; opener, apply the pertinent `syntax-table' text properties to the - ;; entire raw string (when properly terminated) or just the delimiter - ;; (otherwise). In either of these cases, return t, otherwise return nil. - ;; - (let (in-macro macro-end) + ;; a string. If this " is the characteristic " of a multi-line string + ;; opener, apply the pertinent `syntax-table' text properties to the entire + ;; ml string (when properly terminated) or just the delimiter (otherwise). + ;; In either of these cases, return t, otherwise return nil. Point is moved + ;; to after the terminated raw string, or to the end of the containing + ;; macro, or to point-max. + ;; + (let (delim in-macro macro-end) (when (and - (eq (char-before (1- (point))) ?R) - (looking-at "\\([^ ()\\\n\r\t]\\{0,16\\}\\)(")) + (setq delim (c-ml-string-opener-at-or-around-point (1- (point)))) + (save-excursion + (goto-char (car delim)) + (not (c-in-literal)))) (save-excursion (setq in-macro (c-beginning-of-macro)) (setq macro-end (when in-macro (c-end-of-macro) - (point) ;; (min (1+ (point)) (point-max)) + (point) ))) (when (not - (c-propertize-raw-string-opener - (match-string-no-properties 1) ; id - (1- (point)) ; open quote - (match-end 1) ; open paren - macro-end)) ; bound (end of macro) or nil. + (c-propertize-ml-string-opener + delim + macro-end)) ; bound (end of macro) or nil. (goto-char (or macro-end (point-max)))) t))) +(defun c-propertize-ml-string-id (delim) + ;; Apply punctuation ('(1)) syntax-table text properties to the opening or + ;; closing delimiter given by the three element dotted list DELIM, such that + ;; its "total syntactic effect" is that of a single ". + (save-excursion + (goto-char (car delim)) + (while (and (skip-chars-forward c-ml-string-non-punc-skip-chars + (cadr delim)) + (< (point) (cadr delim))) + (when (not (eq (point) (cddr delim))) + (c-put-char-property (point) 'syntax-table '(1)) + (c-truncate-lit-pos-cache (point))) + (forward-char)))) + +(defun c-propertize-ml-string-opener (delim bound) + ;; DELIM defines the opening delimiter of a multi-line string in the + ;; way returned by `c-ml-string-opener-around-point'. Apply any + ;; pertinent `syntax-table' text properties to this opening delimiter and in + ;; the case of a terminated ml string, also to the innards of the string and + ;; the terminating delimiter. + ;; + ;; BOUND is the end of the macro we're inside (i.e. the position of the + ;; closing newline), if any, otherwise nil. + ;; + ;; Point is undefined at the function start. For a terminated ml string, + ;; point is left after the terminating delimiter and t is returned. For an + ;; unterminated string, point is left at the end of the macro, if any, or + ;; after the unmatched opening delimiter, and nil is returned. + (c-propertize-ml-string-id delim) + (goto-char (cadr delim)) + (if (re-search-forward + (funcall c-make-ml-string-closer-re-function + (buffer-substring-no-properties + (car delim) (cadr delim))) + bound t) + + (let ((end-delim + (cons (match-beginning 1) + (cons (match-end 1) (match-beginning 2))))) + (c-propertize-ml-string-id end-delim) + (goto-char (cadr delim)) + (while (progn (skip-syntax-forward c-ml-string-non-punc-skip-chars + (car end-delim)) + (< (point) (car end-delim))) + (c-put-char-property (point) 'syntax-table '(1)) ; punctuation + (c-truncate-lit-pos-cache (point)) + (forward-char)) + (goto-char (cadr end-delim)) + t) + (c-put-char-property (cddr delim) 'syntax-table '(1)) + (c-put-char-property (1- (cadr delim)) 'syntax-table '(15)) + (c-truncate-lit-pos-cache (1- (cddr delim))) + (when bound + ;; In a CPP construct, we try to apply a generic-string + ;; `syntax-table' text property to the last possible character in + ;; the string, so that only characters within the macro get + ;; "stringed out". + (goto-char bound) + (if (save-restriction + (narrow-to-region (cadr delim) (point-max)) + (re-search-backward + (eval-when-compile + ;; This regular expression matches either an escape pair + ;; (which isn't an escaped NL) (submatch 5) or a + ;; non-escaped character (which isn't itself a backslash) + ;; (submatch 10). The long preambles to these + ;; (respectively submatches 2-4 and 6-9) ensure that we + ;; have the correct parity for sequences of backslashes, + ;; etc.. + (concat "\\(" ; 1 + "\\(\\`[^\\]?\\|[^\\][^\\]\\)\\(\\\\\\(.\\|\n\\)\\)*" ; 2-4 + "\\(\\\\.\\)" ; 5 + "\\|" + "\\(\\`\\|[^\\]\\|\\(\\`[^\\]?\\|[^\\][^\\]\\)\\(\\\\\\(.\\|\n\\)\\)+\\)" ; 6-9 + "\\([^\\]\\)" ; 10 + "\\)" + "\\(\\\\\n\\)*\\=")) ; 11 + (cadr delim) t)) + (if (match-beginning 10) + (progn + (c-put-char-property (match-beginning 10) 'syntax-table '(15)) + (c-truncate-lit-pos-cache (match-beginning 10))) + (c-put-char-property (match-beginning 5) 'syntax-table '(1)) + (c-put-char-property (1+ (match-beginning 5)) 'syntax-table '(15)) + (c-truncate-lit-pos-cache (match-beginning 5)))) + (goto-char bound)) + nil)) + +(defvar c-neutralize-pos nil) + ;; Buffer position of character neutralized by punctuation syntax-table + ;; text property ('(1)), or nil if there's no such character. +(defvar c-neutralized-prop nil) + ;; syntax-table text property that was on the character at + ;; `c-neutralize-pos' before it was replaced with '(1), or nil if none. + +(defun c-depropertize-ml-string (string-delims bound) + ;; Remove any `syntax-table' text properties associated with the opening + ;; delimiter of a multi-line string (if it's unmatched) or with the entire + ;; string. Exception: A single punctuation ('(1)) property will be left on + ;; a string character to make the entire set of multi-line strings + ;; syntactically neutral. This is done using the global variable + ;; `c-neutralize-pos', the position of this property (or nil if there is + ;; none). + ;; + ;; STRING-DELIMS, of the form of the output from + ;; `c-ml-string-delims-around-point' defines the current ml string. BOUND + ;; is the bound for searching for a matching closing delimiter; it is + ;; usually nil, but if we're inside a macro, it's the end of the macro + ;; (i.e. just before the terminating \n). + ;; + ;; Point is undefined on input, and is moved to after the (terminated) raw + ;; string, or left after the unmatched opening delimiter, as the case may + ;; be. The return value is of no significance. + + ;; Handle the special case of a closing " previously having been an + ;; unterminated ordinary string. + (when + (and + (cdr string-delims) + (equal (c-get-char-property (cdddr string-delims) ; pos of closing ". + 'syntax-table) + '(15))) + (goto-char (cdddr string-delims)) + (when (c-safe (c-forward-sexp)) ; To '(15) at EOL. + (c-clear-char-property (1- (point)) 'syntax-table) + (c-truncate-lit-pos-cache (1- (point))))) + ;; The '(15) in the closing delimiter will be cleared by the following. + + (c-depropertize-ml-string-delims string-delims) + (let ((bound1 (if (cdr string-delims) + (caddr string-delims) ; end of closing delimiter. + bound)) + first s) + (if (and + bound1 + (setq first (c-clear-char-properties (cadar string-delims) bound1 + 'syntax-table))) + (c-truncate-lit-pos-cache first)) + (setq s (parse-partial-sexp (or c-neutralize-pos (caar string-delims)) + (or bound1 (point-max)))) + (cond + ((not (nth 3 s))) ; Nothing changed by this ml-string. + ((not c-neutralize-pos) ; "New" unbalanced quote in this ml-s. + (setq c-neutralize-pos (nth 8 s)) + (setq c-neutralized-prop (c-get-char-property c-neutralize-pos + 'syntax-table)) + (c-put-char-property c-neutralize-pos 'syntax-table '(1)) + (c-truncate-lit-pos-cache c-neutralize-pos)) + ((eq (nth 3 s) (char-after c-neutralize-pos)) + ;; New unbalanced quote balances old one. + (if c-neutralized-prop + (c-put-char-property c-neutralize-pos 'syntax-table + c-neutralized-prop) + (c-clear-char-property c-neutralize-pos 'syntax-table)) + (c-truncate-lit-pos-cache c-neutralize-pos) + (setq c-neutralize-pos nil)) + ;; New unbalanced quote doesn't balance old one. Nothing to do. + ))) + +(defun c-depropertize-ml-strings-in-region (start finish) + ;; Remove any `syntax-table' text properties associated with multi-line + ;; strings contained in the region (START FINISH). Point is undefined at + ;; entry and exit, and the return value has no significance. + (setq c-neutralize-pos nil) + (goto-char start) + (while (and (< (point) finish) + (re-search-forward + c-ml-string-cpp-or-opener-re + finish t)) + (if (match-beginning (+ c-cpp-or-ml-match-offset 1)) ; opening delimiter + ;; We've found a raw string + (let ((open-delim + (cons (match-beginning (+ c-cpp-or-ml-match-offset 1)) + (cons (match-end (+ c-cpp-or-ml-match-offset 1)) + (match-beginning (+ c-cpp-or-ml-match-offset 2)))))) + (c-depropertize-ml-string + (cons open-delim + (when + (and + (re-search-forward + (funcall c-make-ml-string-closer-re-function + (match-string-no-properties + (+ c-cpp-or-ml-match-offset 1))) + (min (+ finish c-ml-string-max-closer-len-no-leader) + (point-max)) + t) + (<= (match-end 1) finish)) + (cons (match-beginning 1) + (cons (match-end 1) (match-beginning 2))))) + nil)) ; bound + ;; We've found a CPP construct. Search for raw strings within it. + (goto-char (match-beginning 2)) ; the "#" + (c-end-of-macro) + (let ((eom (point))) + (goto-char (match-end 2)) ; after the "#". + (while (and (< (point) eom) + (c-syntactic-re-search-forward + c-ml-string-opener-re eom t)) + (save-excursion + (let ((open-delim (cons (match-beginning 1) + (cons (match-end 1) + (match-beginning 2))))) + (c-depropertize-ml-string + (cons open-delim + (when (re-search-forward + (funcall c-make-ml-string-closer-re-function + (match-string-no-properties 1)) + eom t) + (cons (match-beginning 1) + (cons (match-end 1) (match-beginning 2))))) + eom))))))) ; bound. + (when c-neutralize-pos + (if c-neutralized-prop + (c-put-char-property c-neutralize-pos 'syntax-table + c-neutralized-prop) + (c-clear-char-property c-neutralize-pos 'syntax-table)) + (c-truncate-lit-pos-cache c-neutralize-pos))) + ;; Handling of small scale constructs like types and names. diff --git a/lisp/progmodes/cc-fonts.el b/lisp/progmodes/cc-fonts.el index a7c87125cdd..3c429155abb 100644 --- a/lisp/progmodes/cc-fonts.el +++ b/lisp/progmodes/cc-fonts.el @@ -781,9 +781,9 @@ casts and declarations are fontified. Used on level 2 and higher." ;; Invalid single quotes. c-font-lock-invalid-single-quotes - ;; Fontify C++ raw strings. - ,@(when (c-major-mode-is 'c++-mode) - '(c-font-lock-raw-strings)) + ;; Fontify multiline strings. + ,@(when (c-lang-const c-ml-string-opener-re) + '(c-font-lock-ml-strings)) ;; Fontify keyword constants. ,@(when (c-lang-const c-constant-kwds) @@ -1737,8 +1737,8 @@ casts and declarations are fontified. Used on level 2 and higher." (c-font-lock-declarators limit t in-typedef (not (c-bs-at-toplevel-p (point))))))))))) -(defun c-font-lock-raw-strings (limit) - ;; Fontify C++ raw strings. +(defun c-font-lock-ml-strings (limit) + ;; Fontify multi-line strings. ;; ;; This function will be called from font-lock for a region bounded by POINT ;; and LIMIT, as though it were to identify a keyword for @@ -1748,52 +1748,75 @@ casts and declarations are fontified. Used on level 2 and higher." (let* ((state (c-semi-pp-to-literal (point))) (string-start (and (eq (cadr state) 'string) (car (cddr state)))) - (raw-id (and string-start - (c-at-c++-raw-string-opener string-start) - (match-string-no-properties 1))) - (content-start (and raw-id (point)))) + (open-delim (and string-start + (save-excursion + (goto-char (1+ string-start)) + (c-ml-string-opener-around-point)))) + (string-delims (and open-delim + (cons open-delim (c-get-ml-closer open-delim)))) + found) ;; We go round the next loop twice per raw string, once for each "end". (while (< (point) limit) - (if raw-id - ;; Search for the raw string end delimiter - (progn - (when (search-forward-regexp (concat ")\\(" (regexp-quote raw-id) "\\)\"") - limit 'limit) - (c-put-font-lock-face content-start (match-beginning 1) - 'font-lock-string-face) - (c-remove-font-lock-face (match-beginning 1) (point))) - (setq raw-id nil)) - ;; Search for the start of a raw string. - (when (search-forward-regexp - "R\\(\"\\)\\([^ ()\\\n\r\t]\\{0,16\\}\\)(" limit 'limit) - (when - ;; Make sure we're not in a comment or string. - (and - (not (memq (c-get-char-property (match-beginning 0) 'face) - '(font-lock-comment-face font-lock-comment-delimiter-face - font-lock-string-face))) - (or (and (eobp) - (eq (c-get-char-property (1- (point)) 'face) - 'font-lock-warning-face)) - (not (eq (c-get-char-property (point) 'face) 'font-lock-comment-face)) - ;; (eq (c-get-char-property (point) 'face) 'font-lock-string-face) - (and (equal (c-get-char-property (match-end 2) 'syntax-table) '(1)) - (equal (c-get-char-property (match-beginning 1) 'syntax-table) - '(1))))) - (let ((paren-prop (c-get-char-property (1- (point)) 'syntax-table))) - (if paren-prop - (progn - (c-put-font-lock-face (match-beginning 0) (match-end 0) - 'font-lock-warning-face) - (when - (and - (equal paren-prop '(15)) - (not (c-search-forward-char-property 'syntax-table '(15) limit))) - (goto-char limit))) - (c-remove-font-lock-face (match-beginning 0) (match-end 2)) - (setq raw-id (match-string-no-properties 2)) - (setq content-start (match-end 0))))))))) - nil) + (cond + ;; Point is not in an ml string + ((not string-delims) + (while (and (setq found (re-search-forward c-ml-string-opener-re + limit 'limit)) + (> (match-beginning 0) (point-min)) + (memq (c-get-char-property (1- (match-beginning 0)) 'face) + '(font-lock-comment-face font-lock-string-face + font-lock-comment-delimiter-face)))) + (when found + (setq open-delim (cons (match-beginning 1) + (cons (match-end 1) (match-beginning 2))) + string-delims (cons open-delim (c-get-ml-closer open-delim))) + (goto-char (caar string-delims)))) + + ;; Point is in the body of an ml string. + ((and string-delims + (>= (point) (cadar string-delims)) + (or (not (cdr string-delims)) + (< (point) (cadr string-delims)))) + (if (cdr string-delims) + (goto-char (cadr string-delims)) + (if (equal (c-get-char-property (1- (cadar string-delims)) + 'syntax-table) + '(15)) ; "Always" the case. + ;; The next search should be successful for an unterminated ml + ;; string inside a macro, but not for any other unterminated + ;; string. + (progn + (or (c-search-forward-char-property 'syntax-table '(15) limit) + (goto-char limit)) + (setq string-delims nil)) + (c-benign-error "Missing '(15) syntax-table property at %d" + (1- (cadar string-delims))) + (setq string-delims nil)))) + + ;; Point is at or in a closing delimiter + ((and string-delims + (cdr string-delims) + (>= (point) (cadr string-delims))) + (c-put-font-lock-face (cadr string-delims) (1+ (cadr string-delims)) + 'font-lock-string-face) + (c-remove-font-lock-face (1+ (cadr string-delims)) + (caddr string-delims)) + (goto-char (caddr string-delims)) + (setq string-delims nil)) + + ;; point is at or in an opening delimiter. + (t + (if (cdr string-delims) + (progn + (c-remove-font-lock-face (caar string-delims) + (1- (cadar string-delims))) + (c-put-font-lock-face (1- (cadar string-delims)) + (cadar string-delims) + 'font-lock-string-face)) + (c-put-font-lock-face (caar string-delims) (cadar string-delims) + 'font-lock-warning-face)) + (goto-char (cadar string-delims))))) + nil)) (defun c-font-lock-c++-lambda-captures (limit) ;; Fontify the lambda capture component of C++ lambda declarations. diff --git a/lisp/progmodes/cc-langs.el b/lisp/progmodes/cc-langs.el index 35efadfd8d8..0b125bc43fa 100644 --- a/lisp/progmodes/cc-langs.el +++ b/lisp/progmodes/cc-langs.el @@ -453,9 +453,9 @@ so that all identifiers are recognized as words.") ;; The value here may be a list of functions or a single function. t 'c-before-change-check-unbalanced-strings c++ '(c-extend-region-for-CPP - c-before-change-check-raw-strings - c-before-change-check-<>-operators c-depropertize-CPP + c-before-change-check-ml-strings + c-before-change-check-<>-operators c-truncate-bs-cache c-before-change-check-unbalanced-strings c-parse-quotes-before-change) @@ -467,6 +467,8 @@ so that all identifiers are recognized as words.") java '(c-parse-quotes-before-change c-before-change-check-unbalanced-strings c-before-change-check-<>-operators) + pike '(c-before-change-check-ml-strings + c-before-change-check-unbalanced-strings) awk 'c-awk-record-region-clear-NL) (c-lang-defvar c-get-state-before-change-functions (let ((fs (c-lang-const c-get-state-before-change-functions))) @@ -506,7 +508,7 @@ parameters \(point-min) and \(point-max).") c-change-expand-fl-region) c++ '(c-depropertize-new-text c-after-change-escape-NL-in-string - c-after-change-unmark-raw-strings + c-after-change-unmark-ml-strings c-parse-quotes-after-change c-after-change-mark-abnormal-strings c-extend-font-lock-region-for-macros @@ -519,6 +521,11 @@ parameters \(point-min) and \(point-max).") c-after-change-mark-abnormal-strings c-restore-<>-properties c-change-expand-fl-region) + pike '(c-depropertize-new-text + c-after-change-escape-NL-in-string + c-after-change-unmark-ml-strings + c-after-change-mark-abnormal-strings + c-change-expand-fl-region) awk '(c-depropertize-new-text c-awk-extend-and-syntax-tablify-region)) (c-lang-defvar c-before-font-lock-functions @@ -620,6 +627,176 @@ Note that to set up a language to use this, additionally: '(?\"))) (c-lang-defvar c-string-delims (c-lang-const c-string-delims)) + +;; The next section of the code defines multi-line ("ml") strings for each +;; language. By default, there are no ml strings in a language. To configure +;; them, set each needed lang const in the section. See further details in +;; cc-engine.el (search for "Handling of CC Mode multi-line strings."). +(c-lang-defconst c-ml-string-backslash-escapes + ;; N.B. if `c-ml-string-backslash-escapes' is non-nil, you probably need a + ;; `c-ml-string-any-closer-re' that scans backslashed characters, etc. + "If non-nil, a \\ character escapes the next character in a ml string. +Otherwise such a \\ will be marked to be handled as any other character." + t nil + pike t + ) + +(c-lang-defconst c-ml-string-non-punc-skip-chars + ;; A `skip-chars-forward' argument which skips over all ml string characters + ;; which don't need to be marked with punctuation ('(1)) syntax. + t (if (c-lang-const c-ml-string-backslash-escapes) + "^\"" + "^\"\\")) +(c-lang-defvar c-ml-string-non-punc-skip-chars + (c-lang-const c-ml-string-non-punc-skip-chars)) + +(c-lang-defconst c-ml-string-opener-re + "If non-nil, a regexp that matches a multi-line string opener. +It may also match context. + +Such an opener must be at least 2 characters long, and must +contain a \" character. (match-string 1) matches the actual +delimiter and (match-string 2) matches the actual \". If a +delimiter contains several \"s, it is recommended to configure +the first of them as \"the\" \"." + t nil + pike "\\(#\\(\"\\)\\)" + c++ "\\(R\\(\"\\)[^ ()\\\n\r\t]\\{0,16\\}(\\)") +(c-lang-defvar c-ml-string-opener-re (c-lang-const c-ml-string-opener-re)) + +(c-lang-defconst c-ml-string-max-opener-len + "If non-nil, the maximum length of a multi-line string opener." + t nil + pike 2 + c++ 19) +(c-lang-defvar c-ml-string-max-opener-len + (c-lang-const c-ml-string-max-opener-len)) + +(c-lang-defconst c-ml-string-any-closer-re + "If non-nil, a regexp that matches any multi-line string closer. +It may also match context. + +A search for this regexp starting at the end of the corresponding +opener must find the first closer as the first match. + +Such a closer must include a \" character. (match-string 1) +matches the actual delimiter and and (match-string 2) matches the +actual \". If a delimiter contains several \"s, it is +recommended to regard the last of them as \"the\" \"." + t nil + pike "\\(?:\\=\\|[^\\\"]\\)\\(?:\\\\.\\)*\\(\\(\"\\)\\)" + c++ "\\()[^ ()\\n\r\t]\\{0,16\\}\\(\"\\)\\)") +;; csharp "\\(?:\\=\\|[^\"]\\)\\(?:\"\"\\)*\\(\\(\"\\)\\)\\(?:[^\"]\\|\\'\\)" +(c-lang-defvar c-ml-string-any-closer-re + (c-lang-const c-ml-string-any-closer-re)) + +(c-lang-defconst c-ml-string-max-closer-len + "If non-nil, the maximum length of a multi-line string closer. +This must include the length of any \"context trailer\" following +the actual closer and any \"context leader\" preceding it. This +variable is ignored when `c-ml-string-back-closer-re' is non-nil." + t nil + c++ 18) +(c-lang-defvar c-ml-string-max-closer-len + (c-lang-const c-ml-string-max-closer-len)) + +(c-lang-defconst c-ml-string-max-closer-len-no-leader + "If non-nil, the maximum length of a ml string closer without its leader. +By \"leader\" is meant the context bytes preceding the actual +multi-line string closer, that part of +`c-ml-string-any-closer-re''s match preceding (match-beginning 1)." + t nil + pike 1 + ;; 2 + ;; 3 + c++ 18) +(c-lang-defvar c-ml-string-max-closer-len-no-leader + (c-lang-const c-ml-string-max-closer-len-no-leader)) + +(c-lang-defconst c-ml-string-back-closer-re + "A regexp to move back out of a putative ml closer point is in. + +This variable need only be non-nil for languages with multi-line +string closers that can contain an indefinite length \"leader\" +preceding the actual closer. It was designed for formats where +an unbounded number of \\s or \"s might precede the closer +proper, for example in Pike Mode or csharp-mode. + +If point is in a putative multi-line string closer, a backward +regexp search with `c-ml-string-back-closer-re' will leave point +in a \"safe place\", from where a forward regexp search with +`c-ml-string-any-closer-re' can test whether the original +position was inside an actual closer. + +When non-nil, this variable should end in \"\\\\\\==\". Note that +such a backward search will match a minimal string, so a +\"context character\" is probably needed at the start of the +regexp. The value for csharp-mode would be something like +\"\\\\(:?\\\\`\\\\|[^\\\"]\\\\)\\\"*\\\\\\==\"." + t nil + pike "\\(:?\\`\\|[^\\\"]\\)\\(:?\\\\.\\)*\\=" + ;;pike ;; 2 + ;; "\\(:?\\`\\|[^\"]\\)\"*\\=" + ) +(c-lang-defvar c-ml-string-back-closer-re + (c-lang-const c-ml-string-back-closer-re)) + +(c-lang-defconst c-make-ml-string-closer-re-function + "If non-nil, a function which creates a closer regexp matching an opener. + +Such a function is given one argument, a multi-line opener (a +string), and returns a regexp which will match the corresponding +closer. When this regexp matches, (match-string 1) should be the +actual closing delimiter, and (match-string 2) the \"active\" \" +it contains. + +A forward regexp search for this regexp starting at the end of +the opener must find the closer as its first match." + t (if (c-lang-const c-ml-string-any-closer-re) + 'c-ml-string-make-closer-re) + c++ 'c-c++-make-ml-string-closer-re) +(c-lang-defvar c-make-ml-string-closer-re-function + (c-lang-const c-make-ml-string-closer-re-function)) + +(c-lang-defconst c-make-ml-string-opener-re-function + "If non-nil, a function which creates an opener regexp matching a closer. + +Such a function is given one argument, a multi-line closer (a +string), and returns a regexp which will match the corresponding +opener. When this regexp matches, (match-string 1) should be the +actual opening delimiter, and (match-string 2) the \"active\" \" +it contains. + +A backward regexp search for this regexp starting at the start of +the closer might not find the opener as its first match, should +there be copies of the opener contained in the multi-line string." + t (if (c-lang-const c-ml-string-opener-re) + 'c-ml-string-make-opener-re) + c++ 'c-c++-make-ml-string-opener-re) +(c-lang-defvar c-make-ml-string-opener-re-function + (c-lang-const c-make-ml-string-opener-re-function)) + +(c-lang-defconst c-ml-string-cpp-or-opener-re + ;; A regexp which matches either a macro or a multi-line string opener. + t (concat "\\(" + (or (c-lang-const c-anchored-cpp-prefix) "\\`a\\`") + "\\)\\|\\(" + (or (c-lang-const c-ml-string-opener-re) "\\`a\\`") + "\\)")) +(c-lang-defvar c-ml-string-cpp-or-opener-re + (c-lang-const c-ml-string-cpp-or-opener-re)) + +(c-lang-defconst c-cpp-or-ml-match-offset + ;; The offset to be added onto match numbers for a multi-line string in + ;; matches for `c-cpp-or-ml-string-opener-re'. + t (if (c-lang-const c-anchored-cpp-prefix) + (+ 2 (regexp-opt-depth (c-lang-const c-anchored-cpp-prefix))) + 2)) +(c-lang-defvar c-cpp-or-ml-match-offset + (c-lang-const c-cpp-or-ml-match-offset)) +;; End of ml string section. + + (c-lang-defconst c-has-quoted-numbers "Whether the language has numbers quoted like 4'294'967'295." t nil @@ -860,9 +1037,15 @@ literals." "Set if the language supports multiline string literals without escaped newlines. If t, all string literals are multiline. If a character, only literals where the open quote is immediately preceded by that -literal are multiline." - t nil - pike ?#) +literal are multiline. + +Note that from CC Mode 5.36, this character use is obsolete, +having been superseded by the \"multi-line string\" mechanism. +If both mechanisms are set for a language, the newer one prevails +over the old `c-multiline-string-start-char'. See the variables +in the page containing `c-ml-string-opener-re' in cc-langs.el for +further directions." + t nil) (c-lang-defvar c-multiline-string-start-char (c-lang-const c-multiline-string-start-char)) diff --git a/lisp/progmodes/cc-mode.el b/lisp/progmodes/cc-mode.el index 51085495bd8..a5df8449ea0 100644 --- a/lisp/progmodes/cc-mode.el +++ b/lisp/progmodes/cc-mode.el @@ -1003,8 +1003,8 @@ Note that the style variables are always made local to the buffer." (goto-char (match-beginning 1)) (setq m-beg (point)) (c-end-of-macro) - (when (c-major-mode-is 'c++-mode) - (save-excursion (c-depropertize-raw-strings-in-region m-beg (point)))) + (when c-ml-string-opener-re + (save-excursion (c-depropertize-ml-strings-in-region m-beg (point)))) (c-clear-char-property-with-value m-beg (point) 'syntax-table '(1))) (while (and (< (point) end) @@ -1014,8 +1014,8 @@ Note that the style variables are always made local to the buffer." (setq m-beg (point)) (c-end-of-macro)) (when (and ss-found (> (point) end)) - (when (c-major-mode-is 'c++-mode) - (save-excursion (c-depropertize-raw-strings-in-region m-beg (point)))) + (when c-ml-string-opener-re + (save-excursion (c-depropertize-ml-strings-in-region m-beg (point)))) (c-clear-char-property-with-value m-beg (point) 'syntax-table '(1))) (while (and (< (point) c-new-END) @@ -1023,8 +1023,8 @@ Note that the style variables are always made local to the buffer." (goto-char (match-beginning 1)) (setq m-beg (point)) (c-end-of-macro) - (when (c-major-mode-is 'c++-mode) - (save-excursion (c-depropertize-raw-strings-in-region m-beg (point)))) + (when c-ml-string-opener-re + (save-excursion (c-depropertize-ml-strings-in-region m-beg (point)))) (c-clear-char-property-with-value m-beg (point) 'syntax-table '(1))))) @@ -1174,12 +1174,15 @@ Note that the style variables are always made local to the buffer." ))))) (defun c-unescaped-nls-in-string-p (&optional quote-pos) - ;; Return whether unescaped newlines can be inside strings. + ;; Return whether unescaped newlines can be inside strings. If the current + ;; language handles multi-line strings, the value of this function is always + ;; nil. ;; ;; QUOTE-POS, if present, is the position of the opening quote of a string. ;; Depending on the language, there might be a special character before it ;; signifying the validity of such NLs. (cond + (c-ml-string-opener-re nil) ((null c-multiline-string-start-char) nil) ((c-characterp c-multiline-string-start-char) (and quote-pos @@ -1323,13 +1326,13 @@ Note that the style variables are always made local to the buffer." (setq pos (c-min-property-position pos c-max-syn-tab-mkr 'c-fl-syn-tab)) (when (< pos c-max-syn-tab-mkr) - (goto-char pos)) - (when (and (save-match-data - (c-search-backward-char-property-with-value-on-char - 'c-fl-syn-tab '(15) ?\" - (max (- (point) 500) (point-min)))) - (not (equal (c-get-char-property (point) 'syntax-table) '(1)))) - (setq pos (1+ pos))) + (goto-char pos) + (when (and (save-match-data + (c-search-backward-char-property-with-value-on-char + 'c-fl-syn-tab '(15) ?\" + (max (- (point) 500) (point-min)))) + (not (equal (c-get-char-property (point) 'syntax-table) '(1)))) + (setq pos (1+ pos)))) (while (< pos c-max-syn-tab-mkr) (setq pos (c-min-property-position pos c-max-syn-tab-mkr 'c-fl-syn-tab)) @@ -1435,7 +1438,8 @@ Note that the style variables are always made local to the buffer." ;; quotes up until the next unescaped EOL. Also guard against the change ;; being the insertion of \ before an EOL, escaping it. (cond - ((c-characterp c-multiline-string-start-char) + ((and (not c-ml-string-opener-re) + (c-characterp c-multiline-string-start-char)) ;; The text about to be inserted might contain a multiline string ;; opener. Set c-new-END after anything which might be affected. ;; Go to the end of the putative multiline string. @@ -1461,7 +1465,8 @@ Note that the style variables are always made local to the buffer." (< (point) (point-max)))))) (setq c-new-END (max (point) c-new-END))) - (c-multiline-string-start-char + ((and (not c-ml-string-opener-re) + c-multiline-string-start-char) (setq c-bc-changed-stringiness (not (eq (eq end-literal-type 'string) (eq beg-literal-type 'string)))) @@ -1506,7 +1511,7 @@ Note that the style variables are always made local to the buffer." ;; Opening " at EOB. (c-clear-syn-tab (1- (point)))) (when (and (c-search-backward-char-property 'syntax-table '(15) c-new-BEG) - (memq (char-after) c-string-delims)) ; Ignore an unterminated raw string's (. + (memq (char-after) c-string-delims)) ; Ignore an unterminated ml string's (. ;; Opening " on last line of text (without EOL). (c-remove-string-fences) (setq c-new-BEG (min c-new-BEG (point)))))) @@ -1520,13 +1525,15 @@ Note that the style variables are always made local to the buffer." (unless (or (and - ;; Don't set c-new-BEG/END if we're in a raw string. + ;; Don't set c-new-BEG/END if we're in an ml string. (eq beg-literal-type 'string) - (c-at-c++-raw-string-opener (car beg-limits))) + (c-ml-string-opener-at-or-around-point (car beg-limits))) (and c-multiline-string-start-char + (not c-ml-string-opener-re) (not (c-characterp c-multiline-string-start-char)))) (when (and (eq end-literal-type 'string) - (not (eq (char-before (cdr end-limits)) ?\()) + (or (memq (char-before (cdr end-limits)) c-string-delims) + (memq (char-before (cdr end-limits)) '(?\n ?\r))) (memq (char-after (car end-limits)) c-string-delims)) (setq c-new-END (max c-new-END (cdr end-limits))) (when (equal (c-get-char-property (car end-limits) 'syntax-table) @@ -1549,6 +1556,7 @@ Note that the style variables are always made local to the buffer." ;; This function is called exclusively as an after-change function via ;; `c-before-font-lock-functions'. (if (and c-multiline-string-start-char + (not c-ml-string-opener-re) (not (c-characterp c-multiline-string-start-char))) ;; Only the last " might need to be marked. (c-save-buffer-state @@ -1591,6 +1599,7 @@ Note that the style variables are always made local to the buffer." ((and (null beg-literal-type) (goto-char beg) (and (not (bobp)) + (not c-ml-string-opener-re) (eq (char-before) c-multiline-string-start-char)) (memq (char-after) c-string-delims)) (cons (point) @@ -1615,6 +1624,7 @@ Note that the style variables are always made local to the buffer." (point)) c-new-END)) s) + (goto-char (cond ((null beg-literal-type) c-new-BEG) @@ -1638,8 +1648,9 @@ Note that the style variables are always made local to the buffer." (and (memq (char-before) c-string-delims) (not (nth 4 s))))) ; Check we're actually out of the ; comment. not stuck at EOB - (unless (and (c-major-mode-is 'c++-mode) - (c-maybe-re-mark-raw-string)) + (unless + (and c-ml-string-opener-re + (c-maybe-re-mark-ml-string)) (if (c-unescaped-nls-in-string-p (1- (point))) (looking-at "\\(\\\\\\(.\\|\n\\)\\|[^\"]\\)*") (looking-at (cdr (assq (char-before) c-string-innards-re-alist)))) @@ -1678,21 +1689,15 @@ Note that the style variables are always made local to the buffer." (progn (goto-char end) (setq lit-start (c-literal-start))) (memq (char-after lit-start) c-string-delims) - (or (not (c-major-mode-is 'c++-mode)) + (or (not c-ml-string-opener-re) (progn (goto-char lit-start) - (and (not (and (eq (char-before) ?R) - (looking-at c-c++-raw-string-opener-1-re))) - (not (and (eq (char-after) ?\() - (equal (c-get-char-property - (point) 'syntax-table) - '(15)))))) + (not (c-ml-string-opener-at-or-around-point))) (save-excursion (c-beginning-of-macro)))) (goto-char (1+ end)) ; After the \ - ;; Search forward for EOLL - (setq lim (re-search-forward "\\(?:\\\\\\(?:.\\|\n\\)\\|[^\\\n\r]\\)*" - nil t)) + ;; Search forward for EOLL. + (setq lim (c-point 'eoll)) (goto-char (1+ end)) (when (c-search-forward-char-property-with-value-on-char 'syntax-table '(15) ?\" lim) -- 2.39.5