From: Alan Mackenzie Date: Sat, 1 Jul 2017 15:43:07 +0000 (+0000) Subject: Make C++ digit separators work. Amend the handling of single quotes generally X-Git-Tag: emacs-26.0.90~521^2~11^2~18 X-Git-Url: http://git.eshelyaron.com/gitweb/?a=commitdiff_plain;h=59d07875df9d44568d93a7517853e6a5ccaf1e5b;p=emacs.git Make C++ digit separators work. Amend the handling of single quotes generally Single quotes, even in strings and comments, are now marked with the "punctuation" syntax-table property, except where they are validly bounding a character literal. They are font locked with font-lock-warning-face except where they are valid. This is done in C, C++, ObjC, and Java Modes. * lisp/progmodes/cc-defs.el (c-clear-char-property-with-value-on-char-function) (c-clear-char-property-with-value-on-char, c-put-char-properties-on-char): New functions/macros. * lisp/progmodes/cc-fonts.el (c-font-lock-invalid-single-quotes): New function. (c-basic-matchers-before): invoke c-font-lock-invalid-single-quotes. * lisp/progmodes/cc-langs.el (c-get-state-before-change-functions): Remove c-before-after-change-digit-quote from wherever it occurs. Insert c-parse-quotes-before-change into the entries for the languages where it is needed. (c-before-font-lock-functions): Remove c-before-after-change-digit-quote from wherever it occurs. Insert c-parse-quotes-after-change into the entries for the languages which need it. (c-has-quoted-numbers): New lang-defconst/-defvar. * lisp/progmodes/cc-mode.el (c-before-after-change-digit-quote): Remove. (c-maybe-quoted-number-head, c-maybe-quoted-number-tail) (c-maybe-quoted-number): New defconsts. (c-quoted-number-head-before-point, c-quoted-number-tail-after-point) (c-quoted-number-straddling-point, c-parse-quotes-before-change) (c-parse-quotes-after-change): New functions. --- diff --git a/lisp/progmodes/cc-defs.el b/lisp/progmodes/cc-defs.el index 377a981598a..85a4085e490 100644 --- a/lisp/progmodes/cc-defs.el +++ b/lisp/progmodes/cc-defs.el @@ -1171,6 +1171,63 @@ been put there by c-put-char-property. POINT remains unchanged." nil ,from ,to ,value nil -property-)) ;; GNU Emacs `(c-clear-char-property-with-value-function ,from ,to ,property ,value))) + +(defun c-clear-char-property-with-value-on-char-function (from to property + value char) + "Remove all text-properties PROPERTY with value VALUE on +characters with value CHAR from the region [FROM, TO), as tested +by `equal'. These properties are assumed to be over individual +characters, having been put there by c-put-char-property. POINT +remains unchanged." + (let ((place from) + ) + (while ; loop round occurrences of (PROPERTY VALUE) + (progn + (while ; loop round changes in PROPERTY till we find VALUE + (and + (< place to) + (not (equal (get-text-property place property) value))) + (setq place (c-next-single-property-change place property nil to))) + (< place to)) + (if (eq (char-after place) char) + (remove-text-properties place (1+ place) (cons property nil))) + ;; Do we have to do anything with stickiness here? + (setq place (1+ place))))) + +(defmacro c-clear-char-property-with-value-on-char (from to property value char) + "Remove all text-properties PROPERTY with value VALUE on +characters with value CHAR from the region [FROM, TO), as tested +by `equal'. These properties are assumed to be over individual +characters, having been put there by c-put-char-property. POINT +remains unchanged." + (if c-use-extents + ;; XEmacs + `(let ((-property- ,property) + (-char- ,char)) + (map-extents (lambda (ext val) + (if (and (equal (extent-property ext -property-) val) + (eq (char-after + (extent-start-position ext)) + -char-)) + (delete-extent ext))) + nil ,from ,to ,value nil -property-)) + ;; Gnu Emacs + `(c-clear-char-property-with-value-on-char-function ,from ,to ,property + ,value ,char))) + +(defmacro c-put-char-properties-on-char (from to property value char) + ;; This needs to be a macro because `property' passed to + ;; `c-put-char-property' must be a constant. + "Put the text property PROPERTY with value VALUE on characters +with value CHAR in the region [FROM to)." + `(let ((skip-string (concat "^" (list ,char))) + (-to- ,to)) + (save-excursion + (goto-char ,from) + (while (progn (skip-chars-forward skip-string -to-) + (< (point) -to-)) + (c-put-char-property (point) ,property ,value) + (forward-char))))) ;; Macros to put overlays (Emacs) or extents (XEmacs) on buffer text. ;; For our purposes, these are characterized by being possible to @@ -1228,6 +1285,8 @@ been put there by c-put-char-property. POINT remains unchanged." (def-edebug-spec c-put-char-property t) (def-edebug-spec c-get-char-property t) (def-edebug-spec c-clear-char-property t) +(def-edebug-spec c-clear-char-property-with-value-on-char t) +(def-edebug-spec c-put-char-properties-on-char t) (def-edebug-spec c-clear-char-properties t) (def-edebug-spec c-put-overlay t) (def-edebug-spec c-delete-overlay t) diff --git a/lisp/progmodes/cc-fonts.el b/lisp/progmodes/cc-fonts.el index 00812530357..66f2575f49f 100644 --- a/lisp/progmodes/cc-fonts.el +++ b/lisp/progmodes/cc-fonts.el @@ -702,6 +702,36 @@ stuff. Used on level 1 and higher." t) (c-put-font-lock-face start (1+ start) 'font-lock-warning-face))))) +(defun c-font-lock-invalid-single-quotes (limit) + ;; This function will be called from font-lock for a region bounded by POINT + ;; and LIMIT, as though it were to identify a keyword for + ;; font-lock-keyword-face. It always returns NIL to inhibit this and + ;; prevent a repeat invocation. See elisp/lispref page "Search-based + ;; Fontification". + ;; + ;; This function fontifies invalid single quotes with + ;; `font-lock-warning-face'. These are the single quotes which + ;; o - aren't inside a literal; + ;; o - are marked with a syntax-table text property value '(1); and + ;; o - are NOT marked with a non-null c-digit-separator property. + (let ((limits (c-literal-limits)) + state beg end) + (if limits + (goto-char (cdr limits))) ; Even for being in a ' ' + (while (< (point) limit) + (setq beg (point)) + (setq state (parse-partial-sexp (point) limit nil nil nil 'syntax-table)) + (setq end (point)) + (goto-char beg) + (while (progn (skip-chars-forward "^'" end) + (< (point) end)) + (if (and (equal (c-get-char-property (point) 'syntax-table) '(1)) + (not (c-get-char-property (point) 'c-digit-separator))) + (c-put-font-lock-face (point) (1+ (point)) font-lock-warning-face)) + (forward-char)) + (parse-partial-sexp end limit nil nil state 'syntax-table))) + nil) + (c-lang-defconst c-basic-matchers-before "Font lock matchers for basic keywords, labels, references and various other easily recognizable things that should be fontified before generic @@ -723,6 +753,9 @@ casts and declarations are fontified. Used on level 2 and higher." (concat ".\\(" c-string-limit-regexp "\\)") '((c-font-lock-invalid-string))) + ;; Invalid single quotes. + c-font-lock-invalid-single-quotes + ;; Fontify C++ raw strings. ,@(when (c-major-mode-is 'c++-mode) '(c-font-lock-raw-strings)) diff --git a/lisp/progmodes/cc-langs.el b/lisp/progmodes/cc-langs.el index 1ce0fbf7d05..8be806094cd 100644 --- a/lisp/progmodes/cc-langs.el +++ b/lisp/progmodes/cc-langs.el @@ -474,18 +474,19 @@ so that all identifiers are recognized as words.") ;; The value here may be a list of functions or a single function. t nil c++ '(c-extend-region-for-CPP -; c-before-after-change-extend-region-for-lambda-capture ; doesn't seem needed. c-before-change-check-raw-strings c-before-change-check-<>-operators c-depropertize-CPP - c-before-after-change-digit-quote c-invalidate-macro-cache - c-truncate-bs-cache) + c-truncate-bs-cache + c-parse-quotes-before-change) (c objc) '(c-extend-region-for-CPP c-depropertize-CPP c-invalidate-macro-cache - c-truncate-bs-cache) - ;; java 'c-before-change-check-<>-operators + c-truncate-bs-cache + c-parse-quotes-before-change) + java 'c-parse-quotes-before-change + ;; 'c-before-change-check-<>-operators awk 'c-awk-record-region-clear-NL) (c-lang-defvar c-get-state-before-change-functions (let ((fs (c-lang-const c-get-state-before-change-functions))) @@ -515,18 +516,19 @@ parameters \(point-min) and \(point-max).") t '(c-depropertize-new-text c-change-expand-fl-region) (c objc) '(c-depropertize-new-text + c-parse-quotes-after-change c-extend-font-lock-region-for-macros c-neutralize-syntax-in-and-mark-CPP c-change-expand-fl-region) c++ '(c-depropertize-new-text + c-parse-quotes-after-change c-extend-font-lock-region-for-macros -; c-before-after-change-extend-region-for-lambda-capture ; doesn't seem needed. - c-before-after-change-digit-quote c-after-change-re-mark-raw-strings c-neutralize-syntax-in-and-mark-CPP c-restore-<>-properties c-change-expand-fl-region) java '(c-depropertize-new-text + c-parse-quotes-after-change c-restore-<>-properties c-change-expand-fl-region) awk '(c-depropertize-new-text @@ -609,6 +611,12 @@ EOL terminated statements." (c c++ objc) t) (c-lang-defvar c-has-bitfields (c-lang-const c-has-bitfields)) +(c-lang-defconst c-has-quoted-numbers + "Whether the language has numbers quoted like 4'294'967'295." + t nil + c++ t) +(c-lang-defvar c-has-quoted-numbers (c-lang-const c-has-quoted-numbers)) + (c-lang-defconst c-modified-constant "Regexp that matches a “modified” constant literal such as \"L\\='a\\='\", a “long character”. In particular, this recognizes forms of constant diff --git a/lisp/progmodes/cc-mode.el b/lisp/progmodes/cc-mode.el index a501ebba256..c5ee60f7d79 100644 --- a/lisp/progmodes/cc-mode.el +++ b/lisp/progmodes/cc-mode.el @@ -1083,101 +1083,215 @@ Note that the style variables are always made local to the buffer." (forward-line)) ; no infinite loop with, e.g., "#//" ))))) -(defun c-before-after-change-digit-quote (beg end &optional old-len) - ;; This function either removes or applies the punctuation value ('(1)) of - ;; the `syntax-table' text property on single quote marks which are - ;; separator characters in long integer literals, e.g. "4'294'967'295". It - ;; applies to both decimal/octal and hex literals. (FIXME (2016-06-10): it - ;; should also apply to binary literals.) +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Parsing of quotes. +;; +;; Valid digit separators in numbers will get the syntax-table "punctuation" +;; property, '(1), and also the text property `c-digit-separator' value t. +;; +;; Invalid other quotes (i.e. those not validly bounding a single character, +;; or escaped character) will get the syntax-table "punctuation" property, +;; '(1), too. +;; +;; Note that, for convenience, these properties are applied even inside +;; comments and strings. + +(defconst c-maybe-quoted-number-head + (concat + "\\(0\\(" + "\\([Xx]\\([0-9a-fA-F]\\('[0-9a-fA-F]\\|[0-9a-fA-F]\\)*'?\\)?\\)" + "\\|" + "\\([Bb]\\([01]\\('[01]\\|[01]\\)*'?\\)?\\)" + "\\|" + "\\('[0-7]\\|[0-7]\\)*'?" + "\\)" + "\\|" + "[1-9]\\('[0-9]\\|[0-9]\\)*'?" + "\\)") + "Regexp matching the head of a numeric literal, including with digit separators.") + +(defun c-quoted-number-head-before-point () + ;; Return non-nil when the head of a possibly quoted number is found + ;; immediately before point. The value returned in this case is the buffer + ;; position of the start of the head. + (when c-has-quoted-numbers + (save-excursion + (let ((here (point)) + ) + (skip-chars-backward "0-9a-fA-F'") + (if (and (memq (char-before) '(?x ?X)) + (eq (char-before (1- (point))) ?0)) + (backward-char 2)) + (while (and (search-forward-regexp c-maybe-quoted-number-head here t) + (< (match-end 0) here))) + (and (eq (match-end 0) here) (match-beginning 0)))))) + +(defconst c-maybe-quoted-number-tail + (concat + "\\(" + "\\([xX']?[0-9a-fA-F]\\('[0-9a-fA-F]\\|[0-9a-fA-F]\\)*\\)" + "\\|" + "\\([bB']?[01]\\('[01]\\|[01]\\)*\\)" + "\\|" + "\\('?[0-9]\\('[0-9]\\|[0-9]\\)*\\)" + "\\)") + "Regexp matching the tail of a numeric literal, including with digit separators. +Note that this is a strict tail, so won't match, e.g. \"0x....\".") + +(defun c-quoted-number-tail-after-point () + ;; Return non-nil when a proper tail of a possibly quoted number is found + ;; immediately after point. The value returned in this case is the buffer + ;; position of the end of the tail. + (when c-has-quoted-numbers + (and (looking-at c-maybe-quoted-number-tail) + (match-end 0)))) + +(defconst c-maybe-quoted-number + (concat + "\\(0\\(" + "\\([Xx][0-9a-fA-F]\\('[0-9a-fA-F]\\|[0-9a-fA-F]\\)*\\)" + "\\|" + "\\([Bb][01]\\('[01]\\|[01]\\)*\\)" + "\\|" + "\\('[0-7]\\|[0-7]\\)*" + "\\)" + "\\|" + "[1-9]\\('[0-9]\\|[0-9]\\)*" + "\\)") + "Regexp matching a numeric literal, including with digit separators.") + +(defun c-quoted-number-straddling-point () + ;; Return non-nil if a definitely quoted number starts before point and ends + ;; after point. In this case the number is bounded by (match-beginning 0) + ;; and (match-end 0). + (when c-has-quoted-numbers + (save-excursion + (let ((here (point)) + (bound (progn (skip-chars-forward "0-9a-fA-F'") (point)))) + (goto-char here) + (when (< (skip-chars-backward "0-9a-fA-F'") 0) + (if (and (memq (char-before) '(?x ?X)) + (eq (char-before (1- (point))) ?0)) + (backward-char 2)) + (while (and (search-forward-regexp c-maybe-quoted-number bound t) + (<= (match-end 0) here))) + (and (< (match-beginning 0) here) + (> (match-end 0) here) + (save-match-data + (goto-char (match-beginning 0)) + (save-excursion (search-forward "'" (match-end 0) t))))))))) + +(defun c-parse-quotes-before-change (beg end) + ;; This function analyzes 's near the region (c-new-BEG c-new-END), amending + ;; those two variables as needed to include 's into that region when they + ;; might be syntactically relevant to the change in progress. ;; - ;; In both uses of the function, the `syntax-table' properties are - ;; removed/applied only on quote marks which appear to be digit separators. + ;; Having amended that region, the function removes pertinent text + ;; properties (syntax-table properties with value '(1) and c-digit-separator + ;; props with value t) from 's in it. This operation is performed even + ;; within strings and comments. ;; - ;; Point is undefined on both entry and exit to this function, and the - ;; return value has no significance. The function is called solely as a - ;; before-change function (see `c-get-state-before-change-functions') and as - ;; an after change function (see `c-before-font-lock-functions', with the - ;; parameters BEG, END, and (optionally) OLD-LEN being given the standard - ;; values for before/after-change functions. - (c-save-buffer-state ((num-begin c-new-BEG) digit-re try-end) + ;; This function is called exclusively as a before-change function via the + ;; variable `c-get-state-before-change-functions'. + (c-save-buffer-state (p-limit limits found) + ;; Special consideraton for deleting \ from '\''. + (if (and (> end beg) + (eq (char-before end) ?\\) + (<= c-new-END end)) + (setq c-new-END (min (1+ end) (point-max)))) + + ;; Do we have a ' (or something like ',',',',',') within range of + ;; c-new-BEG? + (goto-char c-new-BEG) + (setq p-limit (max (- (point) 2) (point-min))) + (while (and (skip-chars-backward "^\\\\'" p-limit) + (> (point) p-limit)) + (when (eq (char-before) ?\\) + (setq p-limit (max (1- p-limit) (point-min)))) + (backward-char) + (setq c-new-BEG (point))) + (beginning-of-line) + (while (and + (setq found (search-forward-regexp "\\('\\([^'\\]\\|\\\\.\\)\\)*'" + c-new-BEG 'limit)) + (< (point) (1- c-new-BEG)))) + (if found + (setq c-new-BEG + (if (and (eq (point) (1- c-new-BEG)) + (eq (char-after) ?')) ; "''" before c-new-BEG. + (1- c-new-BEG) + (match-beginning 0)))) + + ;; Check for a number with quote separators straddling c-new-BEG + (when c-has-quoted-numbers + (goto-char c-new-BEG) + (when ;; (c-quoted-number-straddling-point) + (c-quoted-number-head-before-point) + (setq c-new-BEG (match-beginning 0)))) + + ;; Do we have a ' (or something like ',',',',...,',') within range of + ;; c-new-END? (goto-char c-new-END) - (when (looking-at "\\(x\\)?[0-9a-fA-F']+") - (setq c-new-END (match-end 0))) + (setq p-limit (min (+ (point) 2) (point-max))) + (while (and (skip-chars-forward "^\\\\'" p-limit) + (< (point) p-limit)) + (when (eq (char-after) ?\\) + (setq p-limit (min (1+ p-limit) (point-max)))) + (forward-char) + (setq c-new-END (point))) + (if (looking-at "[^']?\\('\\([^'\\]\\|\\\\.\\)\\)*'") + (setq c-new-END (match-end 0))) + + ;; Check for a number with quote separators straddling c-new-END. + (when c-has-quoted-numbers + (goto-char c-new-END) + (when ;; (c-quoted-number-straddling-point) + (c-quoted-number-tail-after-point) + (setq c-new-END (match-end 0)))) + + ;; Remove the '(1) syntax-table property from all "'"s within (c-new-BEG + ;; c-new-END). + (c-clear-char-property-with-value-on-char + c-new-BEG c-new-END + 'syntax-table '(1) + ?') + ;; Remove the c-digit-separator text property from the same "'"s. + (when c-has-quoted-numbers + (c-clear-char-property-with-value-on-char + c-new-BEG c-new-END + 'c-digit-separator t + ?')))) + +(defun c-parse-quotes-after-change (beg end old-len) + ;; This function applies syntax-table properties (value '(1)) and + ;; c-digit-separator properties as needed to 's within the range (c-new-BEG + ;; c-new-END). This operation is performed even within strings and + ;; comments. + ;; + ;; This function is called exclusively as an after-change function via the + ;; variable `c-before-font-lock-functions'. + (c-save-buffer-state (p-limit limits num-beg num-end clear-from-BEG-to) + ;; Apply the needed syntax-table and c-digit-separator text properties to + ;; quotes. (goto-char c-new-BEG) - (when (looking-at "\\(x?\\)[0-9a-fA-F']") - (if (re-search-backward "\\(0x\\)?[0-9a-fA-F]*\\=" nil t) - (setq c-new-BEG (point)))) - - (while - (re-search-forward "[0-9a-fA-F]'[0-9a-fA-F]" c-new-END t) - (setq try-end (1- (point))) - (re-search-backward "[^0-9a-fA-F']" num-begin t) - (setq digit-re - (cond - ((and (not (bobp)) (eq (char-before) ?0) (memq (char-after) '(?x ?X))) - "[0-9a-fA-F]") - ((and (eq (char-after (1+ (point))) ?0) - (memq (char-after (+ 2 (point))) '(?b ?B))) - "[01]") - ((memq (char-after (1+ (point))) '(?0 ?1 ?2 ?3 ?4 ?5 ?6 ?7 ?8 ?9)) - "[0-9]") - (t nil))) - (when digit-re - (cond ((eq (char-after) ?x) (forward-char)) - ((looking-at ".?0[Bb]") (goto-char (match-end 0))) - ((looking-at digit-re)) - (t (forward-char))) - (when (not (c-in-literal)) - (let ((num-end ; End of valid sequence of digits/quotes. - (save-excursion - (re-search-forward - (concat "\\=\\(" digit-re "+'\\)*" digit-re "+") nil t) - (point)))) - (setq try-end ; End of sequence of digits/quotes + (while (and (< (point) c-new-END) + (search-forward "'" c-new-END 'limit)) + (cond ((and (eq (char-before (1- (point))) ?\\) + ;; Check we've got an odd number of \s, here. (save-excursion - (re-search-forward - (concat "\\=\\(" digit-re "\\|'\\)+") nil t) - (point))) - (while (re-search-forward - (concat digit-re "\\('\\)" digit-re) num-end t) - (if old-len ; i.e. are we in an after-change function? - (c-put-char-property (match-beginning 1) 'syntax-table '(1)) - (c-clear-char-property (match-beginning 1) 'syntax-table)) - (backward-char))))) - (goto-char try-end) - (setq num-begin (point))))) - -;; The following doesn't seem needed at the moment (2016-08-15). -;; (defun c-before-after-change-extend-region-for-lambda-capture -;; (_beg _end &optional _old-len) -;; ;; In C++ Mode, extend the region (c-new-BEG c-new-END) to cover any lambda -;; ;; function capture lists we happen to be inside. This function is expected -;; ;; to be called both as a before-change and after change function. -;; ;; -;; ;; Note that these things _might_ be nested, with a capture list looking -;; ;; like: -;; ;; -;; ;; [ ...., &foo = [..](){...}(..), ... ] -;; ;; -;; ;; . What a wonderful language is C++. ;-) -;; (c-save-buffer-state (paren-state pos) -;; (goto-char c-new-BEG) -;; (setq paren-state (c-parse-state)) -;; (while (setq pos (c-pull-open-brace paren-state)) -;; (goto-char pos) -;; (when (c-looking-at-c++-lambda-capture-list) -;; (setq c-new-BEG (min c-new-BEG pos)) -;; (if (c-go-list-forward) -;; (setq c-new-END (max c-new-END (point)))))) - -;; (goto-char c-new-END) -;; (setq paren-state (c-parse-state)) -;; (while (setq pos (c-pull-open-brace paren-state)) -;; (goto-char pos) -;; (when (c-looking-at-c++-lambda-capture-list) -;; (setq c-new-BEG (min c-new-BEG pos)) -;; (if (c-go-list-forward) -;; (setq c-new-END (max c-new-END (point)))))))) + (backward-char) + (eq (logand (skip-chars-backward "\\\\") 1) 1)))) ; not a real '. + ((c-quoted-number-straddling-point) + (setq num-beg (match-beginning 0) + num-end (match-end 0)) + (c-put-char-properties-on-char num-beg num-end + 'syntax-table '(1) ?') + (c-put-char-properties-on-char num-beg num-end + 'c-digit-separator t ?') + (goto-char num-end)) + ((looking-at "\\([^\\']\\|\\\\.\\)'") ; balanced quoted expression. + (goto-char (match-end 0))) + (t (c-put-char-property (1- (point)) 'syntax-table '(1))))))) (defun c-before-change (beg end) ;; Function to be put on `before-change-functions'. Primarily, this calls