From: Alan Mackenzie Date: Fri, 20 Jun 2025 21:13:45 +0000 (+0000) Subject: CC Mode: Remove workarounds for missing "\_<", "\_>" in XEmacs X-Git-Url: http://git.eshelyaron.com/gitweb/?a=commitdiff_plain;h=0874bfe1a744b0f5f607fad6c985abb356d48a45;p=emacs.git CC Mode: Remove workarounds for missing "\_<", "\_>" in XEmacs Also start using `symbols' as the PAREN argument to regexp-opt. lisp/progmodes/cc-defs.el (c-make-keywords-re): Simplify, remove the redundant `appendable' value of the ADORN parameter, use the `symbols' value of the PAREN argument to regexp. (c-emacs-features): Test that "\_<" and "\_>" work in regexps before allowing CC Mode to start. lisp/progmodes/cc-engine.el (c-looking-at-decl-block): When GOTO-START is set, leave point at the _last_ Java annotation, if any. Clarify this in the doc string. (c-guess-basic-syntax CASE 5G): Replace a not needed t GOTO-START argument to c-looking-at-decl-block with nil. lisp/progmodes/cc-langs.el (c-make-mode-syntax-table): Make the syntax of ?@ in Java Mode symbol for processing annotations. (c-opt-identifier-concat-key, c-opt-identifier-prefix-key) (c-opt-after-id-concat-key, c-identifier-key) (c-cpp-message-directives-re, c-cpp-include-key, c-pack-key) (c-brace-stack-thing-key, c-brace-stack-no-semi-key): Replace the `appendable' value of the ADORN argument to c-make-keywords-re with t. (c-annotation-re): New lang const and lang var. (c-keywords-obarray): Replace an explicit (apply 'nconc (mapcar ... )) with c--mapcan, now that the obscure bug with mapcan in Emacs has been resolved. lisp/progmodes/cc-align.el (c-lineup-gnu-DEFUN-intro-cont) (c-lineup-java-throws, c-snug-do-while) lisp/progmodes/cc-awk.el (c-awk-pre-exp-alphanum-kwd-re) (c-awk-after-if-for-while-condition-p) (c-awk-after-function-decl-param-list) (c-awk-after-continue-token) (c-awk-after-rbrace-or-statement-semicolon) (awk-font-lock-keywords) lisp/progmodes/cc-cmds.el (c-defun-name-1) lisp/progmodes/cc-engine.el (c-beginning-of-statement-1) (c-forward-<>-arglist, c-forward-label, c-skip-conditional) (c-looking-at-c++-lambda-expression, c-add-stmt-syntax) (c-guess-basic-syntax CASE 11, CASE 12, CASE 13, CASE 15) (CASE 5C.3, CASE 5D.4, CASE 7D) lisp/progmodes/cc-fonts.el (c-cpp-matchers) (c-basic-matchers-before, c-font-lock-declarations) (c-font-lock-c++-modules, c-simple-decl-matchers) (c-complex-decl-matchers, c-basic-matchers-after) lisp/progmodes/cc-langs.el (c-paragraph-start) (c-stmt-block-only-keywords-regexp) (c-type-decl-prefix-keywords-key, c-type-decl-prefix-key) (c-type-decl-suffix-key, c-known-type-key) (c-enum-clause-introduction-re) lisp/progmodes/cc-menus.el (cc-imenu-c++-generic-expression) lisp/progmodes/cc-vars.el (c-make-noise-macro-regexps) (c-make-macro-with-semi-re): Replace explicit "\<" and "\>" with "\_<" and "\_>". Replace explicit specification of non-'_' character after word end with "\_>". Replace concatenation of "\<" and "\>" with regexps with (regexp-opt ... 'symbols). Use (regexp-opt ... 'symbols) in general. (cherry picked from commit 1a8e8203a158f6bdc74e38f2cdb3214b6c06b9d8) --- diff --git a/lisp/progmodes/cc-align.el b/lisp/progmodes/cc-align.el index 556730fce0b..a293fd858b7 100644 --- a/lisp/progmodes/cc-align.el +++ b/lisp/progmodes/cc-align.el @@ -117,7 +117,7 @@ Works with: topmost-intro-cont." (save-excursion (let (case-fold-search) (goto-char (c-langelem-pos langelem)) - (if (looking-at "\\") + (if (looking-at "\\_") c-basic-offset)))) (defun c-block-in-arglist-dwim (arglist-start) @@ -554,7 +554,7 @@ Works with: func-decl-cont." (throws (catch 'done (goto-char (c-langelem-pos langelem)) (while (zerop (c-forward-token-2 1 t lim)) - (if (looking-at "throws\\>[^_]") + (if (looking-at "throws\\_>") (throw 'done t)))))) (if throws (if (zerop (c-forward-token-2 1 nil (c-point 'eol))) @@ -1513,7 +1513,7 @@ ACTION associated with `block-close' syntax." (progn (goto-char (c-langelem-pos langelem)) (if (eq (char-after) ?{) (c-safe (c-forward-sexp -1))) - (looking-at "\\[^_]"))) + (looking-at "\\_"))) '(before) '(before after))))) diff --git a/lisp/progmodes/cc-awk.el b/lisp/progmodes/cc-awk.el index d9172bce8fc..245901c6e6d 100644 --- a/lisp/progmodes/cc-awk.el +++ b/lisp/progmodes/cc-awk.el @@ -244,9 +244,7 @@ ;; will only work when there won't be a preceding " or / before the sought / ;; to foul things up. (defconst c-awk-pre-exp-alphanum-kwd-re - (concat "\\(^\\|\\=\\|[^_\n\r]\\)\\<" - (regexp-opt '("print" "return" "case") t) - "\\>\\([^_\n\r]\\|$\\)")) + (regexp-opt '("print" "return" "case") 'symbols)) ;; Matches all AWK keywords which can precede expressions (including ;; /regexp/). (defconst c-awk-kwd-regexp-sign-re @@ -343,12 +341,12 @@ (save-excursion (let ((par-pos (c-safe (scan-lists (point) -1 0)))) (when par-pos - (goto-char par-pos) ; back over "(...)" - (c-backward-token-1) ; BOB isn't a problem. - (or (looking-at "\\(if\\|for\\)\\>\\([^_]\\|$\\)") - (and (looking-at "while\\>\\([^_]\\|$\\)") ; Ensure this isn't a do-while. - (not (eq (c-beginning-of-statement-1 do-lim) - 'beginning))))))))) + (goto-char par-pos) ; back over "(...)" + (c-backward-token-1) ; BOB isn't a problem. + (or (looking-at "\\(if\\|for\\)\\_>") + (and (looking-at "while\\_>") ; Ensure this isn't a do-while. + (not (eq (c-beginning-of-statement-1 do-lim) + 'beginning))))))))) (defun c-awk-after-function-decl-param-list () ;; Are we just after the ) in "function foo (bar)" ? @@ -360,9 +358,10 @@ (when par-pos (goto-char par-pos) ; back over "(...)" (c-backward-token-1) ; BOB isn't a problem - (and (looking-at "[_a-zA-Z][_a-zA-Z0-9]*\\>") + (and (looking-at "[_a-zA-Z][_a-zA-Z0-9]*\\_>" + ) (progn (c-backward-token-1) - (looking-at "func\\(tion\\)?\\>")))))))) + (looking-at "func\\(tion\\)?\\_>")))))))) ;; 2002/11/8: FIXME! Check c-backward-token-1/2 for success (0 return code). (defun c-awk-after-continue-token () @@ -374,7 +373,7 @@ (c-backward-token-1) ; FIXME 2002/10/27. What if this fails? (if (and (looking-at "[&|]") (not (bobp))) (backward-char)) ; c-backward-token-1 doesn't do this :-( - (looking-at "[,{?:]\\|&&\\|||\\|do\\>\\|else\\>"))) + (looking-at "[,{?:]\\|&&\\|||\\|do\\_>\\|else\\_>"))) (defun c-awk-after-rbrace-or-statement-semicolon () ;; Are we just after a } or a ; which closes a statement? @@ -390,7 +389,7 @@ (goto-char par-pos) ; go back to containing ( (not (and (looking-at "(") (c-backward-token-1) ; BOB isn't a problem - (looking-at "for\\>"))))))))) + (looking-at "for\\_>"))))))))) (defun c-awk-back-to-contentful-text-or-NL-prop () ;; Move back to just after the first found of either (i) an EOL which has @@ -982,18 +981,19 @@ 'font-lock-warning-face))) nil)))) + ;; Variable names. ,(cons - (concat "\\<" - (regexp-opt - '("ARGC" "ARGIND" "ARGV" "BINMODE" "CONVFMT" "ENVIRON" - "ERRNO" "FIELDWIDTHS" "FILENAME" "FNR" "FPAT" "FS" "FUNCTAB" - "IGNORECASE" "LINT" "NF" "NR" "OFMT" "OFS" "ORS" "PREC" - "PROCINFO" "RLENGTH" "ROUNDMODE" "RS" "RSTART" "RT" "SUBSEP" - "SYMTAB" "TEXTDOMAIN") t) "\\>") - 'font-lock-variable-name-face) - - ;; Special file names. (acm, 2002/7/22) + (regexp-opt + '("ARGC" "ARGIND" "ARGV" "BINMODE" "CONVFMT" "ENVIRON" + "ERRNO" "FIELDWIDTHS" "FILENAME" "FNR" "FPAT" "FS" "FUNCTAB" + "IGNORECASE" "LINT" "NF" "NR" "OFMT" "OFS" "ORS" "PREC" + "PROCINFO" "RLENGTH" "ROUNDMODE" "RS" "RSTART" "RT" "SUBSEP" + "SYNTAB" "TEXTDOMAIN") + 'symbols) + 'font-lock-variable-name-face) + + ;; Special file names. (acm, 2002/7/22) ;; The following regexp was created by first evaluating this in GNU Emacs 21.1: ;; (regexp-opt '("/dev/stdin" "/dev/stdout" "/dev/stderr" "/dev/fd/n" "/dev/pid" ;; "/dev/ppid" "/dev/pgrpid" "/dev/user") 'words) @@ -1004,7 +1004,7 @@ ;; The surrounding quotes are fontified along with the filename, since, semantically, ;; they are an indivisible unit. ("\\(\"/dev/\\(fd/[0-9]+\\|p\\(\\(\\(gr\\)?p\\)?id\\)\\|\ -std\\(err\\|in\\|out\\)\\|user\\)\\)\\>\ +std\\(err\\|in\\|out\\)\\|user\\)\\)\\_>\ \\(\\(\"\\)\\|\\([^\"/\n\r][^\"\n\r]*\\)?$\\)" (1 font-lock-variable-name-face t) (8 font-lock-variable-name-face t t)) @@ -1015,38 +1015,34 @@ std\\(err\\|in\\|out\\)\\|user\\)\\)\\>\ ;; , replacing "lport", "rhost", and "rport" with "[[:alnum:]]+". ;; This cannot be combined with the above pattern, because the match number ;; for the (optional) closing \" would then exceed 9. - ("\\(\"/inet[46]?/\\(\\(raw\\|\\(tc\\|ud\\)p\\)/[[:alnum:]]+/[[:alnum:]]+/[[:alnum:]]+\\)\\)\\>\ + ("\\(\"/inet[46]?/\\(\\(raw\\|\\(tc\\|ud\\)p\\)/[[:alnum:]]+/[[:alnum:]]+/[[:alnum:]]+\\)\\)\\_>\ \\(\\(\"\\)\\|\\([^\"/\n\r][^\"\n\r]*\\)?$\\)" (1 font-lock-variable-name-face t) (6 font-lock-variable-name-face t t)) ;; Keywords. - ,(concat "\\<" - (regexp-opt - '("BEGIN" "BEGINFILE" "END" "ENDFILE" - "break" "case" "continue" "default" "delete" - "do" "else" "exit" "for" "getline" "if" "in" "next" - "nextfile" "return" "switch" "while") - t) "\\>") + ,(regexp-opt + '("BEGIN" "BEGINFILE" "END" "ENDFILE" + "break" "case" "continue" "default" "delete" + "do" "else" "exit" "for" "getline" "if" "in" "next" + "nextfile" "return" "switch" "while") + 'symbols) ;; Builtins. (eval . (list - ,(concat - "\\<" - (regexp-opt - '("adump" "and" "asort" "asorti" "atan2" "bindtextdomain" "close" - "compl" "cos" "dcgettext" "dcngettext" "exp" "extension" "fflush" - "gensub" "gsub" "index" "int" "isarray" "length" "log" "lshift" - "match" "mkbool" "mktime" "or" "patsplit" "print" "printf" "rand" - "rshift" "sin" "split" "sprintf" "sqrt" "srand" "stopme" - "strftime" "strtonum" "sub" "substr" "system" - "systime" "tolower" "toupper" "typeof" "xor") - t) - "\\>") + ,(regexp-opt + '("adump" "and" "asort" "asorti" "atan2" "bindtextdomain" "close" + "compl" "cos" "dcgettext" "dcngettext" "exp" "extension" "fflush" + "gensub" "gsub" "index" "int" "isarray" "length" "log" "lshift" + "match" "mkbool" "mktime" "or" "patsplit" "print" "printf" "rand" + "rshift" "sin" "split" "sprintf" "sqrt" "srand" "stopme" + "strftime" "strtonum" "sub" "substr" "system" + "systime" "tolower" "toupper" "typeof" "xor") + 'symbols) 0 c-preprocessor-face-name)) ;; Directives - (eval . '("@\\(include\\|load\\|namespace\\)\\>" 0 ,c-preprocessor-face-name)) + (eval . '("@\\(include\\|load\\|namespace\\)\\_>" 0 ,c-preprocessor-face-name)) ;; gawk debugging keywords. (acm, 2002/7/21) ;; (Removed, 2003/6/6. These functions are now fontified as built-ins) diff --git a/lisp/progmodes/cc-cmds.el b/lisp/progmodes/cc-cmds.el index 9230faa56da..d5881f808ff 100644 --- a/lisp/progmodes/cc-cmds.el +++ b/lisp/progmodes/cc-cmds.el @@ -2105,7 +2105,7 @@ with a brace block." ;; Pick out the defun name, according to the type of defun. (cond - ((looking-at "DEFUN\\s-*(") ;"DEFUN\\_>") think of XEmacs! + ((looking-at "DEFUN\\_>") ;; DEFUN ("file-name-directory", Ffile_name_directory, Sfile_name_directory, ...) ==> Ffile_name_directory ;; DEFUN(POSIX::STREAM-LOCK, stream lockp &key BLOCK SHARED START LENGTH) ==> POSIX::STREAM-LOCK (down-list 1) diff --git a/lisp/progmodes/cc-defs.el b/lisp/progmodes/cc-defs.el index fd1bb566c60..f40025d7388 100644 --- a/lisp/progmodes/cc-defs.el +++ b/lisp/progmodes/cc-defs.el @@ -2146,103 +2146,48 @@ Notably, null elements in LIST are ignored." (mapconcat 'identity (delete nil (append list nil)) separator)) (defun c-make-keywords-re (adorn list &optional mode) - "Make a regexp that matches all the strings the list. + "Make a regexp that matches any string in LIST. Duplicates and nil elements in the list are removed. The resulting regexp may contain zero or more submatch expressions. -If ADORN is t there will be at least one submatch and the first -surrounds the matched alternative, and the regexp will also not match -a prefix of any identifier. Adorned regexps cannot be appended. The -language variable `c-nonsymbol-key' is used to make the adornment. - -A value `appendable' for ADORN is like above, but all alternatives in -the list that end with a word constituent char will have \\> appended -instead, so that the regexp remains appendable. Note that this -variant doesn't always guarantee that an identifier prefix isn't -matched since the symbol constituent `_' is normally considered a -nonword token by \\>. - -The optional MODE specifies the language to get `c-nonsymbol-key' from -when it's needed. The default is the current language taken from -`c-buffer-is-cc-mode'." - - (setq list (delete nil (delete-dups list))) - (if list - (let (re) - - (if (eq adorn 'appendable) - ;; This is kludgy but it works: Search for a string that - ;; doesn't occur in any word in LIST. Append it to all - ;; the alternatives where we want to add \>. Run through - ;; `regexp-opt' and then replace it with \>. - (let ((unique "") (list1 (copy-tree list)) pos) - (while (let (found) - (setq unique (concat unique "@") - pos list) - (while (and pos - (if (string-match unique (car pos)) - (progn (setq found t) - nil) - t)) - (setq pos (cdr pos))) - found)) - (setq pos list1) - (while pos - (if (string-match "\\w\\'" (car pos)) - (setcar pos (concat (car pos) unique))) - (setq pos (cdr pos))) - (setq re (regexp-opt list1)) - (setq pos 0) - (while (string-match unique re pos) - (setq pos (+ (match-beginning 0) 2) - re (replace-match "\\>" t t re)))) - - (setq re (regexp-opt list))) - - ;; Emacs 20 and XEmacs (all versions so far) has a buggy - ;; regexp-opt that doesn't always cope with strings containing - ;; newlines. This kludge doesn't handle shy parens correctly - ;; so we can't advice regexp-opt directly with it. - (let (fail-list) - (while list - (and (string-match "\n" (car list)) ; To speed it up a little. - (not (string-match (concat "\\`\\(" re "\\)\\'") - (car list))) - (setq fail-list (cons (car list) fail-list))) - (setq list (cdr list))) - (when fail-list - (setq re (concat re - "\\|" - (mapconcat - (if (eq adorn 'appendable) - (lambda (str) - (if (string-match "\\w\\'" str) - (concat (regexp-quote str) - "\\>") - (regexp-quote str))) - 'regexp-quote) - (sort fail-list - (lambda (a b) - (> (length a) (length b)))) - "\\|"))))) - - ;; Add our own grouping parenthesis around re instead of - ;; passing adorn to `regexp-opt', since in XEmacs it makes the - ;; top level grouping "shy". - (cond ((eq adorn 'appendable) - (concat "\\(" re "\\)")) - (adorn - (concat "\\(" re "\\)" - "\\(" - (c-get-lang-constant 'c-nonsymbol-key nil mode) - "\\|$\\)")) - (t - re))) - - ;; Produce a regexp that doesn't match anything. - (if adorn - (concat "\\(" regexp-unmatchable "\\)") - regexp-unmatchable))) +In the typical case when all members of LIST are valid symbols, the +resulting regexp is bracketed in \\_<\\( .... \\)\\_>. + +Otherwise, if ADORN is t there will be at least one submatch and the +first surrounds the matched alternative, and the regexp will also not +match a prefix of any identifier. Adorned regexps can now (2025-06) be +appended to. In versions prior to 2025-06, there was also the value +`appendable' for ADORN. Since normal adorned regexps can now be +appended to anyway, this is no longer needed, but older code using it +will still work. + +The optional MODE specifies the language whose syntax table will be used +to characterize the input strings. The default is the current language +taken from `c-buffer-is-cc-mode'." + (c-with-syntax-table + ;; If we're being called at run time, we use the mode's run time syntax + ;; table. Otherwise, generate one as needed for the current MODE. + (let ((cur-syn-tab-sym + (intern (concat (symbol-name (or mode c-buffer-is-cc-mode)) + "-syntax-table")))) + (if (and (boundp cur-syn-tab-sym) + (syntax-table-p (symbol-value cur-syn-tab-sym))) + (symbol-value cur-syn-tab-sym) + (funcall (c-get-lang-constant 'c-make-mode-syntax-table nil mode)))) + (let ((liszt (remq nil list))) + (cond + ((null liszt) + (if adorn + "\\(\\`a\\`\\)" + "\\`a\\`")) + ((catch 'symbols + (dolist (elt liszt) + (unless (string-match "\\`\\(\\sw\\|\\s_\\)*\\'" elt) + (throw 'symbols nil))) + t) + (regexp-opt liszt 'symbols)) + (adorn (regexp-opt liszt t)) + (t (regexp-opt liszt)))))) (put 'c-make-keywords-re 'lisp-indent-function 1) @@ -2363,12 +2308,26 @@ non-nil, a caret is prepended to invert the set." ;; Find out if "\\s|" (generic string delimiters) work. (c-safe (modify-syntax-entry ?x "|") - (if (string-match "\\s|" "x") - (setq list (cons 'gen-string-delim list)))) - - ;; See if POSIX char classes work. - (when (and (string-match "[[:alpha:]]" "a") - ;; All versions of Emacs 21 so far haven't fixed + (if (string-match "\\s|" "x") + (setq list (cons 'gen-string-delim list)))) + + ;; Check that "\\_<" and "\\_>" work in regular expressions. + (modify-syntax-entry ?_ "_") + (modify-syntax-entry ?* ".") + (modify-syntax-entry ?a "w") + (let ((s "*aaa_aaa*")) + (unless + (and + (c-safe (string-match "\\_<.*\\_>" s)) + (equal (match-string 0 s) "aaa_aaa")) + (error (concat + "CC Mode is incompatible with this version of Emacs - " + "support for \"\\_<\" and \"\\_>\" in regular expressions " + "is required.")))) + + ;; See if POSIX char classes work. + (when (and (string-match "[[:alpha:]]" "a") + ;; All versions of Emacs 21 so far haven't fixed ;; char classes in `skip-chars-forward' and ;; `skip-chars-backward'. (progn diff --git a/lisp/progmodes/cc-engine.el b/lisp/progmodes/cc-engine.el index 3a7ace4e535..7def2991374 100644 --- a/lisp/progmodes/cc-engine.el +++ b/lisp/progmodes/cc-engine.el @@ -1012,12 +1012,12 @@ comment at the start of cc-engine.el for more info." (setq ret 'previous pos saved) - ;; Begin at start and not pos to detect macros if we stand - ;; directly after the #. - (goto-char start) - (if (looking-at "\\<\\|\\W") - ;; Record this as the first token if not starting inside it. - (setq tok start)) + ;; Begin at start and not pos to detect macros if we stand + ;; directly after the #. + (goto-char start) + (if (looking-at "\\_<\\|\\W") + ;; Record this as the first token if not starting inside it. + (setq tok start)) ;; The following while loop goes back one sexp (balanced parens, ;; etc. with contents, or symbol or suchlike) each iteration. This @@ -8824,7 +8824,7 @@ multi-line strings (but not C++, for example)." (if (and (c-major-mode-is 'c++-mode) (save-excursion (and (zerop (c-backward-token-2)) - (looking-at "import\\>\\(?:[^_$]\\|$\\)")))) + (looking-at "import\\_>")))) (when (looking-at "<\\(?:\\\\.\\|[^\\\n\r\t>]\\)*\\(>\\)?") (if (match-beginning 1) ; A terminated <..> (progn @@ -11984,9 +11984,8 @@ This function might do hidden buffer changes." ((and (c-major-mode-is 'c++-mode) (search-forward-regexp - "\\=p\\(r\\(ivate\\|otected\\)\\|ublic\\)\\>[^_]" nil t) - (progn (backward-char) - (c-forward-syntactic-ws limit) + "\\=p\\(r\\(ivate\\|otected\\)\\|ublic\\)\\_>" nil t) + (progn (c-forward-syntactic-ws limit) (looking-at ":\\([^:]\\|\\'\\)"))) ; A single colon. (forward-char) (setq label-type t)) @@ -11999,7 +11998,7 @@ This function might do hidden buffer changes." (setq qt-symbol-idx (and (c-major-mode-is 'c++-mode) (string-match - "\\(p\\(r\\(ivate\\|otected\\)\\|ublic\\)\\|more\\)\\>" + "\\(p\\(r\\(ivate\\|otected\\)\\|ublic\\)\\|more\\)\\_>" (buffer-substring start (point))))) (c-forward-syntactic-ws limit) (cond @@ -12012,7 +12011,7 @@ This function might do hidden buffer changes." 'qt-1kwd-colon 'goto-target))) ((and qt-symbol-idx - (search-forward-regexp "\\=\\(slots\\|Q_SLOTS\\)\\>" limit t) + (search-forward-regexp "\\=\\(slots\\|Q_SLOTS\\)\\_>" limit t) (progn (c-forward-syntactic-ws limit) (looking-at ":\\([^:]\\|\\'\\)"))) ; A single colon (forward-char) @@ -12428,14 +12427,14 @@ comment at the start of cc-engine.el for more info." ;; This function might do hidden buffer changes. (c-forward-sexp (cond ;; else if() - ((looking-at (concat "\\" "\\([ \t\n]\\|\\\\\n\\)+" - "if\\>\\([^_]\\|$\\)")) + "\\_")) 3) ;; do, else, try, finally - ((looking-at (concat "\\<\\(" + ((looking-at (concat "\\_<\\(" "do\\|else\\|try\\|finally" - "\\)\\>\\([^_]\\|$\\)")) + "\\)\\_>")) 1) ;; for, if, while, switch, catch, synchronized, foreach (t 2)))) @@ -12719,9 +12718,11 @@ comment at the start of cc-engine.el for more info." ;; ;; If the check is successful, the return value is the start of the ;; keyword that tells what kind of construct it is, i.e. typically - ;; what `c-decl-block-key' matched. Also, if GOTO-START is set then - ;; the point will be at the start of the construct, before any - ;; leading specifiers, otherwise it's at the returned position. + ;; what `c-decl-block-key' matched. Also, if GOTO-START is set then point + ;; will be left at the start of the construct. This is often at the + ;; return value, but if there is a template preceding it, point will be + ;; left at its start. If there are Java annotations preceding it, point + ;; will be left at the last of these. ;; ;; The point is clobbered if the check is unsuccessful. ;; @@ -12864,7 +12865,9 @@ comment at the start of cc-engine.el for more info." ;; but that'd only occur in invalid code so there's ;; no use spending effort on it. (let ((end (match-end 0)) - (kwd-sym (c-keyword-sym (match-string 0)))) + (kwd-sym (c-keyword-sym (match-string 0))) + (annotation (and c-annotation-re + (looking-at c-annotation-re)))) (unless (and kwd-sym ;; Moving over a protection kwd and the following @@ -12874,7 +12877,12 @@ comment at the start of cc-engine.el for more info." (not (c-keyword-member kwd-sym 'c-protection-kwds)) (c-forward-keyword-clause 0)) (goto-char end) - (c-forward-syntactic-ws)))) + (c-forward-syntactic-ws) + (when annotation + (setq first-specifier-pos (match-beginning 0)) + (when (and (eq (char-after) ?\() + (c-go-list-forward (point) kwd-start)) + (c-forward-syntactic-ws)))))) ((c-syntactic-re-search-forward c-symbol-start kwd-start 'move t) @@ -14090,7 +14098,7 @@ comment at the start of cc-engine.el for more info." (c-forward-syntactic-ws lim) (when (looking-at c-requires-clause-key) (c-forward-c++-requires-clause lim nil))) - (when (looking-at "\\(alignas\\)\\([^a-zA-Z0-9_$]\\|$\\)") + (when (looking-at "\\_<\\(alignas\\)\\_>") (c-forward-keyword-clause 1)) (when (and (eq (char-after) ?\() (c-go-list-forward nil lim)) @@ -14104,7 +14112,7 @@ comment at the start of cc-engine.el for more info." (and (<= (point) lim-or-max) (cond ((save-excursion - (and (looking-at "throw\\([^a-zA-Z0-9_]\\|$\\)") + (and (looking-at "\\_") (progn (goto-char (match-beginning 1)) (c-forward-syntactic-ws lim) (eq (char-after) ?\()) @@ -14346,10 +14354,10 @@ comment at the start of cc-engine.el for more info." ((and (eq step-type 'up) (>= (point) old-boi) - (looking-at "else\\>[^_]") + (looking-at "else\\_>") (save-excursion (goto-char old-pos) - (looking-at "if\\>[^_]"))) + (looking-at "if\\_>"))) ;; Special case to avoid deeper and deeper indentation ;; of "else if" clauses. ) @@ -14416,7 +14424,7 @@ comment at the start of cc-engine.el for more info." (if (and c-recognize-paren-inexpr-blocks (progn (c-backward-syntactic-ws containing-sexp) - (or (not (looking-at "\\>")) + (or (not (looking-at "\\_>")) (not (c-on-identifier)))) (save-excursion (goto-char (1+ paren-pos)) @@ -14881,13 +14889,13 @@ comment at the start of cc-engine.el for more info." (setq macro-start nil)) ;; CASE 11: an else clause? - ((looking-at "else\\>[^_]") + ((looking-at "else\\_>") (c-beginning-of-statement-1 containing-sexp) (c-add-stmt-syntax 'else-clause nil t containing-sexp paren-state)) ;; CASE 12: while closure of a do/while construct? - ((and (looking-at "while\\>[^_]") + ((and (looking-at "while\\_>") (save-excursion (prog1 (eq (c-beginning-of-statement-1 containing-sexp) 'beginning) @@ -14901,9 +14909,9 @@ comment at the start of cc-engine.el for more info." ;; after every try, catch and finally. ((save-excursion (and (cond ((c-major-mode-is 'c++-mode) - (looking-at "catch\\>[^_]")) + (looking-at "catch\\_>")) ((c-major-mode-is 'java-mode) - (looking-at "\\(catch\\|finally\\)\\>[^_]"))) + (looking-at "\\(catch\\|finally\\)\\_>"))) (and (c-safe (c-backward-syntactic-ws) (c-backward-sexp) t) @@ -14914,7 +14922,7 @@ comment at the start of cc-engine.el for more info." (if (eq (char-after) ?\() (c-safe (c-backward-sexp) t) t)) - (looking-at "\\(try\\|catch\\)\\>[^_]") + (looking-at "\\(try\\|catch\\)\\_>") (setq placeholder (point)))) (goto-char placeholder) (c-add-stmt-syntax 'catch-clause nil t @@ -15035,7 +15043,7 @@ comment at the start of cc-engine.el for more info." (save-excursion (setq tmpsymbol (if (and (eq (c-beginning-of-statement-1 lim) 'up) - (looking-at "switch\\>[^_]")) + (looking-at "switch\\_>")) ;; If the surrounding statement is a switch then ;; let's analyze all labels as switch labels, so ;; that they get lined up consistently. @@ -15330,7 +15338,7 @@ comment at the start of cc-engine.el for more info." (let ((where (cdr injava-inher)) (cont (car injava-inher))) (goto-char where) - (cond ((looking-at "throws\\>[^_]") + (cond ((looking-at "throws\\_>") (c-add-syntax 'func-decl-cont (progn (c-beginning-of-statement-1 lim) (c-point 'boi)))) @@ -15469,7 +15477,7 @@ comment at the start of cc-engine.el for more info." (save-excursion (c-beginning-of-statement-1 lim) (setq placeholder (point)) - (if (looking-at "static\\>[^_]") + (if (looking-at "static\\_>") (c-forward-token-2 1 nil indent-point)) (and (looking-at c-class-key) (zerop (c-forward-token-2 2 nil indent-point)) @@ -15542,7 +15550,7 @@ comment at the start of cc-engine.el for more info." (eq containing-decl-open containing-sexp)) (save-excursion (goto-char containing-decl-open) - (setq tmp-pos (c-looking-at-decl-block t))) + (setq tmp-pos (c-looking-at-decl-block nil))) (c-add-class-syntax 'class-close containing-decl-open containing-decl-start @@ -15919,7 +15927,7 @@ comment at the start of cc-engine.el for more info." ((progn (goto-char containing-sexp) (and (c-safe (c-forward-sexp -1) t) - (looking-at "\\[^_]"))) + (looking-at "\\_"))) (goto-char (1+ containing-sexp)) (c-forward-syntactic-ws indent-point) (if (eq char-before-ip ?\;) diff --git a/lisp/progmodes/cc-fonts.el b/lisp/progmodes/cc-fonts.el index bbaf83c244b..fa3cb9458db 100644 --- a/lisp/progmodes/cc-fonts.el +++ b/lisp/progmodes/cc-fonts.el @@ -680,7 +680,7 @@ stuff. Used on level 1 and higher." (eval . (list ,(c-make-syntactic-matcher (concat noncontinued-line-end (c-lang-const c-opt-cpp-prefix) - "if\\(n\\)def\\>")) + "if\\(n\\)def\\_>")) ,(+ ncle-depth 1) c-negation-char-face-name 'append)) @@ -814,18 +814,18 @@ casts and declarations are fontified. Used on level 2 and higher." (if (c-major-mode-is 'pike-mode) ;; No symbol is a keyword after "->" in Pike. `((eval . (list ,(concat "\\(\\=.?\\|[^>]\\|[^-]>\\)" - "\\<\\(" re "\\)\\>") + "\\_<\\(" re "\\)\\_>") 2 c-constant-face-name))) - `((eval . (list ,(concat "\\<\\(" re "\\)\\>") + `((eval . (list ,(concat "\\_<\\(" re "\\)\\_>") 1 c-constant-face-name)))))) ;; Fontify all keywords except the primitive types. ,(if (c-major-mode-is 'pike-mode) ;; No symbol is a keyword after "->" in Pike. `(,(concat "\\(\\=.?\\|[^>]\\|[^-]>\\)" - "\\<" (c-lang-const c-regular-keywords-regexp)) + "\\_<" (c-lang-const c-regular-keywords-regexp)) 2 font-lock-keyword-face) - `(,(concat "\\<" (c-lang-const c-regular-keywords-regexp)) + `(,(concat "\\_<" (c-lang-const c-regular-keywords-regexp)) 1 font-lock-keyword-face)) ;; Fontify leading identifiers in fully qualified names like @@ -876,7 +876,7 @@ casts and declarations are fontified. Used on level 2 and higher." ;; places). `(lambda (limit) (while (re-search-forward - ,(concat "\\(\\<" ; 1 + ,(concat "\\(\\_<" "\\(" (c-lang-const c-symbol-key) "\\)" ; 2 (c-lang-const c-simple-ws) "*" (c-lang-const c-opt-identifier-concat-key) @@ -899,22 +899,14 @@ casts and declarations are fontified. Used on level 2 and higher." ,@(when (c-major-mode-is 'c++-mode) '(c-font-lock-c++-modules)) - ;; The next regexp is highlighted with narrowing. This is so that the - ;; final "context" bit of the regexp, "\\(?:[^=]\\|$\\)", which cannot - ;; match anything non-empty at LIMIT, will match "$" instead. ,@(when (c-lang-const c-equals-nontype-decl-kwds) - `((,(byte-compile - `(lambda (limit) - (save-restriction - (narrow-to-region (point-min) limit) - ,(c-make-font-lock-search-form - (concat (c-lang-const c-equals-nontype-decl-key) ;no \\( - (c-lang-const c-simple-ws) "+\\(" - (c-lang-const c-symbol-key) "\\)" - (c-lang-const c-simple-ws) "*" - "=\\(?:[^=]\\|$\\)") - `((,(+ 1 (c-lang-const c-simple-ws-depth)) - 'font-lock-type-face t))))))))) + `((,(concat (c-lang-const c-equals-nontype-decl-key) + (c-lang-const c-simple-ws) "+\\(" + (c-lang-const c-symbol-key) "\\)") + (,(+ 1 (regexp-opt-depth + (c-lang-const c-equals-nontype-decl-key)) + (c-lang-const c-simple-ws-depth)) + font-lock-type-face t)))) ;; Fontify the special declarations in Objective-C. ,@(when (c-major-mode-is 'objc-mode) @@ -933,11 +925,9 @@ casts and declarations are fontified. Used on level 2 and higher." ;; The @interface/@implementation/@protocol directives. ,(c-make-font-lock-search-function - (concat "\\<" - (regexp-opt - '("@interface" "@implementation" "@protocol") - t) - "\\>") + (regexp-opt + '("@interface" "@implementation" "@protocol") + 'symbols) '((c-fontify-types-and-refs (;; The font-lock package in Emacs is known to clobber ;; `parse-sexp-lookup-properties' (when it exists). @@ -1631,8 +1621,7 @@ casts and declarations are fontified. Used on level 2 and higher." ;; We skip over it to prevent recognition of "more slots: " ;; as a bitfield declaration. (when (and (c-major-mode-is 'c++-mode) - (looking-at - (concat "\\(more\\)\\([^" c-symbol-chars "]\\|$\\)"))) + (looking-at "\\_<\\(more\\)\\_>")) (goto-char (match-end 1)) (c-forward-syntactic-ws)) @@ -2167,9 +2156,8 @@ casts and declarations are fontified. Used on level 2 and higher." ;; prevent a repeat invocation. See elisp/lispref page "Search-based ;; Fontification". (while (and (< (point) limit) - (re-search-forward - "\\<\\(module\\|export\\|import\\)\\>\\(?:[^_$]\\|$\\)" - limit t)) + (re-search-forward "\\_<\\(module\\|export\\|import\\)\\_>" + limit t)) (goto-char (match-end 1)) (let (name-bounds pos beg end module-names) ; A list of conses of start and end @@ -2182,8 +2170,7 @@ casts and declarations are fontified. Used on level 2 and higher." ((save-excursion (when (equal (match-string-no-properties 1) "export") (c-forward-syntactic-ws limit) - (re-search-forward "\\=\\(module\\)\\>\\(?:[^_$]\\|$\\)" - limit t)) + (re-search-forward "\\=\\_<\\(module\\)\\_>" limit t)) (and (equal (match-string-no-properties 1) "module") (< (point) limit) (progn (c-forward-syntactic-ws limit) @@ -2202,8 +2189,7 @@ casts and declarations are fontified. Used on level 2 and higher." ((save-excursion (when (equal (match-string-no-properties 1) "export") (c-forward-syntactic-ws limit) - (re-search-forward "\\=\\(import\\)\\>\\(?:[^_$]\\|$\\)" - limit t)) + (re-search-forward "\\=\\_<\\(import\\)\\_>" limit t)) (and (equal (match-string-no-properties 1) "import") (< (point) limit) (progn (c-forward-syntactic-ws limit) @@ -2324,7 +2310,7 @@ on level 2 only and so aren't combined with `c-complex-decl-matchers'." (regexp-opt-depth prefix-re) (c-lang-const c-simple-ws-depth)))) `((,(c-make-font-lock-search-function - (concat "\\<\\(" prefix-re "\\)" ; 1 + (concat "\\_<\\(" prefix-re "\\)" ; 1 (c-lang-const c-simple-ws) "+" (concat "\\(" ; 2 + prefix-re + c-simple-ws (c-lang-const c-symbol-key) @@ -2340,9 +2326,7 @@ on level 2 only and so aren't combined with `c-complex-decl-matchers'." ;; Fontify special declarations that lack a type. ,@(when (c-lang-const c-typeless-decl-kwds) `((,(c-make-font-lock-search-function - (concat "\\<\\(" - (regexp-opt (c-lang-const c-typeless-decl-kwds)) - "\\)\\>") + (regexp-opt (c-lang-const c-typeless-decl-kwds) 'symbols) '((c-font-lock-declarators limit t nil nil) (save-match-data (goto-char (match-end 1)) @@ -2425,15 +2409,15 @@ on level 2 only and so aren't combined with `c-complex-decl-matchers'." (if (c-major-mode-is 'pike-mode) ;; No symbol is a keyword after "->" in Pike. `(,(concat "\\(\\=.?\\|[^>]\\|[^-]>\\)" - "\\<\\(" re "\\)\\>") + "\\_<\\(" re "\\)\\_>") 2 font-lock-type-face) - `(,(concat "\\<\\(" re "\\)\\>") + `(,(concat "\\_<\\(" re "\\)\\_>") 1 'font-lock-type-face))) ;; Fontify the type in C++ "new" expressions. ,@(when (c-major-mode-is 'c++-mode) ;; This pattern is a probably a "(MATCHER . ANCHORED-HIGHLIGHTER)" ;; (see Elisp page "Search-based Fontification"). - '(("\\" + `(("\\_" (c-font-lock-c++-new)))) ;; Maybe fontify identifiers containing a dollar sign with @@ -2525,7 +2509,7 @@ higher." (c-make-keywords-re nil (c-lang-const c-before-label-kwds)))) `(list - ,(concat "\\<\\(" c-before-label-re "\\)\\>" + ,(concat "\\_<\\(" c-before-label-re "\\)\\_>" "\\s *" "\\(" ; identifier-offset (c-lang-const c-symbol-key) @@ -2538,29 +2522,29 @@ higher." (c-lang-const c-ref-list-kwds) (c-lang-const c-colon-type-list-kwds)) `((,(c-make-font-lock-BO-decl-search-function - (concat "\\<\\(" + (concat "\\_<\\(" (c-make-keywords-re nil (append (c-lang-const c-type-list-kwds) (c-lang-const c-ref-list-kwds) (c-lang-const c-colon-type-list-kwds))) - "\\)\\>") + "\\)\\_>") '((c-fontify-types-and-refs ((c-promote-possible-types t)) (c-forward-keyword-clause 1) (if (> (point) limit) (goto-char limit)))))))) ,@(when (c-lang-const c-paren-type-kwds) `((,(c-make-font-lock-search-function - (concat "\\<\\(" + (concat "\\_<\\(" (c-make-keywords-re nil (c-lang-const c-paren-type-kwds)) - "\\)\\>") + "\\)\\_>") '((c-fontify-types-and-refs ((c-promote-possible-types t)) (c-forward-keyword-clause 1) (if (> (point) limit) (goto-char limit)))))))) ,@(when (c-major-mode-is 'java-mode) - '((eval . (list "\\<\\(@[a-zA-Z0-9]+\\)\\>" 1 c-annotation-face)))) - )) + '((eval . (list "\\_<\\(@[a-zA-Z0-9]+\\)\\_>" 1 c-annotation-face)))) + )) (c-lang-defconst c-matchers-1 t (c-lang-const c-cpp-matchers)) diff --git a/lisp/progmodes/cc-langs.el b/lisp/progmodes/cc-langs.el index 668891ab6fc..b7162fdd38a 100644 --- a/lisp/progmodes/cc-langs.el +++ b/lisp/progmodes/cc-langs.el @@ -380,7 +380,8 @@ The syntax tables aren't stored directly since they're quite large." ;; every keyword is a single symbol. '(modify-syntax-entry ?@ "_" table)) ((c-major-mode-is 'java-mode) - '(modify-syntax-entry ?@ "'" table)) + ;; In Java, @ can be the start of an annotation symbol. + `(modify-syntax-entry ?@ "_" table)) ((c-major-mode-is 'pike-mode) '(modify-syntax-entry ?@ "." table))) table))) @@ -949,7 +950,7 @@ This value is by default merged into `c-operators'." '(left-assoc right-assoc) t))) (when ops - (c-make-keywords-re 'appendable ops)))) + (c-make-keywords-re t ops)))) (c-lang-defvar c-opt-identifier-concat-key (c-lang-const c-opt-identifier-concat-key)) @@ -965,7 +966,7 @@ This value is by default merged into `c-operators'." '(prefix) t))) (when ops - (c-make-keywords-re 'appendable ops)))) + (c-make-keywords-re t ops)))) (c-lang-defvar c-opt-identifier-prefix-key (c-lang-const c-opt-identifier-prefix-key)) @@ -994,7 +995,7 @@ it's not taken care of by default." ;; considered internal - change `c-after-id-concat-ops' instead. t (concat (c-lang-const c-symbol-start) (if (c-lang-const c-after-id-concat-ops) - (concat "\\|" (c-make-keywords-re 'appendable + (concat "\\|" (c-make-keywords-re t (c-lang-const c-after-id-concat-ops))) ""))) @@ -1034,7 +1035,7 @@ e.g. identifiers with template arguments such as \"A\" in C++." (if (c-lang-const c-after-id-concat-ops) (concat "\\(" - (c-make-keywords-re 'appendable + (c-make-keywords-re t (c-lang-const c-after-id-concat-ops)) (concat ;; For flexibility, consider the symbol match @@ -1054,6 +1055,13 @@ e.g. identifiers with template arguments such as \"A\" in C++." ""))) (c-lang-defvar c-identifier-key (c-lang-const c-identifier-key)) +(c-lang-defconst c-annotation-re + "Regexp that matches the first token of an annotation or nil. +Currently only used in Java Mode." + t nil + java "\\_<@[[:alnum:]]+\\_>") +(c-lang-defvar c-annotation-re (c-lang-const c-annotation-re)) + (c-lang-defconst c-module-name-re "This regexp matches (a component of) a module name. Currently (2022-09) just C++ Mode uses this." @@ -1172,7 +1180,7 @@ string message." (c-lang-defconst c-cpp-message-directives-re ;; Appendable regexp matching any of the tokens in `c-cpp-message-directives'. - t (c-make-keywords-re 'appendable (c-lang-const c-cpp-message-directives))) + t (c-make-keywords-re t (c-lang-const c-cpp-message-directives))) (c-lang-defconst noncontinued-line-end t "\\(\\=\\|\\(\\=\\|[^\\]\\)[\n\r]\\)") @@ -1210,8 +1218,7 @@ file name in angle brackets or quotes." (c-lang-const c-cpp-include-directives)) (concat (c-lang-const c-anchored-cpp-prefix) - (c-make-keywords-re 'appendable - (c-lang-const c-cpp-include-directives)) + (c-make-keywords-re t (c-lang-const c-cpp-include-directives)) "[ \t]*") regexp-unmatchable)) (c-lang-defvar c-cpp-include-key (c-lang-const c-cpp-include-key)) @@ -1823,7 +1830,7 @@ This doesn't count the merely contextual bits of the regexp match." t nil c++ '("...")) (c-lang-defconst c-pack-key - t (c-make-keywords-re 'appendable (c-lang-const c-pack-ops))) + t (c-make-keywords-re t (c-lang-const c-pack-ops))) (c-lang-defvar c-pack-key (c-lang-const c-pack-key)) (c-lang-defconst c-auto-ops @@ -2258,8 +2265,8 @@ This works in Emacs >= 25.1." (c-lang-defconst c-paragraph-start "Regexp to append to `paragraph-start'." t "$" - java "\\(@[a-zA-Z]+\\>\\|$\\)" ; For Javadoc. - pike "\\(@[a-zA-Z_-]+\\>\\([^{]\\|$\\)\\|$\\)") ; For Pike refdoc. + java "\\(@[a-zA-Z]+\\_>\\|$\\)" ; For Javadoc. + pike "\\(@[a-zA-Z_-]+\\_>\\([^{]\\|$\\)\\|$\\)") ; For Pike refdoc. (c-lang-defvar c-paragraph-start (c-lang-const c-paragraph-start)) (c-lang-defconst c-paragraph-separate @@ -2794,12 +2801,9 @@ Not to be confused with `c-requires-clause-kwds'." c++ '("requires")) (c-lang-defconst c-fun-name-substitute-key - ;; An unadorned regular expression which matches any member of + ;; An adorned regular expression which matches any member of ;; `c-fun-name-substitute-kwds'. t (c-make-keywords-re t (c-lang-const c-fun-name-substitute-kwds))) -;; We use 'appendable, so that we get "\\>" on the regexp, but without a further -;; character, which would mess up backward regexp search from just after the -;; keyword. If only XEmacs had \\_>. ;-( (c-lang-defvar c-fun-name-substitute-key (c-lang-const c-fun-name-substitute-key)) @@ -2812,7 +2816,6 @@ This should not be confused with `c-fun-name-substitute-kwds'." (c-lang-defconst c-requires-clause-key ;; A regexp matching any member of `c-requires-clause-kwds'. t (c-make-keywords-re t (c-lang-const c-requires-clause-kwds))) -;; See `c-fun-name-substitute-key' for the justification of appendable. (c-lang-defvar c-requires-clause-key (c-lang-const c-requires-clause-key)) (c-lang-defconst c-modifier-kwds @@ -3497,7 +3500,7 @@ Note that Java specific rules are currently applied to tell this from (c-lang-defconst c-brace-stack-thing-key ;; Regexp matching any keyword or operator relevant to the brace stack (see ;; `c-update-brace-stack' in cc-engine.el). - t (c-make-keywords-re 'appendable + t (c-make-keywords-re t (append (c-lang-const c-flat-decl-block-kwds) (if (c-lang-const c-recognize-<>-arglists) @@ -3509,7 +3512,7 @@ Note that Java specific rules are currently applied to tell this from ;; Regexp matching any keyword or operator relevant to the brace stack when ;; a semicolon is not relevant (see `c-update-brace-stack' in ;; cc-engine.el). - t (c-make-keywords-re 'appendable + t (c-make-keywords-re t (append (c-lang-const c-flat-decl-block-kwds) (if (c-lang-const c-recognize-<>-arglists) @@ -3625,7 +3628,7 @@ Note that Java specific rules are currently applied to tell this from (c-lang-defconst c-stmt-block-only-keywords-regexp ;; A regexp matching a keyword in `c-stmt-block-only-keywords'. Such a ;; match can start and end only at token boundaries. - t (concat "\\(\\<\\|\\=\\)" + t (concat "\\(\\_<\\|\\=\\)" (c-make-keywords-re t (c-lang-const c-stmt-block-only-keywords)))) (c-lang-defvar c-stmt-block-only-keywords-regexp (c-lang-const c-stmt-block-only-keywords-regexp)) @@ -3682,12 +3685,9 @@ Note that Java specific rules are currently applied to tell this from ;; Emacs has an odd bug that causes `mapcan' to fail ;; with unintelligible errors. (XEmacs works.) ;; (2015-06-24): This bug has not yet been fixed. - ;;(mapcan (lambda (lang-const) - ;; (list lang-const t)) - ;; lang-const-list) - (apply 'nconc (mapcar (lambda (lang-const) - (list lang-const t)) - lang-const-list)))) + (c--mapcan (lambda (lang-const) + (list lang-const t)) + lang-const-list))) obarray)) (c-lang-defconst c-regular-keywords-regexp @@ -4071,16 +4071,13 @@ possible for good performance." (c-lang-defconst c-type-decl-prefix-keywords-key ;; Regexp matching any keyword operator that might precede the identifier in - ;; a declaration, e.g. "const" or nil. It doesn't test there is no "_" - ;; following the keyword. + ;; a declaration, e.g. "const" or nil. t (if (or (c-lang-const c-type-modifier-kwds) (c-lang-const c-modifier-kwds)) - (concat (regexp-opt (c--delete-duplicates (append (c-lang-const c-type-modifier-kwds) (c-lang-const c-modifier-kwds)) :test 'string-equal) - t) - "\\>"))) + 'symbols))) (c-lang-defconst c-maybe-typeless-specifier-re "Regexp matching keywords which might, but needn't, declare variables with @@ -4098,16 +4095,14 @@ The operator found is either the first submatch (if it is not a keyword) or the second submatch (if it is)." t (if (c-lang-const c-type-decl-prefix-keywords-key) (concat "\\(\\`a\\`\\)\\|" ; 1 - will never match. - (c-lang-const c-type-decl-prefix-keywords-key) ; 2 - "\\([^_]\\|$\\)") ; 3 + (c-lang-const c-type-decl-prefix-keywords-key)) ; 2 "\\`a\\`") ;; Default to a regexp that never matches. ;; Check that there's no "=" afterwards to avoid matching tokens ;; like "*=". (c objc) (concat "\\(" ; 1 "[*(]" "\\)\\|" - (c-lang-const c-type-decl-prefix-keywords-key) ; 2 - "\\([^=_]\\|$\\)") ; 3 + (c-lang-const c-type-decl-prefix-keywords-key)) ; 2 c++ (concat "\\(" ; 1 "&&" "\\|" @@ -4122,10 +4117,10 @@ keyword) or the second submatch (if it is)." ;; `c-font-lock-declarators' and ;; `c-font-lock-declarations' that check for a ;; complete name followed by ":: *". - (c-lang-const c-identifier-start) + (c-lang-const c-identifier-start) ; 5 "\\)") - "\\)" - "\\([^=_]\\|$\\)") ; 5 + "\\)" ; 2 + "\\([^=_]\\|$\\)") ; 6 pike "\\(\\*\\)\\([^=]\\|$\\)") (c-lang-defvar c-type-decl-prefix-key (c-lang-const c-type-decl-prefix-key) @@ -4165,7 +4160,7 @@ is in effect when this is matched (see `c-identifier-syntax-table')." ;; function argument list parenthesis. t (if (c-lang-const c-type-modifier-kwds) (concat "\\((\\|" - (regexp-opt (c-lang-const c-type-modifier-kwds) t) "\\>" + (regexp-opt (c-lang-const c-type-modifier-kwds) 'symbols) "\\)") "\\((\\)") (c c++ objc) (concat @@ -4180,8 +4175,7 @@ is in effect when this is matched (see `c-identifier-syntax-table')." (regexp-opt (append (c-lang-const c-fun-name-substitute-kwds) (c-lang-const c-type-modifier-kwds)) - t) - "\\>") + 'symbols)) "") "\\)") java "\\([[()]\\)" @@ -4259,8 +4253,8 @@ is in effect or not." ;; Regexp matching the known type identifiers. This is initialized ;; from the type keywords and `*-font-lock-extra-types'. The first ;; submatch is the one that matches the type. Note that this regexp - ;; assumes that symbol constituents like '_' and '$' have word - ;; syntax. + ;; assumes that symbol constituents like '_' and '$' have word or + ;; symbol syntax. (let* ((extra-types (when (boundp (c-mode-symbol "font-lock-extra-types")) (c-mode-var "font-lock-extra-types"))) @@ -4274,14 +4268,14 @@ is in effect or not." (unless (string-match "[][.*+?^$\\]" re) re)) extra-types)))) - (concat "\\<\\(" + (concat "\\_<\\(" (c-concat-separated (append (list (c-make-keywords-re nil (append (c-lang-const c-primitive-type-kwds) plain-strings))) regexp-strings) "\\|") - "\\)\\>"))) + "\\)\\_>"))) (c-lang-defconst c-special-brace-lists "List of open- and close-chars that makes up a pike-style brace list, @@ -4368,9 +4362,7 @@ the invalidity of the putative template construct." ;; needed. t (if (c-lang-const c-enum-list-kwds) (concat - "\\<\\(" - (c-make-keywords-re nil (c-lang-const c-enum-list-kwds)) - "\\)\\>" + (c-make-keywords-re t (c-lang-const c-enum-list-kwds)) ;; Disallow various common punctuation chars that can't come ;; before the '{' of the enum list, to avoid searching too far. "[^][{};/#=]*" diff --git a/lisp/progmodes/cc-menus.el b/lisp/progmodes/cc-menus.el index b3c50aa04d9..9881617e20f 100644 --- a/lisp/progmodes/cc-menus.el +++ b/lisp/progmodes/cc-menus.el @@ -85,13 +85,13 @@ A sample value might look like: `\\(_P\\|_PROTO\\)'.") ;; work by backtracking from the end of the definition. (nil ,(concat - "^\\<.*" - "[^" c-alnum "_:<>~]" ; match any non-identifier char - ; (note: this can be `\n') + "^\\_<.*" + "[^" c-alnum "_:<>~]" ; match any non-identifier char + ; (note: this can be `\n') "\\(" "\\([" c-alnum "_:<>~]*::\\)?" ; match an operator - "operator\\>[ \t]*" - "\\(()\\|[^(]*\\)" ; special case for `()' operator + "operator\\_>[ \t]*" + "\\(()\\|[^(]*\\)" ; special case for `()' operator "\\)" "[ \t]*([^)]*)[ \t]*[^ \t;]" ; followed by ws, arg list, @@ -116,7 +116,7 @@ A sample value might look like: `\\(_P\\|_PROTO\\)'.") ;; General function name regexp (nil ,(concat - "^\\<" ; line MUST start with word char + "^\\_<" ; line MUST start with symbol char ;; \n added to prevent overflow in regexp matcher. ;; https://lists.gnu.org/r/emacs-pretest-bug/2007-02/msg00021.html "[^()\n]*" ; no parentheses before @@ -136,7 +136,7 @@ A sample value might look like: `\\(_P\\|_PROTO\\)'.") ,@(if cc-imenu-c-prototype-macro-regexp `((nil ,(concat - "^\\<.*" ; line MUST start with word char + "^\\_<.*" ; line MUST start with symbol char "[^" c-alnum "_]" ; match any non-identifier char "\\([" c-alpha "_][" c-alnum "_]*\\)" ; match function name "[ \t]*" ; whitespace before macro name diff --git a/lisp/progmodes/cc-vars.el b/lisp/progmodes/cc-vars.el index 2b62ace76bf..4df6017bc56 100644 --- a/lisp/progmodes/cc-vars.el +++ b/lisp/progmodes/cc-vars.el @@ -1765,8 +1765,7 @@ this implicitly by reinitializing C/C++/Objc Mode on any buffer)." (setq c-noise-macro-with-parens-name-re (cond ((null c-noise-macro-with-parens-names) regexp-unmatchable) ((consp c-noise-macro-with-parens-names) - (concat (regexp-opt c-noise-macro-with-parens-names t) - "\\([^[:alnum:]_$]\\|$\\)")) + (regexp-opt c-noise-macro-with-parens-names 'symbols)) ((stringp c-noise-macro-with-parens-names) (copy-sequence c-noise-macro-with-parens-names)) (t (error "c-make-noise-macro-regexps: \ @@ -1774,8 +1773,7 @@ c-noise-macro-with-parens-names is invalid: %s" c-noise-macro-with-parens-names) (setq c-noise-macro-name-re (cond ((null c-noise-macro-names) regexp-unmatchable) ((consp c-noise-macro-names) - (concat (regexp-opt c-noise-macro-names t) - "\\([^[:alnum:]_$]\\|$\\)")) + (regexp-opt c-noise-macro-names 'symbols)) ((stringp c-noise-macro-names) (copy-sequence c-noise-macro-names)) (t (error "c-make-noise-macro-regexps: \ @@ -1819,11 +1817,7 @@ variables.") ((stringp c-macro-names-with-semicolon) (copy-sequence c-macro-names-with-semicolon)) ((consp c-macro-names-with-semicolon) - (concat - "\\<" - (regexp-opt c-macro-names-with-semicolon) - "\\>")) ; N.B. the PAREN param of regexp-opt isn't supported by - ; all XEmacsen. + (regexp-opt c-macro-names-with-semicolon 'symbols)) ((null c-macro-names-with-semicolon) nil) (t (error "c-make-macro-with-semi-re: Invalid \