(concat ; Assume n groups before this...
"\\(" ; n+1=name-group
cperl-white-and-comment-rex ; n+2=pre-name
- "\\(::[a-zA-Z_0-9:']+\\|[a-zA-Z_'][a-zA-Z_0-9:']*\\)" ; n+3=name
+ (rx-to-string `(group ,cperl--normal-identifier-rx))
"\\)" ; END n+1=name-group
(if named "" "?")
"\\(" ; n+4=proto-group
'(?w ?_))
(progn
(backward-sexp)
- (looking-at "[a-zA-Z_][a-zA-Z0-9_]*:[^:]"))))
+ (looking-at (rx (sequence (eval cperl--label-rx)
+ (not (in ":"))))))))
(defun cperl-get-state (&optional parse-start start-state)
"Return list (START STATE DEPTH PRESTART),
(progn
(forward-sexp -1)
(skip-chars-backward " \t")
- (looking-at "[ \t]*[a-zA-Z_][a-zA-Z_0-9]*[ \t]*:")))
+ (looking-at
+ (rx (sequence (0+ blank)
+ (eval cperl--label-rx))))))
(get-text-property (point) 'first-format-line)))
;; Look at previous line that's at column 0
"\\<" cperl-sub-regexp "\\>" ; sub with proto/attr
"\\("
cperl-white-and-comment-rex
- "\\(::[a-zA-Z_:'0-9]*\\|[a-zA-Z_'][a-zA-Z_:'0-9]*\\)\\)?" ; name
+ (rx (group (eval cperl--normal-identifier-rx)))
+ "\\)"
"\\("
cperl-maybe-white-and-comment-rex
"\\(([^()]*)\\|:[^:]\\)\\)" ; prototype or attribute start
(t t))))
;; <file> or <$file>
(and (eq c ?\<)
- ;; Do not stringify <FH>, <$fh> :
+ ;; Stringify what looks like a glob, but
+ ;; do not stringify file handles <FH>, <$fh> :
(save-match-data
(looking-at
- "\\$?\\([_a-zA-Z:][_a-zA-Z0-9:]*\\)?>"))))
+ (rx (sequence (opt "$")
+ (eval cperl--normal-identifier-rx)))))))
tb (match-beginning 0))
(goto-char (match-beginning b1))
(cperl-backward-to-noncomment (point-min))
(error nil)))
(if (or bb
(looking-at ; $foo -> {s}
- "[$@]\\$*\\([a-zA-Z0-9_:]+\\|[^{]\\)\\([ \t\n]*->\\)?[ \t\n]*{")
+ (rx
+ (sequence
+ (in "$@") (0+ "$")
+ (or
+ (eval cperl--normal-identifier-rx)
+ (not (in "{")))
+ (opt (sequence (eval cperl--ws*-rx))
+ "->")
+ (eval cperl--ws*-rx)
+ "{")))
(and ; $foo[12] -> {s}
(memq (following-char) '(?\{ ?\[))
(progn
(setq bb t))
((and (eq (following-char) ?:)
(eq b1 ?\{) ; Check for $ { s::bar }
- (looking-at "::[a-zA-Z0-9_:]*[ \t\n\f]*}")
+ ;; (looking-at "::[a-zA-Z0-9_:]*[ \t\n\f]*}")
+ (looking-at
+ (rx (sequence "::"
+ (eval cperl--normal-identifier-rx)
+ (eval cperl--ws*-rx)
+ "}")))
(progn
(goto-char (1- go))
(skip-chars-backward " \t\n\f")
"\\(" ;; XXXX 1-char variables, exc. |()\s
"[$@]"
"\\("
- "[_a-zA-Z:][_a-zA-Z0-9:]*"
+ (rx (eval cperl--normal-identifier-rx))
"\\|"
"{[^{}]*}" ; only one-level allowed
"\\|"
(progn
(backward-sexp)
;; sub {BLK}, print {BLK} $data, but NOT `bless', `return', `tr', `constant'
+ ;; a-zA-Z is fine here, these are Perl keywords
(or (and (looking-at "[a-zA-Z0-9_:]+[ \t\n\f]*[{#]") ; Method call syntax
(not (looking-at "\\(bless\\|return\\|q[wqrx]?\\|tr\\|[smy]\\|constant\\)\\>")))
;; sub bless::foo {}
cperl-maybe-white-and-comment-rex
"\\(state\\|my\\|local\\|our\\)\\)?"
cperl-maybe-white-and-comment-rex
- "\\$[_a-zA-Z0-9]+\\)?\\)\\>"))
+ (rx
+ (sequence
+ "$"
+ (eval cperl--basic-identifier-rx)))
+ "\\)?\\)\\>"))
(progn
(goto-char top)
(forward-sexp 1)
;; Looking at:
;; foreach my $var (
(if (looking-at
- "[ \t]*\\<for\\(each\\)?[ \t]+\\(state\\|my\\|local\\|our\\)[ \t]*\\$[_a-zA-Z0-9]+\\(\t*\\|[ \t][ \t]+\\)[^ \t\n#]")
+ (rx (sequence (0+ blank) symbol-start
+ "for" (opt "each")
+ (1+ blank)
+ (or "state" "my" "local" "our")
+ (0+ blank)
+ "$" (eval cperl--basic-identifier-rx)
+ (1+ blank)
+ (not (in " \t\n#")))))
(progn
(forward-sexp 3)
(delete-horizontal-space)
;; Looking at (with or without "}" at start, ending after "({"):
;; } foreach my $var () OR {
(if (looking-at
- "[ \t]*\\(}[ \t]*\\)?\\<\\(els\\(e\\|if\\)\\|continue\\|if\\|unless\\|while\\|for\\(each\\)?\\(\\([ \t]+\\(state\\|my\\|local\\|our\\)\\)?[ \t]*\\$[_a-zA-Z0-9]+\\)?\\|until\\)\\>\\([ \t]*(\\|[ \t\n]*{\\)\\|[ \t]*{")
+ (rx (sequence
+ (0+ blank)
+ (opt (sequence "}" (0+ blank) ))
+ symbol-start
+ (or "else" "elsif" "continue" "if" "unless" "while" "until"
+ (sequence (or "for" "foreach")
+ (opt
+ (opt (sequence (1+ blank)
+ (or "state" "my" "local" "our")))
+ (0+ blank)
+ "$" (eval cperl--basic-identifier-rx))))
+ symbol-end
+ (group-n 1
+ (or
+ (or (sequence (0+ blank) "(")
+ (sequence (eval cperl--ws*-rx) "{"))
+ (sequence (0+ blank) "{"))))))
(progn
- (setq ml (match-beginning 8)) ; "(" or "{" after control word
+ (setq ml (match-beginning 1)) ; "(" or "{" after control word
(re-search-forward "[({]")
(forward-char -1)
(setq p (point))
(setq lst index-sub-alist)
(while lst
(setq elt (car lst) lst (cdr lst))
- (cond ((string-match "\\(::\\|'\\)[_a-zA-Z0-9]+$" (car elt))
+ (cond ((string-match
+ (rx (sequence (or "::" "'")
+ (eval cperl--basic-identifier-rx)
+ string-end))
+ (car elt))
(setq pack (substring (car elt) 0 (match-beginning 0)))
(if (setq group (assoc pack hier-list))
(if (listp (cdr group))
(defun cperl-init-faces ()
(condition-case errs
(progn
- (let (t-font-lock-keywords t-font-lock-keywords-1 font-lock-anchored)
- (setq font-lock-anchored t)
+ (let (t-font-lock-keywords t-font-lock-keywords-1)
(setq
t-font-lock-keywords
(list
(if (eq (char-after (cperl-1- (match-end 0))) ?\{ )
'font-lock-function-name-face
'font-lock-variable-name-face))))
- '("\\<\\(package\\|require\\|use\\|import\\|no\\|bootstrap\\)[ \t]+\\([a-zA-Z_][a-zA-Z_0-9:]*\\)[ \t;]" ; require A if B;
- 2 font-lock-function-name-face)
+ `(,(rx (sequence symbol-start
+ (or "package" "require" "use" "import"
+ "no" "bootstrap")
+ (eval cperl--ws+-rx)
+ (group-n 1 (eval cperl--normal-identifier-rx))
+ (any " \t;"))) ; require A if B;
+ 1 font-lock-function-name-face)
'("^[ \t]*format[ \t]+\\([a-zA-Z_][a-zA-Z_0-9:]*\\)[ \t]*=[ \t]*$"
1 font-lock-function-name-face)
- (cond (font-lock-anchored
- '("\\([]}\\%@>*&]\\|\\$[a-zA-Z0-9_:]*\\)[ \t]*{[ \t]*\\(-?[a-zA-Z0-9_:]+\\)[ \t]*}"
- (2 font-lock-string-face t)
- ("\\=[ \t]*{[ \t]*\\(-?[a-zA-Z0-9_:]+\\)[ \t]*}"
- nil nil
- (1 font-lock-string-face t))))
- (t '("\\([]}\\%@>*&]\\|\\$[a-zA-Z0-9_:]*\\)[ \t]*{[ \t]*\\(-?[a-zA-Z0-9_:]+\\)[ \t]*}"
- 2 font-lock-string-face t)))
- '("[[ \t{,(]\\(-?[a-zA-Z0-9_:]+\\)[ \t]*=>" 1
- font-lock-string-face t)
+ ;; bareword hash key: $foo{bar}
+ `(,(rx (or (in "]}\\%@>*&") ; What Perl is this?
+ (sequence "$" (eval cperl--normal-identifier-rx)))
+ (0+ blank) "{" (0+ blank)
+ (group-n 1 (sequence (opt "-")
+ (eval cperl--basic-identifier-rx)))
+ (0+ blank) "}")
+;; '("\\([]}\\%@>*&]\\|\\$[a-zA-Z0-9_:]*\\)[ \t]*{[ \t]*\\(-?[a-zA-Z0-9_:]+\\)[ \t]*}"
+ (1 font-lock-string-face t)
+ ;; anchored bareword hash key: $foo{bar}{baz}
+ (,(rx point
+ (0+ blank) "{" (0+ blank)
+ (group-n 1 (sequence (opt "-")
+ (eval cperl--basic-identifier-rx)))
+ (0+ blank) "}")
+ ;; ("\\=[ \t]*{[ \t]*\\(-?[a-zA-Z0-9_:]+\\)[ \t]*}"
+ nil nil
+ (1 font-lock-string-face t)))
+ ;; hash element assignments with bareword key => value
+ `(,(rx (in "[ \t{,()")
+ (group-n 1 (sequence (opt "-")
+ (eval cperl--basic-identifier-rx)))
+ (0+ blank) "=>")
+ 1 font-lock-string-face t)
+;; '("[[ \t{,(]\\(-?[a-zA-Z0-9_:]+\\)[ \t]*=>" 1
+;; font-lock-string-face t)
;; labels
`(,(rx
(sequence
;;; '("[$*]{?\\(\\sw+\\)" 1 font-lock-variable-name-face)
;;; '("\\([@%]\\|\\$#\\)\\(\\sw+\\)"
;;; (2 (cons font-lock-variable-name-face '(underline))))
- (cond (font-lock-anchored
;; 1=my_etc, 2=white? 3=(+white? 4=white? 5=var
- `(,(concat "\\<\\(state\\|my\\|local\\|our\\)"
- cperl-maybe-white-and-comment-rex
- "\\(("
- cperl-maybe-white-and-comment-rex
- "\\)?\\([$@%*]\\([a-zA-Z0-9_:]+\\|[^a-zA-Z0-9_]\\)\\)")
- (5 ,(if cperl-font-lock-multiline
- 'font-lock-variable-name-face
- '(progn (setq cperl-font-lock-multiline-start
- (match-beginning 0))
- 'font-lock-variable-name-face)))
- (,(concat "\\="
- cperl-maybe-white-and-comment-rex
- ","
- cperl-maybe-white-and-comment-rex
- "\\([$@%*]\\([a-zA-Z0-9_:]+\\|[^a-zA-Z0-9_]\\)\\)")
- ;; Bug in font-lock: limit is used not only to limit
- ;; searches, but to set the "extend window for
- ;; facification" property. Thus we need to minimize.
- ,(if cperl-font-lock-multiline
- '(if (match-beginning 3)
- (save-excursion
- (goto-char (match-beginning 3))
- (condition-case nil
- (forward-sexp 1)
- (error
- (condition-case nil
- (forward-char 200)
- (error nil)))) ; typeahead
- (1- (point))) ; report limit
- (forward-char -2)) ; disable continued expr
- '(if (match-beginning 3)
- (point-max) ; No limit for continuation
- (forward-char -2))) ; disable continued expr
- ,(if cperl-font-lock-multiline
- nil
- '(progn ; Do at end
- ;; "my" may be already fontified (POD),
- ;; so cperl-font-lock-multiline-start is nil
- (if (or (not cperl-font-lock-multiline-start)
- (> 2 (count-lines
- cperl-font-lock-multiline-start
- (point))))
- nil
- (put-text-property
- (1+ cperl-font-lock-multiline-start) (point)
- 'syntax-type 'multiline))
- (setq cperl-font-lock-multiline-start nil)))
- (3 font-lock-variable-name-face))))
- (t '("^[ \t{}]*\\(state\\|my\\|local\\|our\\)[ \t]*\\(([ \t]*\\)?\\([$@%*][a-zA-Z0-9_:]+\\)"
- 3 font-lock-variable-name-face)))
- '("\\<for\\(each\\)?\\([ \t]+\\(state\\|my\\|local\\|our\\)\\)?[ \t]*\\(\\$[a-zA-Z_][a-zA-Z_0-9]*\\)[ \t]*("
+ `(,(rx (sequence (or "state" "my" "local" "our"))
+ (eval cperl--ws*-rx)
+ (opt (sequence "(" (eval cperl--ws*-rx)))
+ (group
+ (in "$@%*")
+ (or
+ (eval cperl--normal-identifier-rx)
+ (eval cperl--special-identifier-rx))
+ )
+ )
+ ;; (concat "\\<\\(state\\|my\\|local\\|our\\)"
+ ;; cperl-maybe-white-and-comment-rex
+ ;; "\\(("
+ ;; cperl-maybe-white-and-comment-rex
+ ;; "\\)?\\([$@%*]\\([a-zA-Z0-9_:]+\\|[^a-zA-Z0-9_]\\)\\)")
+ ;; (5 ,(if cperl-font-lock-multiline
+ (1 ,(if cperl-font-lock-multiline
+ 'font-lock-variable-name-face
+ '(progn (setq cperl-font-lock-multiline-start
+ (match-beginning 0))
+ 'font-lock-variable-name-face)))
+ (,(rx (sequence point
+ (eval cperl--ws*-rx)
+ ","
+ (eval cperl--ws*-rx)
+ (group
+ (in "$@%*")
+ (or
+ (eval cperl--normal-identifier-rx)
+ (eval cperl--special-identifier-rx))
+ )
+ )
+ )
+ ;; ,(concat "\\="
+ ;; cperl-maybe-white-and-comment-rex
+ ;; ","
+ ;; cperl-maybe-white-and-comment-rex
+ ;; "\\([$@%*]\\([a-zA-Z0-9_:]+\\|[^a-zA-Z0-9_]\\)\\)")
+ ;; Bug in font-lock: limit is used not only to limit
+ ;; searches, but to set the "extend window for
+ ;; facification" property. Thus we need to minimize.
+ ,(if cperl-font-lock-multiline
+ '(if (match-beginning 1)
+ (save-excursion
+ (goto-char (match-beginning 1))
+ (condition-case nil
+ (forward-sexp 1)
+ (error
+ (condition-case nil
+ (forward-char 200)
+ (error nil)))) ; typeahead
+ (1- (point))) ; report limit
+ (forward-char -2)) ; disable continued expr
+ '(if (match-beginning 1)
+ (point-max) ; No limit for continuation
+ (forward-char -2))) ; disable continued expr
+ ,(if cperl-font-lock-multiline
+ nil
+ '(progn ; Do at end
+ ;; "my" may be already fontified (POD),
+ ;; so cperl-font-lock-multiline-start is nil
+ (if (or (not cperl-font-lock-multiline-start)
+ (> 2 (count-lines
+ cperl-font-lock-multiline-start
+ (point))))
+ nil
+ (put-text-property
+ (1+ cperl-font-lock-multiline-start) (point)
+ 'syntax-type 'multiline))
+ (setq cperl-font-lock-multiline-start nil)))
+ (1 font-lock-variable-name-face)))
+ ;; foreach my $foo (
+ `(,(rx symbol-start "for" (opt "each")
+ (opt (sequence (1+ blank)
+ (or "state" "my" "local" "our")))
+ (0+ blank)
+ (group-n 1 (sequence "$"
+ (eval cperl--basic-identifier-rx)))
+ (0+ blank) "(")
+;; '("\\<for\\(each\\)?\\([ \t]+\\(state\\|my\\|local\\|our\\)\\)?[ \t]*\\(\\$[a-zA-Z_][a-zA-Z_0-9]*\\)[ \t]*("
4 font-lock-variable-name-face)
;; Avoid $!, and s!!, qq!! etc. when not fontifying syntactically
'("\\(?:^\\|[^smywqrx$]\\)\\(!\\)" 1 font-lock-negation-char-face)
'("\\[\\(\\^\\)" 1 font-lock-negation-char-face prepend)))
(setq
t-font-lock-keywords-1
- '(
- ("\\(\\([@%]\\|\\$#\\)[a-zA-Z_:][a-zA-Z0-9_:]*\\)" 1
+ `(
+ ;; arrays and hashes. Access to elements is fixed below
+ (,(rx (group-n 1 (group-n 2 (or (in "@%") "$#"))
+ (eval cperl--normal-identifier-rx)))
+ 1
+;; ("\\(\\([@%]\\|\\$#\\)[a-zA-Z_:][a-zA-Z0-9_:]*\\)" 1
(if (eq (char-after (match-beginning 2)) ?%)
'cperl-hash-face
'cperl-array-face)
nil) ; arrays and hashes
- ("\\(\\([$@%]+\\)[a-zA-Z_:][a-zA-Z0-9_:]*\\)[ \t]*\\([[{]\\)"
+ ;; access to array/hash elements
+ (,(rx (group-n 1 (group-n 2 (in "$@%"))
+ (eval cperl--normal-identifier-rx))
+ (0+ blank)
+ (group-n 3 (in "[{")))
+;; ("\\(\\([$@%]+\\)[a-zA-Z_:][a-zA-Z0-9_:]*\\)[ \t]*\\([[{]\\)"
1
(if (= (- (match-end 2) (match-beginning 2)) 1)
(if (eq (char-after (match-beginning 3)) ?{)
'cperl-hash-face
'cperl-array-face) ; arrays and hashes
font-lock-variable-name-face) ; Just to put something
- t)
- ("\\(@\\|\\$#\\)\\(\\$+\\([a-zA-Z_:][a-zA-Z0-9_:]*\\|[^ \t\n]\\)\\)"
+ t) ; override previous
+ ;; @$ array dereferences, $#$ last array index
+ (,(rx (group-n 1 (or "@" "$#"))
+ (group-n 2 (sequence "$"
+ (or (eval cperl--normal-identifier-rx)
+ (not (in " \t\n"))))))
+ ;; ("\\(@\\|\\$#\\)\\(\\$+\\([a-zA-Z_:][a-zA-Z0-9_:]*\\|[^ \t\n]\\)\\)"
(1 'cperl-array-face)
(2 font-lock-variable-name-face))
- ("\\(%\\)\\(\\$+\\([a-zA-Z_:][a-zA-Z0-9_:]*\\|[^ \t\n]\\)\\)"
+ ;; %$ hash dereferences
+ (,(rx (group-n 1 "%")
+ (group-n 2 (sequence "$"
+ (or (eval cperl--normal-identifier-rx)
+ (not (in " \t\n"))))))
+ ;; ("\\(%\\)\\(\\$+\\([a-zA-Z_:][a-zA-Z0-9_:]*\\|[^ \t\n]\\)\\)"
(1 'cperl-hash-face)
(2 font-lock-variable-name-face))
;;("\\([smy]\\|tr\\)\\([^a-z_A-Z0-9]\\)\\(\\([^\n\\]*||\\)\\)\\2")
(indent-region beg end nil)
(goto-char beg)
(setq col (current-column))
+ ;; Assuming that lineup is done on Perl syntax, this regexp
+ ;; doesn't need to be unicode aware -- haj, 2021-09-10
(if (looking-at "[a-zA-Z0-9_]")
(if (looking-at "\\<[a-zA-Z0-9_]+\\>")
(setq search
"Run etags with appropriate options for Perl files.
If optional argument ALL is `recursive', will process Perl files
in subdirectories too."
+ ;; Apparently etags doesn't support UTF-8 encoded sources, and usage
+ ;; of etags has been commented out in the menu since ... well,
+ ;; forever. So, let's just stick to ASCII here. -- haj, 2021-09-14
(interactive)
(let ((cmd "etags")
(args `("-l" "none" "-r"
;; Search for the function
(progn ;;save-match-data
(while (re-search-forward
+ ;; FIXME: Should XS code be unicode aware? Recent C
+ ;; compilers (Gcc 10+) are, but I guess this isn't used
+ ;; much. -- haj, 2021-09-14
"^\\([ \t]*MODULE\\>[^\n]*\\<PACKAGE[ \t]*=[ \t]*\\([a-zA-Z_][a-zA-Z_0-9:]*\\)\\>\\|\\([a-zA-Z_][a-zA-Z_0-9]*\\)(\\|[ \t]*BOOT:\\)"
nil t)
(cond
(setq lst
(mapcar
(lambda (elt)
- (cond ((string-match "^[_a-zA-Z]" (car elt))
+ (cond ((string-match (rx line-start (or alpha "_")) (car elt))
(goto-char (cdr elt))
(beginning-of-line) ; pos should be of the start of the line
(list (car elt)
","
(number-to-string (1- (elt elt 1))) ; Char pos 0-based
"\n")
- (if (and (string-match "^[_a-zA-Z]+::" (car elt))
- (string-match (concat "^" cperl-sub-regexp "[ \t]+\\([_a-zA-Z]+\\)[^:_a-zA-Z]")
- (elt elt 3)))
+ (if (and (string-match (rx line-start
+ (eval cperl--basic-identifier-rx) "++")
+ (car elt))
+ (string-match (rx-to-string `(sequence line-start
+ (regexp ,cperl-sub-regexp)
+ (1+ (in " \t"))
+ ,cperl--normal-identifier-rx))
+ (elt elt 3)))
;; Need to insert the name without package as well
(setq lst (cons (cons (substring (elt elt 3)
(match-beginning 1)
;;(concat "\\("
(mapconcat
#'identity
- '("[$@%*&][0-9a-zA-Z_:]+\\([ \t]*[[{]\\)?" ; Usual variable
+ '("[$@%*&][[:alnum:]_:]+\\([ \t]*[[{]\\)?" ; Usual variable
"[$@]\\^[a-zA-Z]" ; Special variable
"[$@][^ \n\t]" ; Special variable
"-[a-zA-Z]" ; File test
"\\\\[a-zA-Z0]" ; Special chars
"^=[a-z][a-zA-Z0-9_]*" ; POD sections
"[-!&*+,./<=>?\\^|~]+" ; Operator
- "[a-zA-Z_0-9:]+" ; symbol or number
+ "[[:alnum:]_:]+" ; symbol or number
"x="
"#!")
;;"\\)\\|\\("
;; Does not save-excursion
;; Get to the something meaningful
(or (eobp) (eolp) (forward-char 1))
- (re-search-backward "[-a-zA-Z0-9_:!&*+,./<=>?\\^|~$%@]"
+ (re-search-backward "[-[:alnum:]_:!&*+,./<=>?\\^|~$%@]"
(point-at-bol)
'to-beg)
;; (cond
;; (or (bobp) (backward-char 1))))
;; Try to backtrace
(cond
- ((looking-at "[a-zA-Z0-9_:]") ; symbol
- (skip-chars-backward "a-zA-Z0-9_:")
+ ((looking-at "[[:alnum:]_:]") ; symbol
+ (skip-chars-backward "[:alnum:]_:")
(cond
((and (eq (preceding-char) ?^) ; $^I
(eq (char-after (- (point) 2)) ?\$))
(eq (current-column) 1))
(forward-char -1))) ; =head1
(if (and (eq (preceding-char) ?\<)
- (looking-at "\\$?[a-zA-Z0-9_:]+>")) ; <FH>
+ (looking-at "\\$?[[:alnum:]_:]+>")) ; <FH>
(forward-char -1)))
((and (looking-at "=") (eq (preceding-char) ?x)) ; x=
(forward-char -1))
(not (eq (char-after (- (point) 2)) ?\$))) ; $-
(forward-char -1))
((and (eq (following-char) ?\>)
- (string-match "[a-zA-Z0-9_]" (char-to-string (preceding-char)))
+ (string-match "[[:alnum:]_]" (char-to-string (preceding-char)))
(save-excursion
(forward-sexp -1)
(and (eq (preceding-char) ?\<)
- (looking-at "\\$?[a-zA-Z0-9_:]+>")))) ; <FH>
+ (looking-at "\\$?[[:alnum:]_:]+>")))) ; <FH>
(search-backward "<"))))
((and (eq (following-char) ?\$)
(eq (preceding-char) ?\<)
- (looking-at "\\$?[a-zA-Z0-9_:]+>")) ; <$fh>
+ (looking-at "\\$?[[:alnum:]_:]+>")) ; <$fh>
(forward-char -1)))
(if (looking-at cperl-have-help-regexp)
(buffer-substring (match-beginning 0) (match-end 0))))
(should (equal (get-text-property (match-beginning 0) 'face)
'font-lock-keyword-face))))
+(ert-deftest cperl-test-fontify-special-variables ()
+ "Test fontification of variables like $^T or ${^ENCODING}.
+These can occur as \"local\" aliases."
+ (skip-unless (eq cperl-test-mode #'cperl-mode))
+ (with-temp-buffer
+ (insert "local ($^I, ${^UNICODE});\n")
+ (goto-char (point-min))
+ (funcall cperl-test-mode)
+ (font-lock-ensure)
+ (search-forward "$")
+ (should (equal (get-text-property (point) 'face)
+ 'font-lock-variable-name-face))
+ (search-forward "$")
+ (should (equal (get-text-property (point) 'face)
+ 'font-lock-variable-name-face))))
+
(ert-deftest cperl-test-identify-heredoc ()
"Test whether a construct containing \"<<\" followed by a
bareword is properly identified for a here-document if
(ert-deftest cperl-test-ws-rx ()
"Tests capture of very simple regular expressions (yawn)."
+ (skip-unless (eq cperl-test-mode #'cperl-mode))
(let ((valid
'(" " "\t" "\n"))
(invalid
(ert-deftest cperl-test-ws+-rx ()
"Tests sequences of whitespace and comment lines."
+ (skip-unless (eq cperl-test-mode #'cperl-mode))
(let ((valid
`(" " "\t#\n" "\n# \n"
,(concat "# comment\n" "# comment\n" "\n" "#comment\n")))
(ert-deftest cperl-test-version-regexp ()
"Tests the regexp for recommended syntax of versions in Perl."
+ (skip-unless (eq cperl-test-mode #'cperl-mode))
(let ((valid
'("1" "1.1" "1.1_1" "5.032001"
"v120.100.103"))
(ert-deftest cperl-test-package-regexp ()
"Tests the regular expression of Perl package names with versions.
Also includes valid cases with whitespace in strange places."
+ (skip-unless (eq cperl-test-mode #'cperl-mode))
(let ((valid
'("package Foo"
"package Foo::Bar"
(cperl-test--validate-regexp (rx (eval cperl--package-rx))
valid invalid)))
+(ert-deftest cperl-test-identifier-rx ()
+ "Test valid and invalid identifiers (no sigils)."
+ (skip-unless (eq cperl-test-mode #'cperl-mode))
+ (let ((valid
+ '("foo" "FOO" "f_oo" "a123"
+ "manĝis")) ; Unicode is allowed!
+ (invalid
+ '("$foo" ; no sigils allowed (yet)
+ "Foo::bar" ; no package qualifiers allowed
+ "lots_of_€"))) ; € is not alphabetic
+ (cperl-test--validate-regexp (rx (eval cperl--basic-identifier-rx))
+ valid invalid)))
+
+;;; Test unicode identifier in various places
+
+(defun cperl--test-unicode-setup (code string)
+ "Insert CODE, prepare it for tests, and find STRING.
+Invoke the appropriate major mode, ensure fontification, and set
+point after the first occurrence of STRING (no regexp!)."
+ (insert code)
+ (funcall cperl-test-mode)
+ (font-lock-ensure)
+ (goto-char (point-min))
+ (search-forward string))
+
+(ert-deftest cperl-test-unicode-labels ()
+ "Verify that non-ASCII labels are processed correctly."
+ (with-temp-buffer
+ (cperl--test-unicode-setup "LABEł: for ($manĝi) { say; }" "LAB")
+ (should (equal (get-text-property (point) 'face)
+ 'font-lock-constant-face))))
+
+(ert-deftest cperl-test-unicode-sub ()
+ (with-temp-buffer
+ (cperl--test-unicode-setup
+ (concat "use strict;\n" ; distinguish bob from b-o-f
+ "sub ℏ {\n"
+ " 6.62607015e-34\n"
+ "};")
+ "sub ") ; point is before "ℏ"
+
+ ;; Testing fontification
+ ;; FIXME 2021-09-10: This tests succeeds because cperl-mode
+ ;; accepts almost anything as a sub name for fontification. For
+ ;; example, it fontifies "sub @ {...;}" which is a syntax error in
+ ;; Perl. I let this pass for the moment.
+ (should (equal (get-text-property (point) 'face)
+ 'font-lock-function-name-face))
+
+ ;; Testing `beginning-of-defun'. Not available in perl-mode,
+ ;; where it jumps to the beginning of the buffer.
+ (when (eq cperl-test-mode #'cperl-mode)
+ (goto-char (point-min))
+ (search-forward "-34")
+ (beginning-of-defun)
+ (should (looking-at "sub")))))
+
+(ert-deftest cperl-test-unicode-varname ()
+ (with-temp-buffer
+ (cperl--test-unicode-setup
+ (concat "use strict;\n"
+ "my $π = 3.1415926535897932384626433832795028841971;\n"
+ "\n"
+ "my $manĝi = $π;\n"
+ "__END__\n")
+ "my $") ; perl-mode doesn't fontify the sigil, so include it here
+
+ ;; Testing fontification
+ ;; FIXME 2021-09-10: This test succeeds in cperl-mode because the
+ ;; π character is "not ASCII alphabetic", so it treats $π as a
+ ;; punctuation variable. The following two `should' forms with a
+ ;; longer variable name were added for stronger verification.
+ (should (equal (get-text-property (point) 'face)
+ 'font-lock-variable-name-face))
+ ;; Test both ends of a longer variable name
+ (search-forward "my $") ; again skip the sigil
+ (should (equal (get-text-property (point) 'face)
+ 'font-lock-variable-name-face))
+ (search-forward "manĝi")
+ (should (equal (get-text-property (1- (match-end 0)) 'face)
+ 'font-lock-variable-name-face))))
+
+(ert-deftest cperl-test-unicode-varname-list ()
+ "Verify that all elements of a variable list are fontified."
+
+ (let ((hash-face (if (eq cperl-test-mode #'perl-mode)
+ 'perl-non-scalar-variable
+ 'cperl-hash-face))
+ (array-face (if (eq cperl-test-mode #'perl-mode)
+ 'perl-non-scalar-variable
+ 'cperl-array-face)))
+ (with-temp-buffer
+ (cperl--test-unicode-setup
+ "my (%äsh,@ärräy,$scâlâr);" "%")
+ (should (equal (get-text-property (point) 'face)
+ hash-face))
+ (search-forward "@")
+ (should (equal (get-text-property (point) 'face)
+ array-face))
+ (search-forward "scâlâr")
+ (should (equal (get-text-property (match-beginning 0) 'face)
+ 'font-lock-variable-name-face))
+ (should (equal (get-text-property (1- (match-end 0)) 'face)
+ 'font-lock-variable-name-face)))
+
+ ;; Now with package-qualified variables
+ (with-temp-buffer
+ (cperl--test-unicode-setup
+ "local (%Søme::äsh,@Søme::ärräy,$Søme::scâlâr);" "%")
+ (should (equal (get-text-property (point) 'face)
+ hash-face))
+ (search-forward "Søme::") ; test basic identifier
+ (should (equal (get-text-property (point) 'face)
+ hash-face))
+ (search-forward "@") ; test package name
+ (should (equal (get-text-property (point) 'face)
+ array-face))
+ (search-forward "Søme::") ; test basic identifier
+ (should (equal (get-text-property (point) 'face)
+ array-face))
+ (search-forward "Søme") ; test package name
+ (should (equal (get-text-property (match-beginning 0) 'face)
+ 'font-lock-variable-name-face))
+ (should (equal (get-text-property (1- (match-end 0)) 'face)
+ 'font-lock-variable-name-face))
+ (search-forward "scâlâr") ; test basic identifier
+ (should (equal (get-text-property (match-beginning 0) 'face)
+ 'font-lock-variable-name-face))
+ (should (equal (get-text-property (1- (match-end 0)) 'face)
+ 'font-lock-variable-name-face)))))
+
+(ert-deftest cperl-test-unicode-arrays ()
+ "Test fontification of array access."
+ ;; Perl mode just looks at the sigil, for element access
+ (skip-unless (eq cperl-test-mode #'cperl-mode))
+ ;; simple array element
+ (with-temp-buffer
+ (cperl--test-unicode-setup
+ "$ärräy[1] = 7;" "$")
+ (should (equal (get-text-property (point) 'face)
+ 'cperl-array-face)))
+ ;; array slice
+ (with-temp-buffer
+ (cperl--test-unicode-setup
+ "@ärräy[(1..3)] = (4..6);" "@")
+ (should (equal (get-text-property (point) 'face)
+ 'cperl-array-face)))
+ ;; array max index
+ (with-temp-buffer
+ (cperl--test-unicode-setup
+ "$#ärräy = 1;" "$")
+ (should (equal (get-text-property (point) 'face)
+ 'cperl-array-face)))
+ ;; array dereference
+ (with-temp-buffer
+ (cperl--test-unicode-setup
+ "@$ärräy = (1,2,3)" "@")
+ (should (equal (get-text-property (1- (point)) 'face)
+ 'cperl-array-face))
+ (should (equal (get-text-property (1+ (point)) 'face)
+ 'font-lock-variable-name-face))))
+
+(ert-deftest cperl-test-unicode-hashes ()
+ "Test fontification of hash access."
+ ;; Perl mode just looks at the sigil, for element access
+ (skip-unless (eq cperl-test-mode #'cperl-mode))
+ ;; simple hash element
+ (with-temp-buffer
+ (cperl--test-unicode-setup
+ "$häsh{'a'} = 7;" "$")
+ (should (equal (get-text-property (point) 'face)
+ 'cperl-hash-face)))
+ ;; hash array slice
+ (with-temp-buffer
+ (cperl--test-unicode-setup
+ "@häsh{(1..3)} = (4..6);" "@")
+ (should (equal (get-text-property (point) 'face)
+ 'cperl-hash-face)))
+ ;; hash subset
+ (with-temp-buffer
+ (cperl--test-unicode-setup
+ "my %hash = %häsh{'a',2,3};" "= %")
+ (should (equal (get-text-property (point) 'face)
+ 'cperl-hash-face)))
+ ;; hash dereference
+ (with-temp-buffer
+ (cperl--test-unicode-setup
+ "%$äsh = (key => 'value');" "%")
+ (should (equal (get-text-property (1- (point)) 'face)
+ 'cperl-hash-face))
+ (should (equal (get-text-property (1+ (point)) 'face)
+ 'font-lock-variable-name-face))))
+
+(ert-deftest cperl-test-unicode-hashref ()
+ "Verify that a hashref access disambiguates {s}.
+CPerl mode takes the token \"s\" as a substitution unless
+detected otherwise. Not for perl-mode: it doesn't stringify
+bareword hash keys and doesn't recognize a substitution
+\"s}foo}bar}\""
+ (skip-unless (eq cperl-test-mode #'cperl-mode))
+ (with-temp-buffer
+ (cperl--test-unicode-setup "$häshref->{s} # }}" "{")
+ (should (equal (get-text-property (point) 'face)
+ 'font-lock-string-face))
+ (should (equal (get-text-property (1+ (point)) 'face)
+ nil))))
+
+(ert-deftest cperl-test-unicode-proto ()
+ ;; perl-mode doesn't fontify prototypes at all
+ (skip-unless (eq cperl-test-mode #'cperl-mode))
+ (with-temp-buffer
+ (cperl--test-unicode-setup
+ (concat "sub prötötyped ($) {\n"
+ " ...;"
+ "}\n")
+ "prötötyped (")
+
+ (should (equal (get-text-property (point) 'face)
+ 'font-lock-string-face))))
+
+(ert-deftest cperl-test-unicode-fhs ()
+ (with-temp-buffer
+ (cperl--test-unicode-setup
+ (concat "while (<BAREWÖRD>) {\n"
+ " ...;)\n"
+ "}\n")
+ "while (<") ; point is before the first char of the handle
+ ;; Testing fontification
+ ;; FIXME 2021-09-10: perl-mode.el and cperl-mode.el handle these
+ ;; completely differently. perl-mode interprets barewords as
+ ;; constants, cperl-mode does not fontify them. Both treat
+ ;; non-barewords as globs, which are not fontified by perl-mode,
+ ;; but fontified as strings in cperl-mode. We keep (and test)
+ ;; that behavior "as is" because both bareword filehandles and
+ ;; <glob> syntax are no longer recommended.
+ (let ((bareword-face
+ (if (equal cperl-test-mode 'perl-mode) 'font-lock-constant-face
+ nil)))
+ (should (equal (get-text-property (point) 'face)
+ bareword-face)))))
+
+(ert-deftest cperl-test-unicode-hashkeys ()
+ "Test stringification of bareword hash keys. Not in perl-mode.
+perl-mode generally does not stringify bareword hash keys."
+ (skip-unless (eq cperl-test-mode #'cperl-mode))
+ ;; Plain hash key
+ (with-temp-buffer
+ (cperl--test-unicode-setup
+ "$häsh { kéy }" "{ ")
+ (should (equal (get-text-property (point) 'face)
+ 'font-lock-string-face)))
+ ;; Nested hash key
+ (with-temp-buffer
+ (cperl--test-unicode-setup
+ "$häsh { kéy } { kèy }" "} { ")
+ (should (equal (get-text-property (point) 'face)
+ 'font-lock-string-face)))
+ ;; Key => value
+ (with-temp-buffer
+ (cperl--test-unicode-setup
+ "( kéy => 'value'," "( ")
+ (should (equal (get-text-property (point) 'face)
+ 'font-lock-string-face))))
+
+(ert-deftest cperl-test-word-at-point ()
+ "Test whether the function captures non-ASCII words."
+ (skip-unless (eq cperl-test-mode #'cperl-mode))
+ (let ((words '("rôle" "café" "ångström"
+ "Data::Dump::dump"
+ "_underscore")))
+ (dolist (word words)
+ (with-temp-buffer
+ (insert " + ") ; this will be the suffix
+ (beginning-of-line)
+ (insert ")") ; A non-word char
+ (insert word)
+ (should (string= word (cperl-word-at-point-hard)))))))
+
;;; Function test: Building an index for imenu
(ert-deftest cperl-test-imenu-index ()
"Versioned::Package::outer"
"lexical"
"Versioned::Block::signatured"
- "Package::in_package_again")))
+ "Package::in_package_again"
+ "Erdős::Number::erdős_number")))
(dolist (sub expected)
(should (assoc-string sub index)))))))