From 4a9b24e1780c980d033b44f3c86133bbab691ebe Mon Sep 17 00:00:00 2001 From: Alan Mackenzie Date: Sun, 1 Jul 2018 11:39:03 +0000 Subject: [PATCH] Initial commit. Allow wanted fontification of open string in any mode. The wanted fontification is for the string face to end at the first unescaped newline. This is achieved by a new syntax flag `s' on NL, which means "terminate any open string". src/syntax.c (SYNTAX_FLAGS_CLOSE_STRING, back_maybe_string): New functions. (Fstring_to_syntax, Finternal_describe_syntax_value, scan_lists) (scan_sexps_forward): Adapt to handle the `s' flag. lisp/font-lock.el (font-lock-warn-open-string): New defcustom. (font-lock-fontify-syntactically-region): Enhance to fontify " with warning-face. lisp/progmodes/sh-script.el (sh-mode-syntax-table): Add flag `s' to syntax entry for \n. --- lisp/font-lock.el | 24 ++++- lisp/progmodes/sh-script.el | 2 +- src/syntax.c | 193 ++++++++++++++++++++++++++++++++++-- 3 files changed, 208 insertions(+), 11 deletions(-) diff --git a/lisp/font-lock.el b/lisp/font-lock.el index be9fb4dc93f..f2b7fef5c23 100644 --- a/lisp/font-lock.el +++ b/lisp/font-lock.el @@ -287,6 +287,16 @@ If a number, only buffers greater than this size have fontification messages." (integer :tag "size")) :group 'font-lock :version "24.1") + +(defcustom font-lock-warn-open-string t + "Fontify the opening quote of an unterminated string with warning face? +This is done when this variable is non-nil. + +This works only when the syntax-table entry for newline contains the flag `s' +\(see page \"xxx\" in the Elisp manual)." + :type 'boolean + :group 'font-lock + :version "27.1") ;; Originally these variable values were face names such as `bold' etc. @@ -1597,18 +1607,30 @@ START should be at the beginning of a line." (replace-regexp-in-string "^ *" "" comment-end)))) ;; Find the `start' state. (state (syntax-ppss start)) - face beg) + face beg in-string s-c-start) (if loudly (message "Fontifying %s... (syntactically...)" (buffer-name))) ;; ;; Find each interesting place between here and `end'. (while (progn (when (or (nth 3 state) (nth 4 state)) + (setq s-c-start (nth 8 state)) + (setq in-string (nth 3 state)) (setq face (funcall font-lock-syntactic-face-function state)) (setq beg (max (nth 8 state) start)) (setq state (parse-partial-sexp (point) end nil nil state 'syntax-table)) (when face (put-text-property beg (point) 'face face)) +;;;; NEW STOUGH, 2018-06-29 + (put-text-property s-c-start (1+ s-c-start) + 'face + (if (and font-lock-warn-open-string + in-string + (not (nth 3 state)) + (not (eq in-string (char-before)))) + 'font-lock-warning-face + face)) +;;;; END OF NEW STOUGH (when (and (eq face 'font-lock-comment-face) (or font-lock-comment-start-skip comment-start-skip)) diff --git a/lisp/progmodes/sh-script.el b/lisp/progmodes/sh-script.el index aaa86b5816f..bf760e0a6cc 100644 --- a/lisp/progmodes/sh-script.el +++ b/lisp/progmodes/sh-script.el @@ -429,7 +429,7 @@ name symbol." (defvar sh-mode-syntax-table (sh-mode-syntax-table () ?\# "<" - ?\n ">#" + ?\n ">#s" ?\" "\"\"" ?\' "\"'" ?\` "\"`" diff --git a/src/syntax.c b/src/syntax.c index c5a4b03955b..b82b091ced2 100644 --- a/src/syntax.c +++ b/src/syntax.c @@ -33,7 +33,7 @@ along with GNU Emacs. If not, see . */ #define SYNTAX_ENTRY(c) syntax_property_entry (c, 1) #define SYNTAX_WITH_FLAGS(c) syntax_property_with_flags (c, 1) -/* Eight single-bit flags have the following meanings: +/* Nine single-bit flags have the following meanings: 1. This character is the first of a two-character comment-start sequence. 2. This character is the second of a two-character comment-start sequence. 3. This character is the first of a two-character comment-end sequence. @@ -42,6 +42,7 @@ along with GNU Emacs. If not, see . */ 6. The char is part of a delimiter for comments of style "b". 7. This character is part of a nestable comment sequence. 8. The char is part of a delimiter for comments of style "c". + 9. The char will close an open string (except one opened by a string-fence). Note that any two-character sequence whose first character has flag 1 and whose second character has flag 2 will be interpreted as a comment start. @@ -108,7 +109,11 @@ SYNTAX_FLAGS_COMMENT_NESTED (int flags) { return (flags >> 22) & 1; } - +static bool +SYNTAX_FLAGS_CLOSE_STRING (int flags) +{ + return (flags >> 24) & 1; +} /* FLAGS should be the flags of the main char of the comment marker, e.g. the second for comstart and the first for comend. */ static int @@ -1206,6 +1211,10 @@ the value of a `syntax-table' text property. */) case 'c': val |= 1 << 23; break; + + case 's': + val |= 1 << 24; + break; } if (val < ASIZE (Vsyntax_code_object) && NILP (match)) @@ -1257,6 +1266,8 @@ c (on any of its chars) using this flag: p means CHAR is a prefix character for `backward-prefix-chars'; such characters are treated as whitespace when they occur between expressions. + s means CHAR will terminate any open string (except one started by a + character with generic string fence syntax). usage: (modify-syntax-entry CHAR NEWENTRY &optional SYNTAX-TABLE) */) (Lisp_Object c, Lisp_Object newentry, Lisp_Object syntax_table) { @@ -1294,7 +1305,8 @@ DEFUN ("internal-describe-syntax-value", Finternal_describe_syntax_value, (Lisp_Object syntax) { int code, syntax_code; - bool start1, start2, end1, end2, prefix, comstyleb, comstylec, comnested; + bool start1, start2, end1, end2, prefix, comstyleb, comstylec, comnested, + strclose; char str[2]; Lisp_Object first, match_lisp, value = syntax; @@ -1335,6 +1347,7 @@ DEFUN ("internal-describe-syntax-value", Finternal_describe_syntax_value, comstyleb = SYNTAX_FLAGS_COMMENT_STYLEB (syntax_code); comstylec = SYNTAX_FLAGS_COMMENT_STYLEC (syntax_code); comnested = SYNTAX_FLAGS_COMMENT_NESTED (syntax_code); + strclose = SYNTAX_FLAGS_CLOSE_STRING (syntax_code); if (Smax <= code) { @@ -1368,6 +1381,8 @@ DEFUN ("internal-describe-syntax-value", Finternal_describe_syntax_value, insert ("c", 1); if (comnested) insert ("n", 1); + if (strclose) + insert ("s", 1); insert_string ("\twhich means: "); @@ -1439,6 +1454,9 @@ DEFUN ("internal-describe-syntax-value", Finternal_describe_syntax_value, insert1 (Fsubstitute_command_keys (prefixdoc)); } + if (strclose) + insert_string (",\n\t will close any string started by a char with \" syntax"); + return syntax; } @@ -2637,6 +2655,144 @@ syntax_multibyte (int c, bool multibyte_symbol_p) return ASCII_CHAR_P (c) || !multibyte_symbol_p ? SYNTAX (c) : Ssymbol; } +static bool +back_maybe_string (ptrdiff_t *from, ptrdiff_t *from_byte, + ptrdiff_t stop, bool multibyte_symbol_p) +{ + unsigned short int quit_count = 0; + enum syntaxcode code = Smax; + int syntax = Smax, prev_syntax; + ptrdiff_t at = *from, at_byte = *from_byte; + ptrdiff_t targ, targ_byte; + int c, stringterm; + ptrdiff_t defun_start; + ptrdiff_t defun_start_byte; + +#define DEC_AT \ + do { \ + rarely_quit (++quit_count); \ + prev_syntax = syntax; \ + DEC_BOTH (at, at_byte); \ + if (at >= stop) \ + UPDATE_SYNTAX_TABLE_BACKWARD (at); \ + if (char_quoted (at, at_byte)) \ + { \ + DEC_BOTH (at, at_byte); \ + syntax = code = Sword; \ + } \ + else \ + { \ + c = FETCH_CHAR_AS_MULTIBYTE (at_byte); \ + syntax = SYNTAX_WITH_FLAGS (c); \ + code = syntax_multibyte (c, multibyte_symbol_p); \ + } \ + if (SYNTAX_FLAGS_COMSTART_FIRST (syntax) \ + && SYNTAX_FLAGS_COMSTART_SECOND (prev_syntax)) \ + code = Scomment; \ + } while (0) + + /* Find the alleged string opener. */ + while ((at > stop) + && (code != Sstring) + && (!SYNTAX_FLAGS_CLOSE_STRING (syntax))) + { + DEC_AT; + } + if (code != Sstring) + goto lose; + stringterm = c; + targ = at; + targ_byte = at_byte; + + /* Now go back over paired delimiters which are STRINGTERM. */ + while (true) /* One quoted string per iteration. */ + { + DEC_AT; + /* Search back for a terminating string delimiter: */ + while ((at > stop) + && (code != Sstring) + && (code != Sstring_fence) + && (!SYNTAX_FLAGS_CLOSE_STRING (syntax))) + { + DEC_AT; + /* Check for comment and "other" strings. */ + } + if ((at <= stop) + || SYNTAX_FLAGS_CLOSE_STRING (syntax)) + goto done; + if (code == Sstring_fence) + stringterm = ST_STRING_STYLE; + else if (code == Sstring) + stringterm = c; + /* Now search back for the matching opening string delimiter: */ + DEC_AT; + while ((at > stop) + && !((stringterm == ST_STRING_STYLE) + && (syntax == Sstring_fence)) + && !((c == stringterm) + && (syntax == Sstring)) + && (!SYNTAX_FLAGS_CLOSE_STRING (syntax))) + { + if ((syntax == Sstring_fence) + || (syntax == Sstring) + || (syntax == Scomment)) + goto lossage; + DEC_AT; + } + if ((at <= stop) + || SYNTAX_FLAGS_CLOSE_STRING (syntax)) + goto lose; /* Even number of string delims in line. */ + } + + done: + UPDATE_SYNTAX_TABLE_FORWARD (targ); + *from = targ; + *from_byte = targ_byte; + return true; + lose: + UPDATE_SYNTAX_TABLE_FORWARD (*from); + return false; + + lossage: + /* We've encountered possible comments or strings with mixed + delimiters. Bail out and scan forward from a safe position. */ + { + struct lisp_parse_state state; + bool adjusted = true; + + defun_start = find_defun_start (*from, *from_byte); + defun_start_byte = find_start_value_byte; + adjusted = (defun_start > BEGV); + internalize_parse_state (Qnil, &state); + scan_sexps_forward (&state, + defun_start, defun_start_byte, + *from, TYPE_MINIMUM (EMACS_INT), + 0, 0); + if (!adjusted) + { + adjusted = true; + find_start_value + = CONSP (state.levelstarts) ? XINT (XCAR (state.levelstarts)) + : state.thislevelstart >= 0 ? state.thislevelstart + : find_start_value; + find_start_value_byte = CHAR_TO_BYTE (find_start_value); + } + + if ((state.instring != -1) + && (state.instring != ST_STRING_STYLE) + && (state.comstr_start >= stop)) + { + UPDATE_SYNTAX_TABLE_BACKWARD (state.comstr_start); + *from = state.comstr_start; + *from_byte = CHAR_TO_BYTE (*from); + return true; + } + /* Syntax table is already valid at *FROM, after the + `scan_sexps_forward' */ + return false; + } +} + static Lisp_Object scan_lists (EMACS_INT from, EMACS_INT count, EMACS_INT depth, bool sexpflag) { @@ -2803,13 +2959,16 @@ scan_lists (EMACS_INT from, EMACS_INT count, EMACS_INT depth, bool sexpflag) while (1) { enum syntaxcode c_code; + int c_code_flags; if (from >= stop) goto lose; UPDATE_SYNTAX_TABLE_FORWARD (from); c = FETCH_CHAR_AS_MULTIBYTE (from_byte); c_code = syntax_multibyte (c, multibyte_symbol_p); + c_code_flags = SYNTAX_WITH_FLAGS (c); if (code == Sstring - ? c == stringterm && c_code == Sstring + ? (c == stringterm && c_code == Sstring) + || SYNTAX_FLAGS_CLOSE_STRING (c_code_flags) : c_code == Sstring_fence) break; @@ -2965,6 +3124,10 @@ scan_lists (EMACS_INT from, EMACS_INT count, EMACS_INT depth, bool sexpflag) for very little gain, so we don't bother either. -sm */ if (found) from = out_charpos, from_byte = out_bytepos; + else if (SYNTAX_FLAGS_CLOSE_STRING (syntax) + && back_maybe_string (&from, &from_byte, stop, + multibyte_symbol_p)) + goto done2; break; case Scomment_fence: @@ -3006,7 +3169,14 @@ scan_lists (EMACS_INT from, EMACS_INT count, EMACS_INT depth, bool sexpflag) } if (!depth && sexpflag) goto done2; break; - default: + case Swhitespace: + case Spunct: + if (SYNTAX_FLAGS_CLOSE_STRING (syntax) + && back_maybe_string (&from, &from_byte, stop, + multibyte_symbol_p)) + goto done2; + break; + default: /* Ignore whitespace, punctuation, quote, endcomment. */ break; } @@ -3046,7 +3216,7 @@ function scans over parentheses until the depth goes to zero COUNT times. Hence, positive DEPTH moves out that number of levels of parentheses, while negative DEPTH moves to a deeper level. -Comments are ignored if `parse-sexp-ignore-comments' is non-nil. +Comments are skipped over if `parse-sexp-ignore-comments' is non-nil. If we reach the beginning or end of the accessible part of the buffer before we have scanned over COUNT lists, return nil if the depth at @@ -3065,7 +3235,7 @@ DEFUN ("scan-sexps", Fscan_sexps, Sscan_sexps, 2, 2, 0, If COUNT is negative, scan backwards. Returns the character number of the position thus found. -Comments are ignored if `parse-sexp-ignore-comments' is non-nil. +Comments are skipped over if `parse-sexp-ignore-comments' is non-nil. If the beginning or end of (the accessible part of) the buffer is reached in the middle of a parenthetical grouping, an error is signaled. @@ -3396,10 +3566,12 @@ do { prev_from = from; \ { int c; enum syntaxcode c_code; + int c_code_flags; if (from >= end) goto done; c = FETCH_CHAR_AS_MULTIBYTE (from_byte); c_code = SYNTAX (c); + c_code_flags = SYNTAX_WITH_FLAGS (c); /* Check C_CODE here so that if the char has a syntax-table property which says it is NOT @@ -3421,9 +3593,12 @@ do { prev_from = from; \ break; default: - break; + if (nofence + && SYNTAX_FLAGS_CLOSE_STRING (c_code_flags)) + goto string_end; + break; } - INC_FROM; + INC_FROM; rarely_quit (++quit_count); } } -- 2.39.5