From 8657afac774f36777d0fdd368e0bec64beca22ae Mon Sep 17 00:00:00 2001 From: =?utf8?q?Mattias=20Engdeg=C3=A5rd?= Date: Fri, 16 Jun 2023 12:37:07 +0200 Subject: [PATCH] Correct conversion of strings to tree-sitter query syntax The treesitter query syntax for string literals differs from that of Elisp so we cannot just use the Lisp printer (bug#64017). * src/treesit.c (treesit_query_string_string): New function. (Ftreesit_pattern_expand): Use it. * test/src/treesit-tests.el (treesit-query-api): Add test case. --- src/treesit.c | 44 +++++++++++++++++++++++++++++++++++++++ test/src/treesit-tests.el | 7 ++++++- 2 files changed, 50 insertions(+), 1 deletion(-) diff --git a/src/treesit.c b/src/treesit.c index 0af0e347694..680e510b74d 100644 --- a/src/treesit.c +++ b/src/treesit.c @@ -2299,6 +2299,47 @@ produced by tree-sitter. */) /*** Query functions */ +/* Convert a Lisp string to its printed representation in the tree-sitter + query syntax. */ +static Lisp_Object +treesit_query_string_string (Lisp_Object str) +{ + /* Strings in the treesit query syntax only have the escapes + \n \r \t \0 and any other escaped char stands for that character. + Literal LF, NUL and " are forbidden. */ + ptrdiff_t nbytes = SBYTES (str); + ptrdiff_t escapes = 0; + for (ptrdiff_t i = 0; i < nbytes; i++) + { + unsigned char c = SREF (str, i); + escapes += (c == '\0' || c == '\n' || c == '\r' || c == '\t' || c == '"'); + } + ptrdiff_t nchars = SCHARS (str); + ptrdiff_t extra = escapes + 2; /* backslashes + double quotes */ + Lisp_Object dst = (STRING_MULTIBYTE (str) + ? make_uninit_multibyte_string (nchars + extra, + nbytes + extra) + : make_uninit_string (nbytes + extra)); + unsigned char *d = SDATA (dst); + *d++ = '"'; + for (ptrdiff_t i = 0; i < nbytes; i++) + { + unsigned char c = SREF (str, i); + switch (c) + { + case '\0': *d++ = '\\'; *d++ = '0'; break; + case '\n': *d++ = '\\'; *d++ = 'n'; break; + case '\r': *d++ = '\\'; *d++ = 'r'; break; + case '\t': *d++ = '\\'; *d++ = 't'; break; + case '"': *d++ = '\\'; *d++ = '"'; break; + default: *d++ = c; break; + } + } + *d++ = '"'; + eassert (d == SDATA (dst) + SBYTES (dst)); + return dst; +} + DEFUN ("treesit-pattern-expand", Ftreesit_pattern_expand, Streesit_pattern_expand, 1, 1, 0, @@ -2349,6 +2390,9 @@ See Info node `(elisp)Pattern Matching' for detailed explanation. */) pattern, Vtreesit_str_space), closing_delimiter); + if (STRINGP (pattern)) + return treesit_query_string_string (pattern); + return Fprin1_to_string (pattern, Qnil, Qt); } diff --git a/test/src/treesit-tests.el b/test/src/treesit-tests.el index 7a8e53924eb..04aa91ddca6 100644 --- a/test/src/treesit-tests.el +++ b/test/src/treesit-tests.el @@ -461,7 +461,12 @@ BODY is the test body." "(type field: (_) @capture .) ? * + \"return\"" (treesit-query-expand '((type field: (_) @capture :anchor) - :? :* :+ "return"))))))) + :? :* :+ "return")))) + + ;; Test string conversion in `treesit-pattern-expand'. + (should (equal + (treesit-pattern-expand "a\nb\rc\td\0e\"f\1g") + "\"a\\nb\\rc\\td\\0e\\\"f\1g\""))))) ;;; Narrow -- 2.39.2