From: Yuan Fu Date: Fri, 20 Dec 2024 07:10:32 +0000 (-0800) Subject: Add automated process to verify tree-sitter queries X-Git-Url: http://git.eshelyaron.com/gitweb/?a=commitdiff_plain;h=26ada83be2028dff996d3eb0a30a8be936c3797c;p=emacs.git Add automated process to verify tree-sitter queries This allows us to inform packagers of the grammar version they want to use when packaging tree-sitter grammars with Emacs. * lisp/treesit.el (treesit--builtin-language-sources): New variable. (treesit--verify-major-mode-queries): (treesit-verify-major-mode-queries): New functions. (cherry picked from commit fe06a2baac291b2eceadd12db3623436ab8e2395) --- diff --git a/lisp/treesit.el b/lisp/treesit.el index 98903dffa69..8509f4b5443 100644 --- a/lisp/treesit.el +++ b/lisp/treesit.el @@ -3914,6 +3914,9 @@ See `treesit-language-source-alist' for details." (defvar treesit--install-language-grammar-out-dir-history nil "History for OUT-DIR for `treesit-install-language-grammar'.") +(defvar treesit--install-language-grammar-full-clone nil + "If non-nil, do a full clone when cloning git repos.") + ;;;###autoload (defun treesit-install-language-grammar (lang &optional out-dir) "Build and install the tree-sitter language grammar library for LANG. @@ -3934,57 +3937,77 @@ executable programs, such as the C/C++ compiler and linker. Interactively, prompt for the directory in which to install the compiled grammar files. Non-interactively, use OUT-DIR; if it's nil, the grammar is installed to the standard location, the -\"tree-sitter\" directory under `user-emacs-directory'." +\"tree-sitter\" directory under `user-emacs-directory'. + +Return the git revision of the installed grammar, but it only works when +`treesit--install-language-grammar-full-clone' is t." (interactive (list (intern (completing-read "Language: " (mapcar #'car treesit-language-source-alist))) 'interactive)) - (when-let ((recipe - (or (assoc lang treesit-language-source-alist) - (if (eq out-dir 'interactive) - (treesit--install-language-grammar-build-recipe - lang) - (signal 'treesit-error `("Cannot find recipe for this language" ,lang))))) - (default-out-dir - (or (car treesit--install-language-grammar-out-dir-history) - (locate-user-emacs-file "tree-sitter"))) - (out-dir + (let* ((recipe + (or (assoc lang treesit-language-source-alist) (if (eq out-dir 'interactive) - (read-string - (format "Install to (default: %s): " - default-out-dir) - nil - 'treesit--install-language-grammar-out-dir-history - default-out-dir) - ;; When called non-interactively, OUT-DIR should - ;; default to DEFAULT-OUT-DIR. - (or out-dir default-out-dir)))) - (condition-case err - (progn - (apply #'treesit--install-language-grammar-1 - (cons out-dir recipe)) - - ;; Check that the installed language grammar is loadable. - (pcase-let ((`(,available . ,err) - (treesit-language-available-p lang t))) - (if (not available) - (display-warning - 'treesit - (format "The installed language grammar for %s cannot be located or has problems (%s): %s" - lang (nth 0 err) - (string-join - (mapcar (lambda (x) (format "%s" x)) - (cdr err)) - " "))) - ;; If success, Save the recipe for the current session. - (setf (alist-get lang treesit-language-source-alist) - (cdr recipe))))) - (error - (display-warning - 'treesit - (format "Error encountered when installing language grammar: %s" - err)))))) + (treesit--install-language-grammar-build-recipe + lang) + (signal 'treesit-error `("Cannot find recipe for this language" ,lang))))) + (default-out-dir + (or (car treesit--install-language-grammar-out-dir-history) + (locate-user-emacs-file "tree-sitter"))) + (out-dir + (if (eq out-dir 'interactive) + (read-string + (format "Install to (default: %s): " + default-out-dir) + nil + 'treesit--install-language-grammar-out-dir-history + default-out-dir) + ;; When called non-interactively, OUT-DIR should + ;; default to DEFAULT-OUT-DIR. + (or out-dir default-out-dir))) + version) + (when recipe + (condition-case err + (progn + (setq version (apply #'treesit--install-language-grammar-1 + (cons out-dir recipe))) + + ;; Check that the installed language grammar is loadable. + (pcase-let ((`(,available . ,err) + (treesit-language-available-p lang t))) + (if (not available) + (display-warning + 'treesit + (format "The installed language grammar for %s cannot be located or has problems (%s): %s" + lang (nth 0 err) + (string-join + (mapcar (lambda (x) (format "%s" x)) + (cdr err)) + " "))) + ;; If success, Save the recipe for the current session. + (setf (alist-get lang treesit-language-source-alist) + (cdr recipe))))) + (error + (display-warning + 'treesit + (format "Error encountered when installing language grammar: %s" + err))))) + version)) + +(defun treesit--language-git-revision () + "Return the Git revision of current directory. + +Return the output of \"git describe\". If anything goes wrong, return +nil." + (with-temp-buffer + (cond + ((eq 0 (call-process "git" nil t nil "describe")) + (string-trim (buffer-string))) + ((eq 0 (progn (erase-buffer) + (call-process "git" nil t nil "rev-parse" "HEAD"))) + (string-trim (buffer-string))) + (t nil)))) (defun treesit--call-process-signal (&rest args) "Run `call-process' with ARGS. @@ -4007,16 +4030,19 @@ content as signal data, and erase buffer afterwards." "Clone repo pointed by URL at commit REVISION to WORKDIR. REVISION may be nil, in which case the cloned repo will be at its -default branch." +default branch. + +Use shallow clone by default. Do a full clone when +`treesit--install-language-grammar-full-clone' is t." (message "Cloning repository") ;; git clone xxx --depth 1 --quiet [-b yyy] workdir - (if revision - (treesit--call-process-signal - "git" nil t nil "clone" url "--depth" "1" "--quiet" - "-b" revision workdir) - (treesit--call-process-signal - "git" nil t nil "clone" url "--depth" "1" "--quiet" - workdir))) + (let ((args (list "git" nil t nil "clone" url "--quiet"))) + (when (not treesit--install-language-grammar-full-clone) + (setq args (append args (list "--depth" "1")))) + (when revision + (setq args (append args (list "-b" revision)))) + (setq args (append args (list workdir))) + (apply #'treesit--call-process-signal args))) (defun treesit--install-language-grammar-1 (out-dir lang url &optional revision source-dir cc c++) @@ -4029,7 +4055,11 @@ does not exist). For LANG, URL, REVISION, SOURCE-DIR, GRAMMAR-DIR, CC, C++, see `treesit-language-source-alist'. If anything goes wrong, this -function signals an error." +function signals an error. + +Return the git revision of the installed grammar. The revision is +generated by \"git describe\". It only works when +`treesit--install-language-grammar-full-clone' is t." (let* ((lang (symbol-name lang)) (maybe-repo-dir (expand-file-name url)) (url-is-dir (file-accessible-directory-p maybe-repo-dir)) @@ -4048,7 +4078,8 @@ function signals an error." (signal 'treesit-error '("Emacs cannot figure out the file extension for dynamic libraries for this system, because `dynamic-library-suffixes' is nil")))) (out-dir (or (and out-dir (expand-file-name out-dir)) (locate-user-emacs-file "tree-sitter"))) - (lib-name (concat "libtree-sitter-" lang soext))) + (lib-name (concat "libtree-sitter-" lang soext)) + version) (unwind-protect (with-temp-buffer (if url-is-dir @@ -4059,6 +4090,7 @@ function signals an error." ;; header files use relative path (#include "../xxx"). ;; cd "${sourcedir}" (setq default-directory source-dir) + (setq version (treesit--language-git-revision)) (message "Compiling library") ;; cc -fPIC -c -I. parser.c (treesit--call-process-signal @@ -4104,7 +4136,8 @@ function signals an error." ;; Remove workdir if it's not a repo owned by user and we ;; managed to create it in the first place. (when (and (not url-is-dir) (file-exists-p workdir)) - (delete-directory workdir t))))) + (delete-directory workdir t))) + version)) ;;; Etc @@ -4147,6 +4180,136 @@ function signals an error." functions-in-source) "\n")))) +(defvar treesit--builtin-language-sources + '((c "https://github.com/tree-sitter/tree-sitter-c") + (cpp "https://github.com/tree-sitter/tree-sitter-cpp") + (cmake "https://github.com/uyha/tree-sitter-cmake") + (dockerfile "https://github.com/camdencheek/tree-sitter-dockerfile") + (go "https://github.com/tree-sitter/tree-sitter-go") + (ruby "https://github.com/tree-sitter/tree-sitter-ruby")) + "A list of sources for the builtin modes. +The source information are in the format of +`treesit-language-source-alist'. This is for development only.") + +(defun treesit--verify-major-mode-queries (modes langs grammar-dir) + "Verify font-lock queries in MODES. + +LANGS is a list of languages, it should cover all the languages used by +MODES. GRAMMAR-DIR is a temporary direction in which grammars are +installed. + +If the font-lock queries work fine with the latest grammar, insert some +comments in the source file saying that the modes are known to work with +that version of grammar. At the end of the process, show a list of +queries that has problems with latest grammar." + (let ((treesit-extra-load-path (list grammar-dir)) + (treesit-language-source-alist treesit--builtin-language-sources) + (treesit--install-language-grammar-full-clone t) + (version-alist nil) + (invalid-feature-list nil) + (valid-modes nil) + (mode-language-alist nil) + (file-modes-alist nil)) + (dolist (lang langs) + (let ((ver (treesit-install-language-grammar lang grammar-dir))) + (if ver + (push (cons lang ver) version-alist) + (error "Cannot get version for %s" lang)))) + + ;; Validate font-lock queries for each major mode. + (dolist (mode modes) + (let ((settings + (with-temp-buffer + (ignore-errors + (funcall mode) + (font-lock-mode -1) + treesit-font-lock-settings))) + (all-queries-valid t)) + (dolist (setting settings) + (let* ((query (treesit-font-lock-setting-query setting)) + (language (treesit-query-language query)) + (feature (treesit-font-lock-setting-feature setting))) + ;; Record that MODE uses LANGUAGE. + (unless (memq language (alist-get mode mode-language-alist)) + (push language (alist-get mode mode-language-alist))) + ;; Validate query. + (when (not (ignore-errors + (treesit-query-compile language query t) + t)) + (push (list mode language feature) invalid-feature-list) + (setq all-queries-valid nil)))) + (when all-queries-valid + (push mode valid-modes)))) + + ;; Group modes by their source file. + (dolist (mode valid-modes) + (let ((source-file (replace-regexp-in-string + (rx ".elc" eos) + ".el" + (car (get mode 'function-history))))) + (unless (member mode (alist-get source-file file-modes-alist + nil nil #'equal)) + (push mode (alist-get source-file file-modes-alist + nil nil #'equal))))) + + ;; Update the "known-to-work" version comment for the modes. + (pcase-dolist (`(,source-file . ,modes) file-modes-alist) + (let (beg) + (with-temp-buffer + (insert-file-contents source-file) + (goto-char (point-min)) + (when (not (search-forward + ";;; Tree-sitter language versions\n" nil t)) + (re-search-forward (rx (or ";;; Commentary:" ";;; Code:"))) + (forward-line -1) + (insert "\n;;; Tree-sitter language versions\n\n") + (forward-line -1)) + (setq beg (point)) + (search-forward "\n\n") + (delete-region beg (point)) + (insert ";;\n") + (dolist (mode modes) + (insert (format ";; %s is known to work with the following languages and version:\n" mode)) + (dolist (lang (alist-get mode mode-language-alist)) + (insert (format ";; - tree-sitter-%s: %s\n" lang (alist-get lang version-alist)))) + (insert ";;\n")) + (insert + ";; We try our best to make builtin modes work with latest grammar +;; versions, so a more recent grammar version has a good chance to work. +;; Send us a bug report if it doesn't.") + (insert "\n\n") + (write-file source-file)))) + + (pop-to-buffer (get-buffer-create "*verify major mode queries*")) + (let ((inhibit-read-only t)) + (erase-buffer) + (insert "Verified grammar and versions:\n") + (pcase-dolist (`(,lang . ,version) version-alist) + (insert (format "- %s: %s\n" lang version))) + (insert "\n") + (if (null invalid-feature-list) + (insert "All the queries are valid with latest grammar.\n") + (insert "The following modes has invalid queries:\n") + (dolist (entry invalid-feature-list) + (insert (format "mode: %s language: %s feature: %s" + (nth 0 entry) + (nth 1 entry) + (nth 2 entry))))) + (special-mode)))) + +(defun treesit-verify-major-mode-queries () + "Varify font-lock queries in builtin major modes. + +If the font-lock queries work fine with the latest grammar, insert some +comments in the source file saying that the modes are known to work with +that version of grammar. At the end of the process, show a list of +queries that has problems with latest grammar." + (interactive) + (treesit--verify-major-mode-queries + '(cmake-ts-mode dockerfile-ts-mode go-ts-mode ruby-ts-mode) + '(cmake dockerfile go ruby) + "/tmp/tree-sitter-grammars")) + ;;; Shortdocs (defun treesit--generate-shortdoc-examples ()