From: Yuan Fu Date: Tue, 13 Dec 2022 04:25:53 +0000 (-0800) Subject: Add improved tree-sitter navigation X-Git-Tag: emacs-29.0.90~1207 X-Git-Url: http://git.eshelyaron.com/gitweb/?a=commitdiff_plain;h=03ad1a92a2dae107277805f5b24ce1dab3479059;p=emacs.git Add improved tree-sitter navigation This new set of functions (and tests) should eliminate defun-navigation bugs and limitations we currently have. This commit doesn't change any existing bahavior: treesit-beginning/end-of-defun and friends are unchanged. The plan is to later switch gear and replace the current functions with the new ones introduced in this change. This is a relatively big change, but I've setup a comprehensive test, and it should fix current bugs, so I think it's ok to put it on the release branch. The gist of the new navigation is to use treesit--defuns-around to find the previous sibling defun, next sibling defun, and the parent defun, then use this information to move to previous/next beginning/end of defun in treesit--navigate-defun. I also added comprehensive testing that tests all four possible operations (prev-beg, next-beg, prev-end, next-end) starting at all possible positions (between two sibling defuns, inside a sibling defun, etc). * lisp/treesit.el (treesit-defun-type-regexp): Expand definition to allow (REGEXP . FILTER). Old functions don't support this, but it should be fine since we are soon replacing them. (treesit-defun-tactic) (treesit-defun-skipper): New variables. (treesit-default-defun-skipper) (treesit--defuns-around) (treesit--top-level-defun) (treesit--navigate-defun): New functions. * test/src/treesit-tests.el (treesit--ert-insert-and-parse-marker) (treesit--ert-collect-positions) (treesit--ert-test-defun-navigation): New helper functions. (treesit--ert-defun-navigation-python-program) (treesit--ert-defun-navigation-js-program) (treesit--ert-defun-navigation-bash-program) (treesit--ert-defun-navigation-nested-master): New variables. (treesit-defun-navigation-nested-1) (treesit-defun-navigation-nested-2) (treesit-defun-navigation-nested-3) (treesit-defun-navigation-top-level): New tests. --- diff --git a/lisp/treesit.el b/lisp/treesit.el index f176664bfde..3d7ae7031ef 100644 --- a/lisp/treesit.el +++ b/lisp/treesit.el @@ -1569,8 +1569,25 @@ BACKWARD and ALL are the same as in `treesit-search-forward'." "A regexp that matches the node type of defun nodes. For example, \"(function|class)_definition\". +Sometimes not all nodes matched by the regexp are valid defuns. +In that case, set this variable to a cons cell of the +form (REGEXP . FILTER), where FILTER is a function that takes a +node (the matched node) and returns t if node is valid, or nil +for invalid node. + This is used by `treesit-beginning-of-defun' and friends.") +(defvar-local treesit-defun-tactic 'nested + "Determines how does Emacs treat nested defuns. +If the value is `top-level', Emacs only move across top-level +defuns, if the value is `nested', Emacs recognizes nested defuns.") + +(defvar-local treesit-defun-skipper #'treesit-default-defun-skipper + "A function called after tree-sitter navigation moved a step. +It is called with no arguments. By default, this function tries +to move to the beginning of a line, either by moving to the empty +newline after a defun, or the beginning of a defun.") + (defvar-local treesit-defun-prefer-top-level nil "When non-nil, Emacs prefers top-level defun. @@ -1639,6 +1656,196 @@ ARG is the same as in `beginning-of-defun'." (when top (goto-char (treesit-node-end top))))) +(defun treesit-default-defun-skipper () + "Skips spaces after navigating a defun. +This fucntion tries to move to the beginning of a line, either by +moving to the empty newline after a defun, or to the beginning of +the current line if the beginning of the defun is indented." + (cond ((and (looking-at (rx (* (or " " "\\t")) "\n")) + (not (looking-at (rx bol)))) + (goto-char (match-end 0))) + ((save-excursion + (skip-chars-backward " \t") + (eq (point) (line-beginning-position))) + (goto-char (line-beginning-position))))) + +;; prev-sibling: +;; 1. end-of-node before pos +;; 2. highest such node +;; +;; next-sibling: +;; 1. beg-of-node after pos +;; 2. highest such node +;; +;; parent: +;; 1. node covers pos +;; 2. smallest such node +(defun treesit--defuns-around (pos regexp &optional pred) + "Return the previous, next, and parent defun around POS. + +Return a list of (PREV NEXT PARENT), where PREV and NEXT are +previous and next sibling defuns around POS, and PARENT is the +parent defun surrouding POS. All of three could be nil if no +sound defun exists. + +REGEXP and PRED are the same as in `treesit-defun-type-regexp'." + (let* ((node (treesit-node-at pos)) + ;; NODE-BEFORE/AFTER = NODE when POS is completely in NODE, + ;; but if not, that means point could be in between two + ;; defun, in that case we want to use a node that's actually + ;; before/after point. + (node-before (if (>= (treesit-node-start node) pos) + (treesit-search-forward-goto node "" t t t) + node)) + (node-after (if (<= (treesit-node-end node) pos) + (treesit-search-forward-goto node "" nil nil t) + node)) + (result (list nil nil nil)) + (pred (or pred (lambda (_) t)))) + ;; 1. Find previous and next sibling defuns. + (cl-loop + for idx from 0 to 1 + for node in (list node-before node-after) + for backward in '(t nil) + for pos-pred in (list (lambda (n) (<= (treesit-node-end n) pos)) + (lambda (n) (>= (treesit-node-start n) pos))) + ;; If point is inside a defun, our process below will never + ;; return a next/prev sibling outside of that defun, effectively + ;; any prev/next sibling is locked inside the smallest defun + ;; covering point, which is the correct behavior. That's because + ;; when there exists a defun that covers point, + ;; `treesit-search-forward' will first reach that defun, after + ;; that we only go upwards in the tree, so other defuns outside + ;; of the covering defun is never reached. (Don't use + ;; `treesit-search-forward-goto' as it breaks when NODE-AFTER is + ;; the last token of a parent defun: it will skip the parent + ;; defun because it wants to ensure progress.) + do (cl-loop for cursor = (when node + (save-excursion + (treesit-search-forward + node regexp backward backward))) + then (treesit-node-parent cursor) + while cursor + if (and (string-match-p + regexp (treesit-node-type cursor)) + (funcall pred cursor) + (funcall pos-pred cursor)) + do (setf (nth idx result) cursor))) + ;; 2. Find the parent defun. + (setf (nth 2 result) + (cl-loop for cursor = (or (nth 0 result) + (nth 1 result) + node) + then (treesit-node-parent cursor) + while cursor + if (and (string-match-p + regexp (treesit-node-type cursor)) + (funcall pred cursor) + (not (member cursor result))) + return cursor)) + result)) + +(defun treesit--top-level-defun (node regexp &optional pred) + "Return the top-level parent defun of NODE. +REGEXP and PRED are the same as in `treesit-defun-type-regexp'." + (let* ((pred (or pred (lambda (_) t)))) + ;; `treesit-search-forward-goto' will make sure the matched node + ;; is before POS. + (cl-loop for cursor = node + then (treesit-node-parent cursor) + while cursor + if (and (string-match-p + regexp (treesit-node-type cursor)) + (funcall pred cursor)) + do (setq node cursor)) + node)) + +(defun treesit--navigate-defun (pos arg side &optional recursing) + "Navigate defun ARG steps from POS. + +If ARG is positive, move forward that many steps, if negative, +move backward. If SIDE is `beg', stop at the beginning of a +defun, if SIDE is `end', stop at the end. + +This function doesn't actaully move point, it just returns the +position it would move to. If there aren't enough defuns to move +across, return nil. + +RECURSING is an internal parameter, if non-nil, it means this +function is called recursively." + (pcase-let* + ((counter (abs arg)) + (`(,regexp . ,pred) + (if (consp treesit-defun-type-regexp) + treesit-defun-type-regexp + (cons treesit-defun-type-regexp nil))) + ;; Move POS to the beg/end of NODE. If NODE is nil, terminate. + ;; Return the position we moved to. + (advance (lambda (node) + (let ((dest (pcase side + ('beg (treesit-node-start node)) + ('end (treesit-node-end node))))) + (if (null dest) + (throw 'term nil) + dest))))) + (catch 'term + (while (> counter 0) + (pcase-let + ((`(,prev ,next ,parent) + (treesit--defuns-around pos regexp pred))) + ;; When PARENT is nil, nested and top-level are the same, + ;; there there is a PARENT, make PARENT to be the top-level + ;; parent and pretend there is no nested PREV and NEXT. + (when (and (eq treesit-defun-tactic 'top-level) + parent) + (setq parent (treesit--top-level-defun + parent regexp pred) + prev nil + next nil)) + ;; Move... + (if (> arg 0) + ;; ...forward. + (if (and (eq side 'beg) + ;; Should we skip the defun (recurse)? + (cond (next (not recursing)) ; [1] (see below) + (parent t) ; [2] + (t nil))) + ;; Special case: go to next beg-of-defun. Set POS + ;; to the end of next/parent defun, and run one more + ;; step. If there is a next defun, step over it, so + ;; we only need to recurse once, so we don't need to + ;; recurse if we are already recursing [1]. If there + ;; is no next but a parent, keep stepping out + ;; (recursing) until we got out of the parents until + ;; (1) there is a next sibling defun, or (2) no more + ;; parents [2]. + (setq pos + (or (treesit--navigate-defun + (treesit-node-end (or next parent)) + 1 'beg t) + (throw 'term nil))) + ;; Normal case. + (setq pos (funcall advance (or next parent)))) + ;; ...backward. + (if (and (eq side 'end) + (cond (prev (not recursing)) + (parent t) + (t nil))) + ;; Special case: go to prev end-of-defun. + (setq pos + (or (treesit--navigate-defun + (treesit-node-start (or prev parent)) + -1 'end t) + (throw 'term nil))) + ;; Normal case. + (setq pos (funcall advance (or prev parent))))) + ;; A successful step! Decrement counter. + (cl-decf counter)))) + ;; Counter equal to 0 means we successfully stepped ARG steps. + (if (eq counter 0) + pos + nil))) + ;;; Activating tree-sitter (defun treesit-ready-p (language &optional quiet) diff --git a/test/src/treesit-tests.el b/test/src/treesit-tests.el index 188a9557928..eaf2df62104 100644 --- a/test/src/treesit-tests.el +++ b/test/src/treesit-tests.el @@ -607,6 +607,288 @@ visible_end.)" (insert "]") (should (treesit-node-check array-node 'outdated)))) +;;; Defun navigation +;; +;; I've setup a framework for easier testing of defun navigation. +;; +;; To use it for a particular langauge, first write a test program +;; similar to `treesit--ert-defun-navigation-python-program', and +;; insert markers. Markers that marks BOLs are defined as follows: +;; +;; 100 Before 1st parent +;; 101 Beg of 1st parent +;; 102 End of 1st parent +;; 103 Beg of 2nd parent +;; 104 Beg of 1st method +;; 105 End of 1st method +;; 106 Beg of 2nd method +;; 107 End of 2nd method +;; 108 End of 2nd parent +;; 109 Beg of 3rd parent +;; 110 End of 3rd parent +;; 999 Dummy markers +;; +;; Then add marker 0-9 following the definition given in +;; `treesit--ert-defun-navigation-nested-master'. Then you can use +;; `treesit--ert-test-defun-navigation', pass the test program you +;; just wrote, and the appropriate master: +;; +;; - `treesit--ert-defun-navigation-nested-master' for nested defun +;; - `treesit--ert-defun-navigation-top-level-master' for top-level + + +(defun treesit--ert-insert-and-parse-marker (opening closing text) + "Insert TEXT and parse the marker positions in it. + +TEXT should be a string in which contains position markings +like (1). OPENING and CLOSING are position marking's delimiters, +for (1), OPENING and CLOSING should be \"(\" and \")\", +respectively. + +This function inserts TEXT, parses and removes all the markings, +and returns an alist of (NUMBER . POS), where number is each +marking's number, and POS is each marking's position." + (declare (indent 2)) + (let (result) + (insert text) + (goto-char (point-min)) + (while (re-search-forward + (rx-to-string `(seq ,opening (group (+ digit)) ,closing)) + nil t) + (let ((idx (string-to-number (match-string 1)))) + (push (cons idx (match-beginning 0)) result) + (delete-region (match-beginning 0) (match-end 0)))) + (nreverse result))) + +(defun treesit--ert-collect-positions (positions functions) + "Collect posifions after caling each function in FUNCTIONS. + +POSITIONS should be a list of buffer positions, FUNCTIONS should +be a list of functions. This function collects the return value +of each function in FUNCTIONS starting at each position in +POSITIONS. + +Return a list of (POS...) where each POS corresponds to a +function in FUNCTIONS. For example, if buffer content is +\"123\", POSITIONS is (2 3), FUNCTIONS is (point-min point-max), +the return value is ((1 3) (1 3))." + (cl-loop for pos in positions + collect (cl-loop for fn in functions + collect (progn + (goto-char pos) + (funcall fn))))) + +(defun treesit--ert-test-defun-navigation + (init program master &optional opening closing) + "Run defun navigation tests on PROGRAM and MASTER. + +INIT is a setup function that runs right after this function +creates a temporary buffer. It should take no argument. + +PROGRAM is a program source in string, MASTER is a list of +\(START PREV-BEG NEXT-END PREV-END NEXT-BEG), where START is the +starting marker position, and the rest are marker positions the +corresponding navigation should stop at (after running +`treesit-defun-skipper'). + +OPENING and CLOSING are the same as in +`treesit--ert-insert-and-parse-marker', by default they are \"[\" +and \"]\"." + (with-temp-buffer + (funcall init) + (let* ((opening (or opening "[")) + (closing (or closing "]")) + ;; Insert program and parse marker positions. + (marker-alist (treesit--ert-insert-and-parse-marker + opening closing program)) + ;; Translate marker positions into buffer positions. + (decoded-master + (cl-loop for record in master + collect + (cl-loop for pos in record + collect (alist-get pos marker-alist)))) + ;; Collect positions each function returns. + (positions + (treesit--ert-collect-positions + ;; The first columnn of DECODED-MASTER. + (mapcar #'car decoded-master) + ;; Four functions: next-end, prev-beg, next-beg, prev-end. + (mapcar (lambda (conf) + (lambda () + (if-let ((pos (funcall + #'treesit--navigate-defun + (point) (car conf) (cdr conf)))) + (save-excursion + (goto-char pos) + (funcall treesit-defun-skipper) + (point))))) + '((-1 . beg) + (1 . end) + (-1 . end) + (1 . beg)))))) + ;; Verify each position. + (cl-loop for record in decoded-master + for orig-record in master + for poss in positions + for name = (format "marker %d" (car orig-record)) + do (should (equal (cons name (cdr record)) + (cons name poss))))))) + +(defvar treesit--ert-defun-navigation-python-program + "[100] +[101]class Class1(): +[999] prop = 0 +[102] +[103]class Class2():[0] +[104] [1]def method1(): +[999] [2]return 0[3] +[105] [4] +[106] [5]def method2(): +[999] [6]return 0[7] +[107] [8] +[999] prop = 0[9] +[108] +[109]class Class3(): +[999] prop = 0[10] +[110] +" + "Python source for navigation test.") + +(defvar treesit--ert-defun-navigation-js-program + "[100] +[101]class Class1 { +[999]} +[102] +[103]class Class2 {[0] +[104] [1]method1() { +[999] [2]return 0; +[999] }[3] +[105] [4] +[106] [5]method2() { +[999] [6]return 0; +[999] }[7] +[107][8] +[999]}[9] +[108] +[109]class class3 { +[999]}[10] +[110] +" + "Javascript source for navigation test.") + +(defvar treesit--ert-defun-navigation-bash-program + "[100] +[101]parent1 () { +[999]} +[102] +[103]parent2 () {[0] +[104] [1]sibling1 () { +[999] [2]echo hi +[999] }[3] +[105] [4] +[106] [5]sibling2 () { +[999] [6]echo hi +[999] }[7] +[107][8] +[999]}[9] +[108] +[109]parent3 () { +[999]} +[110] +" + "Javascript source for navigation test.") + +(defvar treesit--ert-defun-navigation-nested-master + ;; START PREV-BEG NEXT-END PREV-END NEXT-BEG + '((0 103 105 102 106) ; Between Beg of parent & 1st sibling. + (1 103 105 102 106) ; Beg of 1st sibling. + (2 104 105 102 106) ; Inside 1st sibling. + (3 104 107 102 109) ; End of 1st sibling. + (4 104 107 102 109) ; Between 1st sibling & 2nd sibling. + (5 104 107 102 109) ; Beg of 2nd sibling. + (6 106 107 105 109) ; Inside 2nd sibling. + (7 106 108 105 109) ; End of 2nd sibling. + (8 106 108 105 109) ; Between 2nd sibling & end of parent. + (9 103 110 102 nil) ; End of parent. + + (100 nil 102 nil 103) ; Before 1st parent. + (101 nil 102 nil 103) ; Beg of 1st parent. + (102 101 108 nil 109) ; Between 1st & 2nd parent. + (103 101 108 nil 109) ; Beg of 2nd parent. + (110 109 nil 108 nil) ; After 3rd parent. + ) + "Master of nested navigation test. + +This basically says, e.g., \"start with point on marker 0, go to +the prev-beg, now point should be at marker 103\", etc.") + +(defvar treesit--ert-defun-navigation-top-level-master + ;; START PREV-BEG NEXT-END NEXT-BEG PREV-END + '((0 103 108 102 109) ; Between Beg of parent & 1st sibling. + (1 103 108 102 109) ; Beg of 1st sibling. + (2 103 108 102 109) ; Inside 1st sibling. + (3 103 108 102 109) ; End of 1st sibling. + (4 103 108 102 109) ; Between 1st sibling & 2nd sibling. + (5 103 108 102 109) ; Beg of 2nd sibling. + (6 103 108 102 109) ; Inside 2nd sibling. + (7 103 108 102 109) ; End of 2nd sibling. + (8 103 108 102 109) ; Between 2nd sibling & end of parent. + (9 103 110 102 nil) ; End of parent. + + ;; Top-level defuns should be identical to the nested test. + (100 nil 102 nil 103) ; Before 1st parent. + (101 nil 102 nil 103) ; Beg of 1st parent. + (102 101 108 nil 109) ; Between 1st & 2nd parent. + (103 101 108 nil 109) ; Beg of 2nd parent. + (110 109 nil 108 nil) ; After 3rd parent. + ) + "Master of top-level navigation test.") + +(ert-deftest treesit-defun-navigation-nested-1 () + "Test defun navigation." + (skip-unless (treesit-language-available-p 'python)) + ;; Nested defun navigation + (let ((treesit-defun-tactic 'nested)) + (require 'python) + (treesit--ert-test-defun-navigation + 'python-ts-mode + treesit--ert-defun-navigation-python-program + treesit--ert-defun-navigation-nested-master))) + +(ert-deftest treesit-defun-navigation-nested-2 () + "Test defun navigation using `js-ts-mode'." + (skip-unless (treesit-language-available-p 'javascript)) + ;; Nested defun navigation + (let ((treesit-defun-tactic 'nested)) + (require 'js) + (treesit--ert-test-defun-navigation + 'js-ts-mode + treesit--ert-defun-navigation-js-program + treesit--ert-defun-navigation-nested-master))) + +(ert-deftest treesit-defun-navigation-nested-3 () + "Test defun navigation using `bash-ts-mode'." + (skip-unless (treesit-language-available-p 'bash)) + ;; Nested defun navigation + (let ((treesit-defun-tactic 'nested)) + (treesit--ert-test-defun-navigation + (lambda () + (treesit-parser-create 'bash) + (setq-local treesit-defun-type-regexp "function_definition")) + treesit--ert-defun-navigation-bash-program + treesit--ert-defun-navigation-nested-master))) + +(ert-deftest treesit-defun-navigation-top-level () + "Test top-level only defun navigation." + (skip-unless (treesit-language-available-p 'python)) + ;; Nested defun navigation + (let ((treesit-defun-tactic 'top-level)) + (require 'python) + (treesit--ert-test-defun-navigation + 'python-ts-mode + treesit--ert-defun-navigation-python-program + treesit--ert-defun-navigation-top-level-master))) + ;; TODO ;; - Functions in treesit.el ;; - treesit-load-name-override-list