From ca79b138d424766e3bfe8e4ca5d2b315b9ea4408 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Jo=C3=A3o=20T=C3=A1vora?= Date: Sun, 26 Feb 2023 12:50:42 +0000 Subject: [PATCH] Eglot: rename and redocument encoding-related functions (bug#61726) * lisp/progmodes/eglot.el (eglot-current-column): Obsolete. (eglot-lsp-abiding-column): Obsolete. (eglot-current-column-function): Obsolete. (eglot-current-linepos-function): Rename from eglot-current-column-function. (eglot-utf-8-linepos): Rename from eglot-bytewise-column. (eglot-utf-16-linepos): Rename from eglot-lsp-abiding-column. (eglot-utf-32-linepos): Rename from eglot-current-column. (eglot-move-to-current-column): Obsolete. (eglot-move-to-lsp-abiding-column): Obsolete. (eglot-move-to-column-function): Obsolete. (eglot-move-to-linepos-function): Rename from eglot-move-to-column-function. (eglot-move-to-utf-8-linepos): Rename from eglot-move-to-bytewise-column. (eglot-move-to-utf-16-linepos): Rename from eglot-move-to-lsp-abiding-column. (eglot-move-to-utf-32-linepos): Rename from eglot-move-to-current-column. (eglot--managed-mode): Adjust. (eglot-client-capabilities): Trim whitespace. * test/lisp/progmodes/eglot-tests.el (eglot-test-lsp-abiding-column) (eglot-test-lsp-abiding-column-1): Use new function/variable names. --- lisp/progmodes/eglot.el | 136 ++++++++++++++++------------- test/lisp/progmodes/eglot-tests.el | 10 +-- 2 files changed, 81 insertions(+), 65 deletions(-) diff --git a/lisp/progmodes/eglot.el b/lisp/progmodes/eglot.el index 9e5dd268a94..e63dd563c44 100644 --- a/lisp/progmodes/eglot.el +++ b/lisp/progmodes/eglot.el @@ -816,9 +816,7 @@ treated as in `eglot--dbind'." `(:valueSet [,@(mapcar #'car eglot--tag-faces)]))) - :general - (list - :positionEncodings ["utf-32" "utf-8" "utf-16"]) + :general (list :positionEncodings ["utf-32" "utf-8" "utf-16"]) :experimental eglot--{}))) (cl-defgeneric eglot-workspace-folders (server) @@ -1442,25 +1440,31 @@ CONNECT-ARGS are passed as additional arguments to (let ((warning-minimum-level :error)) (display-warning 'eglot (apply #'format format args) :warning))) -(defun eglot-current-column () (- (point) (line-beginning-position))) + +;;; Encoding fever +;;; +(define-obsolete-function-alias + 'eglot-lsp-abiding-column 'eglot-utf-16-linepos "29.1") +(define-obsolete-function-alias + 'eglot-current-column 'eglot-utf-32-linepos "29.1") +(define-obsolete-variable-alias + 'eglot-current-column-function 'eglot-current-linepos-function "29.1") + +(defvar eglot-current-linepos-function #'eglot-utf-16-linepos + "Function calculating number of code units to line beginning. + +This is the inverse operation of +`eglot-move-to-linepos-function' (which see). It is a function of +no arguments returning the number of code units corresponding to +the current position of point relative to line beginning.") -(defun eglot-bytewise-column () - "Calculate current column using the LSP `utf-8' criterion." +(defun eglot-utf-8-linepos () + "Calculate number of code units to line beginning using UTF-8." (length (encode-coding-region (line-beginning-position) (point) 'utf-8-unix t))) -(defvar eglot-current-column-function #'eglot-lsp-abiding-column - "Function to calculate the current column. - -This is the inverse operation of -`eglot-move-to-column-function' (which see). It is a function of -no arguments returning a column number. For buffers managed by -fully LSP-compliant servers, this should be set to -`eglot-lsp-abiding-column' (the default), and -`eglot-current-column' for all others.") - -(defun eglot-lsp-abiding-column (&optional lbp) - "Calculate current COLUMN as defined by the LSP spec. +(defun eglot-utf-16-linepos (&optional lbp) + "Calculate number of code units to line beginning using UTF-16. LBP defaults to `line-beginning-position'." (/ (- (length (encode-coding-region (or lbp (line-beginning-position)) ;; Fix github#860 @@ -1468,60 +1472,71 @@ LBP defaults to `line-beginning-position'." 2) 2)) +(defun eglot-utf-32-linepos () + "Calculate number of code units to line beginning using UTF-32." + (- (point) (line-beginning-position))) + (defun eglot--pos-to-lsp-position (&optional pos) "Convert point POS to LSP position." (eglot--widening ;; LSP line is zero-origin; emacs is one-origin. (list :line (1- (line-number-at-pos pos t)) :character (progn (when pos (goto-char pos)) - (funcall eglot-current-column-function))))) + (funcall eglot-current-linepos-function))))) -(defvar eglot-move-to-column-function #'eglot-move-to-lsp-abiding-column - "Function to move to a column reported by the LSP server. +(define-obsolete-function-alias + 'eglot-move-to-current-column 'eglot-move-to-utf-32-linepos "29.1") +(define-obsolete-function-alias + 'eglot-move-to-lsp-abiding-column 'eglot-move-to-utf-16-linepos "29.1") +(define-obsolete-variable-alias +'eglot-move-to-column-function 'eglot-move-to-linepos-function "29.1") -According to the standard, LSP column/character offsets are based -on a count of UTF-16 code units, not actual visual columns. So -when LSP says position 3 of a line containing just \"aXbc\", -where X is a multi-byte character, it actually means `b', not -`c'. However, many servers don't follow the spec this closely. +(defvar eglot-move-to-linepos-function #'eglot-move-to-utf-16-linepos + "Function to move to a column reported by the LSP server. -For buffers managed by fully LSP-compliant servers, this should -be set to `eglot-move-to-lsp-abiding-column' (the default), and -`eglot-move-to-column' for all others.") +Per the LSP spec, character offsets in LSP Position objects count +UTF-16 code units, not actual code points. So when LSP says +position 3 of a line containing just \"aXbc\", where X is a funny +looking character in the UTF-16 \"supplementary plane\", it +actually means `b', not `c'. The default value +`eglot-move-to-utf-16-linepos' accounts for this. -(defun eglot-move-to-column (column) - "Move to COLUMN without closely following the LSP spec." - ;; We cannot use `move-to-column' here, because it moves to *visual* - ;; columns, which can be different from LSP columns in case of - ;; `whitespace-mode', `prettify-symbols-mode', etc. (github#296, - ;; github#297) - (goto-char (min (+ (line-beginning-position) column) - (line-end-position)))) +This variable also be set to `eglot-move-to-utf-8-linepos' or +`eglot-move-to-utf-32-linepos' for servers not closely following +the spec. Also, since LSP 3.17 server and client may agree on an +encoding and Eglot will set this variable automatically.") -(defun eglot-move-to-lsp-abiding-column (column) - "Move to COLUMN as computed by LSP's UTF-16 criterion." +(defun eglot-move-to-utf-8-linepos (n) + "Move to line's Nth code unit as computed by LSP's UTF-8 criterion." (let* ((bol (line-beginning-position)) - (goal-char (+ bol column)) + (goal-byte (+ (position-bytes bol) n)) (eol (line-end-position))) (goto-char bol) - (while (and (< (point) goal-char) - (< (point) eol)) - (if (<= #x010000 (char-after) #x10ffff) - (setq goal-char (1- goal-char))) + (while (and (< (position-bytes (point)) goal-byte) (< (point) eol)) + ;; raw bytes take 2 bytes in the buffer + (when (>= (char-after) #x3fff80) (setq goal-byte (1+ goal-byte))) (forward-char 1)))) -(defun eglot-move-to-bytewise-column (column) - "Move to COLUMN as computed using the LSP `utf-8' criterion." +(defun eglot-move-to-utf-16-linepos (n) + "Move to line's Nth code unit as computed by LSP's UTF-16 criterion." (let* ((bol (line-beginning-position)) - (goal-byte (+ (position-bytes bol) column)) - (eol (line-end-position))) + (goal-char (+ bol n)) + (eol (line-end-position))) (goto-char bol) - (while (and (< (position-bytes (point)) goal-byte) - (< (point) eol)) - (if (>= (char-after) #x3fff80) ; raw bytes take 2 bytes in the buffer - (setq goal-byte (1+ goal-byte))) + (while (and (< (point) goal-char) (< (point) eol)) + ;; code points in the "supplementary place" use two code units + (when (<= #x010000 (char-after) #x10ffff) (setq goal-char (1- goal-char))) (forward-char 1)))) +(defun eglot-move-to-utf-32-linepos (n) + "Move to line's Nth code unit as computed by LSP's UTF-32 criterion." + ;; We cannot use `move-to-column' here, because it moves to *visual* + ;; columns, which can be different from LSP characters in case of + ;; `whitespace-mode', `prettify-symbols-mode', etc. (github#296, + ;; github#297) + (goto-char (min (+ (line-beginning-position) n) + (line-end-position)))) + (defun eglot--lsp-position-to-point (pos-plist &optional marker) "Convert LSP position POS-PLIST to Emacs point. If optional MARKER, return a marker instead" @@ -1532,16 +1547,17 @@ If optional MARKER, return a marker instead" (forward-line (min most-positive-fixnum (plist-get pos-plist :line))) (unless (eobp) ;; if line was excessive leave point at eob - (let ((tab-width 1) - (col (plist-get pos-plist :character))) + (let ((col (plist-get pos-plist :character))) (unless (wholenump col) (eglot--warn "Caution: LSP server sent invalid character position %s. Using 0 instead." col) (setq col 0)) - (funcall eglot-move-to-column-function col))) + (funcall eglot-move-to-linepos-function col))) (if marker (copy-marker (point-marker)) (point))))) + +;;; More helpers (defconst eglot--uri-path-allowed-chars (let ((vec (copy-sequence url-path-allowed-chars))) (aset vec ?: nil) ;; see github#639 @@ -1778,11 +1794,11 @@ Use `eglot-managed-p' to determine if current buffer is managed.") (pcase (plist-get (eglot--capabilities (eglot-current-server)) :positionEncoding) ("utf-32" - (eglot--setq-saving eglot-current-column-function #'eglot-current-column) - (eglot--setq-saving eglot-move-to-column-function #'eglot-move-to-column)) + (eglot--setq-saving eglot-current-linepos-function #'eglot-utf-32-linepos) + (eglot--setq-saving eglot-move-to-linepos-function #'eglot-move-to-utf-32-linepos)) ("utf-8" - (eglot--setq-saving eglot-current-column-function #'eglot-bytewise-column) - (eglot--setq-saving eglot-move-to-column-function #'eglot-move-to-bytewise-column))) + (eglot--setq-saving eglot-current-linepos-function #'eglot-utf-8-linepos) + (eglot--setq-saving eglot-move-to-linepos-function #'eglot-move-to-utf-8-linepos))) (add-hook 'after-change-functions 'eglot--after-change nil t) (add-hook 'before-change-functions 'eglot--before-change nil t) (add-hook 'kill-buffer-hook #'eglot--managed-mode-off nil t) @@ -2616,7 +2632,7 @@ Try to visit the target file for a richer summary line." (add-face-text-property hi-beg hi-end 'xref-match t substring) (list substring (line-number-at-pos (point) t) - (eglot-current-column) (- end beg)))))) + (eglot-utf-32-linepos) (- end beg)))))) (`(,summary ,line ,column ,length) (cond (visiting (with-current-buffer visiting (funcall collect))) diff --git a/test/lisp/progmodes/eglot-tests.el b/test/lisp/progmodes/eglot-tests.el index 4b6528351b2..5d5de59a19a 100644 --- a/test/lisp/progmodes/eglot-tests.el +++ b/test/lisp/progmodes/eglot-tests.el @@ -856,8 +856,8 @@ pylsp prefers autopep over yafp, despite its README stating the contrary." '((c-mode . ("clangd"))))) (with-current-buffer (eglot--find-file-noselect "project/foo.c") - (setq-local eglot-move-to-column-function #'eglot-move-to-lsp-abiding-column) - (setq-local eglot-current-column-function #'eglot-lsp-abiding-column) + (setq-local eglot-move-to-linepos-function #'eglot-move-to-utf-16-linepos) + (setq-local eglot-current-linepos-function #'eglot-utf-16-linepos) (eglot--sniffing (:client-notifications c-notifs) (eglot--tests-connect) (end-of-line) @@ -866,12 +866,12 @@ pylsp prefers autopep over yafp, despite its README stating the contrary." (eglot--wait-for (c-notifs 2) (&key params &allow-other-keys) (should (equal 71 (cadddr (cadadr (aref (cadddr params) 0)))))) (beginning-of-line) - (should (eq eglot-move-to-column-function #'eglot-move-to-lsp-abiding-column)) - (funcall eglot-move-to-column-function 71) + (should (eq eglot-move-to-linepos-function #'eglot-move-to-utf-16-linepos)) + (funcall eglot-move-to-linepos-function 71) (should (looking-at "p"))))))) (ert-deftest eglot-test-lsp-abiding-column () - "Test basic `eglot-lsp-abiding-column' and `eglot-move-to-lsp-abiding-column'." + "Test basic LSP character counting logic." (skip-unless (executable-find "clangd")) (eglot-tests--lsp-abiding-column-1)) -- 2.39.5