From: Stephane Zermatten Date: Wed, 4 Jun 2025 18:00:42 +0000 (+0300) Subject: Handle OSC sequences in term (bug#78263) X-Git-Url: http://git.eshelyaron.com/gitweb/?a=commitdiff_plain;h=69b4ae451bfde502c1a1bb166747a837db522467;p=emacs.git Handle OSC sequences in term (bug#78263) Handlers can be registered to term-osc-handler, which by defaults, supports OSC 0, 2, 7 and 8, using handlers defined in ansi-osc. Unknown OSC sequences are ignored. * lisp/term.el (term-emulate-terminal): Handle OSC sequences. (term-osc-handler): New configuration variable. (term-control-seq-regexp): Extended regexp. (term--osc-max-bytes): New private constant. * lisp/ansi-osc.el (ansi-osc-handler): Handle OSC 0 like OSC 2. * test/lisp/term-tests.el (term-ignore-osc, term-handle-osc) (term-call-ansi-osc-handlers): New tests. (cherry picked from commit 6c13da2caf629b14853e9aa57e59aeeeec14e68f) --- diff --git a/lisp/ansi-osc.el b/lisp/ansi-osc.el index e5026ff5490..ab5fd1f93e8 100644 --- a/lisp/ansi-osc.el +++ b/lisp/ansi-osc.el @@ -62,7 +62,8 @@ pointed by `ansi-osc--marker'." (delete-region pos0 (point)) (setq ansi-osc--marker (copy-marker pos0)))))))) -(defvar-local ansi-osc-handlers '(("2" . ansi-osc-window-title-handler) +(defvar-local ansi-osc-handlers '(("0" . ansi-osc-window-title-handler) + ("2" . ansi-osc-window-title-handler) ("7" . ansi-osc-directory-tracker) ("8" . ansi-osc-hyperlink-handler)) "Alist of handlers for OSC escape sequences. diff --git a/lisp/term.el b/lisp/term.el index 4a5850978d9..f35c019039f 100644 --- a/lisp/term.el +++ b/lisp/term.el @@ -304,6 +304,7 @@ (require 'cl-lib)) (require 'comint) ; Password regexp. (require 'ansi-color) +(require 'ansi-osc) (require 'ring) (require 'shell) @@ -602,6 +603,33 @@ executed once, when the buffer is created." :type 'hook :group 'term) +(defvar term-osc-handlers nil + "Terminal-specific OSC sequence handler function alist. + +OSC (Operating System Command) is a category of ANSI escape sequence +used in terminal application to introduce custom commands. Terminals +ignore unknown OSC sequences by default. Handlers can be registered here +to add support for new OSC sequences to `term'. + +Functions in this alist are passed matching valid OSC sequences as +they're sent to the terminal. + +Valid OSC sequences are of the form + ESC ] code ; text BEL + ESC ] code ; text ESC \ + +Each entry has the form (CODE . FUNCTION), where CODE is the string that +appears before the semicolon. + +FUNCTION is called with two arguments CODE and TEXT, with TEXT being the +content of the OSC sequence after the semicolon. When the function is +called, the term buffer is active and with point and content valid at +the time the OSC sequence appears in the stream. + +Any code not on this alist is further looked up in `ansi-osc-handlers', +which collects OSC handlers that can also work outside of a terminal +context. For details, see `ansi-osc-apply-on-region'.") + (defvar term-mode-map (let ((map (make-sparse-keymap))) (define-key map "\ep" 'term-previous-input) @@ -3026,6 +3054,10 @@ See `term-prompt-regexp'." "\e\\(?:[DM78c]\\|" ;; another Emacs specific control sequence, "AnSiT[^\n]+\n\\|" + ;; OSC (See [ECMA-48] section 8.3.89 "Operation System Command".) + ;; The spec only allows 0x08-0x0d 0x20-7e, but this regexp also + ;; allows non-ascii (UTF-8) characters. + "\\][^\x00-\x07\x0e-\x1f\x7f]*\\(?:\a\\|\e\\\\\\)?\\|" ;; or an escape sequence (section 5.4 "Control Sequences"), "\\[\\([\x30-\x3F]*\\)[\x20-\x2F]*[\x40-\x7E]\\)\\)") "Regexp matching control sequences handled by term.el.") @@ -3228,6 +3260,33 @@ See `term-prompt-regexp'." (split-string ctl-params ";")) (aref str (1- ctl-end)) private)))) + (?\] ;; An OSC sequence + (let ((seq-str (substring str (+ i 2) ctl-end))) + (string-match + "\\`\\(\\([0-9A-Za-z]+\\);\\)?.*?\\(\a\\|\e\\\\\\)?\\'" + seq-str) + (let ((code (match-string 2 seq-str)) + (text-start (match-end 1)) + (end-mark (match-beginning 3))) + (when (and code end-mark) + (when-let* ((func (cdr (or (assoc-string + code term-osc-handlers) + (assoc-string + code ansi-osc-handlers))))) + (with-demoted-errors "term OSC error: %S" + (funcall + func code + (decode-coding-string + (substring seq-str text-start end-mark) + locale-coding-system t))))) + (when (and (not end-mark) + (>= ctl-end str-length) + (< (- ctl-end i) term--osc-max-bytes)) + ;; Continue ignoring until the end marker. + (setq term-terminal-undecoded-bytes + (substring str i))))) + ;; Consume everything + (setq i ctl-end)) (?D ;; Scroll forward (apparently not documented in ;; [ECMA-48], [ctlseqs] mentions it as C1 ;; character "Index" though). diff --git a/test/lisp/term-tests.el b/test/lisp/term-tests.el index ffb341f3b52..4fcc564cc5d 100644 --- a/test/lisp/term-tests.el +++ b/test/lisp/term-tests.el @@ -419,6 +419,123 @@ This is a reduced example from GNU nano's initial screen." (term-test-screen-from-input 40 1 bytes))))) +(ert-deftest term-ignore-osc () + ;; BEL-terminated OSC sequence + (should (equal "test" + (term-test-screen-from-input + 40 1 "te\e]0;window title\ast"))) + ;; ESC \-terminated OSC sequence + (should (equal "test" + (term-test-screen-from-input + 40 1 "te\e]0;window title\e\\st"))) + ;; Long OSC sequence split into multiple chunks + (should (equal "test" + (term-test-screen-from-input + 40 1 '("te\e]0;win" "dow " " title\ast")))) + ;; OSC sequence that start and ends with the chunk + (should (equal "test" + (term-test-screen-from-input + 40 1 '("te" "\e]0;window " "title\a" "st")))) + + ;; Invalid control characters break out of the OSC sequence, for + ;; safety. + (should (equal "tetitlest" + (term-test-screen-from-input + 40 1 '("te\e]0;window\x05title\ast")))) + + (let ((locale-coding-system 'utf-8-unix)) + ;; An OSC sequence with multibyte UTF-8 characters. This is not + ;; exactly standard-compliant, but too common not to support. + (should (equal "test" + (term-test-screen-from-input + 40 1 "te\e]0;\xce\xb1\xce\xb2\e\\st"))))) + +(ert-deftest term-handle-osc () + (let* ((captured nil) + (handler (lambda (code text) + (push (cons code text) + captured))) + (term-osc-handlers `(("2" . ,handler) + ("1994" . ,handler)))) + + ;; Send OSC sequences to handler + (should (equal "test" + (term-test-screen-from-input + 40 1 "te\e]2;foo\as\e]1994;bar\at"))) + (should (equal '(("2" . "foo") + ("1994" . "bar")) + (nreverse captured))) + + ;; OSC sequences and code can be chunked + (setq captured nil) + (should (equal "test" + (term-test-screen-from-input + 40 1 `("te\e]2;chunked fo" + "o\as\e]19" + "94;chunked ba" + "r\at")))) + (should (equal '(("2" . "chunked foo") + ("1994" . "chunked bar")) + (nreverse captured))) + + ;; OSC sequences can contain multibyte characters + (let ((locale-coding-system 'utf-8-unix)) + (setq captured nil) + (should (equal "test" + (term-test-screen-from-input + 40 1 "te\e]2;\xce\xb1\xce\xb2\e\\st"))) + (should (equal '(("2" . "αβ")) captured))) + + ;; Ignore unhandled and invalid OSC sequences + (setq captured nil) + (should (equal + "test" + (term-test-screen-from-input + 40 1 "t\e]3;unhandled\aest"))) + (should-not captured) + (should (equal + "test" + (term-test-screen-from-input + 40 1 "t\e]2missing semicolon\aest"))) + (should-not captured) + + (should (equal + "test" + (term-test-screen-from-input + 40 1 "t\e]2;not ended\003est"))) + (should-not captured))) + +(ert-deftest term-call-ansi-osc-handlers () + (let* ((captured nil) + (osc-handler (lambda (code text) + (push (list 'osc code text) + captured))) + (term-handler (lambda (code text) + (push (list 'term code text) + captured))) + (ansi-osc-handlers `(("1" . ,osc-handler) + ("2" . ,osc-handler) + ("3" . ,osc-handler))) + (term-osc-handlers `(("2" . ,term-handler) + ("3" . nil)))) + + (should + (equal + "test" + (term-test-screen-from-input + 40 1 (concat + "te" + "\e]1;a\a" ;; sent to osc-handler + "\e]2;b\a" ;; sent to term-handler + "\e]3;c\a" ;; ignored; disabled in term + "\e]4;d\a" ;; ignored; not registered + "st")))) + (should + (equal + '((osc "1" "a") + (term "2" "b")) + (nreverse captured))))) + (provide 'term-tests) ;;; term-tests.el ends here