From: Stefan Monnier Date: Sun, 15 Jun 2008 04:43:35 +0000 (+0000) Subject: (encoded-kbd-self-insert-utf-8): Catch and recover from case when the bytes X-Git-Tag: emacs-pretest-23.0.90~4771 X-Git-Url: http://git.eshelyaron.com/gitweb/?a=commitdiff_plain;h=813c812568b3bca29c24b5ef23d44bd098a2bbb0;p=emacs.git (encoded-kbd-self-insert-utf-8): Catch and recover from case when the bytes we thought we were reading turn out to be something else entirely, such as latin-1 chars from quail. See bug#396. --- diff --git a/lisp/ChangeLog b/lisp/ChangeLog index fd6246c5916..42b7c94c6aa 100644 --- a/lisp/ChangeLog +++ b/lisp/ChangeLog @@ -1,3 +1,10 @@ +2008-06-15 Stefan Monnier + + * international/encoded-kb.el (encoded-kbd-self-insert-utf-8): + Catch and recover from case when the bytes we thought we were reading + turn out to be something else entirely, such as latin-1 chars from + quail. See bug#396. + 2008-06-15 Dan Nicolaescu * vc.el (vc-deduce-fileset): Check if the buffer has a file. diff --git a/lisp/international/encoded-kb.el b/lisp/international/encoded-kb.el index ec887659e9c..4659b499112 100644 --- a/lisp/international/encoded-kb.el +++ b/lisp/international/encoded-kb.el @@ -219,8 +219,9 @@ The following key sequence may cause multilingual text insertion." (defun encoded-kbd-self-insert-utf-8 (arg) (interactive "p") - (let ((char (encoded-kbd-last-key)) - len) + (let* ((lead (encoded-kbd-last-key)) + (char lead) + len event) (cond ((< char #xE0) (setq len 1 char (logand char #x1F))) ((< char #xF0) @@ -230,8 +231,22 @@ The following key sequence may cause multilingual text insertion." (t (setq len 4 char 0))) (while (> len 0) - (setq char (logior (lsh char 6) (logand (read-char-exclusive) #x3F)) - len (1- len))) + (setq event (read-char-exclusive)) + (if (and (>= event #x80) (< event #xc0)) + ;; Valid utf-8 sequence. + (setq char (logior (lsh char 6) (- event #x80)) + len (1- len)) + ;; Invalid utf-8 sequence. Might be because Quail got involved + ;; in-between and the bytes we thought we were reading were actually + ;; latin-1 chars. Let's presume that `event' is the second "byte", + ;; i.e. there weren't any "apprently correct" between `lead' and + ;; `event': it's easy to recover in this case, and the more general + ;; case seems pretty unlikely. + ;; FIXME: We should really do encoded-kbd decoding before processing + ;; input-methods. + (push event unread-command-events) + (setq char lead) + (setq len 0))) (vector char))) (defun encoded-kbd-setup-keymap (keymap coding)