From: Fabián Ezequiel Gallina Date: Sat, 27 Dec 2014 04:30:21 +0000 (-0300) Subject: python.el: Handle file encoding for shell. X-Git-Tag: emacs-24.4.90~100 X-Git-Url: http://git.eshelyaron.com/gitweb/?a=commitdiff_plain;h=2dd5163;p=emacs.git python.el: Handle file encoding for shell. * lisp/progmodes/python.el (python-rx-constituents): Add coding-cookie. (python-shell--save-temp-file): Write file with proper encoding. (python-shell-buffer-substring): Add coding cookie for detected encoding to generated content. Fix blank lines when removing if-name-main block. (python-shell-send-file): Handle file encoding. (python-info-encoding-from-cookie) (python-info-encoding): New functions. * test/automated/python-tests.el (python-shell-buffer-substring-1) (python-shell-buffer-substring-2, python-shell-buffer-substring-3) (python-shell-buffer-substring-4, python-shell-buffer-substring-5) (python-shell-buffer-substring-6, python-shell-buffer-substring-7) (python-shell-buffer-substring-8) (python-info-encoding-from-cookie-1) (python-info-encoding-from-cookie-2) (python-info-encoding-from-cookie-3) (python-info-encoding-from-cookie-4) (python-info-encoding-from-cookie-5) (python-info-encoding-from-cookie-6) (python-info-encoding-from-cookie-7, python-info-encoding-1) (python-info-encoding-2): New tests. --- diff --git a/lisp/ChangeLog b/lisp/ChangeLog index 22728121a15..b73732a1a7e 100644 --- a/lisp/ChangeLog +++ b/lisp/ChangeLog @@ -1,3 +1,16 @@ +2014-12-27 Fabián Ezequiel Gallina + + python.el: Handle file encoding for shell. + + * progmodes/python.el (python-rx-constituents): Add coding-cookie. + (python-shell--save-temp-file): Write file with proper encoding. + (python-shell-buffer-substring): Add coding cookie for detected + encoding to generated content. Fix blank lines when removing + if-name-main block. + (python-shell-send-file): Handle file encoding. + (python-info-encoding-from-cookie) + (python-info-encoding): New functions. + 2014-12-24 Michael Albinus * net/tramp-sh.el (tramp-do-copy-or-rename-file-out-of-band): diff --git a/lisp/progmodes/python.el b/lisp/progmodes/python.el index 632659c28bb..02d0cbef262 100644 --- a/lisp/progmodes/python.el +++ b/lisp/progmodes/python.el @@ -386,7 +386,18 @@ (* ?\\ ?\\) (any ?\' ?\"))) (* ?\\ ?\\) ;; Match single or triple quotes of any kind. - (group (or "\"" "\"\"\"" "'" "'''")))))) + (group (or "\"" "\"\"\"" "'" "'''"))))) + (coding-cookie . ,(rx line-start ?# (* space) + (or + ;; # coding= + (: "coding" (or ?: ?=) (* space) (group-n 1 (+ (or word ?-)))) + ;; # -*- coding: -*- + (: "-*-" (* space) "coding:" (* space) + (group-n 1 (+ (or word ?-))) (* space) "-*-") + ;; # vim: set fileencoding= : + (: "vim:" (* space) "set" (+ space) + "fileencoding" (* space) ?= (* space) + (group-n 1 (+ (or word ?-))) (* space) ":"))))) "Additional Python specific sexps for `python-rx'") (defmacro python-rx (&rest regexps) @@ -2400,11 +2411,7 @@ there for compatibility with CEDET.") (concat (file-remote-p default-directory) "/tmp") temporary-file-directory)) (temp-file-name (make-temp-file "py")) - ;; XXX: Python's built-in compile function accepts utf-8 as - ;; input so there's no need to enforce a coding cookie. In - ;; the future making `coding-system-for-write' match the - ;; current buffer's coding may be a good idea. - (coding-system-for-write 'utf-8)) + (coding-system-for-write (python-info-encoding))) (with-temp-file temp-file-name (insert string) (delete-trailing-whitespace)) @@ -2511,16 +2518,28 @@ the python shell: \"if __name__ == '__main__'\" block will be removed. 2. When a subregion of the buffer is sent, it takes care of appending extra empty lines so tracebacks are correct. - 3. Wraps indented regions under an \"if True:\" block so the + 3. When the region sent is a substring of the current buffer, a + coding cookie is added. + 4. Wraps indented regions under an \"if True:\" block so the interpreter evaluates them correctly." - (let ((substring (buffer-substring-no-properties start end)) - (fillstr (make-string (1- (line-number-at-pos start)) ?\n)) - (toplevel-block-p (save-excursion - (goto-char start) - (or (zerop (line-number-at-pos start)) - (progn - (python-util-forward-comment 1) - (zerop (current-indentation))))))) + (let* ((substring (buffer-substring-no-properties start end)) + (buffer-substring-p (save-restriction + (widen) + (not (equal (list (point-min) (point-max)) + (list start end))))) + (encoding (python-info-encoding)) + (fillstr (concat + (when buffer-substring-p + (format "# -*- coding: %s -*-\n" encoding)) + (make-string + (- (line-number-at-pos start) + (if buffer-substring-p 2 1)) ?\n))) + (toplevel-block-p (save-excursion + (goto-char start) + (or (zerop (line-number-at-pos start)) + (progn + (python-util-forward-comment 1) + (zerop (current-indentation))))))) (with-temp-buffer (python-mode) (if fillstr (insert fillstr)) @@ -2536,17 +2555,26 @@ the python shell: (when (python-nav-if-name-main) (cons (point) (progn (python-nav-forward-sexp-safe) + ;; Include ending newline + (forward-line 1) (point))))))) ;; Oh destructuring bind, how I miss you. (if-name-main-start (car if-name-main-start-end)) - (if-name-main-end (cdr if-name-main-start-end))) + (if-name-main-end (cdr if-name-main-start-end)) + (fillstr (make-string + (- (line-number-at-pos if-name-main-end) + (line-number-at-pos if-name-main-start)) ?\n))) (when if-name-main-start-end (goto-char if-name-main-start) (delete-region if-name-main-start if-name-main-end) - (insert - (make-string - (- (line-number-at-pos if-name-main-end) - (line-number-at-pos if-name-main-start)) ?\n))))) + (insert fillstr)))) + ;; Ensure there's only one coding cookie in the generated string. + (goto-char (point-min)) + (when (looking-at-p (python-rx coding-cookie)) + (forward-line 1) + (when (looking-at-p (python-rx coding-cookie)) + (delete-region + (line-beginning-position) (line-end-position)))) (buffer-substring-no-properties (point-min) (point-max))))) (defun python-shell-send-region (start end &optional nomain) @@ -2604,15 +2632,21 @@ If DELETE is non-nil, delete the file afterwards." (expand-file-name (or (file-remote-p file-name 'localname) file-name))) - temp-file-name))) + temp-file-name)) + (encoding + (with-temp-buffer + (insert-file-contents + (or temp-file-name file-name)) + (python-info-encoding)))) (when (not file-name) (error "If FILE-NAME is nil then TEMP-FILE-NAME must be non-nil")) (python-shell-send-string (format - (concat "__pyfile = open('''%s''');" - "exec(compile(__pyfile.read(), '''%s''', 'exec'));" - "__pyfile.close()%s") - (or temp-file-name file-name) file-name + (concat + "import codecs; __pyfile = codecs.open('''%s''', encoding='''%s''');" + "exec(compile(__pyfile.read().encode('''%s'''), '''%s''', 'exec'));" + "__pyfile.close()%s") + (or temp-file-name file-name) encoding encoding file-name (if delete (format "; import os; os.remove('''%s''')" (or temp-file-name file-name)) "")) @@ -3912,6 +3946,32 @@ operator." (* whitespace) line-end)) (string-equal "" (match-string-no-properties 1)))) +(defun python-info-encoding-from-cookie () + "Detect current buffer's encoding from its coding cookie. +Returns the enconding as a symbol." + (let ((first-two-lines + (save-excursion + (save-restriction + (widen) + (goto-char (point-min)) + (forward-line 2) + (buffer-substring-no-properties + (point) + (point-min)))))) + (when (string-match (python-rx coding-cookie) first-two-lines) + (intern (match-string-no-properties 1 first-two-lines))))) + +(defun python-info-encoding () + "Return encoding for file. +Try `python-info-encoding-from-cookie', if none is found then +default to utf-8." + ;; If no enconding is defined, then it's safe to use UTF-8: Python 2 + ;; uses ASCII as default while Python 3 uses UTF-8. This means that + ;; in the worst case escenario python.el will make things work for + ;; Python 2 files with unicode data and no encoding defined. + (or (python-info-encoding-from-cookie) + 'utf-8)) + ;;; Utility functions diff --git a/test/ChangeLog b/test/ChangeLog index 14780c09e6f..101e9d9caa7 100644 --- a/test/ChangeLog +++ b/test/ChangeLog @@ -1,3 +1,19 @@ +2014-12-27 Fabián Ezequiel Gallina + + * automated/python-tests.el (python-shell-buffer-substring-1) + (python-shell-buffer-substring-2, python-shell-buffer-substring-3) + (python-shell-buffer-substring-4, python-shell-buffer-substring-5) + (python-shell-buffer-substring-6, python-shell-buffer-substring-7) + (python-shell-buffer-substring-8) + (python-info-encoding-from-cookie-1) + (python-info-encoding-from-cookie-2) + (python-info-encoding-from-cookie-3) + (python-info-encoding-from-cookie-4) + (python-info-encoding-from-cookie-5) + (python-info-encoding-from-cookie-6) + (python-info-encoding-from-cookie-7, python-info-encoding-1) + (python-info-encoding-2): New tests. + 2014-12-25 Michael Albinus * automated/tramp-tests.el (tramp-test17-insert-directory): Do not diff --git a/test/automated/python-tests.el b/test/automated/python-tests.el index d1713ac1851..8fcda58e1e0 100644 --- a/test/automated/python-tests.el +++ b/test/automated/python-tests.el @@ -2459,6 +2459,198 @@ and `python-shell-interpreter-args' in the new shell buffer." "^\\(o\\.t \\|\\)"))) (ignore-errors (delete-file startup-file))))) +(ert-deftest python-shell-buffer-substring-1 () + "Selecting a substring of the whole buffer must match its contents." + (python-tests-with-temp-buffer + " +class Foo(models.Model): + pass + + +class Bar(models.Model): + pass +" + (should (string= (buffer-string) + (python-shell-buffer-substring (point-min) (point-max)))))) + +(ert-deftest python-shell-buffer-substring-2 () + "Main block should be removed if NOMAIN is non-nil." + (python-tests-with-temp-buffer + " +class Foo(models.Model): + pass + +class Bar(models.Model): + pass + +if __name__ == \"__main__\": + foo = Foo() + print (foo) +" + (should (string= (python-shell-buffer-substring (point-min) (point-max) t) + " +class Foo(models.Model): + pass + +class Bar(models.Model): + pass + + + + +")))) + +(ert-deftest python-shell-buffer-substring-3 () + "Main block should be removed if NOMAIN is non-nil." + (python-tests-with-temp-buffer + " +class Foo(models.Model): + pass + +if __name__ == \"__main__\": + foo = Foo() + print (foo) + +class Bar(models.Model): + pass +" + (should (string= (python-shell-buffer-substring (point-min) (point-max) t) + " +class Foo(models.Model): + pass + + + + + +class Bar(models.Model): + pass +")))) + +(ert-deftest python-shell-buffer-substring-4 () + "Coding cookie should be added for substrings." + (python-tests-with-temp-buffer + "# coding: latin-1 + +class Foo(models.Model): + pass + +if __name__ == \"__main__\": + foo = Foo() + print (foo) + +class Bar(models.Model): + pass +" + (should (string= (python-shell-buffer-substring + (python-tests-look-at "class Foo(models.Model):") + (progn (python-nav-forward-sexp) (point))) + "# -*- coding: latin-1 -*- + +class Foo(models.Model): + pass")))) + +(ert-deftest python-shell-buffer-substring-5 () + "The proper amount of blank lines is added for a substring." + (python-tests-with-temp-buffer + "# coding: latin-1 + +class Foo(models.Model): + pass + +if __name__ == \"__main__\": + foo = Foo() + print (foo) + +class Bar(models.Model): + pass +" + (should (string= (python-shell-buffer-substring + (python-tests-look-at "class Bar(models.Model):") + (progn (python-nav-forward-sexp) (point))) + "# -*- coding: latin-1 -*- + + + + + + + + +class Bar(models.Model): + pass")))) + +(ert-deftest python-shell-buffer-substring-6 () + "Handle substring with coding cookie in the second line." + (python-tests-with-temp-buffer + " +# coding: latin-1 + +class Foo(models.Model): + pass + +if __name__ == \"__main__\": + foo = Foo() + print (foo) + +class Bar(models.Model): + pass +" + (should (string= (python-shell-buffer-substring + (python-tests-look-at "# coding: latin-1") + (python-tests-look-at "if __name__ == \"__main__\":")) + "# -*- coding: latin-1 -*- + + +class Foo(models.Model): + pass + +")))) + +(ert-deftest python-shell-buffer-substring-7 () + "Ensure first coding cookie gets precedence." + (python-tests-with-temp-buffer + "# coding: utf-8 +# coding: latin-1 + +class Foo(models.Model): + pass + +if __name__ == \"__main__\": + foo = Foo() + print (foo) + +class Bar(models.Model): + pass +" + (should (string= (python-shell-buffer-substring + (python-tests-look-at "# coding: latin-1") + (python-tests-look-at "if __name__ == \"__main__\":")) + "# -*- coding: utf-8 -*- + + +class Foo(models.Model): + pass + +")))) + +(ert-deftest python-shell-buffer-substring-8 () + "Ensure first coding cookie gets precedence when sending whole buffer." + (python-tests-with-temp-buffer + "# coding: utf-8 +# coding: latin-1 + +class Foo(models.Model): + pass +" + (should (string= (python-shell-buffer-substring (point-min) (point-max)) + "# coding: utf-8 + + +class Foo(models.Model): + pass +")))) + ;;; Shell completion @@ -3773,6 +3965,85 @@ foo = True # another comment (forward-line 1) (should (python-info-current-line-empty-p)))) +(ert-deftest python-info-encoding-from-cookie-1 () + "Should detect it on first line." + (python-tests-with-temp-buffer + "# coding=latin-1 + +foo = True # another comment +" + (should (eq (python-info-encoding-from-cookie) 'latin-1)))) + +(ert-deftest python-info-encoding-from-cookie-2 () + "Should detect it on second line." + (python-tests-with-temp-buffer + " +# coding=latin-1 + +foo = True # another comment +" + (should (eq (python-info-encoding-from-cookie) 'latin-1)))) + +(ert-deftest python-info-encoding-from-cookie-3 () + "Should not be detected on third line (and following ones)." + (python-tests-with-temp-buffer + " + +# coding=latin-1 +foo = True # another comment +" + (should (not (python-info-encoding-from-cookie))))) + +(ert-deftest python-info-encoding-from-cookie-4 () + "Should detect Emacs style." + (python-tests-with-temp-buffer + "# -*- coding: latin-1 -*- + +foo = True # another comment" + (should (eq (python-info-encoding-from-cookie) 'latin-1)))) + +(ert-deftest python-info-encoding-from-cookie-5 () + "Should detect Vim style." + (python-tests-with-temp-buffer + "# vim: set fileencoding=latin-1 : + +foo = True # another comment" + (should (eq (python-info-encoding-from-cookie) 'latin-1)))) + +(ert-deftest python-info-encoding-from-cookie-6 () + "First cookie wins." + (python-tests-with-temp-buffer + "# -*- coding: iso-8859-1 -*- +# vim: set fileencoding=latin-1 : + +foo = True # another comment" + (should (eq (python-info-encoding-from-cookie) 'iso-8859-1)))) + +(ert-deftest python-info-encoding-from-cookie-7 () + "First cookie wins." + (python-tests-with-temp-buffer + "# vim: set fileencoding=latin-1 : +# -*- coding: iso-8859-1 -*- + +foo = True # another comment" + (should (eq (python-info-encoding-from-cookie) 'latin-1)))) + +(ert-deftest python-info-encoding-1 () + "Should return the detected encoding from cookie." + (python-tests-with-temp-buffer + "# vim: set fileencoding=latin-1 : + +foo = True # another comment" + (should (eq (python-info-encoding) 'latin-1)))) + +(ert-deftest python-info-encoding-2 () + "Should default to utf-8." + (python-tests-with-temp-buffer + "# No encoding for you + +foo = True # another comment" + (should (eq (python-info-encoding) 'utf-8)))) + ;;; Utility functions