]> git.eshelyaron.com Git - emacs.git/commitdiff
python.el: Handle file encoding for shell.
authorFabián Ezequiel Gallina <fgallina@gnu.org>
Sat, 27 Dec 2014 04:30:21 +0000 (01:30 -0300)
committerFabián Ezequiel Gallina <fgallina@gnu.org>
Sat, 27 Dec 2014 04:30:21 +0000 (01:30 -0300)
* lisp/progmodes/python.el (python-rx-constituents): Add coding-cookie.
(python-shell--save-temp-file): Write file with proper encoding.
(python-shell-buffer-substring): Add coding cookie for detected
encoding to generated content.  Fix blank lines when removing
if-name-main block.
(python-shell-send-file): Handle file encoding.
(python-info-encoding-from-cookie)
(python-info-encoding): New functions.

* test/automated/python-tests.el (python-shell-buffer-substring-1)
(python-shell-buffer-substring-2, python-shell-buffer-substring-3)
(python-shell-buffer-substring-4, python-shell-buffer-substring-5)
(python-shell-buffer-substring-6, python-shell-buffer-substring-7)
(python-shell-buffer-substring-8)
(python-info-encoding-from-cookie-1)
(python-info-encoding-from-cookie-2)
(python-info-encoding-from-cookie-3)
(python-info-encoding-from-cookie-4)
(python-info-encoding-from-cookie-5)
(python-info-encoding-from-cookie-6)
(python-info-encoding-from-cookie-7, python-info-encoding-1)
(python-info-encoding-2): New tests.

lisp/ChangeLog
lisp/progmodes/python.el
test/ChangeLog
test/automated/python-tests.el

index 22728121a1599c491e9f181d20b25492a5b026f1..b73732a1a7ef164a36e243cb79dee974f24b66ce 100644 (file)
@@ -1,3 +1,16 @@
+2014-12-27  Fabián Ezequiel Gallina  <fgallina@gnu.org>
+
+       python.el: Handle file encoding for shell.
+
+       * progmodes/python.el (python-rx-constituents): Add coding-cookie.
+       (python-shell--save-temp-file): Write file with proper encoding.
+       (python-shell-buffer-substring): Add coding cookie for detected
+       encoding to generated content.  Fix blank lines when removing
+       if-name-main block.
+       (python-shell-send-file): Handle file encoding.
+       (python-info-encoding-from-cookie)
+       (python-info-encoding): New functions.
+
 2014-12-24  Michael Albinus  <michael.albinus@gmx.de>
 
        * net/tramp-sh.el (tramp-do-copy-or-rename-file-out-of-band):
index 632659c28bb791d0b890dde7241daff378f2d257..02d0cbef26298b1337f5e194bf0b7514f8b25f6a 100644 (file)
                                          (* ?\\ ?\\) (any ?\' ?\")))
                                 (* ?\\ ?\\)
                                 ;; Match single or triple quotes of any kind.
-                                (group (or  "\"" "\"\"\"" "'" "'''"))))))
+                                (group (or  "\"" "\"\"\"" "'" "'''")))))
+      (coding-cookie . ,(rx line-start ?# (* space)
+                            (or
+                             ;; # coding=<encoding name>
+                             (: "coding" (or ?: ?=) (* space) (group-n 1 (+ (or word ?-))))
+                             ;; # -*- coding: <encoding name> -*-
+                             (: "-*-" (* space) "coding:" (* space)
+                                (group-n 1 (+ (or word ?-))) (* space) "-*-")
+                             ;; # vim: set fileencoding=<encoding name> :
+                             (: "vim:" (* space) "set" (+ space)
+                                "fileencoding" (* space) ?= (* space)
+                                (group-n 1 (+ (or word ?-))) (* space) ":")))))
     "Additional Python specific sexps for `python-rx'")
 
   (defmacro python-rx (&rest regexps)
@@ -2400,11 +2411,7 @@ there for compatibility with CEDET.")
               (concat (file-remote-p default-directory) "/tmp")
             temporary-file-directory))
          (temp-file-name (make-temp-file "py"))
-         ;; XXX: Python's built-in compile function accepts utf-8 as
-         ;; input so there's no need to enforce a coding cookie.  In
-         ;; the future making `coding-system-for-write' match the
-         ;; current buffer's coding may be a good idea.
-         (coding-system-for-write 'utf-8))
+         (coding-system-for-write (python-info-encoding)))
     (with-temp-file temp-file-name
       (insert string)
       (delete-trailing-whitespace))
@@ -2511,16 +2518,28 @@ the python shell:
      \"if __name__ == '__main__'\" block will be removed.
   2. When a subregion of the buffer is sent, it takes care of
      appending extra empty lines so tracebacks are correct.
-  3. Wraps indented regions under an \"if True:\" block so the
+  3. When the region sent is a substring of the current buffer, a
+     coding cookie is added.
+  4. Wraps indented regions under an \"if True:\" block so the
      interpreter evaluates them correctly."
-  (let ((substring (buffer-substring-no-properties start end))
-        (fillstr (make-string (1- (line-number-at-pos start)) ?\n))
-        (toplevel-block-p (save-excursion
-                            (goto-char start)
-                            (or (zerop (line-number-at-pos start))
-                                (progn
-                                  (python-util-forward-comment 1)
-                                  (zerop (current-indentation)))))))
+  (let* ((substring (buffer-substring-no-properties start end))
+         (buffer-substring-p (save-restriction
+                               (widen)
+                               (not (equal (list (point-min) (point-max))
+                                           (list start end)))))
+         (encoding (python-info-encoding))
+         (fillstr (concat
+                   (when buffer-substring-p
+                     (format "# -*- coding: %s -*-\n" encoding))
+                   (make-string
+                    (- (line-number-at-pos start)
+                       (if buffer-substring-p 2 1)) ?\n)))
+         (toplevel-block-p (save-excursion
+                             (goto-char start)
+                             (or (zerop (line-number-at-pos start))
+                                 (progn
+                                   (python-util-forward-comment 1)
+                                   (zerop (current-indentation)))))))
     (with-temp-buffer
       (python-mode)
       (if fillstr (insert fillstr))
@@ -2536,17 +2555,26 @@ the python shell:
                        (when (python-nav-if-name-main)
                          (cons (point)
                                (progn (python-nav-forward-sexp-safe)
+                                      ;; Include ending newline
+                                      (forward-line 1)
                                       (point)))))))
                ;; Oh destructuring bind, how I miss you.
                (if-name-main-start (car if-name-main-start-end))
-               (if-name-main-end (cdr if-name-main-start-end)))
+               (if-name-main-end (cdr if-name-main-start-end))
+               (fillstr (make-string
+                         (- (line-number-at-pos if-name-main-end)
+                            (line-number-at-pos if-name-main-start)) ?\n)))
           (when if-name-main-start-end
             (goto-char if-name-main-start)
             (delete-region if-name-main-start if-name-main-end)
-            (insert
-             (make-string
-              (- (line-number-at-pos if-name-main-end)
-                 (line-number-at-pos if-name-main-start)) ?\n)))))
+            (insert fillstr))))
+      ;; Ensure there's only one coding cookie in the generated string.
+      (goto-char (point-min))
+      (when (looking-at-p (python-rx coding-cookie))
+        (forward-line 1)
+        (when (looking-at-p (python-rx coding-cookie))
+          (delete-region
+           (line-beginning-position) (line-end-position))))
       (buffer-substring-no-properties (point-min) (point-max)))))
 
 (defun python-shell-send-region (start end &optional nomain)
@@ -2604,15 +2632,21 @@ If DELETE is non-nil, delete the file afterwards."
                           (expand-file-name
                            (or (file-remote-p file-name 'localname)
                                file-name)))
-                        temp-file-name)))
+                        temp-file-name))
+         (encoding
+          (with-temp-buffer
+            (insert-file-contents
+             (or temp-file-name file-name))
+            (python-info-encoding))))
     (when (not file-name)
       (error "If FILE-NAME is nil then TEMP-FILE-NAME must be non-nil"))
     (python-shell-send-string
      (format
-      (concat "__pyfile = open('''%s''');"
-              "exec(compile(__pyfile.read(), '''%s''', 'exec'));"
-              "__pyfile.close()%s")
-      (or temp-file-name file-name) file-name
+      (concat
+       "import codecs; __pyfile = codecs.open('''%s''', encoding='''%s''');"
+       "exec(compile(__pyfile.read().encode('''%s'''), '''%s''', 'exec'));"
+       "__pyfile.close()%s")
+      (or temp-file-name file-name) encoding encoding file-name
       (if delete (format "; import os; os.remove('''%s''')"
                          (or temp-file-name file-name))
         ""))
@@ -3912,6 +3946,32 @@ operator."
                 (* whitespace) line-end))
     (string-equal "" (match-string-no-properties 1))))
 
+(defun python-info-encoding-from-cookie ()
+  "Detect current buffer's encoding from its coding cookie.
+Returns the enconding as a symbol."
+  (let ((first-two-lines
+         (save-excursion
+           (save-restriction
+             (widen)
+             (goto-char (point-min))
+             (forward-line 2)
+             (buffer-substring-no-properties
+              (point)
+              (point-min))))))
+    (when (string-match (python-rx coding-cookie) first-two-lines)
+      (intern (match-string-no-properties 1 first-two-lines)))))
+
+(defun python-info-encoding ()
+  "Return encoding for file.
+Try `python-info-encoding-from-cookie', if none is found then
+default to utf-8."
+  ;; If no enconding is defined, then it's safe to use UTF-8: Python 2
+  ;; uses ASCII as default while Python 3 uses UTF-8.  This means that
+  ;; in the worst case escenario python.el will make things work for
+  ;; Python 2 files with unicode data and no encoding defined.
+  (or (python-info-encoding-from-cookie)
+      'utf-8))
+
 \f
 ;;; Utility functions
 
index 14780c09e6faa79a6367708760045c495272f424..101e9d9caa72707f15cfbcdbd42a413fda93c2f6 100644 (file)
@@ -1,3 +1,19 @@
+2014-12-27  Fabián Ezequiel Gallina  <fgallina@gnu.org>
+
+       * automated/python-tests.el (python-shell-buffer-substring-1)
+       (python-shell-buffer-substring-2, python-shell-buffer-substring-3)
+       (python-shell-buffer-substring-4, python-shell-buffer-substring-5)
+       (python-shell-buffer-substring-6, python-shell-buffer-substring-7)
+       (python-shell-buffer-substring-8)
+       (python-info-encoding-from-cookie-1)
+       (python-info-encoding-from-cookie-2)
+       (python-info-encoding-from-cookie-3)
+       (python-info-encoding-from-cookie-4)
+       (python-info-encoding-from-cookie-5)
+       (python-info-encoding-from-cookie-6)
+       (python-info-encoding-from-cookie-7, python-info-encoding-1)
+       (python-info-encoding-2): New tests.
+
 2014-12-25  Michael Albinus  <michael.albinus@gmx.de>
 
        * automated/tramp-tests.el (tramp-test17-insert-directory): Do not
index d1713ac185182247ba3cb7db92bf2a435187029b..8fcda58e1e04e08b67705cc038e4dfc50ebc192c 100644 (file)
@@ -2459,6 +2459,198 @@ and `python-shell-interpreter-args' in the new shell buffer."
                            "^\\(o\\.t \\|\\)")))
       (ignore-errors (delete-file startup-file)))))
 
+(ert-deftest python-shell-buffer-substring-1 ()
+  "Selecting a substring of the whole buffer must match its contents."
+  (python-tests-with-temp-buffer
+   "
+class Foo(models.Model):
+    pass
+
+
+class Bar(models.Model):
+    pass
+"
+   (should (string= (buffer-string)
+                    (python-shell-buffer-substring (point-min) (point-max))))))
+
+(ert-deftest python-shell-buffer-substring-2 ()
+  "Main block should be removed if NOMAIN is non-nil."
+  (python-tests-with-temp-buffer
+   "
+class Foo(models.Model):
+    pass
+
+class Bar(models.Model):
+    pass
+
+if __name__ == \"__main__\":
+    foo = Foo()
+    print (foo)
+"
+   (should (string= (python-shell-buffer-substring (point-min) (point-max) t)
+                    "
+class Foo(models.Model):
+    pass
+
+class Bar(models.Model):
+    pass
+
+
+
+
+"))))
+
+(ert-deftest python-shell-buffer-substring-3 ()
+  "Main block should be removed if NOMAIN is non-nil."
+  (python-tests-with-temp-buffer
+   "
+class Foo(models.Model):
+    pass
+
+if __name__ == \"__main__\":
+    foo = Foo()
+    print (foo)
+
+class Bar(models.Model):
+    pass
+"
+   (should (string= (python-shell-buffer-substring (point-min) (point-max) t)
+                    "
+class Foo(models.Model):
+    pass
+
+
+
+
+
+class Bar(models.Model):
+    pass
+"))))
+
+(ert-deftest python-shell-buffer-substring-4 ()
+  "Coding cookie should be added for substrings."
+  (python-tests-with-temp-buffer
+   "# coding: latin-1
+
+class Foo(models.Model):
+    pass
+
+if __name__ == \"__main__\":
+    foo = Foo()
+    print (foo)
+
+class Bar(models.Model):
+    pass
+"
+   (should (string= (python-shell-buffer-substring
+                     (python-tests-look-at "class Foo(models.Model):")
+                     (progn (python-nav-forward-sexp) (point)))
+                    "# -*- coding: latin-1 -*-
+
+class Foo(models.Model):
+    pass"))))
+
+(ert-deftest python-shell-buffer-substring-5 ()
+  "The proper amount of blank lines is added for a substring."
+  (python-tests-with-temp-buffer
+   "# coding: latin-1
+
+class Foo(models.Model):
+    pass
+
+if __name__ == \"__main__\":
+    foo = Foo()
+    print (foo)
+
+class Bar(models.Model):
+    pass
+"
+   (should (string= (python-shell-buffer-substring
+                     (python-tests-look-at "class Bar(models.Model):")
+                     (progn (python-nav-forward-sexp) (point)))
+                    "# -*- coding: latin-1 -*-
+
+
+
+
+
+
+
+
+class Bar(models.Model):
+    pass"))))
+
+(ert-deftest python-shell-buffer-substring-6 ()
+  "Handle substring with coding cookie in the second line."
+  (python-tests-with-temp-buffer
+   "
+# coding: latin-1
+
+class Foo(models.Model):
+    pass
+
+if __name__ == \"__main__\":
+    foo = Foo()
+    print (foo)
+
+class Bar(models.Model):
+    pass
+"
+   (should (string= (python-shell-buffer-substring
+                     (python-tests-look-at "# coding: latin-1")
+                     (python-tests-look-at "if __name__ == \"__main__\":"))
+                    "# -*- coding: latin-1 -*-
+
+
+class Foo(models.Model):
+    pass
+
+"))))
+
+(ert-deftest python-shell-buffer-substring-7 ()
+  "Ensure first coding cookie gets precedence."
+  (python-tests-with-temp-buffer
+   "# coding: utf-8
+# coding: latin-1
+
+class Foo(models.Model):
+    pass
+
+if __name__ == \"__main__\":
+    foo = Foo()
+    print (foo)
+
+class Bar(models.Model):
+    pass
+"
+   (should (string= (python-shell-buffer-substring
+                     (python-tests-look-at "# coding: latin-1")
+                     (python-tests-look-at "if __name__ == \"__main__\":"))
+                    "# -*- coding: utf-8 -*-
+
+
+class Foo(models.Model):
+    pass
+
+"))))
+
+(ert-deftest python-shell-buffer-substring-8 ()
+  "Ensure first coding cookie gets precedence when sending whole buffer."
+  (python-tests-with-temp-buffer
+   "# coding: utf-8
+# coding: latin-1
+
+class Foo(models.Model):
+    pass
+"
+   (should (string= (python-shell-buffer-substring (point-min) (point-max))
+                    "# coding: utf-8
+
+
+class Foo(models.Model):
+    pass
+"))))
+
 \f
 ;;; Shell completion
 
@@ -3773,6 +3965,85 @@ foo = True  # another comment
    (forward-line 1)
    (should (python-info-current-line-empty-p))))
 
+(ert-deftest python-info-encoding-from-cookie-1 ()
+  "Should detect it on first line."
+  (python-tests-with-temp-buffer
+   "# coding=latin-1
+
+foo = True  # another comment
+"
+   (should (eq (python-info-encoding-from-cookie) 'latin-1))))
+
+(ert-deftest python-info-encoding-from-cookie-2 ()
+  "Should detect it on second line."
+  (python-tests-with-temp-buffer
+   "
+# coding=latin-1
+
+foo = True  # another comment
+"
+   (should (eq (python-info-encoding-from-cookie) 'latin-1))))
+
+(ert-deftest python-info-encoding-from-cookie-3 ()
+  "Should not be detected on third line (and following ones)."
+  (python-tests-with-temp-buffer
+   "
+
+# coding=latin-1
+foo = True  # another comment
+"
+   (should (not (python-info-encoding-from-cookie)))))
+
+(ert-deftest python-info-encoding-from-cookie-4 ()
+  "Should detect Emacs style."
+  (python-tests-with-temp-buffer
+   "# -*- coding: latin-1 -*-
+
+foo = True  # another comment"
+   (should (eq (python-info-encoding-from-cookie) 'latin-1))))
+
+(ert-deftest python-info-encoding-from-cookie-5 ()
+  "Should detect Vim style."
+  (python-tests-with-temp-buffer
+   "# vim: set fileencoding=latin-1 :
+
+foo = True  # another comment"
+   (should (eq (python-info-encoding-from-cookie) 'latin-1))))
+
+(ert-deftest python-info-encoding-from-cookie-6 ()
+  "First cookie wins."
+  (python-tests-with-temp-buffer
+   "# -*- coding: iso-8859-1 -*-
+# vim: set fileencoding=latin-1 :
+
+foo = True  # another comment"
+   (should (eq (python-info-encoding-from-cookie) 'iso-8859-1))))
+
+(ert-deftest python-info-encoding-from-cookie-7 ()
+  "First cookie wins."
+  (python-tests-with-temp-buffer
+   "# vim: set fileencoding=latin-1 :
+# -*- coding: iso-8859-1 -*-
+
+foo = True  # another comment"
+   (should (eq (python-info-encoding-from-cookie) 'latin-1))))
+
+(ert-deftest python-info-encoding-1 ()
+  "Should return the detected encoding from cookie."
+  (python-tests-with-temp-buffer
+   "# vim: set fileencoding=latin-1 :
+
+foo = True  # another comment"
+   (should (eq (python-info-encoding) 'latin-1))))
+
+(ert-deftest python-info-encoding-2 ()
+  "Should default to utf-8."
+  (python-tests-with-temp-buffer
+   "# No encoding for you
+
+foo = True  # another comment"
+   (should (eq (python-info-encoding) 'utf-8))))
+
 \f
 ;;; Utility functions