From 224aeb2877fffc894e9d1b31f7f44db0e55160c3 Mon Sep 17 00:00:00 2001 From: Lars Ingebrigtsen Date: Sat, 5 Feb 2022 08:50:05 +0100 Subject: [PATCH] Fix yank-media utf-16 detection * lisp/yank-media.el (yank-media--utf-16-p): Factor out into its own function for easier testing and fix the code. --- lisp/yank-media.el | 47 +++++++++++++++-------------------- test/lisp/yank-media-tests.el | 38 ++++++++++++++++++++++++++++ 2 files changed, 58 insertions(+), 27 deletions(-) create mode 100644 test/lisp/yank-media-tests.el diff --git a/lisp/yank-media.el b/lisp/yank-media.el index 8c758777247..5cd75eb3186 100644 --- a/lisp/yank-media.el +++ b/lisp/yank-media.el @@ -155,33 +155,7 @@ non-supported selection data types." (format "%s" data)) ((string-match-p "\\`text/" (symbol-name data-type)) ;; We may have utf-16, which Emacs won't detect automatically. - (let ((coding-system - (and (zerop (mod (length data) 2)) - (let ((stats (vector 0 0))) - (dotimes (i (length data)) - (when (zerop (elt data i)) - (setf (aref stats (mod i 2)) - (1+ (aref stats (mod i 2)))))) - ;; We have some nuls... - (and (not (and (zerop (elt stats 0)) - (zerop (elt stats 1)))) - ;; If we have more than 90% every-other nul, then it's - ;; pretty likely to be utf-16. - (cond - ((> (if (zerop (elt stats 1)) - 1 - (/ (float (elt stats 0)) - (float (elt stats 1)))) - 0.9) - ;; Big endian. - 'utf-16-be) - ((> (if (zerop (elt stats 0)) - 1 - (/ (float (elt stats 1)) - (float (elt stats 0)))) - 0.9) - ;; Little endian. - 'utf-16-le))))))) + (let ((coding-system (yank-media--utf-16-p data))) (if coding-system (decode-coding-string data coding-system) ;; Some programs add a nul character at the end of text/* @@ -192,6 +166,25 @@ non-supported selection data types." (t data))) +(defun yank-media--utf-16-p (data) + (and (zerop (mod (length data) 2)) + (let ((stats (vector 0 0))) + (dotimes (i (length data)) + (when (zerop (elt data i)) + (setf (aref stats (mod i 2)) + (1+ (aref stats (mod i 2)))))) + ;; If we have more than 90% every-other nul, then it's + ;; pretty likely to be utf-16. + (cond + ((> (/ (float (elt stats 0)) (/ (length data) 2)) + 0.9) + ;; Big endian. + 'utf-16-be) + ((> (/ (float (elt stats 1)) (/ (length data) 2)) + 0.9) + ;; Little endian. + 'utf-16-le))))) + (provide 'yank-media) ;;; yank-media.el ends here diff --git a/test/lisp/yank-media-tests.el b/test/lisp/yank-media-tests.el new file mode 100644 index 00000000000..4487ae150da --- /dev/null +++ b/test/lisp/yank-media-tests.el @@ -0,0 +1,38 @@ +;;; yank-media-tests.el --- Tests for yank-media.el -*- lexical-binding: t; -*- + +;; Copyright (C) 2022 Free Software Foundation, Inc. + +;; This file is part of GNU Emacs. + +;; GNU Emacs is free software: you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation, either version 3 of the License, or +;; (at your option) any later version. + +;; GNU Emacs is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GNU Emacs. If not, see . + +;;; Commentary: + +;; + +;;; Code: + +(require 'yank-media) +(require 'ert) +(require 'ert-x) + +(ert-deftest test-utf-16 () + (should-not (yank-media--utf-16-p "f")) + (should-not (yank-media--utf-16-p "fo")) + (should-not (yank-media--utf-16-p "\000ofo")) + (should (eq (yank-media--utf-16-p "\000o\000o") 'utf-16-be)) + (should (eq (yank-media--utf-16-p "o\000o\000") 'utf-16-le)) + (should-not (yank-media--utf-16-p "o\000\000o"))) + +;;; yank-media-tests.el ends here -- 2.39.5