From: Dave Love Date: Thu, 8 May 2003 17:54:14 +0000 (+0000) Subject: *** empty log message *** X-Git-Tag: ttn-vms-21-2-B4~10247 X-Git-Url: http://git.eshelyaron.com/gitweb/?a=commitdiff_plain;h=347003be13b7c02520571c77813950388d96267b;p=emacs.git *** empty log message *** --- diff --git a/etc/NEWS b/etc/NEWS index 96f39e7dea4..2a3fee0b6df 100644 --- a/etc/NEWS +++ b/etc/NEWS @@ -88,6 +88,8 @@ See the files mac/README and mac/INSTALL for build instructions. * Changes in Emacs 21.4 +** A UTF-7 coding system is available in the library `utf-7'. + ** GUD mode has its own tool bar for controlling execution of the inferior and other common debugger commands. @@ -224,13 +226,17 @@ Big 5 is then converted to CNS. library. These include complete versions of most of those in codepage.el, based on Unicode mappings. -** The utf-8 coding system has been enhanced. Untranslatable utf-8 -sequences (mostly representing CJK characters) are composed into -single quasi-characters. User option `utf-translate-cjk-mode' -arranges to translate many utf-8 CJK character sequences into real -Emacs characters in a similar way to the Mule-UCS system. The utf-8 -coding system will now encode characters from most of Emacs's +** The utf-8/16 coding systems have been enhanced. +By default, untranslatable utf-8 sequences (mostly representing CJK +characters) are simply composed into single quasi-characters. User +option `utf-translate-cjk' arranges to translate many utf-8 CJK +character sequences into real Emacs characters in a similar way to the +Mule-UCS system. This uses significant space, so is not the default. +You can augment/amend the CJK translation via hash tables +`ucs-mule-cjk-to-unicode' and `ucs-unicode-to-mule-cjk'. The utf-8 +coding system now also encodes characters from most of Emacs's one-dimensional internal charsets, specifically the ISO-8859 ones. +The utf-16 coding system is affected similarly. ** iso-10646-1 (`Unicode') fonts can be used to display any range of characters encodable by the utf-8 coding system. Just specify the @@ -405,7 +411,7 @@ mode-line. ** Speedbar has moved from the "Tools" top level menu to "Show/Hide". +++ -** Emacs can now indicate in the mode-line the presence of new e-mails +** Emacs can now indicate in the mode-line the presence of new e-mail in a directory or in a file. See the documentation of the user option `display-time-mail-directory'. @@ -1208,7 +1214,7 @@ mode-lines in inverse-video. --- ** The obsolete C mode (c-mode.el) has been removed to avoid problems -with Custom. +with Custom. cplus-md.el, which required it, has also been removed. ** New package benchmark.el contains simple support for convenient timing measurements of code (including the garbage collection component). diff --git a/lisp/ChangeLog b/lisp/ChangeLog index fa9ed8aa5c3..fbbf7025bef 100644 --- a/lisp/ChangeLog +++ b/lisp/ChangeLog @@ -1,3 +1,7 @@ +2003-05-08 Dave Love + + * international/utf-7.el: New file. + 2003-05-07 Francis J. Wright * files.el (insert-file-contents-literally): Allow it to be called diff --git a/lisp/international/utf-7.el b/lisp/international/utf-7.el new file mode 100644 index 00000000000..4e453c4145d --- /dev/null +++ b/lisp/international/utf-7.el @@ -0,0 +1,139 @@ +;;; utf-7.el --- utf-7 coding system + +;; Copyright (C) 2003 Free Software Foundation, Inc. + +;; Author: Dave Love +;; Keywords: i18n, mail + +;; This file is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 2, or (at your option) +;; any later version. + +;; This file is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GNU Emacs; see the file COPYING. If not, write to +;; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, +;; Boston, MA 02111-1307, USA. + +;;; Commentary: + +;; Defines a coding system for UTF-7, defined in RFC 2152. Non-ASCII +;; segments are encoded as base64-encoded big endian UTF-16. Also +;; defines a variation required for IMAP (RFC 2060). + +;; The encoding and decoding was originally taken from Jon K Hellan's +;; implementation in Gnus, but has been substantially re-done. + +;; This probably needs more attention. In particular, it's not +;; completely consistent with iconv's behaviour. It's arguable +;; whether the IMAP version should be a coding system since it's +;; apparently only used for IMAP mailbox names, so it's commented out. + +;;; Code: + +(make-coding-system + 'utf-7 0 ?U + "UTF-7 encoding of Unicode (RFC 2152)" + nil + `((safe-chars . ,(coding-system-get 'utf-16-be 'safe-chars)) + (mime-charset . utf-7) + (pre-write-conversion . utf-7-pre-write-conversion) + (post-read-conversion . utf-7-post-read-conversion))) + +;; (make-coding-system +;; 'utf-7-imap 0 ?u +;; "UTF-7 encoding of Unicode, IMAP version (RFC 2060)" +;; nil +;; `((safe-chars . ,(coding-system-get 'utf-16-be 'safe-chars)) +;; (pre-write-conversion . utf-7-imap-pre-write-conversion) +;; (post-read-conversion . utf-7-imap-post-read-conversion))) + +(defun utf-7-decode (len imap) + "Decode LEN bytes of UTF-7 at point. +IMAP non-nil means use the IMAP version." + (save-excursion + (save-restriction + (narrow-to-region (point) (+ (point) len)) + (let ((not-esc (if imap "^&" "^+")) + (skip-chars (if imap "A-Za-z0-9+," "A-Za-z0-9+/"))) + (while (not (eobp)) + (skip-chars-forward not-esc) + (unless (eobp) + (forward-char) + (let ((p (point)) + (run-length (skip-chars-forward skip-chars))) + (if (eq ?- (char-after)) + (delete-char 1)) + (unless (= run-length 0) ; encoded lone esc-char + (let ((pl (mod (- run-length) 4))) + (insert-char ?= pl) + (if imap + (subst-char-in-region p (point) ?, ?/)) + (base64-decode-region p (point))) + (decode-coding-region p (point) 'utf-16-be) + (save-excursion + (goto-char p) + (delete-backward-char 1))))))) + (- (point-max) (point-min))))) + +(defun utf-7-post-read-conversion (len) + (utf-7-decode len nil)) + +;; (defun utf-7-imap-post-read-conversion (len) +;; (utf-7-decode len t)) + +(defun utf-7-encode (from to imap) + "Encode bytes between FROM and TO to UTF-7. +ESC and SKIP-CHARS are adjusted for the normal and IMAP versions." + (let* ((old-buf (current-buffer)) + (esc (if imap ?& ?+)) + ;; These are characters which can be encoded asis. + (skip-chars (if imap + "\t\n\r\x20-\x25\x27-\x7e" ; rfc2060 + ;; This includes the rfc2152 optional set. + ;; Perhaps it shouldn't (like iconv). + "\t\n\r -*,-[]-}")) + (not-skip-chars (format "^%s%c" skip-chars esc))) + (set-buffer (generate-new-buffer " *temp*")) + (if (stringp from) + (insert from) + (insert-buffer-substring old-buf from to)) + (goto-char (point-min)) + (while (not (eobp)) + (skip-chars-forward skip-chars) + (if (eq ?+ (char-after)) + (progn (forward-char) + (insert ?-)) + (unless (eobp) + (insert esc) + (let ((p (point))) + (skip-chars-forward not-skip-chars) + (save-restriction + ;; encode-coding-region doesn't preserve point + (narrow-to-region p (point)) + (encode-coding-region p (point-max) 'utf-16-be) + (base64-encode-region p (point-max)) + (if imap + (subst-char-in-region p (point-max) ?/ ?,)) + (goto-char p) + ;; As I read the RFC, this isn't correct, but it's + ;; consistent with iconv, at least regarding `='. + (skip-chars-forward "^= \t\n") + (delete-region (point) (point-max)))) + (unless (eobp) + (insert ?-))))) + nil)) + +(defun utf-7-pre-write-conversion (from to) + (utf-7-encode from to nil)) + +;; (defun utf-7-imap-pre-write-conversion (from to) +;; (utf-7-encode from to t)) + +(provide 'utf-7) +;;; utf-7.el ends here diff --git a/src/ChangeLog b/src/ChangeLog index a7add6733e3..b6af51313b6 100644 --- a/src/ChangeLog +++ b/src/ChangeLog @@ -1,3 +1,7 @@ +2003-05-08 Dave Love + + * coding.c (Vlast_coding_system_used): Doc fix. + 2003-05-07 Jason Rumney * fileio.c (Ffile_symlink_p): Let handlers handle symlinks even