mm-util.el (mm-extra-numeric-entities): New variable.
mm-url.el (mm-url-decode-entities):
mm-decode.el (mm-shr): Use it to decode extra numeric entities.
+2010-12-07 Katsumi Yamaoka <yamaoka@jpl.org>
+
+ * mm-util.el (mm-extra-numeric-entities): New variable.
+
+ * mm-url.el (mm-url-decode-entities):
+ * mm-decode.el (mm-shr): Use it to decode extra numeric entities.
+
2010-12-07 Stefan Monnier <monnier@iro.umontreal.ca>
* message.el: Use completion-at-point.
(when handle
(mm-with-part handle
(buffer-string))))))
- shr-inhibit-images shr-blocked-images charset)
+ shr-inhibit-images shr-blocked-images charset char)
(if (and (boundp 'gnus-summary-buffer)
(buffer-name gnus-summary-buffer))
(with-current-buffer gnus-summary-buffer
(narrow-to-region (point) (point))
(shr-insert-document
(mm-with-part handle
- (when (and charset
- (setq charset (mm-charset-to-coding-system charset))
- (not (eq charset 'ascii)))
- (insert (prog1
- (mm-decode-coding-string (buffer-string) charset)
- (erase-buffer)
- (mm-enable-multibyte))))
+ (insert (prog1
+ (if (and charset
+ (setq charset
+ (mm-charset-to-coding-system charset))
+ (not (eq charset 'ascii)))
+ (mm-decode-coding-string (buffer-string) charset)
+ (mm-string-as-multibyte (buffer-string)))
+ (erase-buffer)
+ (mm-enable-multibyte)))
+ (goto-char (point-min))
+ (setq case-fold-search t)
+ (while (re-search-forward
+ "&#\\(?:x\\([89][0-9a-f]\\)\\|\\(1[2-5][0-9]\\)\\);" nil t)
+ (when (setq char
+ (cdr (assq (if (match-beginning 1)
+ (string-to-number (match-string 1) 16)
+ (string-to-number (match-string 2)))
+ mm-extra-numeric-entities)))
+ (replace-match (char-to-string char))))
(libxml-parse-html-region (point-min) (point-max))))
(mm-handle-set-undisplayer
handle
(defun mm-url-decode-entities ()
"Decode all HTML entities."
(goto-char (point-min))
- (while (re-search-forward "&\\(#[0-9]+\\|#x[0-9a-f]+\\|[a-z]+[0-9]*\\);" nil t)
+ (while (re-search-forward "&\\(#[0-9]+\\|#x[0-9a-f]+\\|[a-z]+[0-9]*\\);"
+ nil t)
(let* ((entity (match-string 1))
(elem (if (eq (aref entity 0) ?\#)
- (let ((c (mm-ucs-to-char
- ;; Hex number: ㈒
- (if (eq (aref entity 1) ?x)
- (string-to-number (substring entity 2)
- 16)
- ;; Decimal number: 
- (string-to-number (substring entity 1))))))
+ (let ((c
+ ;; Hex number: ㈒
+ (if (eq (aref entity 1) ?x)
+ (string-to-number (substring entity 2)
+ 16)
+ ;; Decimal number: 
+ (string-to-number (substring entity 1)))))
+ (setq c (or (cdr (assq c mm-extra-numeric-entities))
+ (mm-ucs-to-char c)))
(if (mm-char-or-char-int-p c) c ?#))
(or (cdr (assq (intern entity)
mm-url-html-entities))
Setting it to nil is useful on Emacsen supporting Unicode if sending
mail with multiple parts is preferred to sending a Unicode one.")
+(defvar mm-extra-numeric-entities
+ (mapcar
+ (lambda (item)
+ (cons (car item) (mm-ucs-to-char (cdr item))))
+ '((#x80 . #x20AC) (#x82 . #x201A) (#x83 . #x0192) (#x84 . #x201E)
+ (#x85 . #x2026) (#x86 . #x2020) (#x87 . #x2021) (#x88 . #x02C6)
+ (#x89 . #x2030) (#x8A . #x0160) (#x8B . #x2039) (#x8C . #x0152)
+ (#x8E . #x017D) (#x91 . #x2018) (#x92 . #x2019) (#x93 . #x201C)
+ (#x94 . #x201D) (#x95 . #x2022) (#x96 . #x2013) (#x97 . #x2014)
+ (#x98 . #x02DC) (#x99 . #x2122) (#x9A . #x0161) (#x9B . #x203A)
+ (#x9C . #x0153) (#x9E . #x017E) (#x9F . #x0178)))
+ "*Alist of extra numeric entities and characters other than ISO 10646.
+This table is used for decoding extra numeric entities to characters,
+like \"€\" to the euro sign, mainly in html messages.")
+
;;; Internal variables:
;;; Functions: