From: ShengHuo ZHU Date: Wed, 31 Oct 2001 04:16:51 +0000 (+0000) Subject: * mm-util.el, nnultimate.el, nnweb.el, nnslashdot.el: Sync with X-Git-Tag: ttn-vms-21-2-B4~18968 X-Git-Url: http://git.eshelyaron.com/gitweb/?a=commitdiff_plain;h=95fa1ff74aa9ae40d5ef4b680ea606287c40327f;p=emacs.git * mm-util.el, nnultimate.el, nnweb.el, nnslashdot.el: Sync with the Gnus CVS. * mm-util.el (mm-mime-mule-charset-alist): Move down and call mm-coding-system-p. Don't correct it only in XEmacs. (mm-charset-to-coding-system): Use mm-coding-system-p and mm-get-coding-system-list. (mm-emacs-mule, mm-mule4-p): New. (mm-enable-multibyte, mm-disable-multibyte, mm-enable-multibyte-mule4, mm-disable-multibyte-mule4, mm-with-unibyte-current-buffer, mm-with-unibyte-current-buffer-mule4): Use them. (mm-find-mime-charset-region): Treat iso-2022-jp. From Dave Love : * mm-util.el (mm-mime-mule-charset-alist): Make it correct by construction. (mm-charset-synonym-alist): Remove windows-125[02]. Make other entries conditional on not having a coding system defined for them. (mm-mule-charset-to-mime-charset): Use find-coding-systems-for-charsets if defined. (mm-charset-to-coding-system): Don't use mm-get-coding-system-list. Look in mm-charset-synonym-alist later. Add last resort search of coding systems. (mm-enable-multibyte-mule4, mm-disable-multibyte-mule4) (mm-with-unibyte-current-buffer-mule4): Just treat Mule 5 like Mule 4. (mm-find-mime-charset-region): Re-write. (mm-with-unibyte-current-buffer): Restore buffer as well as multibyteness. --- diff --git a/lisp/gnus/ChangeLog b/lisp/gnus/ChangeLog index de61ebb79c0..14138f18820 100644 --- a/lisp/gnus/ChangeLog +++ b/lisp/gnus/ChangeLog @@ -1,3 +1,38 @@ +2001-10-30 ShengHuo ZHU + + * mm-util.el, nnultimate.el, nnweb.el, nnslashdot.el: Sync with + the Gnus CVS. + + * mm-util.el (mm-mime-mule-charset-alist): Move down and call + mm-coding-system-p. Don't correct it only in XEmacs. + (mm-charset-to-coding-system): Use mm-coding-system-p and + mm-get-coding-system-list. + (mm-emacs-mule, mm-mule4-p): New. + (mm-enable-multibyte, mm-disable-multibyte, + mm-enable-multibyte-mule4, mm-disable-multibyte-mule4, + mm-with-unibyte-current-buffer, + mm-with-unibyte-current-buffer-mule4): Use them. + (mm-find-mime-charset-region): Treat iso-2022-jp. + + From Dave Love : + + * mm-util.el (mm-mime-mule-charset-alist): Make it correct by + construction. + (mm-charset-synonym-alist): Remove windows-125[02]. Make other + entries conditional on not having a coding system defined for + them. + (mm-mule-charset-to-mime-charset): Use + find-coding-systems-for-charsets if defined. + (mm-charset-to-coding-system): Don't use + mm-get-coding-system-list. Look in mm-charset-synonym-alist + later. Add last resort search of coding systems. + (mm-enable-multibyte-mule4, mm-disable-multibyte-mule4) + (mm-with-unibyte-current-buffer-mule4): Just treat Mule 5 like + Mule 4. + (mm-find-mime-charset-region): Re-write. + (mm-with-unibyte-current-buffer): Restore buffer as well as + multibyteness. + 2001-10-30 Simon Josefsson * nnimap.el (nnimap-date-days-ago): Defeat locale. diff --git a/lisp/gnus/mm-util.el b/lisp/gnus/mm-util.el index 95ab4f6291f..69823c43d1c 100644 --- a/lisp/gnus/mm-util.el +++ b/lisp/gnus/mm-util.el @@ -1,4 +1,4 @@ -;;; mm-util.el --- utility functions for MIME things +;;; mm-util.el --- Utility functions for Mule and low level things ;; Copyright (C) 1998, 1999, 2000, 2001 Free Software Foundation, Inc. ;; Author: Lars Magne Ingebrigtsen @@ -27,63 +27,6 @@ (eval-when-compile (require 'cl)) (require 'mail-prsvr) -(defun mm-coding-system-p (sym) - "Return non-nil if SYM is a coding system." - (or (and (fboundp 'coding-system-p) (coding-system-p sym)) - (memq sym (mm-get-coding-system-list)))) - -(defvar mm-mime-mule-charset-alist - `((us-ascii ascii) - (iso-8859-1 latin-iso8859-1) - (iso-8859-2 latin-iso8859-2) - (iso-8859-3 latin-iso8859-3) - (iso-8859-4 latin-iso8859-4) - (iso-8859-5 cyrillic-iso8859-5) - ;; Non-mule (X)Emacs uses the last mule-charset for 8bit characters. - ;; The fake mule-charset, gnus-koi8-r, tells Gnus that the default - ;; charset is koi8-r, not iso-8859-5. - (koi8-r cyrillic-iso8859-5 gnus-koi8-r) - (iso-8859-6 arabic-iso8859-6) - (iso-8859-7 greek-iso8859-7) - (iso-8859-8 hebrew-iso8859-8) - (iso-8859-9 latin-iso8859-9) - (iso-8859-14 latin-iso8859-14) - (iso-8859-15 latin-iso8859-15) - (viscii vietnamese-viscii-lower) - (iso-2022-jp latin-jisx0201 japanese-jisx0208 japanese-jisx0208-1978) - (euc-kr korean-ksc5601) - (gb2312 chinese-gb2312) - (big5 chinese-big5-1 chinese-big5-2) - (tibetan tibetan) - (thai-tis620 thai-tis620) - (iso-2022-7bit ethiopic arabic-1-column arabic-2-column) - (iso-2022-jp-2 latin-iso8859-1 greek-iso8859-7 - latin-jisx0201 japanese-jisx0208-1978 - chinese-gb2312 japanese-jisx0208 - korean-ksc5601 japanese-jisx0212 - katakana-jisx0201) - (iso-2022-int-1 latin-iso8859-1 greek-iso8859-7 - latin-jisx0201 japanese-jisx0208-1978 - chinese-gb2312 japanese-jisx0208 - korean-ksc5601 japanese-jisx0212 - chinese-cns11643-1 chinese-cns11643-2) - (iso-2022-int-1 latin-iso8859-1 latin-iso8859-2 - cyrillic-iso8859-5 greek-iso8859-7 - latin-jisx0201 japanese-jisx0208-1978 - chinese-gb2312 japanese-jisx0208 - korean-ksc5601 japanese-jisx0212 - chinese-cns11643-1 chinese-cns11643-2 - chinese-cns11643-3 chinese-cns11643-4 - chinese-cns11643-5 chinese-cns11643-6 - chinese-cns11643-7) - ;; utf-8 comes either from Mule-UCS or Mule 5+. - ,@(if (mm-coding-system-p 'utf-8) - (list (cons 'utf-8 (delete 'ascii - (coding-system-get - 'mule-utf-8 - 'safe-charsets)))))) - "Alist of MIME-charset/MULE-charsets.") - (eval-and-compile (mapcar (lambda (elem) @@ -104,12 +47,6 @@ (make-char . (lambda (charset int) (int-to-char int))) - (read-coding-system - . (lambda (prompt) - "Prompt the user for a coding system." - (completing-read - prompt (mapcar (lambda (s) (list (symbol-name (car s)))) - mm-mime-mule-charset-alist)))) (read-charset . (lambda (prompt) "Return a charset." @@ -119,40 +56,85 @@ (mapcar (lambda (e) (list (symbol-name (car e)))) mm-mime-mule-charset-alist) nil t)))) + (subst-char-in-string + . (lambda (from to string) ;; stolen (and renamed) from nnheader.el + "Replace characters in STRING from FROM to TO." + (let ((string (substring string 0)) ;Copy string. + (len (length string)) + (idx 0)) + ;; Replace all occurrences of FROM with TO. + (while (< idx len) + (when (= (aref string idx) from) + (aset string idx to)) + (setq idx (1+ idx))) + string))) (string-as-unibyte . identity) - (multibyte-string-p . ignore) - ))) + (string-as-multibyte . identity) + (multibyte-string-p . ignore)))) (eval-and-compile (defalias 'mm-char-or-char-int-p - (cond + (cond ((fboundp 'char-or-char-int-p) 'char-or-char-int-p) - ((fboundp 'char-valid-p) 'char-valid-p) + ((fboundp 'char-valid-p) 'char-valid-p) (t 'identity)))) +(eval-and-compile + (defalias 'mm-read-coding-system + (cond + ((fboundp 'read-coding-system) + (if (and (featurep 'xemacs) + (<= (string-to-number emacs-version) 21.1)) + (lambda (prompt &optional default-coding-system) + (read-coding-system prompt)) + 'read-coding-system)) + (t (lambda (prompt &optional default-coding-system) + "Prompt the user for a coding system." + (completing-read + prompt (mapcar (lambda (s) (list (symbol-name (car s)))) + mm-mime-mule-charset-alist))))))) + (defvar mm-coding-system-list nil) (defun mm-get-coding-system-list () "Get the coding system list." (or mm-coding-system-list (setq mm-coding-system-list (mm-coding-system-list)))) +(defun mm-coding-system-p (sym) + "Return non-nil if SYM is a coding system." + (or (and (fboundp 'coding-system-p) (coding-system-p sym)) + (memq sym (mm-get-coding-system-list)))) + (defvar mm-charset-synonym-alist - `((big5 . cn-big5) - (gb2312 . cn-gb-2312) + `( + ;; Perfectly fine? A valid MIME name, anyhow. + ,(unless (mm-coding-system-p 'big5) + '(big5 . cn-big5)) + ;; Not in XEmacs, but it's not a proper MIME charset anyhow. + ,(unless (mm-coding-system-p 'x-ctext) + '(x-ctext . ctext)) + ;; Apparently not defined in Emacs 20, but is a valid MIME name. + ,(unless (mm-coding-system-p 'gb2312) + '(gb2312 . cn-gb-2312)) ;; Windows-1252 is actually a superset of Latin-1. See also ;; `gnus-article-dumbquotes-map'. - ,(unless (mm-coding-system-p 'windows-1252) ; should be defined eventually - '(windows-1252 . iso-8859-1)) + ;;,(unless (mm-coding-system-p 'windows-1252) + ; should be defined eventually + ;; '(windows-1252 . iso-8859-1)) + ;; ISO-8859-15 is very similar to ISO-8859-1. + ;;,(unless (mm-coding-system-p 'iso-8859-15) ; Emacs 21 defines it. + ;; '(iso-8859-15 . iso-8859-1)) ;; Windows-1250 is a variant of Latin-2 heavily used by Microsoft ;; Outlook users in Czech republic. Use this to allow reading of their ;; e-mails. cp1250 should be defined by M-x codepage-setup. - ,(unless (mm-coding-system-p 'windows-1250) ; should be defined eventually - '(windows-1250 . cp1250)) - (x-ctext . ctext)) + ;;,(unless (mm-coding-system-p 'windows-1250) + ; should be defined eventually + ;; '(windows-1250 . cp1250)) + ) "A mapping from invalid charset names to the real charset names.") (defvar mm-binary-coding-system - (cond + (cond ((mm-coding-system-p 'binary) 'binary) ((mm-coding-system-p 'no-conversion) 'no-conversion) (t nil)) @@ -169,30 +151,113 @@ "Text coding system for write.") (defvar mm-auto-save-coding-system - (cond + (cond ((mm-coding-system-p 'emacs-mule) (if (memq system-type '(windows-nt ms-dos ms-windows)) - (if (mm-coding-system-p 'emacs-mule-dos) + (if (mm-coding-system-p 'emacs-mule-dos) 'emacs-mule-dos mm-binary-coding-system) 'emacs-mule)) ((mm-coding-system-p 'escape-quoted) 'escape-quoted) (t mm-binary-coding-system)) "Coding system of auto save file.") +(defvar mm-universal-coding-system mm-auto-save-coding-system + "The universal Coding system.") + +;; Fixme: some of the cars here aren't valid MIME charsets. That +;; should only matter with XEmacs, though. +(defvar mm-mime-mule-charset-alist + `((us-ascii ascii) + (iso-8859-1 latin-iso8859-1) + (iso-8859-2 latin-iso8859-2) + (iso-8859-3 latin-iso8859-3) + (iso-8859-4 latin-iso8859-4) + (iso-8859-5 cyrillic-iso8859-5) + ;; Non-mule (X)Emacs uses the last mule-charset for 8bit characters. + ;; The fake mule-charset, gnus-koi8-r, tells Gnus that the default + ;; charset is koi8-r, not iso-8859-5. + (koi8-r cyrillic-iso8859-5 gnus-koi8-r) + (iso-8859-6 arabic-iso8859-6) + (iso-8859-7 greek-iso8859-7) + (iso-8859-8 hebrew-iso8859-8) + (iso-8859-9 latin-iso8859-9) + (iso-8859-14 latin-iso8859-14) + (iso-8859-15 latin-iso8859-15) + (viscii vietnamese-viscii-lower) + (iso-2022-jp latin-jisx0201 japanese-jisx0208 japanese-jisx0208-1978) + (euc-kr korean-ksc5601) + (gb2312 chinese-gb2312) + (big5 chinese-big5-1 chinese-big5-2) + (tibetan tibetan) + (thai-tis620 thai-tis620) + (iso-2022-7bit ethiopic arabic-1-column arabic-2-column) + (iso-2022-jp-2 latin-iso8859-1 greek-iso8859-7 + latin-jisx0201 japanese-jisx0208-1978 + chinese-gb2312 japanese-jisx0208 + korean-ksc5601 japanese-jisx0212 + katakana-jisx0201) + (iso-2022-int-1 latin-iso8859-1 greek-iso8859-7 + latin-jisx0201 japanese-jisx0208-1978 + chinese-gb2312 japanese-jisx0208 + korean-ksc5601 japanese-jisx0212 + chinese-cns11643-1 chinese-cns11643-2) + (iso-2022-int-1 latin-iso8859-1 latin-iso8859-2 + cyrillic-iso8859-5 greek-iso8859-7 + latin-jisx0201 japanese-jisx0208-1978 + chinese-gb2312 japanese-jisx0208 + korean-ksc5601 japanese-jisx0212 + chinese-cns11643-1 chinese-cns11643-2 + chinese-cns11643-3 chinese-cns11643-4 + chinese-cns11643-5 chinese-cns11643-6 + chinese-cns11643-7) + ,(if (or (not (fboundp 'charsetp)) ;; non-Mule case + (charsetp 'unicode-a) + (not (mm-coding-system-p 'mule-utf-8))) + '(utf-8 unicode-a unicode-b unicode-c unicode-d unicode-e) + ;; If we have utf-8 we're in Mule 5+. + (append '(utf-8) + (delete 'ascii + (coding-system-get 'mule-utf-8 'safe-charsets))))) + "Alist of MIME-charset/MULE-charsets.") + +;; Correct by construction, but should be unnecessary: +;; XEmacs hates it. +(when (and (not (featurep 'xemacs)) + (fboundp 'coding-system-list) + (fboundp 'sort-coding-systems)) + (setq mm-mime-mule-charset-alist + (apply + 'nconc + (mapcar + (lambda (cs) + (when (and (coding-system-get cs 'mime-charset) + (not (eq t (coding-system-get cs 'safe-charsets)))) + (list (cons (coding-system-get cs 'mime-charset) + (delq 'ascii + (coding-system-get cs 'safe-charsets)))))) + (sort-coding-systems (coding-system-list 'base-only)))))) + ;;; Internal variables: ;;; Functions: (defun mm-mule-charset-to-mime-charset (charset) "Return the MIME charset corresponding to the given Mule CHARSET." - (let ((alist mm-mime-mule-charset-alist) - out) - (while alist - (when (memq charset (cdar alist)) - (setq out (caar alist) - alist nil)) - (pop alist)) - out)) + (if (fboundp 'find-coding-systems-for-charsets) + (let (mime) + (dolist (cs (find-coding-systems-for-charsets (list charset))) + (unless mime + (when cs + (setq mime (coding-system-get cs 'mime-charset))))) + mime) + (let ((alist mm-mime-mule-charset-alist) + out) + (while alist + (when (memq charset (cdar alist)) + (setq out (caar alist) + alist nil)) + (pop alist)) + out))) (defun mm-charset-to-coding-system (charset &optional lbt) "Return coding-system corresponding to CHARSET. @@ -201,9 +266,6 @@ If optional argument LBT (`unix', `dos' or `mac') is specified, it is used as the line break code type of the coding system." (when (stringp charset) (setq charset (intern (downcase charset)))) - (setq charset - (or (cdr (assq charset mm-charset-synonym-alist)) - charset)) (when lbt (setq charset (intern (format "%s-%s" charset lbt)))) (cond @@ -215,58 +277,73 @@ used as the line break code type of the coding system." 'ascii) ;; Check to see whether we can handle this charset. (This depends ;; on there being some coding system matching each `mime-charset' - ;; coding sysytem property defined, as there should be.) - ((memq charset (mm-get-coding-system-list)) + ;; property defined, as there should be.) + ((and (mm-coding-system-p charset) +;;; Doing this would potentially weed out incorrect charsets. +;;; charset +;;; (eq charset (coding-system-get charset 'mime-charset)) + ) charset) - ;; Nope. - (t - nil))) - -(if (fboundp 'subst-char-in-string) - (defsubst mm-replace-chars-in-string (string from to) - (subst-char-in-string from to string)) - (defun mm-replace-chars-in-string (string from to) - "Replace characters in STRING from FROM to TO." - (let ((string (substring string 0)) ;Copy string. - (len (length string)) - (idx 0)) - ;; Replace all occurrences of FROM with TO. - (while (< idx len) - (when (= (aref string idx) from) - (aset string idx to)) - (setq idx (1+ idx))) - string))) - -(defsubst mm-enable-multibyte () - "Set the multibyte flag of the current buffer. + ;; Translate invalid charsets. + ((mm-coding-system-p (setq charset + (cdr (assq charset + mm-charset-synonym-alist)))) + charset) + ;; Last resort: search the coding system list for entries which + ;; have the right mime-charset in case the canonical name isn't + ;; defined (though it should be). + ((let (cs) + ;; mm-get-coding-system-list returns a list of cs without lbt. + ;; Do we need -lbt? + (dolist (c (mm-get-coding-system-list)) + (if (and (null cs) + (eq charset (coding-system-get c 'mime-charset))) + (setq cs c))) + cs)))) + +(defsubst mm-replace-chars-in-string (string from to) + (mm-subst-char-in-string from to string)) + +(eval-and-compile + (defvar mm-emacs-mule (and (not (featurep 'xemacs)) + (boundp 'default-enable-multibyte-characters) + default-enable-multibyte-characters + (fboundp 'set-buffer-multibyte)) + "Emacs mule.") + + (defvar mm-mule4-p (and mm-emacs-mule + (fboundp 'charsetp) + (not (charsetp 'eight-bit-control))) + "Mule version 4.") + + (if mm-emacs-mule + (defun mm-enable-multibyte () + "Set the multibyte flag of the current buffer. Only do this if the default value of `enable-multibyte-characters' is non-nil. This is a no-op in XEmacs." - (when (and (fboundp 'set-buffer-multibyte) - (boundp 'enable-multibyte-characters) - (default-value 'enable-multibyte-characters)) - (set-buffer-multibyte t))) + (set-buffer-multibyte t)) + (defalias 'mm-enable-multibyte 'ignore)) -(defsubst mm-disable-multibyte () - "Unset the multibyte flag of in the current buffer. + (if mm-emacs-mule + (defun mm-disable-multibyte () + "Unset the multibyte flag of in the current buffer. This is a no-op in XEmacs." - (when (fboundp 'set-buffer-multibyte) - (set-buffer-multibyte nil))) + (set-buffer-multibyte nil)) + (defalias 'mm-disable-multibyte 'ignore)) -(defsubst mm-enable-multibyte-mule4 () - "Enable multibyte in the current buffer. + (if mm-mule4-p + (defun mm-enable-multibyte-mule4 () + "Enable multibyte in the current buffer. Only used in Emacs Mule 4." - (when (and (fboundp 'set-buffer-multibyte) - (boundp 'enable-multibyte-characters) - (default-value 'enable-multibyte-characters) - (not (charsetp 'eight-bit-control))) - (set-buffer-multibyte t))) - -(defsubst mm-disable-multibyte-mule4 () - "Disable multibyte in the current buffer. + (set-buffer-multibyte t)) + (defalias 'mm-enable-multibyte-mule4 'ignore)) + + (if mm-mule4-p + (defun mm-disable-multibyte-mule4 () + "Disable multibyte in the current buffer. Only used in Emacs Mule 4." - (when (and (fboundp 'set-buffer-multibyte) - (not (charsetp 'eight-bit-control))) - (set-buffer-multibyte nil))) + (set-buffer-multibyte nil)) + (defalias 'mm-disable-multibyte-mule4 'ignore))) (defun mm-preferred-coding-system (charset) ;; A typo in some Emacs versions. @@ -294,10 +371,10 @@ If the charset is `composition', return the actual one." (progn (setq mail-parse-mule-charset (and (boundp 'current-language-environment) - (car (last - (assq 'charset - (assoc current-language-environment - language-info-alist)))))) + (car (last + (assq 'charset + (assoc current-language-environment + language-info-alist)))))) (if (or (not mail-parse-mule-charset) (eq mail-parse-mule-charset 'ascii)) (setq mail-parse-mule-charset @@ -309,6 +386,8 @@ If the charset is `composition', return the actual one." (defun mm-mime-charset (charset) "Return the MIME charset corresponding to the given Mule CHARSET." + (if (eq charset 'unknown) + (error "The message contains non-printable characters, please use attachment")) (if (and (fboundp 'coding-system-get) (fboundp 'get-charset-property)) ;; This exists in Emacs 20. (or @@ -317,6 +396,7 @@ If the charset is `composition', return the actual one." (mm-preferred-coding-system charset) 'mime-charset)) (and (eq charset 'ascii) 'us-ascii) + (mm-preferred-coding-system charset) (mm-mule-charset-to-mime-charset charset)) ;; This is for XEmacs. (mm-mule-charset-to-mime-charset charset))) @@ -330,21 +410,8 @@ If the charset is `composition', return the actual one." (setq result (cons head result))) (nreverse result))) -(defun mm-find-mime-charset-region (b e) - "Return the MIME charsets needed to encode the region between B and E." - (let ((charsets (mapcar 'mm-mime-charset - (delq 'ascii - (mm-find-charset-region b e))))) - (when (memq 'iso-2022-jp-2 charsets) - (setq charsets (delq 'iso-2022-jp charsets))) - (setq charsets (mm-delete-duplicates charsets)) - (if (and (> (length charsets) 1) - (fboundp 'find-coding-systems-region) - (let ((cs (find-coding-systems-region b e))) - (or (memq 'utf-8 cs) (memq 'mule-utf-8 cs)))) - '(utf-8) - charsets))) - +;; It's not clear whether this is supposed to mean the global or local +;; setting. I think it's used inconsistently. -- fx (defsubst mm-multibyte-p () "Say whether multibyte is enabled." (if (and (not (featurep 'xemacs)) @@ -352,6 +419,39 @@ If the charset is `composition', return the actual one." enable-multibyte-characters (featurep 'mule))) +(defun mm-find-mime-charset-region (b e) + "Return the MIME charsets needed to encode the region between B and E. +Nil means ASCII, a single-element list represents an appropriate MIME +charset, and a longer list means no appropriate charset." + ;; The return possibilities of this function are a mess... + (or (and + (mm-multibyte-p) + (fboundp 'find-coding-systems-region) + ;; Find the mime-charset of the most preferred coding + ;; system that has one. + (let ((systems (find-coding-systems-region b e)) + result) + ;; Fixme: The `mime-charset' (`x-ctext') of `compound-text' + ;; is not in the IANA list. + (setq systems (delq 'compound-text systems)) + (unless (equal systems '(undecided)) + (while systems + (let ((cs (coding-system-get (pop systems) 'mime-charset))) + (if cs + (setq systems nil + result (list cs)))))) + result)) + ;; Otherwise we're not multibyte, XEmacs or a single coding + ;; system won't cover it. + (let ((charsets + (mm-delete-duplicates + (mapcar 'mm-mime-charset + (delq 'ascii + (mm-find-charset-region b e)))))) + (if (memq 'iso-2022-jp-2 charsets) + (delq 'iso-2022-jp charsets) + charsets)))) + (defmacro mm-with-unibyte-buffer (&rest forms) "Create a temporary buffer, and evaluate FORMS there like `progn'. Use unibyte mode for this." @@ -364,15 +464,18 @@ Use unibyte mode for this." "Evaluate FORMS with current current buffer temporarily made unibyte. Also bind `default-enable-multibyte-characters' to nil. Equivalent to `progn' in XEmacs" - (let ((multibyte (make-symbol "multibyte"))) - `(if (fboundp 'set-buffer-multibyte) - (let ((,multibyte enable-multibyte-characters)) + (let ((multibyte (make-symbol "multibyte")) + (buffer (make-symbol "buffer"))) + `(if mm-emacs-mule + (let ((,multibyte enable-multibyte-characters) + (,buffer (current-buffer))) (unwind-protect (let (default-enable-multibyte-characters) (set-buffer-multibyte nil) ,@forms) + (set-buffer ,buffer) (set-buffer-multibyte ,multibyte))) - (progn + (let (default-enable-multibyte-characters) ,@forms)))) (put 'mm-with-unibyte-current-buffer 'lisp-indent-function 0) (put 'mm-with-unibyte-current-buffer 'edebug-form-spec '(body)) @@ -380,22 +483,19 @@ Equivalent to `progn' in XEmacs" (defmacro mm-with-unibyte-current-buffer-mule4 (&rest forms) "Evaluate FORMS there like `progn' in current buffer. Mule4 only." - (let ((multibyte (make-symbol "multibyte"))) - `(if (or (featurep 'xemacs) - (not (fboundp 'set-buffer-multibyte)) - (charsetp 'eight-bit-control)) ;; For Emacs Mule 4 only. - (progn - ,@forms) - (let ((,multibyte (default-value 'enable-multibyte-characters))) - (unwind-protect - (let ((buffer-file-coding-system mm-binary-coding-system) - (coding-system-for-read mm-binary-coding-system) - (coding-system-for-write mm-binary-coding-system)) - (set-buffer-multibyte nil) - (setq-default enable-multibyte-characters nil) - ,@forms) - (setq-default enable-multibyte-characters ,multibyte) - (set-buffer-multibyte ,multibyte)))))) + (let ((multibyte (make-symbol "multibyte")) + (buffer (make-symbol "buffer"))) + `(if mm-mule4-p + (let ((,multibyte enable-multibyte-characters) + (,buffer (current-buffer))) + (unwind-protect + (let (default-enable-multibyte-characters) + (set-buffer-multibyte nil) + ,@forms) + (set-buffer ,buffer) + (set-buffer-multibyte ,multibyte))) + (let (default-enable-multibyte-characters) + ,@forms)))) (put 'mm-with-unibyte-current-buffer-mule4 'lisp-indent-function 0) (put 'mm-with-unibyte-current-buffer-mule4 'edebug-form-spec '(body)) @@ -410,9 +510,14 @@ Mule4 only." "Return a list of Emacs charsets in the region B to E." (cond ((and (mm-multibyte-p) - (fboundp 'find-charset-region)) + (fboundp 'find-charset-region)) ;; Remove composition since the base charsets have been included. - (delq 'composition (find-charset-region b e))) + ;; Remove eight-bit-*, treat them as ascii. + (let ((css (find-charset-region b e))) + (mapcar (lambda (cs) (setq css (delq cs css))) + '(composition eight-bit-control eight-bit-graphic + control-1)) + css)) (t ;; We are in a unibyte buffer or XEmacs non-mule, so we futz around a bit. (save-excursion @@ -425,8 +530,8 @@ Mule4 only." (let (charset) (setq charset (and (boundp 'current-language-environment) - (car (last (assq 'charset - (assoc current-language-environment + (car (last (assq 'charset + (assoc current-language-environment language-info-alist)))))) (if (eq charset 'ascii) (setq charset nil)) (or charset @@ -476,15 +581,15 @@ If INHIBIT is non-nil, inhibit mm-inhibit-file-name-handlers. (auto-mode-alist (if inhibit nil (mm-auto-mode-alist))) (default-major-mode 'fundamental-mode) (enable-local-variables nil) - (after-insert-file-functions nil) + (after-insert-file-functions nil) (enable-local-eval nil) (find-file-hooks nil) - (inhibit-file-name-operation (if inhibit + (inhibit-file-name-operation (if inhibit 'insert-file-contents inhibit-file-name-operation)) (inhibit-file-name-handlers (if inhibit - (append mm-inhibit-file-name-handlers + (append mm-inhibit-file-name-handlers inhibit-file-name-handlers) inhibit-file-name-handlers))) (insert-file-contents filename visit beg end replace))) @@ -497,37 +602,47 @@ saying what text to write. Optional fourth argument specifies the coding system to use when encoding the file. If INHIBIT is non-nil, inhibit mm-inhibit-file-name-handlers." - (let ((coding-system-for-write - (or codesys mm-text-coding-system-for-write + (let ((coding-system-for-write + (or codesys mm-text-coding-system-for-write mm-text-coding-system)) - (inhibit-file-name-operation (if inhibit + (inhibit-file-name-operation (if inhibit 'append-to-file inhibit-file-name-operation)) (inhibit-file-name-handlers (if inhibit - (append mm-inhibit-file-name-handlers + (append mm-inhibit-file-name-handlers inhibit-file-name-handlers) inhibit-file-name-handlers))) (append-to-file start end filename))) -(defun mm-write-region (start end filename &optional append visit lockname +(defun mm-write-region (start end filename &optional append visit lockname coding-system inhibit) "Like `write-region'. If INHIBIT is non-nil, inhibit mm-inhibit-file-name-handlers." - (let ((coding-system-for-write - (or coding-system mm-text-coding-system-for-write + (let ((coding-system-for-write + (or coding-system mm-text-coding-system-for-write mm-text-coding-system)) - (inhibit-file-name-operation (if inhibit + (inhibit-file-name-operation (if inhibit 'write-region inhibit-file-name-operation)) (inhibit-file-name-handlers (if inhibit - (append mm-inhibit-file-name-handlers + (append mm-inhibit-file-name-handlers inhibit-file-name-handlers) inhibit-file-name-handlers))) (write-region start end filename append visit lockname))) +(defun mm-image-load-path (&optional package) + (let (dir result) + (dolist (path load-path (nreverse result)) + (if (file-directory-p + (setq dir (concat (file-name-directory + (directory-file-name path)) + "etc/" (or package "gnus/")))) + (push dir result)) + (push path result)))) + (provide 'mm-util) ;;; mm-util.el ends here diff --git a/lisp/gnus/nnslashdot.el b/lisp/gnus/nnslashdot.el index 246a3613a81..8290b2c73b1 100644 --- a/lisp/gnus/nnslashdot.el +++ b/lisp/gnus/nnslashdot.el @@ -1,5 +1,5 @@ ;;; nnslashdot.el --- interfacing with Slashdot -;; Copyright (C) 1999, 2000 Free Software Foundation, Inc. +;; Copyright (C) 1999, 2000, 2001 Free Software Foundation, Inc. ;; Author: Lars Magne Ingebrigtsen ;; Keywords: news @@ -57,6 +57,9 @@ "http://slashdot.org/article.pl?sid=%s&mode=nocomment" "Where nnslashdot will fetch the article from.") +(defvoo nnslashdot-backslash-url "http://slashdot.org/slashdot.xml" + "Where nnslashdot will fetch the stories from.") + (defvoo nnslashdot-threshold -1 "The article threshold.") @@ -86,19 +89,17 @@ (nnslashdot-possibly-change-server group server) (condition-case why (unless gnus-nov-is-evil - (if nnslashdot-threaded - (nnslashdot-threaded-retrieve-headers articles group) - (nnslashdot-sane-retrieve-headers articles group))) + (nnslashdot-retrieve-headers-1 articles group)) (search-failed (nnslashdot-lose why)))) -(deffoo nnslashdot-threaded-retrieve-headers (articles group) - (let ((last (car (last articles))) - (did nil) - (start 1) - (sid (caddr (assoc group nnslashdot-groups))) - (first-comments t) - (startats '(1)) - headers article subject score from date lines parent point s) +(deffoo nnslashdot-retrieve-headers-1 (articles group) + (let* ((last (car (last articles))) + (start (if nnslashdot-threaded 1 (pop articles))) + (entry (assoc group nnslashdot-groups)) + (sid (nth 2 entry)) + (first-comments t) + headers article subject score from date lines parent point cid + s startats changed) (save-excursion (set-buffer nnslashdot-buffer) (let ((case-fold-search t)) @@ -107,10 +108,10 @@ (nnweb-insert (format nnslashdot-article-url (nnslashdot-sid-strip sid)) t) (goto-char (point-min)) - (search-forward "Posted by ") - (when (looking-at "]+>\\([^<]+\\)") - (setq from (nnweb-decode-entities-string (match-string 1)))) - (search-forward " on ") + (re-search-forward "Posted by[ \t\r\n]+") + (when (looking-at "\\(]+>\\)?[ \t\r\n]*\\([^<\r\n]+\\)") + (setq from (nnweb-decode-entities-string (match-string 2)))) + (search-forward "on ") (setq date (nnslashdot-date-to-date (buffer-substring (point) (1- (search-forward "<"))))) (setq lines (/ (- (point) @@ -123,16 +124,16 @@ 1 group from date (concat "<" (nnslashdot-sid-strip sid) "%1@slashdot>") "" 0 lines nil nil)) - headers)) - (while (and (setq start (pop startats)) - (< start last)) + headers) + (setq start (if nnslashdot-threaded 2 (pop articles)))) + (while (and start (<= start last)) (setq point (goto-char (point-max))) (nnweb-insert (format nnslashdot-comments-url (nnslashdot-sid-strip sid) - nnslashdot-threshold 0 start) + nnslashdot-threshold 0 (- start 2)) t) - (when first-comments + (when (and nnslashdot-threaded first-comments) (setq first-comments nil) (goto-char (point-max)) (while (re-search-backward "startat=\\([0-9]+\\)" nil t) @@ -140,58 +141,68 @@ (unless (memq s startats) (push s startats))) (setq startats (sort startats '<))) + (setq article (if (and article (< start article)) article start)) (goto-char point) (while (re-search-forward "<\\(b\\|H4\\)>\\([^<]+\\).*score:\\([^)]+\\))" nil t) - (setq article (string-to-number (match-string 1)) + (setq cid (match-string 1) subject (match-string 3) score (match-string 5)) + (unless (assq article (nth 4 entry)) + (setcar (nthcdr 4 entry) (cons (cons article cid) (nth 4 entry))) + (setq changed t)) (when (string-match "^Re: *" subject) (setq subject (concat "Re: " (substring subject (match-end 0))))) - (setq subject (nnweb-decode-entities-string subject)) - (forward-line 1) + (setq subject (nnweb-decode-entities-string subject)) + (search-forward "
") (if (looking-at - "by ]+>\\([^<]+\\)
[ \t\n]*.*(\\([^)]+\\))") + "by[ \t\n]+]+>\\([^<]+\\)[ \t\n]*(\\(<[^>]+>\\)*\\([^<>)]+\\))") (progn (goto-char (- (match-end 0) 5)) - (setq from (concat + (setq from (concat (nnweb-decode-entities-string (match-string 1)) - " <" (match-string 2) ">"))) + " <" (match-string 3) ">"))) (setq from "") - (when (looking-at "by \\(.+\\) on ") + (when (looking-at "by \\([^<>]*\\) on ") (goto-char (- (match-end 0) 5)) (setq from (nnweb-decode-entities-string (match-string 1))))) (search-forward " on ") (setq date (nnslashdot-date-to-date - (buffer-substring (point) (progn (end-of-line) (point))))) - (setq lines (/ (abs (- (search-forward "\n\r") (point))))) + (setq lines (/ (abs (- (search-forward ""))) 70)) - (forward-line 4) - (setq parent - (if (looking-at ".*cid=\\([0-9]+\\)") - (match-string 1) - nil)) - (setq did t) + (if (not + (re-search-forward ".*cid=\\([0-9]+\\)\">Parent" nil t)) + (setq parent nil) + (setq parent (match-string 1)) + (when (string= parent "0") + (setq parent nil))) (push (cons - (1+ article) + article (make-full-mail-header - (1+ article) + article (concat subject " (" score ")") from date - (concat "<" (nnslashdot-sid-strip sid) "%" - (number-to-string (1+ article)) - "@slashdot>") + (concat "<" (nnslashdot-sid-strip sid) "%" cid "@slashdot>") (if parent - (concat "<" (nnslashdot-sid-strip sid) "%" - (number-to-string (1+ (string-to-number parent))) - "@slashdot>") + (concat "<" (nnslashdot-sid-strip sid) "%" + parent "@slashdot>") "") 0 lines nil nil)) - headers))))) + headers) + (while (and articles (<= (car articles) article)) + (pop articles)) + (setq article (1+ article))) + (if nnslashdot-threaded + (progn + (setq start (pop startats)) + (if start (setq start (+ start 2)))) + (setq start (pop articles)))))) + (if changed (nnslashdot-write-groups)) (setq nnslashdot-headers (sort headers 'car-less-than-car)) (save-excursion (set-buffer nntp-server-buffer) @@ -201,108 +212,6 @@ (nnheader-insert-nov (cdr header))))) 'nov)) -(deffoo nnslashdot-sane-retrieve-headers (articles group) - (let ((last (car (last articles))) - (did nil) - (start (max (1- (car articles)) 1)) - (sid (caddr (assoc group nnslashdot-groups))) - headers article subject score from date lines parent point) - (save-excursion - (set-buffer nnslashdot-buffer) - (erase-buffer) - (when (= start 1) - (nnweb-insert (format nnslashdot-article-url - (nnslashdot-sid-strip sid)) t) - (goto-char (point-min)) - (search-forward "Posted by ") - (when (looking-at "]+>\\([^<]+\\)") - (setq from (nnweb-decode-entities-string (match-string 1)))) - (search-forward " on ") - (setq date (nnslashdot-date-to-date - (buffer-substring (point) (1- (search-forward "<"))))) - (forward-line 2) - (setq lines (count-lines (point) - (re-search-forward - "A href=\"\\(http://slashdot.org\\)?/article"))) - (push - (cons - 1 - (make-full-mail-header - 1 group from date (concat "<" (nnslashdot-sid-strip sid) - "%1@slashdot>") - "" 0 lines nil nil)) - headers)) - (while (or (not article) - (and did - (< article last))) - (when article - (setq start (1+ article))) - (setq point (goto-char (point-max))) - (nnweb-insert - (format nnslashdot-comments-url (nnslashdot-sid-strip sid) - nnslashdot-threshold 4 start) - t) - (goto-char point) - (while (re-search-forward - "<\\(b\\|H4\\)>\\([^<]+\\).*score:\\([^)]+\\))" - nil t) - (setq article (string-to-number (match-string 1)) - subject (match-string 3) - score (match-string 5)) - (when (string-match "^Re: *" subject) - (setq subject (concat "Re: " (substring subject (match-end 0))))) - (setq subject (nnweb-decode-entities-string subject)) - (forward-line 1) - (if (looking-at - "by ]+>\\([^<]+\\)[ \t\n]*.*(\\([^)]+\\))") - (progn - (goto-char (- (match-end 0) 5)) - (setq from (concat - (nnweb-decode-entities-string (match-string 1)) - " <" (match-string 2) ">"))) - (setq from "") - (when (looking-at "by \\(.+\\) on ") - (goto-char (- (match-end 0) 5)) - (setq from (nnweb-decode-entities-string (match-string 1))))) - (search-forward " on ") - (setq date - (nnslashdot-date-to-date - (buffer-substring (point) (progn (end-of-line) (point))))) - (setq lines (/ (abs (- (search-forward ""))) - 70)) - (forward-line 2) - (setq parent - (if (looking-at ".*cid=\\([0-9]+\\)") - (match-string 1) - nil)) - (setq did t) - (push - (cons - (1+ article) - (make-full-mail-header - (1+ article) (concat subject " (" score ")") - from date - (concat "<" (nnslashdot-sid-strip sid) "%" - (number-to-string (1+ article)) - "@slashdot>") - (if parent - (concat "<" (nnslashdot-sid-strip sid) "%" - (number-to-string (1+ (string-to-number parent))) - "@slashdot>") - "") - 0 lines nil nil)) - headers)))) - (setq nnslashdot-headers - (sort headers (lambda (s1 s2) (< (car s1) (car s2))))) - (save-excursion - (set-buffer nntp-server-buffer) - (erase-buffer) - (mm-with-unibyte-current-buffer - (dolist (header nnslashdot-headers) - (nnheader-insert-nov (cdr header))))) - 'nov)) - (deffoo nnslashdot-request-group (group &optional server dont-check) (nnslashdot-possibly-change-server nil server) (let ((elem (assoc group nnslashdot-groups))) @@ -325,7 +234,7 @@ (deffoo nnslashdot-request-article (article &optional group server buffer) (nnslashdot-possibly-change-server group server) - (let (contents) + (let (contents cid) (condition-case why (save-excursion (set-buffer nnslashdot-buffer) @@ -333,23 +242,32 @@ (goto-char (point-min)) (when (and (stringp article) (string-match "%\\([0-9]+\\)@" article)) - (setq article (string-to-number (match-string 1 article)))) + (setq cid (match-string 1 article)) + (let ((map (nth 4 (assoc group nnslashdot-groups)))) + (while map + (if (equal (cdar map) cid) + (setq article (caar map) + map nil) + (setq map (cdr map)))))) (when (numberp article) (if (= article 1) (progn - (re-search-forward "Posted by *<[^>]+>[^>]*<[^>]+> *on ") + (re-search-forward + "Posted by") (search-forward "
") (setq contents (buffer-substring (point) (progn (re-search-forward - "

.*A href=\"\\(http://slashdot.org\\)?/article") + "< [ \t\r\n]*" (1- article))) + (setq cid (cdr (assq article + (nth 4 (assoc group nnslashdot-groups))))) + (search-forward (format "" cid)) (setq contents (buffer-substring - (re-search-forward "]+>") + (re-search-forward "]*>") (search-forward ""))))))) (search-failed (nnslashdot-lose why))) @@ -384,10 +302,10 @@ (let ((number 0) sid elem description articles gname) (condition-case why - ;; First we do the Ultramode to get info on all the latest groups. - (progn + ;; First we do the Ultramode to get info on all the latest groups. + (progn (mm-with-unibyte-buffer - (nnweb-insert "http://slashdot.org/slashdot.xml" t) + (nnweb-insert nnslashdot-backslash-url t) (goto-char (point-min)) (while (search-forward "" nil t) (narrow-to-region (point) (search-forward "")) @@ -404,7 +322,8 @@ (setq gname (concat description " (" sid ")")) (if (setq elem (assoc gname nnslashdot-groups)) (setcar (cdr elem) articles) - (push (list gname articles sid) nnslashdot-groups)) + (push (list gname articles sid (current-time) nil) + nnslashdot-groups)) (goto-char (point-max)) (widen))) ;; Then do the older groups. @@ -425,13 +344,14 @@ (setq gname (concat description " (" sid ")")) (if (setq elem (assoc gname nnslashdot-groups)) (setcar (cdr elem) articles) - (push (list gname articles sid) nnslashdot-groups))))) + (push (list gname articles sid (current-time) nil) + nnslashdot-groups))))) (incf number 30))) (search-failed (nnslashdot-lose why))) (nnslashdot-write-groups) (nnslashdot-generate-active) t)) - + (deffoo nnslashdot-request-newgroups (date &optional server) (nnslashdot-possibly-change-server nil server) (nnslashdot-generate-active) @@ -496,6 +416,24 @@ (setq nnslashdot-headers nil nnslashdot-groups nil)) +(deffoo nnslashdot-request-expire-articles + (articles group &optional server force) + (nnslashdot-possibly-change-server group server) + (let ((item (assoc group nnslashdot-groups))) + (when item + (if (fourth item) + (when (and (>= (length articles) (cadr item)) ;; All are expirable. + (nnmail-expired-article-p + group + (fourth item) + force)) + (setq nnslashdot-groups (delq item nnslashdot-groups)) + (nnslashdot-write-groups) + (setq articles nil)) ;; all expired. + (setcdr (cddr item) (list (current-time))) + (nnslashdot-write-groups)))) + articles) + (nnoo-define-skeleton nnslashdot) ;;; Internal functions @@ -508,18 +446,32 @@ (unless nnslashdot-groups (nnslashdot-read-groups))) +(defun nnslashdot-make-tuple (tuple n) + (prog1 + tuple + (while (> n 1) + (unless (cdr tuple) + (setcdr tuple (list nil))) + (setq tuple (cdr tuple) + n (1- n))))) + (defun nnslashdot-read-groups () (let ((file (expand-file-name "groups" nnslashdot-directory))) (when (file-exists-p file) (mm-with-unibyte-buffer (insert-file-contents file) (goto-char (point-min)) - (setq nnslashdot-groups (read (current-buffer))))))) + (setq nnslashdot-groups (read (current-buffer)))) + (if (and nnslashdot-groups (< (length (car nnslashdot-groups)) 5)) + (let ((groups nnslashdot-groups)) + (while groups + (nnslashdot-make-tuple (car groups) 5) + (setq groups (cdr groups)))))))) (defun nnslashdot-write-groups () (with-temp-file (expand-file-name "groups" nnslashdot-directory) - (prin1 nnslashdot-groups (current-buffer)))) - + (gnus-prin1 nnslashdot-groups))) + (defun nnslashdot-init (server) "Initialize buffers and such." (unless (file-exists-p nnslashdot-directory) @@ -528,7 +480,8 @@ (setq nnslashdot-buffer (save-excursion (nnheader-set-temp-buffer - (format " *nnslashdot %s*" server)))))) + (format " *nnslashdot %s*" server)))) + (push nnslashdot-buffer gnus-buffers))) (defun nnslashdot-date-to-date (sdate) (condition-case err @@ -552,11 +505,6 @@ (defun nnslashdot-lose (why) (error "Slashdot HTML has changed; please get a new version of nnslashdot")) -;(defun nnslashdot-sid-strip (sid) -; (if (string-match "^00/" sid) -; (substring sid (match-end 0)) -; sid)) - (defalias 'nnslashdot-sid-strip 'identity) (provide 'nnslashdot) diff --git a/lisp/gnus/nnultimate.el b/lisp/gnus/nnultimate.el index 6ccb0a2aec8..5ce8446da11 100644 --- a/lisp/gnus/nnultimate.el +++ b/lisp/gnus/nnultimate.el @@ -56,6 +56,8 @@ (defvoo nnultimate-groups nil) (defvoo nnultimate-headers nil) (defvoo nnultimate-articles nil) +(defvar nnultimate-table-regexp + "postings.*editpost\\|forumdisplay\\|Forum[0-9]+/HTML\\|getbio") ;;; Interface functions @@ -74,13 +76,17 @@ (old-total (or (nth 6 entry) 1)) (furl "forumdisplay.cgi?action=topics&number=%d&DaysPrune=1000") (furls (list (concat nnultimate-address (format furl sid)))) + (nnultimate-table-regexp + "postings.*editpost\\|forumdisplay\\|getbio") headers article subject score from date lines parent point contents tinfo fetchers map elem a href garticles topic old-max - inc datel table string current-page total-contents pages + inc datel table current-page total-contents pages farticles forum-contents parse furl-fetched mmap farticle) (setq map mapping) (while (and (setq article (car articles)) map) + ;; Skip past the articles in the map until we reach the + ;; article we're looking for. (while (and map (or (> article (caar map)) (< (cadar map) (caar map)))) @@ -101,7 +107,7 @@ fetchers)) (pop articles) (setq article (car articles))))) - ;; Now we have the mapping from/to Gnus/nnultimate article numbers, + ;; Now we have the mapping from/to Gnus/nnultimate article numbers, ;; so we start fetching the topics that we need to satisfy the ;; request. (if (not fetchers) @@ -128,22 +134,27 @@ (setq contents (ignore-errors (w3-parse-buffer (current-buffer)))) (setq table (nnultimate-find-forum-table contents)) - (setq string (mapconcat 'identity (nnweb-text table) "")) - (when (string-match "topic is \\([0-9]\\) pages" string) - (setq pages (string-to-number (match-string 1 string))) - (setcdr table nil) - (setq table (nnultimate-find-forum-table contents))) + (goto-char (point-min)) + (when (re-search-forward "topic is \\([0-9]+\\) pages" nil t) + (setq pages (string-to-number (match-string 1)))) (setq contents (cdr (nth 2 (car (nth 2 table))))) (setq total-contents (nconc total-contents contents)) (incf current-page)) - ;;(setq total-contents (nreverse total-contents)) - (dolist (art (cdr elem)) - (if (not (nth (1- (cdr art)) total-contents)) - () ;(debug) - (push (list (car art) - (nth (1- (cdr art)) total-contents) - subject) - nnultimate-articles))))) + (when t + (let ((i 0)) + (dolist (co total-contents) + (push (list (or (nnultimate-topic-article-to-article + group (car elem) (incf i)) + 1) + co subject) + nnultimate-articles)))) + (when nil + (dolist (art (cdr elem)) + (when (nth (1- (cdr art)) total-contents) + (push (list (car art) + (nth (1- (cdr art)) total-contents) + subject) + nnultimate-articles)))))) (setq nnultimate-articles (sort nnultimate-articles 'car-less-than-car)) ;; Now we have all the articles, conveniently in an alist @@ -161,17 +172,26 @@ (setq date (substring (car datel) (match-end 0)) datel nil)) (pop datel)) - (setq date (delete "" (split-string date "[- \n\t\r    ]"))) - (if (or (member "AM" date) - (member "PM" date)) + (when date + (setq date (delete "" (split-string + date "[-, \n\t\r    ]"))) + (if (or (member "AM" date) + (member "PM" date)) + (setq date (format + "%s %s %s %s" + (nth 1 date) + (if (and (>= (length (nth 0 date)) 3) + (assoc (downcase + (substring (nth 0 date) 0 3)) + parse-time-months)) + (substring (nth 0 date) 0 3) + (car (rassq (string-to-number (nth 0 date)) + parse-time-months))) + (nth 2 date) (nth 3 date))) (setq date (format "%s %s %s %s" - (car (rassq (string-to-number (nth 0 date)) + (car (rassq (string-to-number (nth 1 date)) parse-time-months)) - (nth 1 date) (nth 2 date) (nth 3 date))) - (setq date (format "%s %s %s %s" - (car (rassq (string-to-number (nth 1 date)) - parse-time-months)) - (nth 0 date) (nth 2 date) (nth 3 date)))) + (nth 0 date) (nth 2 date) (nth 3 date))))) (push (cons article @@ -180,7 +200,7 @@ from (or date "") (concat "<" (number-to-string sid) "%" (number-to-string article) - "@ultimate>") + "@ultimate." server ">") "" 0 (/ (length (mapconcat 'identity @@ -199,6 +219,16 @@ (nnheader-insert-nov (cdr header)))))) 'nov))) +(defun nnultimate-topic-article-to-article (group topic article) + (catch 'found + (dolist (elem (nth 5 (assoc group nnultimate-groups))) + (when (and (= topic (nth 2 elem)) + (>= article (nth 3 elem)) + (< article (+ (- (nth 1 elem) (nth 0 elem)) 1 + (nth 3 elem)))) + (throw 'found + (+ (nth 0 elem) (- article (nth 3 elem)))))))) + (deffoo nnultimate-request-group (group &optional server dont-check) (nnultimate-possibly-change-server nil server) (when (not nnultimate-groups) @@ -330,7 +360,7 @@ ;; the group is entered, there's 2 new articles in topic one ;; and 1 in topic three. Then Gnus article number 8-9 be 5-6 ;; in topic one and 10 will be the 2 in topic three. - (dolist (row (reverse forum-contents)) + (dolist (row (nreverse forum-contents)) (setq row (nth 2 row)) (when (setq a (nnweb-parse-find 'a row)) (setq subject (car (last (nnweb-text a))) @@ -403,7 +433,7 @@ nnultimate-groups-alist) (with-temp-file (expand-file-name "groups" nnultimate-directory) (prin1 nnultimate-groups-alist (current-buffer)))) - + (defun nnultimate-init (server) "Initialize buffers and such." (unless (file-exists-p nnultimate-directory) @@ -438,9 +468,7 @@ (nth 2 parse)))) (let ((href (cdr (assq 'href (nth 1 (nnweb-parse-find 'a parse 20))))) case-fold-search) - (when (and href (string-match - "postings\\|forumdisplay\\|Forum[0-9]+/HTML\\|getbio" - href)) + (when (and href (string-match nnultimate-table-regexp href)) t)))) (provide 'nnultimate) diff --git a/lisp/gnus/nnweb.el b/lisp/gnus/nnweb.el index c4ff7248e6b..740b182639f 100644 --- a/lisp/gnus/nnweb.el +++ b/lisp/gnus/nnweb.el @@ -1,5 +1,5 @@ ;;; nnweb.el --- retrieving articles via web search engines -;; Copyright (C) 1996, 1997, 1998, 1999, 2000 +;; Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001 ;; Free Software Foundation, Inc. ;; Author: Lars Magne Ingebrigtsen @@ -55,25 +55,48 @@ (defvoo nnweb-directory (nnheader-concat gnus-directory "nnweb/") "Where nnweb will save its files.") -(defvoo nnweb-type 'dejanews +(defvoo nnweb-type 'google "What search engine type is being used. -Valid types include `dejanews', `dejanewsold', `reference', +Valid types include `google', `dejanews', `dejanewsold', `reference', and `altavista'.") (defvar nnweb-type-definition - '((dejanews + '( + (google + ;;(article . nnweb-google-wash-article) + ;;(id . "http://groups.google.com/groups?as_umsgid=%s") (article . ignore) - (id . "http://search.dejanews.com/msgid.xp?MID=%s&fmt=text") - (map . nnweb-dejanews-create-mapping) - (search . nnweb-dejanews-search) - (address . "http://www.deja.com/=dnc/qs.xp") - (identifier . nnweb-dejanews-identity)) - (dejanewsold + (id . "http://groups.google.com/groups?selm=%s&output=gplain") + ;;(reference . nnweb-google-reference) + (reference . identity) + (map . nnweb-google-create-mapping) + (search . nnweb-google-search) + (address . "http://groups.google.com/groups") + (identifier . nnweb-google-identity)) + (dejanews ;; alias of google + ;;(article . nnweb-google-wash-article) + ;;(id . "http://groups.google.com/groups?as_umsgid=%s") (article . ignore) - (map . nnweb-dejanews-create-mapping) - (search . nnweb-dejanewsold-search) - (address . "http://www.deja.com/dnquery.xp") - (identifier . nnweb-dejanews-identity)) + (id . "http://groups.google.com/groups?selm=%s&output=gplain") + ;;(reference . nnweb-google-reference) + (reference . identity) + (map . nnweb-google-create-mapping) + (search . nnweb-google-search) + (address . "http://groups.google.com/groups") + (identifier . nnweb-google-identity)) +;;; (dejanews +;;; (article . ignore) +;;; (id . "http://search.dejanews.com/msgid.xp?MID=%s&fmt=text") +;;; (map . nnweb-dejanews-create-mapping) +;;; (search . nnweb-dejanews-search) +;;; (address . "http://www.deja.com/=dnc/qs.xp") +;;; (identifier . nnweb-dejanews-identity)) +;;; (dejanewsold +;;; (article . ignore) +;;; (map . nnweb-dejanews-create-mapping) +;;; (search . nnweb-dejanewsold-search) +;;; (address . "http://www.deja.com/dnquery.xp") +;;; (identifier . nnweb-dejanews-identity)) (reference (article . nnweb-reference-wash-article) (map . nnweb-reference-create-mapping) @@ -124,6 +147,8 @@ and `altavista'.") (deffoo nnweb-request-scan (&optional group server) (nnweb-possibly-change-server group server) + (if nnweb-ephemeral-p + (setq nnweb-hashtb (gnus-make-hashtable 4095))) (funcall (nnweb-definition 'map)) (unless nnweb-ephemeral-p (nnweb-write-active) @@ -134,9 +159,10 @@ and `altavista'.") (when (and group (not (equal group nnweb-group)) (not nnweb-ephemeral-p)) + (setq nnweb-group group + nnweb-articles nil) (let ((info (assoc group nnweb-group-alist))) (when info - (setq nnweb-group group) (setq nnweb-type (nth 2 info)) (setq nnweb-search (nth 3 info)) (unless dont-check @@ -175,17 +201,19 @@ and `altavista'.") (and (stringp article) (nnweb-definition 'id t) (let ((fetch (nnweb-definition 'id)) - art) + art active) (when (string-match "^<\\(.*\\)>$" article) (setq art (match-string 1 article))) - (and fetch - art - (mm-with-unibyte-current-buffer - (nnweb-fetch-url - (format fetch article))))))) + (when (and fetch art) + (setq url (format fetch art)) + (mm-with-unibyte-current-buffer + (nnweb-fetch-url url)) + (if (nnweb-definition 'reference t) + (setq article + (funcall (nnweb-definition + 'reference) article))))))) (unless nnheader-callback-function - (funcall (nnweb-definition 'article)) - (nnweb-decode-entities)) + (funcall (nnweb-definition 'article))) (nnheader-report 'nnweb "Fetched article %s" article) (cons group (and (numberp article) article)))))) @@ -290,10 +318,11 @@ and `altavista'.") (nnweb-open-server server))) (unless nnweb-group-alist (nnweb-read-active)) + (unless nnweb-hashtb + (setq nnweb-hashtb (gnus-make-hashtable 4095))) (when group (when (and (not nnweb-ephemeral-p) - (not (equal group nnweb-group))) - (setq nnweb-hashtb (gnus-make-hashtable 4095)) + (equal group nnweb-group)) (nnweb-request-group group nil t)))) (defun nnweb-init (server) @@ -393,7 +422,7 @@ and `altavista'.") (car (rassq (string-to-number (match-string 2 date)) parse-time-months)) - (match-string 3 date) + (match-string 3 date) (match-string 1 date))) (setq date "Jan 1 00:00:00 0000")) (incf i) @@ -559,6 +588,7 @@ and `altavista'.") (while (search-forward "," nil t) (replace-match " " t t))) (widen) + (nnweb-decode-entities) (set-marker body nil)))) (defun nnweb-reference-search (search) @@ -663,7 +693,8 @@ and `altavista'.") (while (re-search-forward "[0-9]+" nil t) (replace-match "<\\1> " t))) (widen) - (nnweb-remove-markup))) + (nnweb-remove-markup) + (nnweb-decode-entities))) (defun nnweb-altavista-search (search &optional part) (url-insert-file-contents @@ -682,6 +713,140 @@ and `altavista'.") (setq buffer-file-name nil) t) +;;; +;;; Deja bought by google.com +;;; + +(defun nnweb-google-wash-article () + (let ((case-fold-search t) url) + (goto-char (point-min)) + (re-search-forward "^

" nil t)
+    (narrow-to-region (point-min) (point))
+    (search-backward "" nil t)
+      (replace-match "\n"))
+    (nnweb-remove-markup)
+    (goto-char (point-min))
+    (while (re-search-forward "^[ \t]*\n" nil t)
+      (replace-match ""))
+    (goto-char (point-max))
+    (insert "\n")
+    (widen)
+    (narrow-to-region (point) (point-max))
+    (search-forward "" nil t)
+    (delete-region (point) (point-max))
+    (nnweb-remove-markup)
+    (widen)))
+
+(defun nnweb-google-parse-1 (&optional Message-ID)
+  (let ((i 0)
+	(case-fold-search t)
+	(active (cadr (assoc nnweb-group nnweb-group-alist)))
+	Subject Score Date Newsgroups From
+	map url mid)
+    (unless active
+      (push (list nnweb-group (setq active (cons 1 0))
+		  nnweb-type nnweb-search)
+	    nnweb-group-alist))
+    ;; Go through all the article hits on this page.
+    (goto-char (point-min))
+    (while (re-search-forward
+	    "a href=/groups\\(\\?[^ \">]*selm=\\([^ &\">]+\\)\\)" nil t)
+      (setq mid (match-string 2)
+	    url (format 
+		 "http://groups.google.com/groups?selm=%s&output=gplain" mid))
+      (narrow-to-region (search-forward ">" nil t)
+			(search-forward "" nil t))
+      (nnweb-remove-markup)
+      (nnweb-decode-entities)
+      (setq Subject (buffer-string))
+      (goto-char (point-max))
+      (widen)
+      (forward-line 1)
+      (when (looking-at "
]+>") + (goto-char (match-end 0))) + (if (not (looking-at "]+>")) + (skip-chars-forward " \t") + (narrow-to-region (point) + (search-forward "" nil t)) + (nnweb-remove-markup) + (nnweb-decode-entities) + (setq Newsgroups (buffer-string)) + (goto-char (point-max)) + (widen) + (skip-chars-forward "- \t")) + (when (looking-at + "\\([0-9]+[/ ][A-Za-z]+[/ ][0-9]+\\)[ \t]*by[ \t]*\\([^<]*\\) - ]+\\)" url) + (match-string 1 url) + url)) + ;;; ;;; General web/w3 interface utility functions ;;; @@ -689,7 +854,7 @@ and `altavista'.") (defun nnweb-insert-html (parse) "Insert HTML based on a w3 parse tree." (if (stringp parse) - (insert parse) + (insert (nnheader-string-as-multibyte parse)) (insert "<" (symbol-name (car parse)) " ") (insert (mapconcat (lambda (param) @@ -729,7 +894,7 @@ and `altavista'.") (while (re-search-forward "&\\(#[0-9]+\\|[a-z]+\\);" nil t) (let ((elem (if (eq (aref (match-string 1) 0) ?\#) (let ((c - (string-to-number (substring + (string-to-number (substring (match-string 1) 1)))) (if (mm-char-or-char-int-p c) c 32)) (or (cdr (assq (intern (match-string 1)) @@ -739,9 +904,9 @@ and `altavista'.") (setq elem (char-to-string elem))) (replace-match elem t t)))) -(defun nnweb-decode-entities-string (str) +(defun nnweb-decode-entities-string (string) (with-temp-buffer - (insert str) + (insert string) (nnweb-decode-entities) (buffer-substring (point-min) (point-max)))) @@ -760,12 +925,12 @@ and `altavista'.") "Insert the contents from an URL in the current buffer. If FOLLOW-REFRESH is non-nil, redirect refresh url in META." (let ((name buffer-file-name)) - (if follow-refresh + (if follow-refresh (save-restriction (narrow-to-region (point) (point)) (url-insert-file-contents url) (goto-char (point-min)) - (when (re-search-forward + (when (re-search-forward "]*URL=\\([^\"]+\\)\"" nil t) (let ((url (match-string 1))) (delete-region (point-min) (point-max)) @@ -822,6 +987,11 @@ If FOLLOW-REFRESH is non-nil, redirect refresh url in META." (listp (cdr element))) (nnweb-text-1 element))))) +(defun nnweb-replace-in-string (string match newtext) + (while (string-match match string) + (setq string (replace-match newtext t t string))) + string) + (provide 'nnweb) ;;; nnweb.el ends here