From e6d2d263da3c83033860f408bc400386b54ff288 Mon Sep 17 00:00:00 2001 From: Miles Bader Date: Fri, 6 Jun 2008 22:53:14 +0000 Subject: [PATCH] Merge from gnus--devo--0 Revision: emacs@sv.gnu.org/emacs--devo--0--patch-1215 --- doc/misc/ChangeLog | 17 +- doc/misc/gnus.texi | 656 ++++++++++++++++- lisp/ChangeLog | 4 + lisp/Makefile.in | 1 + lisp/gnus/ChangeLog | 4 + lisp/gnus/nnir.el | 1666 +++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 2343 insertions(+), 5 deletions(-) create mode 100644 lisp/gnus/nnir.el diff --git a/doc/misc/ChangeLog b/doc/misc/ChangeLog index ff2f0292c3f..578abef1051 100644 --- a/doc/misc/ChangeLog +++ b/doc/misc/ChangeLog @@ -1,3 +1,16 @@ +2008-06-05 Reiner Steib + + * gnus.texi (nnmairix): Markup and othe minor fixes. + +2008-06-05 David Engster + + * gnus.texi (nnmairix): New nodes describing nnmairix.el. + +2008-06-05 Reiner Steib + + * gnus.texi (Group Parameters): Change ~/.gnus to ~/.gnus.el + (Searching, nnir, nnmairix): New stub nodes. + 2008-05-30 "Felix -" (tiny change) * cl.texi (Iteration Clauses): Fix incorrect "identical" examples. @@ -40,10 +53,6 @@ * org.texi: Massive changes, in many parts of the file. -2008-04-13 Reiner Steib - - * gnus.texi (Searching, nnir.el, nnmairix.el): New nodes. - 2008-04-13 Reiner Steib * gnus.texi (Oort Gnus): Add message-fill-column. diff --git a/doc/misc/gnus.texi b/doc/misc/gnus.texi index 8273be8f6da..8e7ebd17906 100644 --- a/doc/misc/gnus.texi +++ b/doc/misc/gnus.texi @@ -1736,6 +1736,7 @@ long as Gnus is active. * Exiting Gnus:: Stop reading news and get some work done. * Group Topics:: A folding group mode divided into topics. * Non-ASCII Group Names:: Accessing groups of non-English names. +* Searching:: Mail search engines. * Misc Group Stuff:: Other stuff that you can to do. @end menu @@ -3167,7 +3168,7 @@ message). As a workaround, it might help to add the variable in question to @code{gnus-newsgroup-variables}. @xref{Various Summary Stuff}. So if you want to set @code{message-from-style} via the group parameters, then you may need the following statement elsewhere in your -@file{~/.gnus} file: +@file{~/.gnus.el} file: @lisp (add-to-list 'gnus-newsgroup-variables 'message-from-style) @@ -4356,6 +4357,659 @@ names should be the same in both groups. Otherwise the Newsgroups header will be displayed incorrectly in the article buffer. +@node Searching +@section Searching + +@menu +* nnir:: Searching on IMAP, with swish, namazu, etc. +* nnmairix:: Searching maildir, MH or mbox with Mairix. +@end menu + +@cindex Searching + +FIXME: This node is a stub. + +FIXME: Add a brief overview of Gnus search capabilities. A brief +comparison of nnir, nnmairix, contrib/gnus-namazu would be nice +as well. + +FIXME: Explain difference to @ref{Searching for Articles}, add reference +and back-reference. + +@node nnir +@subsection nnir + +FIXME: As a first step, convert the commentary of @file{nnir} to texi. +@cindex nnir + +@node nnmairix +@subsection nnmairix + +@cindex mairix +@cindex nnmairix +This paragraph describes how to set up mairix and the back end +@code{nnmairix} for indexing and searching your mail from within +Gnus. Additionally, you can create permanent ``smart'' groups which are +bound to mairix searches and are automatically updated. + +@menu +* About mairix:: About the mairix mail search engine +* nnmairix requirements:: What you will need for using nnmairix +* What nnmairix does:: What does nnmairix actually do? +* Setting up mairix:: Set up your mairix installation +* Configuring nnmairix:: Set up the nnmairix back end +* nnmairix keyboard shortcuts:: List of available keyboard shortcuts +* Propagating marks:: How to propagate marks from nnmairix groups +* nnmairix tips and tricks:: Some tips, tricks and examples +* nnmairix caveats:: Some more stuff you might want to know +@end menu + +@c FIXME: The markup in this section needs improvement. E.g. add +@c @sample{...}, maybe remove some @strong{...}, convert ` - ' to `---`, +@c ... + +@node About mairix +@subsubsection About mairix + +Mairix is a tool for indexing and searching words in locally stored +mail. It was written by Richard Curnow and is licensed under the +GPL. Mairix comes with most popular GNU/Linux distributions, but it also +runs under Windows (with cygwin), Mac OS X and Solaris. The homepage can +be found at +@uref{http://www.rpcurnow.force9.co.uk/mairix/index.html} + +Though mairix might not be as flexible as other search tools like +swish++ or namazu, which you can use via the @code{nnir} back end, it +has the prime advantage of being incredibly fast. On current systems, it +can easily search through headers and message bodies of thousands and +thousands of mails in well under a second. Building the database +necessary for searching might take a minute or two, but only has to be +done once fully. Afterwards, the updates are done incrementally and +therefore are really fast, too. Additionally, mairix is very easy to set +up. + +For maximum speed though, mairix should be used with mails stored in +@code{Maildir} or @code{MH} format (this includes the @code{nnml} back +end), although it also works with mbox. Mairix presents the search +results by populating a @emph{virtual} maildir/MH folder with symlinks +which point to the ``real'' message files (if mbox is used, copies are +made). Since mairix already presents search results in such a virtual +mail folder, it is very well suited for using it as an external program +for creating @emph{smart} mail folders, which represent certain mail +searches. This is similar to a Kiboze group (@pxref{Kibozed Groups}), +but much faster. + +@node nnmairix requirements +@subsubsection nnmairix requirements + +Mairix searches local mail - that means, mairix absolutely must have +direct access to your mail folders. If your mail resides on another +server (e.g. an @acronym{IMAP} server) and you happen to have shell +access, @code{nnmairix} supports running mairix remotely, e.g. via ssh. + +Additionally, @code{nnmairix} only supports the following Gnus back +ends: @code{nnml}, @code{nnmaildir}, and @code{nnimap}. You +@strong{must} use one of these back ends for using +@code{nnmairix}. Other back ends, like @code{nnmbox}, @code{nnfolder} or +@code{nnmh}, won't work. + +If you absolutely must use mbox and still want to use @code{nnmairix}, +you can set up a local @acronym{IMAP} server, which you then access via +@code{nnimap}. This is a rather massive setup for accessing some mbox +files, so just change to MH or Maildir already... + +@node What nnmairix does +@subsubsection What nnmairix does + +The back end @code{nnmairix} enables you to call mairix from within Gnus, +either to query mairix with a search term or to update the +database. While visiting a message in the summary buffer, you can use +several pre-defined shortcuts for calling mairix, e.g. to quickly +search for all mails from the sender of the current message or to +display the whole thread associated with the message, even if the +mails are in different folders. + +Additionally, you can create permanent @code{nnmairix} groups which are bound +to certain mairix searches. This way, you can easily create a group +containing mails from a certain sender, with a certain subject line or +even for one specific thread based on the Message-ID. If you check for +new mail in these folders (e.g. by pressing @kbd{g} or @kbd{M-g}), they +automatically update themselves by calling mairix. + +You might ask why you need @code{nnmairix} at all, since mairix already +creates the group, populates it with links to the mails so that you can +then access it with Gnus, right? Well, this @emph{might} work, but often +does not - at least not without problems. Most probably you will get +strange article counts, and sometimes you might see mails which Gnus +claims have already been canceled and are inaccessible. This is due to +the fact that Gnus isn't really amused when things are happening behind +its back. Another problem can be the mail back end itself, e.g. if you +use mairix with an @acronym{IMAP} server (I had Dovecot complaining +about corrupt index files when mairix changed the contents of the search +group). Using @code{nnmairix} should circumvent these problems. + +@code{nnmairix} is not really a mail back end - it's actually more like a +wrapper, sitting between a ``real'' mail back end where mairix stores the +searches and the Gnus front end. You can choose between three different +mail back ends for the mairix folders: @code{nnml}, @code{nnmaildir} or +@code{nnimap}. @code{nnmairix} will call the mairix binary so that the +search results are stored in folders named +@code{zz_mairix--} on this mail back end, but it will +present these folders in the Gnus front end only with @code{}. You +can use an existing mail back end where you already store your mail, but +if you're uncomfortable with @code{nnmairix} creating new mail groups +alongside your other mail, you can also create e.g. a new +@code{nnmaildir} server exclusively for mairix. However, a special case +exists if you want to use mairix remotely on an IMAP server with +@code{nnimap} - here the mairix folders and your other mail must be on +the same @code{nnimap} back end. + +@node Setting up mairix +@subsubsection Setting up mairix + +First: create a backup of your mail folders (@pxref{nnmairix caveats}). + +Setting up mairix is easy: simply create a @file{.mairixrc} file with +(at least) the following entries: + +@example +# Your Maildir/MH base folder +base=~/Maildir +@end example + +This is the base folder for your mails. All the following paths are +relative to this base folder. If you want to use @code{nnmairix} with +@code{nnimap}, this base path has to point to the mail path where the +@acronym{IMAP} server stores the mail folders! + +@c FIXME: Add typical examples? +@example +maildir= ... your maildir folders which should be indexed ... +mh= ... your nnml/mh folders which should be indexed ... +mbox = ... your mbox files which should be indexed ... +@end example + +Specify all your maildir/nnml folders and mbox files (relative to the +base path!) you want to index with mairix. See the man-page for +mairixrc for details. + +@example +omit=zz_mairix-* +@end example + +@vindex nnmairix-group-prefix +This should make sure that you don't accidentally index the mairix +search results. You can change the prefix of these folders with the +variable @code{nnmairix-group-prefix}. + +@c FIXME: Add typical examples? +@example +mformat= ... 'maildir' or 'mh' ... +database= ... location of database file ... +@end example + +The @code{format} setting specifies the output format for the mairix +search folder. Set this to @code{mh} if you want to access search results +with @code{nnml}. Otherwise choose @code{maildir}. + +See the man pages for mairix and mairixrc for further options. Now +simply call @code{mairix} to create the index for the first time. + +@node Configuring nnmairix +@subsubsection Configuring nnmairix + +In group mode, type @kbd{G b c} +(@code{nnmairix-create-server-and-default-group}). This will ask you for all +necessary information and create a @code{nnmairix} server as a foreign +server. You will have to specify the following: + +@itemize @bullet + +@item +The @strong{name} of the @code{nnmairix} server - choose whatever you +want. + +@item +The @strong{mail back end} where mairix should stores its +searches. Currently @code{nnmaildir}, @code{nnimap} and @code{nnml} are +supported. As explained above, for locally stored mails, this can be an +existing mail back end where you store your mails. However, you can also +create e.g. a new @code{nnmaildir} server exclusively for +@code{nnmairix} in your secondary select methods (@pxref{Finding the +News}). If you want to use mairix remotely on an @acronym{IMAP} server, +you have to choose the corresponding @code{nnimap} back end here. + +@item +@vindex nnmairix-mairix-search-options +The @strong{command} to call the mairix binary. This will usually just +be @code{mairix}, but you can also choose something like @code{ssh +SERVER mairix} if you want to call mairix remotely, e.g. on your +@acronym{IMAP} server. If you want to add some default options to +mairix, you could do this here, but better use the variable +@code{nnmairix-mairix-search-options} instead. + +@item +The name of the @strong{default search group}. This will be the group +where all temporary mairix searches are stored, i.e. all searches which +are not bound to permanent @code{nnmairix} groups. Choose whatever you +like. + +@item +If the mail back end is @code{nnimap} or @code{nnmaildir}, you will be +asked if you work with @strong{Maildir++}, i.e. with hidden maildir +folders (=beginning with a dot). For example, you have to answer +@samp{yes} here if you work with the Dovecot @acronym{IMAP} +server. Otherwise, you should answer @samp{no} here. + +@end itemize + +@node nnmairix keyboard shortcuts +@subsubsection nnmairix keyboard shortcuts + +In group mode: + +@table @kbd + +@item G b c +@kindex G b c (Group) +@findex nnmairix-create-server-and-default-group +Creates @code{nnmairix} server and default search group for this server +(@code{nnmairix-create-server-and-default-group}). You should have done +this by now (@pxref{Configuring nnmairix}). + +@item G b s +@kindex G b s (Group) +@findex nnmairix-search +Prompts for query which is then sent to the mairix binary. Search +results are put into the default search group which is automatically +displayed (@code{nnmairix-search}). + +@item G b m +@kindex G b m (Group) +@findex nnmairix-widget-search +Allows you to create a mairix search or a permanent group more +comfortably using graphical widgets, similar to a customization +group. Just try it to see how it works (@code{nnmairix-widget-search}). + +@item G b i +@kindex G b i (Group) +@findex nnmairix-search-interactive +Another command for creating a mairix query more comfortably, but uses +only the minibuffer (@code{nnmairix-search-interactive}). + +@item G b g +@kindex G b g (Group) +@findex nnmairix-create-search-group +Creates a permanent group which is associated with a search query +(@code{nnmairix-create-search-group}). The @code{nnmairix} back end +automatically calls mairix when you update this group with @kbd{g} or +@kbd{M-g}. + +@item G b q +@kindex G b q (Group) +@findex nnmairix-group-change-query-this-group +Changes the search query for the @code{nnmairix} group under cursor +(@code{nnmairix-group-change-query-this-group}). + +@item G b t +@kindex G b t (Group) +@findex nnmairix-group-toggle-threads-this-group +Toggles the 'threads' parameter for the @code{nnmairix} group under cursor, +i.e. if you want see the whole threads of the found messages +(@code{nnmairix-group-toggle-threads-this-group}). + +@item G b u +@kindex G b u (Group) +@findex nnmairix-update-database +@vindex nnmairix-mairix-update-options +Calls mairix binary for updating the database +(@code{nnmairix-update-database}). The default parameters are @code{-F} +and @code{-Q} for making this as fast as possible (see variable +@code{nnmairix-mairix-update-options} for defining these default +options). + +@item G b r +@kindex G b r (Group) +@findex nnmairix-group-toggle-readmarks-this-group +Keep articles in this @code{nnmairix} group always read or unread, or leave the +marks unchanged (@code{nnmairix-group-toggle-readmarks-this-group}). + +@item G b d +@kindex G b d (Group) +@findex nnmairix-group-delete-recreate-this-group +Recreate @code{nnmairix} group on the ``real'' mail back end +(@code{nnmairix-group-delete-recreate-this-group}). You can do this if +you always get wrong article counts with a @code{nnmairix} group. + +@item G b a +@kindex G b a (Group) +@findex nnmairix-group-toggle-allowfast-this-group +Toggles the @code{allow-fast} parameters for group under cursor +(@code{nnmairix-group-toggle-allowfast-this-group}). The default +behavior of @code{nnmairix} is to do a mairix search every time you +update or enter the group. With the @code{allow-fast} parameter set, +mairix will only be called when you explicitly update the group, but not +upon entering. This makes entering the group faster, but it may also +lead to dangling symlinks if something changed between updating and +entering the group which is not yet in the mairix database. + +@item G b p +@kindex G b p (Group) +@findex nnmairix-group-toggle-propmarks-this-group +Toggle marks propagation for this group +(@code{nnmairix-group-toggle-propmarks-this-group}). (@pxref{Propagating +marks}). + +@item G b o +@kindex G b o (Group) +@findex nnmairix-propagate-marks +Manually propagate marks (@code{nnmairix-propagate-marks}); needed only when +@code{nnmairix-propagate-marks-upon-close} is set to @code{nil}. + +@end table + +In summary mode: + +@table @kbd + +@item $ m +@kindex $ m (Summary) +@findex nnmairix-widget-search-from-this-article +Allows you to create a mairix query or group based on the current +message using graphical widgets (same as @code{nnmairix-widget-search}) +(@code{nnmairix-widget-search-from-this-article}). + +@item $ g +@kindex $ g (Summary) +@findex nnmairix-create-search-group-from-message +Interactively creates a new search group with query based on the current +message, but uses the minibuffer instead of graphical widgets +(@code{nnmairix-create-search-group-from-message}). + +@item $ t +@kindex $ t (Summary) +@findex nnmairix-search-thread-this-article +Searches thread for the current article +(@code{nnmairix-search-thread-this-article}). This is effectively a +shortcut for calling @code{nnmairix-search} with @samp{m:msgid} of the +current article and enabled threads. + +@item $ f +@kindex $ f (Summary) +@findex nnmairix-search-from-this-article +Searches all messages from sender of the current article +(@code{nnmairix-search-from-this-article}). This is a shortcut for +calling @code{nnmairix-search} with @samp{f:From}. + +@item $ o +@kindex $ o (Summary) +@findex nnmairix-goto-original-article +(Only in @code{nnmairix} groups!) Tries determine the group this article +originally came from and displays the article in this group, so that +e.g. replying to this article the correct posting styles/group +parameters are applied (@code{nnmairix-goto-original-article}). This +function will use the registry if available, but can also parse the +article file path as a fallback method. + +@item $ u +@kindex $ u (Summary) +@findex nnmairix-remove-tick-mark-original-article +Remove possibly existing tick mark from original article +(@code{nnmairix-remove-tick-mark-original-article}). (@pxref{nnmairix +tips and tricks}). + +@end table + +@node Propagating marks +@subsubsection Propagating marks + +First of: you really need a patched mairix binary for using the marks +propagation feature efficiently. Otherwise, you would have to update +the mairix database all the time. You can get the patch at + +@uref{http://m61s02.vlinux.de/mairix-maildir-patch.tar} + +You need the mairix v0.21 source code for this patch; everything else +is explained in the accompanied readme file. If you don't want to use +marks propagation, you don't have to apply these patches, but they also +fix some annoyances regarding changing maildir flags, so it might still +be useful to you. + +With the patched mairix binary, you can use @code{nnmairix} as an +alternative to mail splitting (@pxref{Fancy Mail Splitting}). For +example, instead of splitting all mails from @samp{david@@foobar.com} +into a group, you can simply create a search group with the query +@samp{f:david@@foobar.com}. This is actually what ``smart folders'' are +all about: simply put everything in one mail folder and dynamically +create searches instead of splitting. This is more flexible, since you +can dynamically change your folders any time you want to. This also +implies that you will usually read your mails in the @code{nnmairix} +groups instead of your ``real'' mail groups. + +There is one problem, though: say you got a new mail from +@samp{david@@foobar.com} - it will now show up in two groups, the +``real'' group (your INBOX, for example) and in the @code{nnmairix} +search group (provided you have updated the mairix database). Now you +enter the @code{nnmairix} group and read the mail. The mail will be +marked as read, but only in the @code{nnmairix} group - in the ``real'' +mail group it will be still shown as unread. + +You could now catch up the mail group (@pxref{Group Data}), but this is +tedious and error prone, since you may overlook mails you don't have +created @code{nnmairix} groups for. Of course, you could first use +@code{nnmairix-goto-original-article} (@pxref{nnmairix keyboard +shortcuts}) and then read the mail in the original group, but that's +even more cumbersome. + +Clearly, the easiest way would be if marks could somehow be +automatically set for the original article. This is exactly what +@emph{marks propagation} is about. + +Marks propagation is deactivated by default. You can activate it for a +certain @code{nnmairix} group with +@code{nnmairix-group-toggle-propmarks-this-group} (bound to @kbd{G b +p}). This function will warn you if you try to use it with your default +search group; the reason is that the default search group is used for +temporary searches, and it's easy to accidentally propagate marks from +this group. However, you can ignore this warning if you really want to. + +With marks propagation enabled, all the marks you set in a @code{nnmairix} +group should now be propagated to the original article. For example, +you can now tick an article (by default with @kbd{!}) and this mark should +magically be set for the original article, too. + +A few more remarks which you may or may not want to know: + +@vindex nnmairix-propagate-marks-upon-close +Marks will not be set immediately, but only upon closing a group. This +not only makes marks propagation faster, it also avoids problems with +dangling symlinks when dealing with maildir files (since changing flags +will change the file name). You can also control when to propagate marks +via @code{nnmairix-propagate-marks-upon-close} (see the doc-string for +details). + +Obviously, @code{nnmairix} will have to look up the original group for every +article you want to set marks for. If available, @code{nnmairix} will first use +the registry for determining the original group. The registry is very +fast, hence you should really, really enable the registry when using +marks propagation. If you don't have to worry about RAM and disc space, +set @code{gnus-registry-max-entries} to a large enough value; to be on +the safe side, choose roughly the amount of mails you index with mairix. + +@vindex nnmairix-only-use-registry +If you don't want to use the registry or the registry hasn't seen the +original article yet, @code{nnmairix} will use an additional mairix search +for determining the file path of the article. This, of course, is way +slower than the registry - if you set hundreds or even thousands of +marks this way, it might take some time. You can avoid this situation +by setting @code{nnmairix-only-use-registry} to t. + +Maybe you also want to propagate marks the other way round, i.e. if you +tick an article in a "real" mail group, you'd like to have the same +article in a @code{nnmairix} group ticked, too. For several good +reasons, this can only be done efficiently if you use maildir. To +immediately contradict myself, let me mention that it WON'T work with +@code{nnmaildir}, since @code{nnmaildir} stores the marks externally and +not in the file name. Therefore, propagating marks to @code{nnmairix} +groups will usually only work if you use an IMAP server which uses +maildir as its file format. + +@vindex nnmairix-propagate-marks-to-nnmairix-groups +If you work with this setup, just set +@code{nnmairix-propagate-marks-to-nnmairix-groups} to @code{t} and see what +happens. If you don't like what you see, just set it to @code{nil} again. One +problem might be that you get a wrong number of unread articles; this +usually happens when you delete or expire articles in the original +groups. When this happens, you can recreate the @code{nnmairix} group on the +back end using @kbd{G b d}. + +@node nnmairix tips and tricks +@subsubsection nnmairix tips and tricks + +@itemize +@item +Checking Mail + +@findex nnmairix-update-groups +I put all my important mail groups at group level 1. The mairix groups +have group level 5, so they do not get checked at start up (@pxref{Group +Levels}). + +I use the following to check for mails: + +@lisp +(defun my-check-mail-mairix-update (level) + (interactive "P") + ;; if no prefix given, set level=1 + (gnus-group-get-new-news (or level 1)) + (nnmairix-update-groups "mairixsearch" t t) + (gnus-group-list-groups)) + +(define-key gnus-group-mode-map "g" 'my-check-mail-mairix-update) +@end lisp + +Instead of @samp{"mairixsearch"} use the name of your @code{nnmairix} +server. See the doc string for @code{nnmairix-update-groups} for +details. + +@item +Example: search group for ticked articles + +For example, you can create a group for all ticked articles, where the +articles always stay unread: + +Hit @kbd{G b g}, enter group name (e.g. @samp{important}), use +@samp{F:f} as query and do not include threads. + +Now activate marks propagation for this group by using @kbd{G b p}. Then +activate the always-unread feature by using @kbd{G b r} twice. + +So far so good - but how do you remove the tick marks in the @code{nnmairix} +group? There are two options: You may simply use +@code{nnmairix-remove-tick-mark-original-article} (bound to @kbd{$ u}) to remove +tick marks from the original article. The other possibility is to set +@code{nnmairix-propagate-marks-to-nnmairix-groups} to @code{t}, but see the above +comments about this option. If it works for you, the tick marks should +also exist in the @code{nnmairix} group and you can remove them as usual, +e.g. by marking an article as read. + +When you have removed a tick mark from the original article, this +article should vanish from the @code{nnmairix} group after you have updated the +mairix database and updated the group. Fortunately, there is a function +for doing exactly that: @code{nnmairix-update-groups}. See the previous code +snippet and the doc string for details. + +@item +Dealing with auto-subscription of mail groups + +As described before, all @code{nnmairix} groups are in fact stored on +the mail back end in the form @samp{zz_mairix--}. You can +see them when you enter the back end server in the server buffer. You +should not subscribe these groups! Unfortunately, these groups will +usually get @strong{auto-subscribed} when you use @code{nnmaildir} or +@code{nnml}, i.e. you will suddenly see groups of the form +@samp{zz_mairix*} pop up in your group buffer. If this happens to you, +simply kill these groups with C-k. For avoiding this, turn off +auto-subscription completely by setting the variable +@code{gnus-auto-subscribed-groups} to @code{nil} (@pxref{Filtering New +Groups}), or if you like to keep this feature use the following kludge +for turning it off for all groups beginning with @samp{zz_}: + +@lisp +(setq gnus-auto-subscribed-groups + "^\\(nnml\\|nnfolder\\|nnmbox\\|nnmh\\|nnbabyl\\|nnmaildir\\).*:\\([^z]\\|z$\\|\\z[^z]\\|zz$\\|zz[^_]\\|zz_$\\).*") +@end lisp + +@end itemize + +@node nnmairix caveats +@subsubsection nnmairix caveats + +@itemize +@item +If you use the Gnus agent (@pxref{Gnus Unplugged}): don't agentize +@code{nnmairix} groups (though I have no idea what happens if you do). + +@item +If you use the Gnus registry: don't use the registry with +@code{nnmairix} groups (put them in +@code{gnus-registry-unfollowed-groups}). Be @strong{extra careful} if +you use @code{gnus-registry-split-fancy-with-parent} - mails which are +split into @code{nnmairix} groups are usually gone for good as soon as +you check the group for new mail (yes, it has happened to me...). + +@item +Therefore: @strong{Never ever} put ``real'' mails into @code{nnmairix} +groups (you shouldn't be able to, anyway). + +@item +mairix does only support us-ascii characters. + +@item +@code{nnmairix} uses a rather brute force method to force Gnus to +completely reread the group on the mail back end after mairix was +called - it simply deletes and re-creates the group on the mail +back end. So far, this has worked for me without any problems, and I +don't see how @code{nnmairix} could delete other mail groups than its +own, but anyway: you really should have a backup of your mail +folders. + +@item +All necessary information is stored in the group parameters +(@pxref{Group Parameters}). This has the advantage that no active file +is needed, but also implies that when you kill a @code{nnmairix} group, +it is gone for good. + +@item +@findex nnmairix-purge-old-groups +If you create and kill a lot of @code{nnmairix} groups, the +``zz_mairix-*'' groups will accumulate on the mail back end server. To +delete old groups which are no longer needed, call +@code{nnmairix-purge-old-groups}. Note that this assumes that you don't +save any ``real'' mail in folders of the form +@code{zz_mairix--}. You can change the prefix of +@code{nnmairix} groups by changing the variable +@code{nnmairix-group-prefix}. + +@item +The following only applies if you @strong{don't} use the mentioned patch +for mairix (@pxref{Propagating marks}): + +A problem can occur when using @code{nnmairix} with maildir folders and +comes with the fact that maildir stores mail flags like @samp{Seen} or +@samp{Replied} by appending chars @samp{S} and @samp{R} to the message +file name, respectively. This implies that currently you would have to +update the mairix database not only when new mail arrives, but also when +mail flags are changing. The same applies to new mails which are indexed +while they are still in the @samp{new} folder but then get moved to +@samp{cur} when Gnus has seen the mail. If you don't update the database +after this has happened, a mairix query can lead to symlinks pointing to +non-existing files. In Gnus, these messages will usually appear with +``(none)'' entries in the header and can't be accessed. If this happens +to you, using @kbd{G b u} and updating the group will usually fix this. + +@end itemize + @node Misc Group Stuff @section Misc Group Stuff diff --git a/lisp/ChangeLog b/lisp/ChangeLog index 3a911766d51..a5174018a68 100644 --- a/lisp/ChangeLog +++ b/lisp/ChangeLog @@ -1,3 +1,7 @@ +2008-06-06 Miles Bader + + * Makefile.in (ELCFILES): Add gnus/nndir.elc. + 2008-06-06 Chong Yidong * menu-bar.el (menu-bar-options-menu): Add Menu entry for diff --git a/lisp/Makefile.in b/lisp/Makefile.in index 2d9fe09d711..78ca42b4993 100644 --- a/lisp/Makefile.in +++ b/lisp/Makefile.in @@ -600,6 +600,7 @@ ELCFILES = \ $(lisp)/gnus/nngateway.elc \ $(lisp)/gnus/nnheader.elc \ $(lisp)/gnus/nnimap.elc \ + $(lisp)/gnus/nnir.elc \ $(lisp)/gnus/nnkiboze.elc \ $(lisp)/gnus/nnlistserv.elc \ $(lisp)/gnus/nnmail.elc \ diff --git a/lisp/gnus/ChangeLog b/lisp/gnus/ChangeLog index 7f678b91b9e..8b37e7e99a4 100644 --- a/lisp/gnus/ChangeLog +++ b/lisp/gnus/ChangeLog @@ -1,3 +1,7 @@ +2008-06-05 Reiner Steib + + * nnir.el: New file. + 2008-06-05 Stefan Monnier * gnus-util.el (gnus-read-shell-command): New function. diff --git a/lisp/gnus/nnir.el b/lisp/gnus/nnir.el new file mode 100644 index 00000000000..df83f18c83c --- /dev/null +++ b/lisp/gnus/nnir.el @@ -0,0 +1,1666 @@ +;;; nnir.el --- search mail with various search engines -*- coding: iso-8859-1 -*- + +;; Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, +;; 2005, 2006, 2007, 2008 Free Software Foundation, Inc. + +;; Author: Kai Großjohann +;; Swish-e and Swish++ backends by: +;; Christoph Conrad . +;; IMAP backend by: Simon Josefsson . +;; IMAP search by: Torsten Hilbrich gmx.net> +;; IMAP search improved by Daniel Pittman . +;; nnmaildir support for Swish++ and Namazu backends by: +;; Justus Piater Piater.name> + +;; TODO: Documentation in the Gnus manual + +;; From: Reiner Steib +;; Subject: Re: Including nnir.el +;; Newsgroups: gmane.emacs.gnus.general +;; Message-ID: +;; Date: 2006-06-05 22:49:01 GMT +;; +;; On Sun, Jun 04 2006, Sascha Wilde wrote: +;; +;; > The one thing most hackers like to forget: Documentation. By now the +;; > documentation is only in the comments at the head of the source, I +;; > would use it as basis to cook up some minimal texinfo docs. +;; > +;; > Where in the existing gnus manual would this fit best? + +;; Maybe (info "(gnus)Combined Groups") for a general description. +;; `gnus-group-make-nnir-group' might be described in (info +;; "(gnus)Foreign Groups") as well. + +;; Keywords: news mail searching ir + +;; This file is part of GNU Emacs. + +;; This is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. + +;; GNU Emacs is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GNU Emacs; see the file COPYING. If not, write to the +;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, +;; Boston, MA 02110-1301, USA. + +;;; Commentary: + +;; The most recent version of this can always be fetched from the Gnus +;; CVS repository. See http://www.gnus.org/ for more information. + +;; This code is still in the development stage but I'd like other +;; people to have a look at it. Please do not hesitate to contact me +;; with your ideas. + +;; What does it do? Well, it allows you to index your mail using some +;; search engine (freeWAIS-sf, swish-e and others -- see later), +;; then type `G G' in the Group buffer and issue a query to the search +;; engine. You will then get a buffer which shows all articles +;; matching the query, sorted by Retrieval Status Value (score). + +;; When looking at the retrieval result (in the Summary buffer) you +;; can type `G T' (aka M-x gnus-summary-nnir-goto-thread RET) on an +;; article. You will be teleported into the group this article came +;; from, showing the thread this article is part of. (See below for +;; restrictions.) + +;; The Lisp installation is simple: just put this file on your +;; load-path, byte-compile it, and load it from ~/.gnus or something. +;; This will install a new command `G G' in your Group buffer for +;; searching your mail. Note that you also need to configure a number +;; of variables, as described below. + +;; Restrictions: +;; +;; * If you don't use HyREX as your search engine, this expects that +;; you use nnml or another one-file-per-message backend, because the +;; others doesn't support nnfolder. +;; * It can only search the mail backend's which are supported by one +;; search engine, because of different query languages. +;; * There are restrictions to the Wais setup. +;; * There are restrictions to the imap setup. +;; * gnus-summary-nnir-goto-thread: Fetches whole group first, before +;; limiting to the right articles. This is much too slow, of +;; course. May issue a query for number of articles to fetch; you +;; must accept the default of all articles at this point or things +;; may break. + +;; The Lisp setup involves setting a few variables and setting up the +;; search engine. You can define the variables in the server definition +;; like this : +;; (setq gnus-secondary-select-methods '( +;; (nnimap "" (nnimap-address "localhost") +;; (nnir-search-engine hyrex) +;; (nnir-hyrex-additional-switches ("-d" "ddl-nnimap.xml")) +;; ))) +;; Or you can define the global ones. The variables set in the mailer- +;; definition will be used first. +;; The variable to set is `nnir-search-engine'. Choose one of the engines +;; listed in `nnir-engines'. (Actually `nnir-engines' is an alist, +;; type `C-h v nnir-engines RET' for more information; this includes +;; examples for setting `nnir-search-engine', too.) +;; +;; The variable nnir-mail-backend isn't used anymore. +;; + +;; You must also set up a search engine. I'll tell you about the two +;; search engines currently supported: + +;; 1. freeWAIS-sf +;; +;; As always with freeWAIS-sf, you need a so-called `format file'. I +;; use the following file: +;; +;; ,----- +;; | # Kai's format file for freeWAIS-sf for indexing mails. +;; | # Each mail is in a file, much like the MH format. +;; | +;; | # Document separator should never match -- each file is a document. +;; | record-sep: /^@this regex should never match@$/ +;; | +;; | # Searchable fields specification. +;; | +;; | region: /^[sS]ubject:/ /^[sS]ubject: */ +;; | subject "Subject header" stemming TEXT BOTH +;; | end: /^[^ \t]/ +;; | +;; | region: /^([tT][oO]|[cC][cC]):/ /^([tT][oO]|[cC][cC]): */ +;; | to "To and Cc headers" SOUNDEX BOTH +;; | end: /^[^ \t]/ +;; | +;; | region: /^[fF][rR][oO][mM]:/ /^[fF][rR][oO][mM]: */ +;; | from "From header" SOUNDEX BOTH +;; | end: /^[^ \t]/ +;; | +;; | region: /^$/ +;; | stemming TEXT GLOBAL +;; | end: /^@this regex should never match@$/ +;; `----- +;; +;; 1998-07-22: waisindex would dump core on me for large articles with +;; the above settings. I used /^$/ as the end regex for the global +;; field. That seemed to work okay. + +;; There is a Perl module called `WAIS.pm' which is available from +;; CPAN as well as ls6-ftp.cs.uni-dortmund.de:/pub/wais/Perl. This +;; module comes with a nifty tool called `makedb', which I use for +;; indexing. Here's my `makedb.conf': +;; +;; ,----- +;; | # Config file for makedb +;; | +;; | # Global options +;; | waisindex = /usr/local/bin/waisindex +;; | wais_opt = -stem -t fields +;; | # `-stem' option necessary when `stemming' is specified for the +;; | # global field in the *.fmt file +;; | +;; | # Own variables +;; | homedir = /home/kai +;; | +;; | # The mail database. +;; | database = mail +;; | files = `find $homedir/Mail -name \*[0-9] -print` +;; | dbdir = $homedir/.wais +;; | limit = 100 +;; `----- +;; +;; The Lisp setup involves the `nnir-wais-*' variables. The most +;; difficult to understand variable is probably +;; `nnir-wais-remove-prefix'. Here's what it does: the output of +;; `waissearch' basically contains the file name and the (full) +;; directory name. As Gnus works with group names rather than +;; directory names, the directory name is transformed into a group +;; name as follows: first, a prefix is removed from the (full) +;; directory name, then all `/' are replaced with `.'. The variable +;; `nnir-wais-remove-prefix' should contain a regex matching exactly +;; this prefix. It defaults to `$HOME/Mail/' (note the trailing +;; slash). + +;; 2. Namazu +;; +;; The Namazu backend requires you to have one directory containing all +;; index files, this is controlled by the `nnir-namazu-index-directory' +;; variable. To function the `nnir-namazu-remove-prefix' variable must +;; also be correct, see the documentation for `nnir-wais-remove-prefix' +;; above. +;; +;; It is particularly important not to pass any any switches to namazu +;; that will change the output format. Good switches to use include +;; `--sort', `--ascending', `--early' and `--late'. Refer to the Namazu +;; documentation for further information on valid switches. +;; +;; To index my mail with the `mknmz' program I use the following +;; configuration file: +;; +;; ,---- +;; | package conf; # Don't remove this line! +;; | +;; | # Paths which will not be indexed. Don't use `^' or `$' anchors. +;; | $EXCLUDE_PATH = "spam|sent"; +;; | +;; | # Header fields which should be searchable. case-insensitive +;; | $REMAIN_HEADER = "from|date|message-id|subject"; +;; | +;; | # Searchable fields. case-insensitive +;; | $SEARCH_FIELD = "from|date|message-id|subject"; +;; | +;; | # The max length of a word. +;; | $WORD_LENG_MAX = 128; +;; | +;; | # The max length of a field. +;; | $MAX_FIELD_LENGTH = 256; +;; `---- +;; +;; My mail is stored in the directories ~/Mail/mail/, ~/Mail/lists/ and +;; ~/Mail/archive/, so to index them I go to the directory set in +;; `nnir-namazu-index-directory' and issue the following command. +;; +;; mknmz --mailnews ~/Mail/archive/ ~/Mail/mail/ ~/Mail/lists/ +;; +;; For maximum searching efficiency I have a cron job set to run this +;; command every four hours. + +;; 3. HyREX +;; +;; The HyREX backend requires you to have one directory from where all +;; your relative paths are to, if you use them. This directory must be +;; set in the `nnir-hyrex-index-directory' variable, which defaults to +;; your home directory. You must also pass the base, class and +;; directory options or simply your dll to the `nnir-hyrex-programm' by +;; setting the `nnir-hyrex-additional-switches' variable accordently. +;; To function the `nnir-hyrex-remove-prefix' variable must also be +;; correct, see the documentation for `nnir-wais-remove-prefix' above. + +;; 4. find-grep +;; +;; The find-grep engine simply runs find(1) to locate eligible +;; articles and searches them with grep(1). This, of course, is much +;; slower than using a proper search engine but OTOH doesn't require +;; maintenance of an index and is still faster than using any built-in +;; means for searching. The method specification of the server to +;; search must include a directory for this engine to work (E.g., +;; `nnml-directory'). The tools must be POSIX compliant. GNU Find +;; prior to version 4.2.12 (4.2.26 on Linux due to incorrect ARG_MAX +;; handling) does not work. +;; ,---- +;; | ;; find-grep configuration for searching the Gnus Cache +;; | +;; | (nnml "cache" +;; | (nnml-get-new-mail nil) +;; | (nnir-search-engine find-grep) +;; | (nnml-directory "~/News/cache/") +;; | (nnml-active-file "~/News/cache/active")) +;; `---- + +;; Developer information: + +;; I have tried to make the code expandable. Basically, it is divided +;; into two layers. The upper layer is somewhat like the `nnvirtual' +;; or `nnkiboze' backends: given a specification of what articles to +;; show from another backend, it creates a group containing exactly +;; those articles. The lower layer issues a query to a search engine +;; and produces such a specification of what articles to show from the +;; other backend. + +;; The interface between the two layers consists of the single +;; function `nnir-run-query', which just selects the appropriate +;; function for the search engine one is using. The input to +;; `nnir-run-query' is a string, representing the query as input by +;; the user. The output of `nnir-run-query' is supposed to be a +;; vector, each element of which should in turn be a three-element +;; vector. The first element should be full group name of the article, +;; the second element should be the article number, and the third +;; element should be the Retrieval Status Value (RSV) as returned from +;; the search engine. An RSV is the score assigned to the document by +;; the search engine. For Boolean search engines, the +;; RSV is always 1000 (or 1 or 100, or whatever you like). + +;; The sorting order of the articles in the summary buffer created by +;; nnir is based on the order of the articles in the above mentioned +;; vector, so that's where you can do the sorting you'd like. Maybe +;; it would be nice to have a way of displaying the search result +;; sorted differently? + +;; So what do you need to do when you want to add another search +;; engine? You write a function that executes the query. Temporary +;; data from the search engine can be put in `nnir-tmp-buffer'. This +;; function should return the list of articles as a vector, as +;; described above. Then, you need to register this backend in +;; `nnir-engines'. Then, users can choose the backend by setting +;; `nnir-search-engine'. + +;; Todo, or future ideas: + +;; * It should be possible to restrict search to certain groups. +;; +;; * There is currently no error checking. +;; +;; * The summary buffer display is currently really ugly, with all the +;; added information in the subjects. How could I make this +;; prettier? +;; +;; * A function which can be called from an nnir summary buffer which +;; teleports you into the group the current article came from and +;; shows you the whole thread this article is part of. +;; Implementation suggestions? +;; (1998-07-24: There is now a preliminary implementation, but +;; it is much too slow and quite fragile.) +;; +;; * Support other mail backends. In particular, probably quite a few +;; people use nnfolder. How would one go about searching nnfolders +;; and producing the right data needed? The group name and the RSV +;; are simple, but what about the article number? +;; - The article number is encoded in the `X-Gnus-Article-Number' +;; header of each mail. +;; - The HyREX engine supports nnfolder. +;; +;; * Support compressed mail files. Probably, just stripping off the +;; `.gz' or `.Z' file name extension is sufficient. +;; +;; * At least for imap, the query is performed twice. +;; + +;; Have you got other ideas? + +;;; Setup Code: + +(require 'nnoo) +(require 'gnus-group) +(require 'gnus-sum) +(require 'message) +(require 'gnus-util) +(eval-and-compile + (require 'cl)) + +(nnoo-declare nnir) +(nnoo-define-basics nnir) + +(gnus-declare-backend "nnir" 'mail) + +(defvar nnir-imap-search-field "TEXT" + "The IMAP search item when doing an nnir search") + +(defvar nnir-imap-search-arguments + '(("Whole message" . "TEXT") + ("Subject" . "SUBJECT") + ("To" . "TO") + ("From" . "FROM") + (nil . "HEADER \"%s\"")) + "Mapping from user readable strings to IMAP search items for use in nnir") + +(defvar nnir-imap-search-argument-history () + "The history for querying search options in nnir") + +;;; Developer Extension Variable: + +(defvar nnir-engines + `((wais nnir-run-waissearch + ()) + (imap nnir-run-imap + ((criteria + "Search in: " ; Prompt + ,nnir-imap-search-arguments ; alist for completing + nil ; no filtering + nil ; allow any user input + nil ; initial value + nnir-imap-search-argument-history ; the history to use + ,nnir-imap-search-field ; default + ))) + (swish++ nnir-run-swish++ + ((group . "Group spec: "))) + (swish-e nnir-run-swish-e + ((group . "Group spec: "))) + (namazu nnir-run-namazu + ()) + (hyrex nnir-run-hyrex + ((group . "Group spec: "))) + (find-grep nnir-run-find-grep + ((grep-options . "Grep options: ")))) + "Alist of supported search engines. +Each element in the alist is a three-element list (ENGINE FUNCTION ARGS). +ENGINE is a symbol designating the searching engine. FUNCTION is also +a symbol, giving the function that does the search. The third element +ARGS is a list of cons pairs (PARAM . PROMPT). When issuing a query, +the FUNCTION will issue a query for each of the PARAMs, using PROMPT. + +The value of `nnir-search-engine' must be one of the ENGINE symbols. +For example, use the following line for searching using freeWAIS-sf: + (setq nnir-search-engine 'wais) +Use the following line if you read your mail via IMAP and your IMAP +server supports searching: + (setq nnir-search-engine 'imap) +Note that you have to set additional variables for most backends. For +example, the `wais' backend needs the variables `nnir-wais-program', +`nnir-wais-database' and `nnir-wais-remove-prefix'. + +Add an entry here when adding a new search engine.") + +;;; User Customizable Variables: + +(defgroup nnir nil + "Search nnmh and nnml groups in Gnus with swish-e, freeWAIS-sf, or EWS." + :group 'gnus) + +;; Mail backend. + +;; TODO: +;; If `nil', use server parameters to find out which server to search. CCC +;; +(defcustom nnir-mail-backend '(nnml "") + "*Specifies which backend should be searched. +More precisely, this is used to determine from which backend to fetch the +messages found. + +This must be equal to an existing server, so maybe it is best to use +something like the following: + (setq nnir-mail-backend (nth 0 gnus-secondary-select-methods)) +The above line works fine if the mail backend you want to search is +the first element of gnus-secondary-select-methods (`nth' starts counting +at zero)." + :type '(sexp) + :group 'nnir) + +;; Search engine to use. + +(defcustom nnir-search-engine 'wais + "*The search engine to use. Must be a symbol. +See `nnir-engines' for a list of supported engines, and for example +settings of `nnir-search-engine'." + :type '(sexp) + :group 'nnir) + +;; freeWAIS-sf. + +(defcustom nnir-wais-program "waissearch" + "*Name of waissearch executable." + :type '(string) + :group 'nnir) + +(defcustom nnir-wais-database (expand-file-name "~/.wais/mail") + "*Name of Wais database containing the mail. + +Note that this should be a file name without extension. For example, +if you have a file /home/john/.wais/mail.fmt, use this: + (setq nnir-wais-database \"/home/john/.wais/mail\") +The string given here is passed to `waissearch -d' as-is." + :type '(file) + :group 'nnir) + +(defcustom nnir-wais-remove-prefix (concat (getenv "HOME") "/Mail/") + "*The prefix to remove from each directory name returned by waissearch +in order to get a group name (albeit with / instead of .). This is a +regular expression. + +For example, suppose that Wais returns file names such as +\"/home/john/Mail/mail/misc/42\". For this example, use the following +setting: (setq nnir-wais-remove-prefix \"/home/john/Mail/\") +Note the trailing slash. Removing this prefix gives \"mail/misc/42\". +`nnir' knows to remove the \"/42\" and to replace \"/\" with \".\" to +arrive at the correct group name, \"mail.misc\"." + :type '(regexp) + :group 'nnir) + +(defcustom nnir-swish++-configuration-file + (expand-file-name "~/Mail/swish++.conf") + "*Configuration file for swish++." + :type '(file) + :group 'nnir) + +(defcustom nnir-swish++-program "search" + "*Name of swish++ search executable." + :type '(string) + :group 'nnir) + +(defcustom nnir-swish++-additional-switches '() + "*A list of strings, to be given as additional arguments to swish++. + +Note that this should be a list. Ie, do NOT use the following: + (setq nnir-swish++-additional-switches \"-i -w\") ; wrong +Instead, use this: + (setq nnir-swish++-additional-switches '(\"-i\" \"-w\"))" + :type '(repeat (string)) + :group 'nnir) + +(defcustom nnir-swish++-remove-prefix (concat (getenv "HOME") "/Mail/") + "*The prefix to remove from each file name returned by swish++ +in order to get a group name (albeit with / instead of .). This is a +regular expression. + +This variable is very similar to `nnir-wais-remove-prefix', except +that it is for swish++, not Wais." + :type '(regexp) + :group 'nnir) + +;; Swish-E. +;; URL: http://sunsite.berkeley.edu/SWISH-E/ +;; New version: http://www.boe.es/swish-e +;; Variables `nnir-swish-e-index-file', `nnir-swish-e-program' and +;; `nnir-swish-e-additional-switches' + +(make-obsolete-variable 'nnir-swish-e-index-file + 'nnir-swish-e-index-files) +(defcustom nnir-swish-e-index-file + (expand-file-name "~/Mail/index.swish-e") + "*Index file for swish-e. +This could be a server parameter. +It is never consulted once `nnir-swish-e-index-files', which should be +used instead, has been customized." + :type '(file) + :group 'nnir) + +(defcustom nnir-swish-e-index-files + (list nnir-swish-e-index-file) + "*List of index files for swish-e. +This could be a server parameter." + :type '(repeat (file)) + :group 'nnir) + +(defcustom nnir-swish-e-program "swish-e" + "*Name of swish-e search executable. +This cannot be a server parameter." + :type '(string) + :group 'nnir) + +(defcustom nnir-swish-e-additional-switches '() + "*A list of strings, to be given as additional arguments to swish-e. + +Note that this should be a list. Ie, do NOT use the following: + (setq nnir-swish-e-additional-switches \"-i -w\") ; wrong +Instead, use this: + (setq nnir-swish-e-additional-switches '(\"-i\" \"-w\")) + +This could be a server parameter." + :type '(repeat (string)) + :group 'nnir) + +(defcustom nnir-swish-e-remove-prefix (concat (getenv "HOME") "/Mail/") + "*The prefix to remove from each file name returned by swish-e +in order to get a group name (albeit with / instead of .). This is a +regular expression. + +This variable is very similar to `nnir-wais-remove-prefix', except +that it is for swish-e, not Wais. + +This could be a server parameter." + :type '(regexp) + :group 'nnir) + +;; HyREX engine, see + +(defcustom nnir-hyrex-program "nnir-search" + "*Name of the nnir-search executable." + :type '(string) + :group 'nnir) + +(defcustom nnir-hyrex-additional-switches '() + "*A list of strings, to be given as additional arguments for nnir-search. +Note that this should be a list. Ie, do NOT use the following: + (setq nnir-hyrex-additional-switches \"-ddl ddl.xml -c nnir\") ; wrong ! +Instead, use this: + (setq nnir-hyrex-additional-switches '(\"-ddl\" \"ddl.xml\" \"-c\" \"nnir\"))" + :type '(repeat (string)) + :group 'nnir) + +(defcustom nnir-hyrex-index-directory (getenv "HOME") + "*Index directory for HyREX." + :type '(directory) + :group 'nnir) + +(defcustom nnir-hyrex-remove-prefix (concat (getenv "HOME") "/Mail/") + "*The prefix to remove from each file name returned by HyREX +in order to get a group name (albeit with / instead of .). + +For example, suppose that HyREX returns file names such as +\"/home/john/Mail/mail/misc/42\". For this example, use the following +setting: (setq nnir-hyrex-remove-prefix \"/home/john/Mail/\") +Note the trailing slash. Removing this prefix gives \"mail/misc/42\". +`nnir' knows to remove the \"/42\" and to replace \"/\" with \".\" to +arrive at the correct group name, \"mail.misc\"." + :type '(directory) + :group 'nnir) + +;; Namazu engine, see + +(defcustom nnir-namazu-program "namazu" + "*Name of Namazu search executable." + :type '(string) + :group 'nnir) + +(defcustom nnir-namazu-index-directory (expand-file-name "~/Mail/namazu/") + "*Index directory for Namazu." + :type '(directory) + :group 'nnir) + +(defcustom nnir-namazu-additional-switches '() + "*A list of strings, to be given as additional arguments to namazu. +The switches `-q', `-a', and `-s' are always used, very few other switches +make any sense in this context. + +Note that this should be a list. Ie, do NOT use the following: + (setq nnir-namazu-additional-switches \"-i -w\") ; wrong +Instead, use this: + (setq nnir-namazu-additional-switches '(\"-i\" \"-w\"))" + :type '(repeat (string)) + :group 'nnir) + +(defcustom nnir-namazu-remove-prefix (concat (getenv "HOME") "/Mail/") + "*The prefix to remove from each file name returned by Namazu +in order to get a group name (albeit with / instead of .). + +This variable is very similar to `nnir-wais-remove-prefix', except +that it is for Namazu, not Wais." + :type '(directory) + :group 'nnir) + +;;; Internal Variables: + +(defvar nnir-current-query nil + "Internal: stores current query (= group name).") + +(defvar nnir-current-server nil + "Internal: stores current server (does it ever change?).") + +(defvar nnir-current-group-marked nil + "Internal: stores current list of process-marked groups.") + +(defvar nnir-artlist nil + "Internal: stores search result.") + +(defvar nnir-tmp-buffer " *nnir*" + "Internal: temporary buffer.") + +;;; Code: + +;; Gnus glue. + +(defun gnus-group-make-nnir-group (extra-parms query) + "Create an nnir group. Asks for query." + (interactive "P\nsQuery: ") + (setq nnir-current-query nil + nnir-current-server nil + nnir-current-group-marked nil + nnir-artlist nil) + (let ((parms nil)) + (if extra-parms + (setq parms (nnir-read-parms query)) + (setq parms (list (cons 'query query)))) + (add-to-list 'parms (cons 'unique-id (message-unique-id)) t) + (gnus-group-read-ephemeral-group + (concat "nnir:" (prin1-to-string parms)) '(nnir "") t + (cons (current-buffer) + gnus-current-window-configuration) + nil))) + +(defun nnir-group-mode-hook () + (define-key gnus-group-mode-map (kbd "G G") + 'gnus-group-make-nnir-group)) +(add-hook 'gnus-group-mode-hook 'nnir-group-mode-hook) + +;; Why is this needed? Is this for compatibility with old/new gnusae? Using +;; gnus-group-server instead works for me. -- Justus Piater +(defmacro nnir-group-server (group) + "Return the server for a newsgroup GROUP. +The returned format is as `gnus-server-to-method' needs it. See +`gnus-group-real-prefix' and `gnus-group-real-name'." + `(let ((gname ,group)) + (if (string-match "^\\([^:]+\\):" gname) + (progn + (setq gname (match-string 1 gname)) + (if (string-match "^\\([^+]+\\)\\+\\(.+\\)$" gname) + (format "%s:%s" (match-string 1 gname) (match-string 2 gname)) + (concat gname ":"))) + (format "%s:%s" (car gnus-select-method) (cadr gnus-select-method))))) + +;; Summary mode commands. + +(defun gnus-summary-nnir-goto-thread () + "Only applies to nnir groups. Go to group this article came from +and show thread that contains this article." + (interactive) + (unless (eq 'nnir (car (gnus-find-method-for-group gnus-newsgroup-name))) + (error "Can't execute this command unless in nnir group.")) + (let* ((cur (gnus-summary-article-number)) + (group (nnir-artlist-artitem-group nnir-artlist cur)) + (backend-number (nnir-artlist-artitem-number nnir-artlist cur)) + server backend-group) + (setq server (nnir-group-server group)) + (setq backend-group (gnus-group-real-name group)) + (gnus-group-read-ephemeral-group + backend-group + (gnus-server-to-method server) + t ; activate + (cons (current-buffer) + 'summary) ; window config + nil + (list backend-number)) + (gnus-summary-limit (list backend-number)) + (gnus-summary-refer-thread))) + +(if (fboundp 'eval-after-load) + (eval-after-load "gnus-sum" + '(define-key gnus-summary-goto-map + "T" 'gnus-summary-nnir-goto-thread)) + (add-hook 'gnus-summary-mode-hook + (function (lambda () + (define-key gnus-summary-goto-map + "T" 'gnus-summary-nnir-goto-thread))))) + + + +;; Gnus backend interface functions. + +(deffoo nnir-open-server (server &optional definitions) + ;; Just set the server variables appropriately. + (nnoo-change-server 'nnir server definitions)) + +(deffoo nnir-request-group (group &optional server fast) + "GROUP is the query string." + (nnir-possibly-change-server server) + ;; Check for cache and return that if appropriate. + (if (and (equal group nnir-current-query) + (equal gnus-group-marked nnir-current-group-marked) + (or (null server) + (equal server nnir-current-server))) + nnir-artlist + ;; Cache miss. + (setq nnir-artlist (nnir-run-query group))) + (save-excursion + (set-buffer nntp-server-buffer) + (if (zerop (length nnir-artlist)) + (progn + (setq nnir-current-query nil + nnir-current-server nil + nnir-current-group-marked nil + nnir-artlist nil) + (nnheader-report 'nnir "Search produced empty results.")) + ;; Remember data for cache. + (setq nnir-current-query group) + (when server (setq nnir-current-server server)) + (setq nnir-current-group-marked gnus-group-marked) + (nnheader-insert "211 %d %d %d %s\n" + (nnir-artlist-length nnir-artlist) ; total # + 1 ; first # + (nnir-artlist-length nnir-artlist) ; last # + group)))) ; group name + +(deffoo nnir-retrieve-headers (articles &optional group server fetch-old) + (save-excursion + (let ((artlist (copy-sequence articles)) + art artitem artgroup artno artrsv artfullgroup + novitem novdata foo server) + (while (not (null artlist)) + (setq art (car artlist)) + (or (numberp art) + (nnheader-report + 'nnir + "nnir-retrieve-headers doesn't grok message ids: %s" + art)) + (setq artitem (nnir-artlist-article nnir-artlist art)) + (setq artrsv (nnir-artitem-rsv artitem)) + (setq artfullgroup (nnir-artitem-group artitem)) + (setq artno (nnir-artitem-number artitem)) + (setq artgroup (gnus-group-real-name artfullgroup)) + (setq server (nnir-group-server artfullgroup)) + ;; retrieve NOV or HEAD data for this article, transform into + ;; NOV data and prepend to `novdata' + (set-buffer nntp-server-buffer) + (nnir-possibly-change-server server) + (let ((gnus-override-method + (gnus-server-to-method server))) + (case (setq foo (gnus-retrieve-headers (list artno) artfullgroup nil)) + (nov + (goto-char (point-min)) + (setq novitem (nnheader-parse-nov)) + (unless novitem + (pop-to-buffer nntp-server-buffer) + (error + "nnheader-parse-nov returned nil for article %s in group %s" + artno artfullgroup))) + (headers + (goto-char (point-min)) + (setq novitem (nnheader-parse-head)) + (unless novitem + (pop-to-buffer nntp-server-buffer) + (error + "nnheader-parse-head returned nil for article %s in group %s" + artno artfullgroup))) + (t (error "Unknown header type %s while requesting article %s of group %s" + foo artno artfullgroup)))) + ;; replace article number in original group with article number + ;; in nnir group + (mail-header-set-number novitem art) + (mail-header-set-from novitem + (mail-header-from novitem)) + (mail-header-set-subject + novitem + (format "[%d: %s/%d] %s" + artrsv artgroup artno + (mail-header-subject novitem))) + ;;-(mail-header-set-extra novitem nil) + (push novitem novdata) + (setq artlist (cdr artlist))) + (setq novdata (nreverse novdata)) + (set-buffer nntp-server-buffer) (erase-buffer) + (mapc 'nnheader-insert-nov novdata) + 'nov))) + +(deffoo nnir-request-article (article + &optional group server to-buffer) + (if (stringp article) + (nnheader-report + 'nnir + "nnir-retrieve-headers doesn't grok message ids: %s" + article) + (save-excursion + (let* ((artitem (nnir-artlist-article nnir-artlist + article)) + (artfullgroup (nnir-artitem-group artitem)) + (artno (nnir-artitem-number artitem)) + ;; Bug? + ;; Why must we bind nntp-server-buffer here? It won't + ;; work if `buf' is used, say. (Of course, the set-buffer + ;; line below must then be updated, too.) + (nntp-server-buffer (or to-buffer nntp-server-buffer))) + (set-buffer nntp-server-buffer) + (erase-buffer) + (message "Requesting article %d from group %s" + artno artfullgroup) + (gnus-request-article artno artfullgroup nntp-server-buffer) + (cons artfullgroup artno))))) + + +(nnoo-define-skeleton nnir) + + +(defmacro nnir-add-result (dirnam artno score prefix server artlist) + "Ask `nnir-compose-result' to construct a result vector, +and if it is non-nil, add it to artlist." + `(let ((result (nnir-compose-result ,dirnam ,artno ,score ,prefix ,server))) + (when (not (null result)) + (push result ,artlist)))) + +(autoload 'nnmaildir-base-name-to-article-number "nnmaildir") + +;; Helper function currently used by the Swish++ and Namazu backends; +;; perhaps useful for other backends as well +(defun nnir-compose-result (dirnam article score prefix server) + "Extract the group from dirnam, and create a result vector +ready to be added to the list of search results." + + ;; remove nnir-*-remove-prefix from beginning of dirnam filename + (when (string-match (concat "^" prefix) dirnam) + (setq dirnam (replace-match "" t t dirnam))) + + (when (file-readable-p (concat prefix dirnam article)) + ;; remove trailing slash and, for nnmaildir, cur/new/tmp + (setq dirnam + (substring dirnam 0 (if (string= server "nnmaildir:") -5 -1))) + + ;; Set group to dirnam without any leading dots or slashes, + ;; and with all subsequent slashes replaced by dots + (let ((group (gnus-replace-in-string + (gnus-replace-in-string dirnam "^[./\\]" "" t) + "[/\\]" "." t))) + + (vector (nnir-group-full-name group server) + (if (string= server "nnmaildir:") + (nnmaildir-base-name-to-article-number + (substring article 0 (string-match ":" article)) + group nil) + (string-to-number article)) + (string-to-number score))))) + +;;; Search Engine Interfaces: + +;; freeWAIS-sf interface. +(defun nnir-run-waissearch (query server &optional group) + "Run given query agains waissearch. Returns vector of (group name, file name) +pairs (also vectors, actually)." + (when group + (error "The freeWAIS-sf backend cannot search specific groups.")) + (save-excursion + (let ((qstring (cdr (assq 'query query))) + (prefix (nnir-read-server-parm 'nnir-wais-remove-prefix server)) + artlist score artno dirnam) + (set-buffer (get-buffer-create nnir-tmp-buffer)) + (erase-buffer) + (message "Doing WAIS query %s..." query) + (call-process nnir-wais-program + nil ; input from /dev/null + t ; output to current buffer + nil ; don't redisplay + "-d" (nnir-read-server-parm 'nnir-wais-database server) ; database to search + qstring) + (message "Massaging waissearch output...") + ;; remove superfluous lines + (keep-lines "Score:") + ;; extract data from result lines + (goto-char (point-min)) + (while (re-search-forward + "Score: +\\([0-9]+\\).*'\\([0-9]+\\) +\\([^']+\\)/'" nil t) + (setq score (match-string 1) + artno (match-string 2) + dirnam (match-string 3)) + (unless (string-match prefix dirnam) + (nnheader-report 'nnir "Dir name %s doesn't contain prefix %s" + dirnam prefix)) + (setq group (substitute ?. ?/ (replace-match "" t t dirnam))) + (push (vector (nnir-group-full-name group server) + (string-to-number artno) + (string-to-number score)) + artlist)) + (message "Massaging waissearch output...done") + (apply 'vector + (sort* artlist + (function (lambda (x y) + (> (nnir-artitem-rsv x) + (nnir-artitem-rsv y))))))))) + +;; IMAP interface. +;; todo: +;; nnir invokes this two (2) times???! +;; we should not use nnimap at all but open our own server connection +;; we should not LIST * but use nnimap-list-pattern from defs +;; send queries as literals +;; handle errors + +(autoload 'nnimap-open-server "nnimap") +(defvar nnimap-server-buffer) ;; nnimap.el +(autoload 'imap-mailbox-select "imap") +(autoload 'imap-search "imap") +(autoload 'imap-quote-specials "imap") + +(defun nnir-run-imap (query srv &optional group-option) + "Run a search against an IMAP back-end server. +This uses a custom query language parser; see `nnir-imap-make-query' for +details on the language and supported extensions" + (save-excursion + (let ((qstring (cdr (assq 'query query))) + (server (cadr (gnus-server-to-method srv))) + (group (or group-option (gnus-group-group-name))) + (defs (caddr (gnus-server-to-method srv))) + (criteria (or (cdr (assq 'criteria query)) + nnir-imap-search-field)) + artlist buf) + (message "Opening server %s" server) + (condition-case () + (when (nnimap-open-server server defs) ;; xxx + (setq buf nnimap-server-buffer) ;; xxx + (message "Searching %s..." group) + (let ((arts 0) + (mbx (gnus-group-real-name group))) + (when (imap-mailbox-select mbx nil buf) + (mapc + (lambda (artnum) + (push (vector group artnum 1) artlist) + (setq arts (1+ arts))) + (imap-search (nnir-imap-make-query criteria qstring) buf)) + (message "Searching %s... %d matches" mbx arts))) + (message "Searching %s...done" group)) + (quit nil)) + (reverse artlist)))) + +(defun nnir-imap-make-query (criteria qstring) + "Parse the query string and criteria into an appropriate IMAP search +expression, returning the string query to make. + +This implements a little language designed to return the expected results +to an arbitrary query string to the end user. + +The search is always case-insensitive, as defined by RFC2060, and supports +the following features (inspired by the Google search input language): + +Automatic \"and\" queries + If you specify multiple words then they will be treated as an \"and\" + expression intended to match all components. + +Phrase searches + If you wrap your query in double-quotes then it will be treated as a + literal string. + +Negative terms + If you precede a term with \"-\" then it will negate that. + +\"OR\" queries + If you include an upper-case \"OR\" in your search it will cause the + term before it and the term after it to be treated as alternatives. + +In future the following will be added to the language: + * support for date matches + * support for location of text matching within the query + * from/to/etc headers + * additional search terms + * flag based searching + * anything else that the RFC supports, basically." + ;; Walk through the query and turn it into an IMAP query string. + (nnir-imap-query-to-imap criteria (nnir-imap-parse-query qstring))) + + +(defun nnir-imap-query-to-imap (criteria query) + "Turn a s-expression format query into IMAP." + (mapconcat + ;; Turn the expressions into IMAP text + (lambda (item) + (nnir-imap-expr-to-imap criteria item)) + ;; The query, already in s-expr format. + query + ;; Append a space between each expression + " ")) + + +(defun nnir-imap-expr-to-imap (criteria expr) + "Convert EXPR into an IMAP search expression on CRITERIA" + ;; What sort of expression is this, eh? + (cond + ;; Simple string term + ((stringp expr) + (format "%s \"%s\"" criteria (imap-quote-specials expr))) + ;; Trivial term: and + ((eq expr 'and) nil) + ;; Composite term: or expression + ((eq (car-safe expr) 'or) + (format "OR %s %s" + (nnir-imap-expr-to-imap criteria (second expr)) + (nnir-imap-expr-to-imap criteria (third expr)))) + ;; Composite term: just the fax, mam + ((eq (car-safe expr) 'not) + (format "NOT (%s)" (nnir-imap-query-to-imap criteria (rest expr)))) + ;; Composite term: just expand it all. + ((and (not (null expr)) (listp expr)) + (format "(%s)" (nnir-imap-query-to-imap criteria expr))) + ;; Complex value, give up for now. + (t (error "Unhandled input: %S" expr)))) + + +(defun nnir-imap-parse-query (string) + "Turn STRING into an s-expression based query based on the IMAP +query language as defined in `nnir-imap-make-query'. + +This involves turning individual tokens into higher level terms +that the search language can then understand and use." + (with-temp-buffer + ;; Set up the parsing environment. + (insert string) + (goto-char (point-min)) + ;; Now, collect the output terms and return them. + (let (out) + (while (not (nnir-imap-end-of-input)) + (push (nnir-imap-next-expr) out)) + (reverse out)))) + + +(defun nnir-imap-next-expr (&optional count) + "Return the next expression from the current buffer." + (let ((term (nnir-imap-next-term count)) + (next (nnir-imap-peek-symbol))) + ;; Are we looking at an 'or' expression? + (cond + ;; Handle 'expr or expr' + ((eq next 'or) + (list 'or term (nnir-imap-next-expr 2))) + ;; Anything else + (t term)))) + + +(defun nnir-imap-next-term (&optional count) + "Return the next TERM from the current buffer." + (let ((term (nnir-imap-next-symbol count))) + ;; What sort of term is this? + (cond + ;; and -- just ignore it + ((eq term 'and) 'and) + ;; negated term + ((eq term 'not) (list 'not (nnir-imap-next-expr))) + ;; generic term + (t term)))) + + +(defun nnir-imap-peek-symbol () + "Return the next symbol from the current buffer, but don't consume it." + (save-excursion + (nnir-imap-next-symbol))) + +(defun nnir-imap-next-symbol (&optional count) + "Return the next symbol from the current buffer, or nil if we are +at the end of the buffer. If supplied COUNT skips some symbols before +returning the one at the supplied position." + (when (and (numberp count) (> count 1)) + (nnir-imap-next-symbol (1- count))) + (let ((case-fold-search t)) + ;; end of input stream? + (unless (nnir-imap-end-of-input) + ;; No, return the next symbol from the stream. + (cond + ;; negated expression -- return it and advance one char. + ((looking-at "-") (forward-char 1) 'not) + ;; quoted string + ((looking-at "\"") (nnir-imap-delimited-string "\"")) + ;; list expression -- we parse the content and return this as a list. + ((looking-at "(") + (nnir-imap-parse-query (nnir-imap-delimited-string ")"))) + ;; keyword input -- return a symbol version + ((looking-at "\\band\\b") (forward-char 3) 'and) + ((looking-at "\\bor\\b") (forward-char 2) 'or) + ((looking-at "\\bnot\\b") (forward-char 3) 'not) + ;; Simple, boring keyword + (t (let ((start (point)) + (end (if (search-forward-regexp "[[:blank:]]" nil t) + (prog1 + (match-beginning 0) + ;; unskip if we hit a non-blank terminal character. + (when (string-match "[^[:blank:]]" (match-string 0)) + (backward-char 1))) + (goto-char (point-max))))) + (buffer-substring start end))))))) + +(defun nnir-imap-delimited-string (delimiter) + "Return a delimited string from the current buffer." + (let ((start (point)) end) + (forward-char 1) ; skip the first delimiter. + (while (not end) + (unless (search-forward delimiter nil t) + (error "Unmatched delimited input with %s in query" delimiter)) + (let ((here (point))) + (unless (equal (buffer-substring (- here 2) (- here 1)) "\\") + (setq end (point))))) + (buffer-substring (1+ start) (1- end)))) + +(defun nnir-imap-end-of-input () + "Are we at the end of input?" + (skip-chars-forward "[[:blank:]]") + (looking-at "$")) + + +;; Swish++ interface. +;; -cc- Todo +;; Search by +;; - group +;; Sort by +;; - rank (default) +;; - article number +;; - file size +;; - group +(defun nnir-run-swish++ (query server &optional group) + "Run QUERY against swish++. +Returns a vector of (group name, file name) pairs (also vectors, +actually). + +Tested with swish++ 4.7 on GNU/Linux and with swish++ 5.0b2 on +Windows NT 4.0." + + (when group + (error "The swish++ backend cannot search specific groups.")) + + (save-excursion + (let ( (qstring (cdr (assq 'query query))) + (groupspec (cdr (assq 'group query))) + (prefix (nnir-read-server-parm 'nnir-swish++-remove-prefix server)) + artlist + ;; nnml-use-compressed-files might be any string, but probably this + ;; is sufficient. Note that we can't only use the value of + ;; nnml-use-compressed-files because old articles might have been + ;; saved with a different value. + (article-pattern (if (string= server "nnmaildir:") + ":[0-9]+" + "^[0-9]+\\(\\.[a-z0-9]+\\)?$")) + score artno dirnam filenam) + + (when (equal "" qstring) + (error "swish++: You didn't enter anything.")) + + (set-buffer (get-buffer-create nnir-tmp-buffer)) + (erase-buffer) + + (if groupspec + (message "Doing swish++ query %s on %s..." qstring groupspec) + (message "Doing swish++ query %s..." qstring)) + + (let* ((cp-list `( ,nnir-swish++-program + nil ; input from /dev/null + t ; output + nil ; don't redisplay + "--config-file" ,(nnir-read-server-parm 'nnir-swish++-configuration-file server) + ,@(nnir-read-server-parm 'nnir-swish++-additional-switches server) + ,qstring ; the query, in swish++ format + )) + (exitstatus + (progn + (message "%s args: %s" nnir-swish++-program + (mapconcat 'identity (cddddr cp-list) " ")) ;; ??? + (apply 'call-process cp-list)))) + (unless (or (null exitstatus) + (zerop exitstatus)) + (nnheader-report 'nnir "Couldn't run swish++: %s" exitstatus) + ;; swish++ failure reason is in this buffer, show it if + ;; the user wants it. + (when (> gnus-verbose 6) + (display-buffer nnir-tmp-buffer)))) + + ;; The results are output in the format of: + ;; V 4.7 Linux + ;; rank relative-path-name file-size file-title + ;; V 5.0b2: + ;; rank relative-path-name file-size topic?? + ;; where rank is an integer from 1 to 100. + (goto-char (point-min)) + (while (re-search-forward + "\\(^[0-9]+\\) \\([^ ]+\\) [0-9]+ \\(.*\\)$" nil t) + (setq score (match-string 1) + filenam (match-string 2) + artno (file-name-nondirectory filenam) + dirnam (file-name-directory filenam)) + + ;; don't match directories + (when (string-match article-pattern artno) + (when (not (null dirnam)) + + ;; maybe limit results to matching groups. + (when (or (not groupspec) + (string-match groupspec dirnam)) + (nnir-add-result dirnam artno score prefix server artlist))))) + + (message "Massaging swish++ output...done") + + ;; Sort by score + (apply 'vector + (sort* artlist + (function (lambda (x y) + (> (nnir-artitem-rsv x) + (nnir-artitem-rsv y))))))))) + +;; Swish-E interface. +(defun nnir-run-swish-e (query server &optional group) + "Run given query against swish-e. +Returns a vector of (group name, file name) pairs (also vectors, +actually). + +Tested with swish-e-2.0.1 on Windows NT 4.0." + + ;; swish-e crashes with empty parameter to "-w" on commandline... + (when group + (error "The swish-e backend cannot search specific groups.")) + + (save-excursion + (let ((qstring (cdr (assq 'query query))) + (prefix + (or (nnir-read-server-parm 'nnir-swish-e-remove-prefix server) + (error "Missing parameter `nnir-swish-e-remove-prefix'"))) + artlist score artno dirnam group ) + + (when (equal "" qstring) + (error "swish-e: You didn't enter anything.")) + + (set-buffer (get-buffer-create nnir-tmp-buffer)) + (erase-buffer) + + (message "Doing swish-e query %s..." query) + (let* ((index-files + (or (nnir-read-server-parm + 'nnir-swish-e-index-files server) + (error "Missing parameter `nnir-swish-e-index-files'"))) + (additional-switches + (nnir-read-server-parm + 'nnir-swish-e-additional-switches server)) + (cp-list `(,nnir-swish-e-program + nil ; input from /dev/null + t ; output + nil ; don't redisplay + "-f" ,@index-files + ,@additional-switches + "-w" + ,qstring ; the query, in swish-e format + )) + (exitstatus + (progn + (message "%s args: %s" nnir-swish-e-program + (mapconcat 'identity (cddddr cp-list) " ")) + (apply 'call-process cp-list)))) + (unless (or (null exitstatus) + (zerop exitstatus)) + (nnheader-report 'nnir "Couldn't run swish-e: %s" exitstatus) + ;; swish-e failure reason is in this buffer, show it if + ;; the user wants it. + (when (> gnus-verbose 6) + (display-buffer nnir-tmp-buffer)))) + + ;; The results are output in the format of: + ;; rank path-name file-title file-size + (goto-char (point-min)) + (while (re-search-forward + "\\(^[0-9]+\\) \\([^ ]+\\) \"\\([^\"]+\\)\" [0-9]+$" nil t) + (setq score (match-string 1) + artno (match-string 3) + dirnam (file-name-directory (match-string 2))) + + ;; don't match directories + (when (string-match "^[0-9]+$" artno) + (when (not (null dirnam)) + + ;; remove nnir-swish-e-remove-prefix from beginning of dirname + (when (string-match (concat "^" prefix) dirnam) + (setq dirnam (replace-match "" t t dirnam))) + + (setq dirnam (substring dirnam 0 -1)) + ;; eliminate all ".", "/", "\" from beginning. Always matches. + (string-match "^[./\\]*\\(.*\\)$" dirnam) + ;; "/" -> "." + (setq group (substitute ?. ?/ (match-string 1 dirnam))) + ;; Windows "\\" -> "." + (setq group (substitute ?. ?\\ group)) + + (push (vector (nnir-group-full-name group server) + (string-to-number artno) + (string-to-number score)) + artlist)))) + + (message "Massaging swish-e output...done") + + ;; Sort by score + (apply 'vector + (sort* artlist + (function (lambda (x y) + (> (nnir-artitem-rsv x) + (nnir-artitem-rsv y))))))))) + +;; HyREX interface +(defun nnir-run-hyrex (query server &optional group) + (save-excursion + (let ((artlist nil) + (groupspec (cdr (assq 'group query))) + (qstring (cdr (assq 'query query))) + (prefix (nnir-read-server-parm 'nnir-hyrex-remove-prefix server)) + score artno dirnam) + (when (and group groupspec) + (error (concat "It does not make sense to use a group spec" + " with process-marked groups."))) + (when group + (setq groupspec (gnus-group-real-name group))) + (when (and group (not (equal group (nnir-group-full-name groupspec server)))) + (message "%s vs. %s" group (nnir-group-full-name groupspec server)) + (error "Server with groupspec doesn't match group !")) + (set-buffer (get-buffer-create nnir-tmp-buffer)) + (erase-buffer) + (if groupspec + (message "Doing hyrex-search query %s on %s..." query groupspec) + (message "Doing hyrex-search query %s..." query)) + (let* ((cp-list + `( ,nnir-hyrex-program + nil ; input from /dev/null + t ; output + nil ; don't redisplay + "-i",(nnir-read-server-parm 'nnir-hyrex-index-directory server) ; index directory + ,@(nnir-read-server-parm 'nnir-hyrex-additional-switches server) + ,qstring ; the query, in hyrex-search format + )) + (exitstatus + (progn + (message "%s args: %s" nnir-hyrex-program + (mapconcat 'identity (cddddr cp-list) " ")) + (apply 'call-process cp-list)))) + (unless (or (null exitstatus) + (zerop exitstatus)) + (nnheader-report 'nnir "Couldn't run hyrex-search: %s" exitstatus) + ;; nnir-search failure reason is in this buffer, show it if + ;; the user wants it. + (when (> gnus-verbose 6) + (display-buffer nnir-tmp-buffer)))) ;; FIXME: Dont clear buffer ! + (if groupspec + (message "Doing hyrex-search query \"%s\" on %s...done" qstring groupspec) + (message "Doing hyrex-search query \"%s\"...done" qstring)) + (sit-for 0) + ;; nnir-search returns: + ;; for nnml/nnfolder: "filename mailid weigth" + ;; for nnimap: "group mailid weigth" + (goto-char (point-min)) + (delete-non-matching-lines "^\\S + [0-9]+ [0-9]+$") + ;; HyREX couldn't search directly in groups -- so filter out here. + (when groupspec + (keep-lines groupspec)) + ;; extract data from result lines + (goto-char (point-min)) + (while (re-search-forward + "\\(\\S +\\) \\([0-9]+\\) \\([0-9]+\\)" nil t) + (setq dirnam (match-string 1) + artno (match-string 2) + score (match-string 3)) + (when (string-match prefix dirnam) + (setq dirnam (replace-match "" t t dirnam))) + (push (vector (nnir-group-full-name (substitute ?. ?/ dirnam) server) + (string-to-number artno) + (string-to-number score)) + artlist)) + (message "Massaging hyrex-search output...done.") + (apply 'vector + (sort* artlist + (function (lambda (x y) + (if (string-lessp (nnir-artitem-group x) + (nnir-artitem-group y)) + t + (< (nnir-artitem-number x) + (nnir-artitem-number y))))))) + ))) + +;; Namazu interface +(defun nnir-run-namazu (query server &optional group) + "Run given query against Namazu. Returns a vector of (group name, file name) +pairs (also vectors, actually). + +Tested with Namazu 2.0.6 on a GNU/Linux system." + (when group + (error "The Namazu backend cannot search specific groups")) + (save-excursion + (let ((article-pattern (if (string= server "nnmaildir:") + ":[0-9]+" + "^[0-9]+$")) + artlist + (qstring (cdr (assq 'query query))) + (prefix (nnir-read-server-parm 'nnir-namazu-remove-prefix server)) + score group article + (process-environment (copy-sequence process-environment))) + (setenv "LC_MESSAGES" "C") + (set-buffer (get-buffer-create nnir-tmp-buffer)) + (erase-buffer) + (let* ((cp-list + `( ,nnir-namazu-program + nil ; input from /dev/null + t ; output + nil ; don't redisplay + "-q" ; don't be verbose + "-a" ; show all matches + "-s" ; use short format + ,@(nnir-read-server-parm 'nnir-namazu-additional-switches server) + ,qstring ; the query, in namazu format + ,(nnir-read-server-parm 'nnir-namazu-index-directory server) ; index directory + )) + (exitstatus + (progn + (message "%s args: %s" nnir-namazu-program + (mapconcat 'identity (cddddr cp-list) " ")) + (apply 'call-process cp-list)))) + (unless (or (null exitstatus) + (zerop exitstatus)) + (nnheader-report 'nnir "Couldn't run namazu: %s" exitstatus) + ;; Namazu failure reason is in this buffer, show it if + ;; the user wants it. + (when (> gnus-verbose 6) + (display-buffer nnir-tmp-buffer)))) + + ;; Namazu output looks something like this: + ;; 2. Re: Gnus agent expire broken (score: 55) + ;; /home/henrik/Mail/mail/sent/1310 (4,138 bytes) + + (goto-char (point-min)) + (while (re-search-forward + "^\\([0-9]+\\.\\).*\\((score: \\([0-9]+\\)\\))\n\\([^ ]+\\)" + nil t) + (setq score (match-string 3) + group (file-name-directory (match-string 4)) + article (file-name-nondirectory (match-string 4))) + + ;; make sure article and group is sane + (when (and (string-match article-pattern article) + (not (null group))) + (nnir-add-result group article score prefix server artlist))) + + ;; sort artlist by score + (apply 'vector + (sort* artlist + (function (lambda (x y) + (> (nnir-artitem-rsv x) + (nnir-artitem-rsv y))))))))) + +(defun nnir-run-find-grep (query server &optional group) + "Run find and grep to obtain matching articles." + (let* ((method (gnus-server-to-method server)) + (sym (intern + (concat (symbol-name (car method)) "-directory"))) + (directory (cadr (assoc sym (cddr method)))) + (regexp (cdr (assoc 'query query))) + (grep-options (cdr (assoc 'grep-options query))) + artlist) + (unless directory + (error "No directory found in method specification of server %s" + server)) + (message "Searching %s using find-grep..." (or group server)) + (save-window-excursion + (set-buffer (get-buffer-create nnir-tmp-buffer)) + (erase-buffer) + (if (> gnus-verbose 6) + (pop-to-buffer (current-buffer))) + (cd directory) ; Using relative paths simplifies postprocessing. + (let ((group + (if (not group) + "." + ;; Try accessing the group literally as well as + ;; interpreting dots as directory separators so the + ;; engine works with plain nnml as well as the Gnus + ;; Cache. + (find-if 'file-directory-p + (let ((group (gnus-group-real-name group))) + (list group (gnus-replace-in-string group "\\." "/" t))))))) + (unless group + (error "Cannot locate directory for group")) + (save-excursion + (apply + 'call-process "find" nil t + "find" group "-type" "f" "-name" "[0-9]*" "-exec" + "grep" + `("-l" ,@(and grep-options (split-string grep-options "\\s-" t)) + "-e" ,regexp "{}" "+")))) + + ;; Translate relative paths to group names. + (while (not (eobp)) + (let* ((path (split-string + (buffer-substring (point) (line-end-position)) "/" t)) + (art (string-to-number (car (last path))))) + (while (string= "." (car path)) + (setq path (cdr path))) + (let ((group (mapconcat 'identity (subseq path 0 -1) "."))) + (push (vector (nnir-group-full-name group server) art 0) + artlist)) + (forward-line 1))) + (message "Searching %s using find-grep...done" (or group server)) + artlist))) + +;;; Util Code: + +(defun nnir-read-parms (query) + "Reads additional search parameters according to `nnir-engines'." + (let ((parmspec (caddr (assoc nnir-search-engine nnir-engines)))) + (cons (cons 'query query) + (mapcar 'nnir-read-parm parmspec)))) + +(defun nnir-read-parm (parmspec) + "Reads a single search parameter. +`parmspec' is a cons cell, the car is a symbol, the cdr is a prompt." + (let ((sym (car parmspec)) + (prompt (cdr parmspec))) + (if (listp prompt) + (let* ((result (apply 'completing-read prompt)) + (mapping (or (assoc result nnir-imap-search-arguments) + (assoc nil nnir-imap-search-arguments)))) + (cons sym (format (cdr mapping) result))) + (cons sym (read-string prompt))))) + +(defun nnir-run-query (query) + "Invoke appropriate search engine function (see `nnir-engines'). +If some groups were process-marked, run the query for each of the groups +and concat the results." + (let ((q (car (read-from-string query)))) + (if gnus-group-marked + (apply 'vconcat + (mapcar (lambda (x) + (let ((server (nnir-group-server x)) + search-func) + (setq search-func (cadr + (assoc + (nnir-read-server-parm 'nnir-search-engine server) nnir-engines))) + (if search-func + (funcall search-func q server x) + nil))) + gnus-group-marked) + ) + (apply 'vconcat + (mapcar (lambda (x) + (if (and (equal (cadr x) 'ok) (not (equal (cadar x) "-ephemeral"))) + (let ((server (format "%s:%s" (caar x) (cadar x))) + search-func) + (setq search-func (cadr + (assoc + (nnir-read-server-parm 'nnir-search-engine server) nnir-engines))) + (if search-func + (funcall search-func q server nil) + nil)) + nil)) + gnus-opened-servers) + )) + )) + +(defun nnir-read-server-parm (key server) + "Returns the parameter value of for the given server, where server is of +form 'backend:name'." + (let ((method (gnus-server-to-method server))) + (cond ((and method (assq key (cddr method))) + (nth 1 (assq key (cddr method)))) + ((and nnir-mail-backend + (gnus-server-equal method nnir-mail-backend)) + (symbol-value key)) + (t nil)))) +;; (if method +;; (if (assq key (cddr method)) +;; (nth 1 (assq key (cddr method))) +;; (symbol-value key)) +;; (symbol-value key)) +;; )) + +(defun nnir-group-full-name (shortname server) + "For the given group name, return a full Gnus group name. +The Gnus backend/server information is added." + (gnus-group-prefixed-name shortname (gnus-server-to-method server))) + +(defun nnir-possibly-change-server (server) + (unless (and server (nnir-server-opened server)) + (nnir-open-server server))) + + +;; Data type article list. + +(defun nnir-artlist-length (artlist) + "Returns number of articles in artlist." + (length artlist)) + +(defun nnir-artlist-article (artlist n) + "Returns from ARTLIST the Nth artitem (counting starting at 1)." + (elt artlist (1- n))) + +(defun nnir-artitem-group (artitem) + "Returns the group from the ARTITEM." + (elt artitem 0)) + +(defun nnir-artlist-artitem-group (artlist n) + "Returns from ARTLIST the group of the Nth artitem (counting from 1)." + (nnir-artitem-group (nnir-artlist-article artlist n))) + +(defun nnir-artitem-number (artitem) + "Returns the number from the ARTITEM." + (elt artitem 1)) + +(defun nnir-artlist-artitem-number (artlist n) + "Returns from ARTLIST the number of the Nth artitem (counting from 1)." + (nnir-artitem-number (nnir-artlist-article artlist n))) + +(defun nnir-artitem-rsv (artitem) + "Returns the Retrieval Status Value (RSV, score) from the ARTITEM." + (elt artitem 2)) + +(defun nnir-artlist-artitem-rsv (artlist n) + "Returns from ARTLIST the Retrieval Status Value of the Nth artitem +\(counting from 1)." + (nnir-artitem-rsv (nnir-artlist-article artlist n))) + +;; unused? +(defun nnir-artlist-groups (artlist) + "Returns a list of all groups in the given ARTLIST." + (let ((res nil) + (with-dups nil)) + ;; from each artitem, extract group component + (setq with-dups (mapcar 'nnir-artitem-group artlist)) + ;; remove duplicates from above + (mapc (function (lambda (x) (add-to-list 'res x))) + with-dups) + res)) + + +;; The end. +(provide 'nnir) + +;;; arch-tag: 9b3fecf8-4397-4bbb-bf3c-6ac3cbbc6664 -- 2.39.2