From: Lars Ingebrigtsen Date: Sat, 14 Aug 2021 13:20:59 +0000 (+0200) Subject: Add new functions for lax mail address splitting X-Git-Tag: emacs-28.0.90~1511 X-Git-Url: http://git.eshelyaron.com/gitweb/?a=commitdiff_plain;h=e91b574bf8b14c0d8cc33242570bffa8ddc38760;p=emacs.git Add new functions for lax mail address splitting * lisp/emacs-lisp/subr-x.el (string-clean-whitespace): Autoload. * lisp/mail/mail-parse.el (mail-header-parse-addresses-lax) (mail-header-parse-address-lax): New functions. --- diff --git a/etc/NEWS b/etc/NEWS index a321ffd81f1..a3a25436289 100644 --- a/etc/NEWS +++ b/etc/NEWS @@ -2443,6 +2443,15 @@ images are marked. ** Miscellaneous +--- +*** New function 'mail-header-parse-addresses-lax'. +This takes a comma-separated string and returns a list of mail/name +pairs. + +--- +*** New function 'mail-header-parse-address-lax'. +Parse a string as a mail address-like string. + --- *** 'shell-script-mode' now supports 'outline-minor-mode'. The outline headings have lines that start with "###". diff --git a/lisp/emacs-lisp/subr-x.el b/lisp/emacs-lisp/subr-x.el index 468d124c0e2..4204d20249d 100644 --- a/lisp/emacs-lisp/subr-x.el +++ b/lisp/emacs-lisp/subr-x.el @@ -240,6 +240,7 @@ carriage return." (substring string 0 (- (length string) (length suffix))) string)) +;;;###autoload (defun string-clean-whitespace (string) "Clean up whitespace in STRING. All sequences of whitespaces in STRING are collapsed into a diff --git a/lisp/mail/mail-extr.el b/lisp/mail/mail-extr.el index 88fb0866856..24d8311f641 100644 --- a/lisp/mail/mail-extr.el +++ b/lisp/mail/mail-extr.el @@ -707,7 +707,10 @@ This function is primarily meant for when you're displaying the result to the user: Many prettifications are applied to the result returned. If you want to decode an address for further non-display use, you should probably use -`mail-header-parse-address' instead." +`mail-header-parse-address' instead. Also see +`mail-header-parse-address-lax' for a function that's less strict +than `mail-header-parse-address', but does less post-processing +to the results." (let ((canonicalization-buffer (get-buffer-create " *canonical address*")) (extraction-buffer (get-buffer-create " *extract address components*")) value-list) diff --git a/lisp/mail/mail-parse.el b/lisp/mail/mail-parse.el index e72ed828494..212fadf3823 100644 --- a/lisp/mail/mail-parse.el +++ b/lisp/mail/mail-parse.el @@ -71,6 +71,45 @@ (defalias 'mail-decode-encoded-address-region 'rfc2047-decode-address-region) (defalias 'mail-decode-encoded-address-string 'rfc2047-decode-address-string) +(defun mail-header-parse-addresses-lax (string) + "Parse STRING as a comma-separated list of mail addresses. +The return value is a list with mail/name pairs." + (delq nil + (mapcar (lambda (elem) + (or (mail-header-parse-address elem) + (mail-header-parse-address-lax elem))) + (mail-header-parse-addresses string t)))) + +(defun mail-header-parse-address-lax (string) + "Parse STRING as a mail address. +Returns a mail/name pair. + +This function will first try to parse STRING as a +standards-compliant address string, and if that fails, try to use +heuristics to determine the email address and the name in the +string." + (with-temp-buffer + (insert (string-clean-whitespace string)) + ;; Find the bit with the @ and guess that that's the mail. + (goto-char (point-max)) + (when (search-backward "@" nil t) + (if (re-search-backward " " nil t) + (forward-char 1) + (goto-char (point-min))) + (let* ((start (point)) + (mail (buffer-substring + start (or (re-search-forward " " nil t) + (goto-char (point-max)))))) + (delete-region start (point)) + ;; We've now removed the email bit, so the rest of the stuff + ;; has to be the name. + (cons (string-trim mail "[<]+" "[>]+") + (let ((name (string-trim (buffer-string) + "[ \t\n\r(]+" "[ \t\n\r)]+"))) + (if (length= name 0) + nil + name))))))) + (provide 'mail-parse) ;;; mail-parse.el ends here diff --git a/test/lisp/mail/mail-parse-tests.el b/test/lisp/mail/mail-parse-tests.el new file mode 100644 index 00000000000..70de92df45a --- /dev/null +++ b/test/lisp/mail/mail-parse-tests.el @@ -0,0 +1,54 @@ +;;; mail-parse-tests.el --- tests for mail-parse.el -*- lexical-binding: t -*- + +;; Copyright (C) 2021 Free Software Foundation, Inc. + +;; This file is part of GNU Emacs. + +;; GNU Emacs is free software: you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation, either version 3 of the License, or +;; (at your option) any later version. + +;; GNU Emacs is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GNU Emacs. If not, see . + +;;; Commentary: + +;;; Code: + +(require 'ert) +(require 'mail-parse) +(require 'subr-x) + +(ert-deftest test-mail-header-parse-address-lax () + (should (equal (mail-header-parse-address-lax + "Lars Ingebrigtsen ") + '("larsi@gnus.org" . "Lars Ingebrigtsen"))) + (should (equal (mail-header-parse-address-lax + "Lars Ingebrigtsen larsi@gnus.org>") + '("larsi@gnus.org" . "Lars Ingebrigtsen"))) + (should (equal (mail-header-parse-address-lax + "Lars Ingebrigtsen larsi@gnus.org") + '("larsi@gnus.org" . "Lars Ingebrigtsen"))) + (should (equal (mail-header-parse-address-lax + "larsi@gnus.org (Lars Ingebrigtsen)") + '("larsi@gnus.org " . "Lars Ingebrigtsen"))) + (should (equal (mail-header-parse-address-lax "larsi@gnus.org") + '("larsi@gnus.org"))) + (should (equal (mail-header-parse-address-lax "foo") + nil))) + +(ert-deftest test-mail-header-parse-addresses-lax () + (should (equal (mail-header-parse-addresses-lax + "Bob Weiner , Mats Lidell ") + '(("rsw@gnu.org" . "Bob Weiner") + ("matsl@gnu.org" . "Mats Lidell"))))) + +(provide 'mail-parse-tests) + +;;; mail-parse-tests.el ends here