From: Bob Rogers Date: Fri, 25 Feb 2022 12:03:20 +0000 (+0100) Subject: Add new file ietf-drums-date.el X-Git-Tag: emacs-29.0.90~2108 X-Git-Url: http://git.eshelyaron.com/gitweb/?a=commitdiff_plain;h=ca3858563c7ba8ee3caa82fbd2b7c386ea60c0d3;p=emacs.git Add new file ietf-drums-date.el * lisp/mail/ietf-drums-date.el: parse-time-string replacement which is compatible but can be made stricter if desired. * test/lisp/mail/ietf-drums-date-tests.el (added): Add tests for ietf-drums-parse-date-string. * lisp/mail/ietf-drums.el (ietf-drums-parse-date): Use ietf-drums-parse-date-string. --- diff --git a/etc/NEWS b/etc/NEWS index 902d89e62d8..8deb6999789 100644 --- a/etc/NEWS +++ b/etc/NEWS @@ -1158,6 +1158,12 @@ functions. * Lisp Changes in Emacs 29.1 +--- +** New function 'ietf-drums-parse-date-string'. +This function parses RFC5322 (and RFC822) date strings, and should be +used instead of 'parse-time-string' when parsing data that's standards +compliant. + +++ ** New macro 'setopt'. This is like 'setq', but uses 'customize-set-variable' to set the diff --git a/lisp/mail/ietf-drums-date.el b/lisp/mail/ietf-drums-date.el new file mode 100644 index 00000000000..6f64ae73377 --- /dev/null +++ b/lisp/mail/ietf-drums-date.el @@ -0,0 +1,274 @@ +;;; ietf-drums-date.el --- parse time/date for ietf-drums.el -*- lexical-binding: t -*- + +;; Copyright (C) 2022 Free Software Foundation, Inc. + +;; Author: Bob Rogers +;; Keywords: mail, util + +;; This file is part of GNU Emacs. + +;; GNU Emacs is free software: you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation, either version 3 of the License, or +;; (at your option) any later version. + +;; GNU Emacs is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GNU Emacs. If not, see . + +;;; Commentary: + +;; 'ietf-drums-parse-date-string' parses a time and/or date in a +;; string and returns a list of values, just like `decode-time', where +;; unspecified elements in the string are returned as nil (except +;; unspecified DST is returned as -1). `encode-time' may be applied +;; on these values to obtain an internal time value. + +;; Historically, `parse-time-string' was used for this purpose, but it +;; was gradually but imperfectly extended to handle other date +;; formats. 'ietf-drums-parse-date-string' is compatible in that it +;; uses the same return value format and parses the same email date +;; formats by default, but can be made stricter if desired. + +;;; Code: + +(require 'cl-lib) +(require 'parse-time) + +(define-error 'date-parse-error "Date/time parse error" 'error) + +(defconst ietf-drums-date--slot-names + '(second minute hour day month year weekday dst zone) + "Names of return value slots, for better error messages +See the decoded-time defstruct.") + +(defconst ietf-drums-date--slot-ranges + '((0 60) (0 59) (0 23) (1 31) (1 12) (1 9999)) + "Numeric slot ranges, for bounds checking. +Note that RFC5322 explicitly requires that seconds go up to 60, +to allow for leap seconds (see Mills, D., 'Network Time +Protocol', STD 12, RFC 1119, September 1989).") + +(defsubst ietf-drums-date--ignore-char-p (char) + ;; Ignore whitespace and commas. + (memq char '(?\s ?\t ?\r ?\n ?,))) + +(defun ietf-drums-date--tokenize-string (string &optional comment-eof) + "Turn STRING into tokens, separated only by whitespace and commas. +Multiple commas are ignored. Pure digit sequences are turned +into integers. If COMMENT-EOF is true, then a comment as +defined by RFC5322 (strictly, the CFWS production that also +accepts comments) is treated as an end-of-file, and no further +tokens are recognized, otherwise we strip out all comments and +treat them as whitespace (per RFC822)." + (let ((index 0) + (end (length string)) + (list ())) + (cl-flet ((skip-ignored () + ;; Skip ignored characters at index (the scan + ;; position). Skip RFC822 comments in matched parens, + ;; but do not complain about unterminated comments. + (let ((char nil) + (nest 0)) + (while (and (< index end) + (setq char (aref string index)) + (or (> nest 0) + (ietf-drums-date--ignore-char-p char) + (and (not comment-eof) (eql char ?\()))) + (cl-incf index) + ;; FWS bookkeeping. + (cond ((and (eq char ?\\) + (< (1+ index) end)) + ;; Move to the next char but don't check + ;; it to see if it might be a paren. + (cl-incf index)) + ((eq char ?\() (cl-incf nest)) + ((eq char ?\)) (cl-decf nest))))))) + (skip-ignored) ;; Skip leading whitespace. + (while (and (< index end) + (not (and comment-eof + (eq (aref string index) ?\()))) + (let* ((start index) + (char (aref string index)) + (all-digits (<= ?0 char ?9))) + ;; char is valid; look for more valid characters. + (when (and (eq char ?\\) + (< (1+ index) end)) + ;; Escaped character, which might be a "(". If so, we are + ;; correct to include it in the token, even though the + ;; caller is sure to barf. If not, we violate RFC2?822 by + ;; not removing the backslash, but no characters in valid + ;; RFC2?822 dates need escaping anyway, so it shouldn't + ;; matter that this is not done strictly correctly. -- + ;; rgr, 24-Dec-21. + (cl-incf index)) + (while (and (< (cl-incf index) end) + (setq char (aref string index)) + (not (or (ietf-drums-date--ignore-char-p char) + (eq char ?\()))) + (unless (<= ?0 char ?9) + (setq all-digits nil)) + (when (and (eq char ?\\) + (< (1+ index) end)) + ;; Escaped character, see above. + (cl-incf index))) + (push (if all-digits + (cl-parse-integer string :start start :end index) + (substring string start index)) + list) + (skip-ignored))) + (nreverse list)))) + +(defun ietf-drums-parse-date-string (time-string &optional error no-822) + "Parse an RFC5322 or RFC822 date, passed as TIME-STRING. +The optional ERROR parameter causes syntax errors to be flagged +by signalling an instance of the date-parse-error condition. The +optional NO-822 parameter disables the more lax RFC822 syntax, +which is permitted by default. + +The result is a list of (SEC MIN HOUR DAY MON YEAR DOW DST TZ), +which can be accessed as a decoded-time defstruct (q.v.), +e.g. `decoded-time-year' to extract the year, and turned into an +Emacs timestamp by `encode-time'. + +The strict syntax for RFC5322 is as follows: + + [ day-of-week \",\" ] day FWS month-name FWS year FWS time [CFWS] + +where the \"time\" production is: + + 2DIGIT \":\" 2DIGIT [ \":\" 2DIGIT ] FWS ( \"+\" / \"-\" ) 4DIGIT + +and FWS is \"folding white space,\" and CFWS is \"comments and/or +folding white space\", where comments are included in nesting +parentheses and are equivalent to white space. RFC822 also +accepts comments in random places (all of which is handled by +ietf-drums-date--tokenize-string) and two-digit years. For +two-digit years, 50 and up are interpreted as 1950 through 1999 +and 00 through 49 as 200 through 2049. + +We are somewhat more lax in what we accept (specifically, the +hours don't have to be two digits, and the TZ and the comma after +the DOW are optional), but we do insist that the items that are +present do appear in this order. Unspecified/unrecognized +elements in the string are returned as nil (except unspecified +DST is returned as -1)." + (let ((tokens (ietf-drums-date--tokenize-string (downcase time-string) + no-822)) + (time (list nil nil nil nil nil nil nil -1 nil))) + (cl-labels ((set-matched-slot (slot index token) + ;; Assign a slot value from match data if index is + ;; non-nil, else from token, signalling an error if + ;; enabled and it's out of range. + (let ((value (if index + (cl-parse-integer (match-string index token)) + token))) + (when error + (let ((range (nth slot ietf-drums-date--slot-ranges))) + (when (and range + (not (<= (car range) value (cadr range)))) + (signal 'date-parse-error + (list "Slot out of range" + (nth slot ietf-drums-date--slot-names) + token (car range) (cadr range)))))) + (setf (nth slot time) value))) + (set-numeric (slot token) + ;; Only assign the slot if the token is a number. + (cond ((natnump token) + (set-matched-slot slot nil token)) + (error + (signal 'date-parse-error + (list "Not a number" + (nth slot ietf-drums-date--slot-names) + token)))))) + ;; Check for weekday. + (let ((dow (assoc (car tokens) parse-time-weekdays))) + (when dow + ;; Day of the week. + (set-matched-slot 6 nil (cdr dow)) + (pop tokens))) + ;; Day. + (set-numeric 3 (pop tokens)) + ;; Alphabetic month. + (let* ((month (pop tokens)) + (match (assoc month parse-time-months))) + (cond (match + (set-matched-slot 4 nil (cdr match))) + (error + (signal 'date-parse-error + (list "Expected an alphabetic month" month))) + (t + (push month tokens)))) + ;; Year. + (let ((year (pop tokens))) + ;; Check the year for the right number of digits. + (cond ((not (natnump year)) + (when error + (signal 'date-parse-error + (list "Expected a year" year))) + (push year tokens)) + ((>= year 1000) + (set-numeric 5 year)) + ((or no-822 + (>= year 100)) + (when error + (signal 'date-parse-error + (list "Four-digit years are required" year))) + (push year tokens)) + ((>= year 50) + ;; second half of the 20th century. + (set-numeric 5 (+ 1900 year))) + (t + ;; first half of the 21st century. + (set-numeric 5 (+ 2000 year))))) + ;; Time. + (let ((time (pop tokens))) + (cond ((or (null time) (natnump time)) + (when error + (signal 'date-parse-error + (list "Expected a time" time))) + (push time tokens)) + ((string-match + "^\\([0-9][0-9]?\\):\\([0-9][0-9]\\):\\([0-9][0-9]\\)$" + time) + (set-matched-slot 2 1 time) + (set-matched-slot 1 2 time) + (set-matched-slot 0 3 time)) + ((string-match "^\\([0-9][0-9]?\\):\\([0-9][0-9]\\)$" time) + ;; Time without seconds. + (set-matched-slot 2 1 time) + (set-matched-slot 1 2 time) + (set-matched-slot 0 nil 0)) + (error + (signal 'date-parse-error + (list "Expected a time" time))))) + ;; Timezone. + (let* ((zone (pop tokens)) + (match (assoc zone parse-time-zoneinfo))) + (cond (match + (set-matched-slot 8 nil (cadr match)) + (set-matched-slot 7 nil (caddr match))) + ((and (stringp zone) + (string-match "^[-+][0-9][0-9][0-9][0-9]$" zone)) + ;; Numeric time zone. + (set-matched-slot + 8 nil + (* 60 + (+ (cl-parse-integer zone :start 3 :end 5) + (* 60 (cl-parse-integer zone :start 1 :end 3))) + (if (= (aref zone 0) ?-) -1 1)))) + ((and zone error) + (signal 'date-parse-error + (list "Expected a timezone" zone))))) + (when (and tokens error) + (signal 'date-parse-error + (list "Extra token(s)" (car tokens))))) + time)) + +(provide 'ietf-drums-date) + +;;; ietf-drums-date.el ends here diff --git a/lisp/mail/ietf-drums.el b/lisp/mail/ietf-drums.el index 85aa27235fc..d1ad671b160 100644 --- a/lisp/mail/ietf-drums.el +++ b/lisp/mail/ietf-drums.el @@ -294,9 +294,13 @@ a list of address strings." (replace-match " " t t)) (goto-char (point-min))) +(declare-function ietf-drums-parse-date-string "ietf-drums-date" + (time-string &optional error? no-822?)) + (defun ietf-drums-parse-date (string) "Return an Emacs time spec from STRING." - (encode-time (parse-time-string string))) + (require 'ietf-drums-date) + (encode-time (ietf-drums-parse-date-string string))) (defun ietf-drums-narrow-to-header () "Narrow to the header section in the current buffer." diff --git a/test/lisp/mail/ietf-drums-date-tests.el b/test/lisp/mail/ietf-drums-date-tests.el new file mode 100644 index 00000000000..5b798077ff9 --- /dev/null +++ b/test/lisp/mail/ietf-drums-date-tests.el @@ -0,0 +1,190 @@ +;;; ietf-drums-date-tests.el --- Test suite for ietf-drums-date.el -*- lexical-binding:t -*- + +;; Copyright (C) 2022 Free Software Foundation, Inc. + +;; Author: Bob Rogers + +;; This file is part of GNU Emacs. + +;; GNU Emacs is free software: you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation, either version 3 of the License, or +;; (at your option) any later version. + +;; GNU Emacs is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GNU Emacs. If not, see . + +;;; Commentary: + +;;; Code: + +(require 'ert) +(require 'ietf-drums) +(require 'ietf-drums-date) + +(ert-deftest ietf-drums-date-tests () + "Test basic ietf-drums-parse-date-string functionality." + + ;; Test tokenization. + (should (equal (ietf-drums-date--tokenize-string " ") '())) + (should (equal (ietf-drums-date--tokenize-string " a b") '("a" "b"))) + (should (equal (ietf-drums-date--tokenize-string "a bbc dde") + '("a" "bbc" "dde"))) + (should (equal (ietf-drums-date--tokenize-string " , a 27 b,, c 14:32 ") + '("a" 27 "b" "c" "14:32"))) + ;; Some folding whitespace tests. + (should (equal (ietf-drums-date--tokenize-string " a b (end) c" t) + '("a" "b"))) + (should (equal (ietf-drums-date--tokenize-string "(quux)a (foo (bar)) b(baz)") + '("a" "b"))) + (should (equal (ietf-drums-date--tokenize-string "a b\\cde") + ;; Strictly incorrect, but strictly unnecessary syntax. + '("a" "b\\cde"))) + (should (equal (ietf-drums-date--tokenize-string "a b\\ de") + '("a" "b\\ de"))) + (should (equal (ietf-drums-date--tokenize-string "a \\de \\(f") + '("a" "\\de" "\\(f"))) + + ;; Start with some compatible RFC822 dates. + (dolist (case '(("Mon, 22 Feb 2016 19:35:42 +0100" + (42 35 19 22 2 2016 1 -1 3600) + (22219 21758)) + ("22 Feb 2016 19:35:42 +0100" + (42 35 19 22 2 2016 nil -1 3600) + (22219 21758)) + ("Mon, 22 February 2016 19:35:42 +0100" + (42 35 19 22 2 2016 1 -1 3600) + (22219 21758)) + ("Mon, 22 feb 2016 19:35:42 +0100" + (42 35 19 22 2 2016 1 -1 3600) + (22219 21758)) + ("Monday, 22 february 2016 19:35:42 +0100" + (42 35 19 22 2 2016 1 -1 3600) + (22219 21758)) + ("Monday, 22 february 2016 19:35:42 PST" + (42 35 19 22 2 2016 1 nil -28800) + (22219 54158)) + ("Friday, 21 Sep 2018 13:47:58 PDT" + (58 47 13 21 9 2018 5 t -25200) + (23461 22782)) + ("Friday, 21 Sep 2018 13:47:58 EDT" + (58 47 13 21 9 2018 5 t -14400) + (23461 11982)))) + (let* ((input (car case)) + (parsed (cadr case)) + (encoded (caddr case))) + ;; The input should parse the same without RFC822. + (should (equal (ietf-drums-parse-date-string input) parsed)) + (should (equal (ietf-drums-parse-date-string input nil t) parsed)) + ;; Check the encoded date (the official output, though the + ;; decoded-time is easier to debug). + (should (equal (ietf-drums-parse-date input) encoded)))) + + ;; Test a few without timezones. + (dolist (case '(("Mon, 22 Feb 2016 19:35:42" + (42 35 19 22 2 2016 1 -1 nil)) + ("Friday, 21 Sep 2018 13:47:58" + (58 47 13 21 9 2018 5 -1 nil)))) + (let* ((input (car case)) + (parsed (cadr case))) + ;; The input should parse the same without RFC822. + (should (equal (ietf-drums-parse-date-string input) parsed)) + (should (equal (ietf-drums-parse-date-string input nil t) parsed)) + ;; We can't check the encoded date here because it will differ + ;; depending on the TZ of the test environment. + )) + + ;; Two-digit years are not allowed by the "modern" format. + (should (equal (ietf-drums-parse-date-string "22 Feb 16 19:35:42 +0100") + '(42 35 19 22 2 2016 nil -1 3600))) + (should (equal (ietf-drums-parse-date-string "22 Feb 16 19:35:42 +0100" nil t) + '(nil nil nil 22 2 nil nil -1 nil))) + (should (equal (should-error (ietf-drums-parse-date-string + "22 Feb 16 19:35:42 +0100" t t)) + '(date-parse-error "Four-digit years are required" 16))) + (should (equal (ietf-drums-parse-date-string "22 Feb 96 19:35:42 +0100") + '(42 35 19 22 2 1996 nil -1 3600))) + (should (equal (ietf-drums-parse-date-string "22 Feb 96 19:35:42 +0100" nil t) + '(nil nil nil 22 2 nil nil -1 nil))) + (should (equal (should-error (ietf-drums-parse-date-string + "22 Feb 96 19:35:42 +0100" t t)) + '(date-parse-error "Four-digit years are required" 96))) + + ;; Try some dates with comments. + (should (equal (ietf-drums-parse-date-string + "22 Feb (today) 16 19:35:42 +0100") + '(42 35 19 22 2 2016 nil -1 3600))) + (should (equal (ietf-drums-parse-date-string + "22 Feb (today) 16 19:35:42 +0100" nil t) + '(nil nil nil 22 2 nil nil -1 nil))) + (should (equal (should-error (ietf-drums-parse-date-string + "22 Feb (today) 16 19:35:42 +0100" t t)) + '(date-parse-error "Expected a year" nil))) + (should (equal (ietf-drums-parse-date-string + "22 Feb 96 (long ago) 19:35:42 +0100") + '(42 35 19 22 2 1996 nil -1 3600))) + (should (equal (ietf-drums-parse-date-string + "Friday, 21 Sep(comment \\) with \\( parens)18 19:35:42") + '(42 35 19 21 9 2018 5 -1 nil))) + (should (equal (ietf-drums-parse-date-string + "Friday, 21 Sep 18 19:35:42 (unterminated comment") + '(42 35 19 21 9 2018 5 -1 nil))) + + ;; Test some RFC822 error cases + (dolist (test '(("33 1 2022" ("Slot out of range" day 33 1 31)) + ("0 1 2022" ("Slot out of range" day 0 1 31)) + ("1 1 2020 2021" ("Expected an alphabetic month" 1)) + ("1 Jan 2020 2021" ("Expected a time" 2021)) + ("1 Jan 2020 20:21 2000" ("Expected a timezone" 2000)) + ("1 Jan 2020 20:21 +0200 33" ("Extra token(s)" 33)))) + (should (equal (should-error (ietf-drums-parse-date-string (car test) t)) + (cons 'date-parse-error (cadr test))))) + + (dolist (test '(("22 Feb 196" nil ;; bad year + ("Four-digit years are required" 196)) + ("22 Feb 16 19:35:24" t ;; two-digit year + ("Four-digit years are required" 16)) + ("22 Feb 96 19:35:42" t ;; two-digit year + ("Four-digit years are required" 96)) + ("2 Feb 2021 1996" nil + ("Expected a time" 1996)) + ("22 Fub 1996" nil + ("Expected an alphabetic month" "fub")) + ("1 Jan 2020 30" nil + ("Expected a time" 30)) + ("1 Jan 2020 16:47 15:15" nil + ("Expected a timezone" "15:15")) + ("1 Jan 2020 16:47 +0800 -0800" t + ("Extra token(s)" "-0800")) + ;; Range tests + ("32 Dec 2021" nil + ("Slot out of range" day 32 1 31)) + ("0 Dec 2021" nil + ("Slot out of range" day 0 1 31)) + ("3 13 2021" nil + ("Expected an alphabetic month" 13)) + ("3 Dec 0000" t + ("Four-digit years are required" 0)) + ("3 Dec 20021" nil + ("Slot out of range" year 20021 1 9999)) + ("1 Jan 2020 24:21:14" nil + ("Slot out of range" hour "24:21:14" 0 23)) + ("1 Jan 2020 14:60:21" nil + ("Slot out of range" minute "14:60:21" 0 59)) + ("1 Jan 2020 14:21:61" nil + ("Slot out of range" second "14:21:61" 0 60)))) + (should (equal (should-error + (ietf-drums-parse-date-string (car test) t (cadr test))) + (cons 'date-parse-error (caddr test))))) + (should (equal (ietf-drums-parse-date-string + "1 Jan 2020 14:21:60") ;; a leap second! + '(60 21 14 1 1 2020 nil -1 nil)))) + +(provide 'ietf-drums-date-tests) + +;;; ietf-drums-date-tests.el ends here