--- /dev/null
+;;; ietf-drums-date.el --- parse time/date for ietf-drums.el -*- lexical-binding: t -*-
+
+;; Copyright (C) 2022 Free Software Foundation, Inc.
+
+;; Author: Bob Rogers <rogers@rgrjr.com>
+;; Keywords: mail, util
+
+;; This file is part of GNU Emacs.
+
+;; GNU Emacs is free software: you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation, either version 3 of the License, or
+;; (at your option) any later version.
+
+;; GNU Emacs is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>.
+
+;;; Commentary:
+
+;; 'ietf-drums-parse-date-string' parses a time and/or date in a
+;; string and returns a list of values, just like `decode-time', where
+;; unspecified elements in the string are returned as nil (except
+;; unspecified DST is returned as -1). `encode-time' may be applied
+;; on these values to obtain an internal time value.
+
+;; Historically, `parse-time-string' was used for this purpose, but it
+;; was gradually but imperfectly extended to handle other date
+;; formats. 'ietf-drums-parse-date-string' is compatible in that it
+;; uses the same return value format and parses the same email date
+;; formats by default, but can be made stricter if desired.
+
+;;; Code:
+
+(require 'cl-lib)
+(require 'parse-time)
+
+(define-error 'date-parse-error "Date/time parse error" 'error)
+
+(defconst ietf-drums-date--slot-names
+ '(second minute hour day month year weekday dst zone)
+ "Names of return value slots, for better error messages
+See the decoded-time defstruct.")
+
+(defconst ietf-drums-date--slot-ranges
+ '((0 60) (0 59) (0 23) (1 31) (1 12) (1 9999))
+ "Numeric slot ranges, for bounds checking.
+Note that RFC5322 explicitly requires that seconds go up to 60,
+to allow for leap seconds (see Mills, D., 'Network Time
+Protocol', STD 12, RFC 1119, September 1989).")
+
+(defsubst ietf-drums-date--ignore-char-p (char)
+ ;; Ignore whitespace and commas.
+ (memq char '(?\s ?\t ?\r ?\n ?,)))
+
+(defun ietf-drums-date--tokenize-string (string &optional comment-eof)
+ "Turn STRING into tokens, separated only by whitespace and commas.
+Multiple commas are ignored. Pure digit sequences are turned
+into integers. If COMMENT-EOF is true, then a comment as
+defined by RFC5322 (strictly, the CFWS production that also
+accepts comments) is treated as an end-of-file, and no further
+tokens are recognized, otherwise we strip out all comments and
+treat them as whitespace (per RFC822)."
+ (let ((index 0)
+ (end (length string))
+ (list ()))
+ (cl-flet ((skip-ignored ()
+ ;; Skip ignored characters at index (the scan
+ ;; position). Skip RFC822 comments in matched parens,
+ ;; but do not complain about unterminated comments.
+ (let ((char nil)
+ (nest 0))
+ (while (and (< index end)
+ (setq char (aref string index))
+ (or (> nest 0)
+ (ietf-drums-date--ignore-char-p char)
+ (and (not comment-eof) (eql char ?\())))
+ (cl-incf index)
+ ;; FWS bookkeeping.
+ (cond ((and (eq char ?\\)
+ (< (1+ index) end))
+ ;; Move to the next char but don't check
+ ;; it to see if it might be a paren.
+ (cl-incf index))
+ ((eq char ?\() (cl-incf nest))
+ ((eq char ?\)) (cl-decf nest)))))))
+ (skip-ignored) ;; Skip leading whitespace.
+ (while (and (< index end)
+ (not (and comment-eof
+ (eq (aref string index) ?\())))
+ (let* ((start index)
+ (char (aref string index))
+ (all-digits (<= ?0 char ?9)))
+ ;; char is valid; look for more valid characters.
+ (when (and (eq char ?\\)
+ (< (1+ index) end))
+ ;; Escaped character, which might be a "(". If so, we are
+ ;; correct to include it in the token, even though the
+ ;; caller is sure to barf. If not, we violate RFC2?822 by
+ ;; not removing the backslash, but no characters in valid
+ ;; RFC2?822 dates need escaping anyway, so it shouldn't
+ ;; matter that this is not done strictly correctly. --
+ ;; rgr, 24-Dec-21.
+ (cl-incf index))
+ (while (and (< (cl-incf index) end)
+ (setq char (aref string index))
+ (not (or (ietf-drums-date--ignore-char-p char)
+ (eq char ?\())))
+ (unless (<= ?0 char ?9)
+ (setq all-digits nil))
+ (when (and (eq char ?\\)
+ (< (1+ index) end))
+ ;; Escaped character, see above.
+ (cl-incf index)))
+ (push (if all-digits
+ (cl-parse-integer string :start start :end index)
+ (substring string start index))
+ list)
+ (skip-ignored)))
+ (nreverse list))))
+
+(defun ietf-drums-parse-date-string (time-string &optional error no-822)
+ "Parse an RFC5322 or RFC822 date, passed as TIME-STRING.
+The optional ERROR parameter causes syntax errors to be flagged
+by signalling an instance of the date-parse-error condition. The
+optional NO-822 parameter disables the more lax RFC822 syntax,
+which is permitted by default.
+
+The result is a list of (SEC MIN HOUR DAY MON YEAR DOW DST TZ),
+which can be accessed as a decoded-time defstruct (q.v.),
+e.g. `decoded-time-year' to extract the year, and turned into an
+Emacs timestamp by `encode-time'.
+
+The strict syntax for RFC5322 is as follows:
+
+ [ day-of-week \",\" ] day FWS month-name FWS year FWS time [CFWS]
+
+where the \"time\" production is:
+
+ 2DIGIT \":\" 2DIGIT [ \":\" 2DIGIT ] FWS ( \"+\" / \"-\" ) 4DIGIT
+
+and FWS is \"folding white space,\" and CFWS is \"comments and/or
+folding white space\", where comments are included in nesting
+parentheses and are equivalent to white space. RFC822 also
+accepts comments in random places (all of which is handled by
+ietf-drums-date--tokenize-string) and two-digit years. For
+two-digit years, 50 and up are interpreted as 1950 through 1999
+and 00 through 49 as 200 through 2049.
+
+We are somewhat more lax in what we accept (specifically, the
+hours don't have to be two digits, and the TZ and the comma after
+the DOW are optional), but we do insist that the items that are
+present do appear in this order. Unspecified/unrecognized
+elements in the string are returned as nil (except unspecified
+DST is returned as -1)."
+ (let ((tokens (ietf-drums-date--tokenize-string (downcase time-string)
+ no-822))
+ (time (list nil nil nil nil nil nil nil -1 nil)))
+ (cl-labels ((set-matched-slot (slot index token)
+ ;; Assign a slot value from match data if index is
+ ;; non-nil, else from token, signalling an error if
+ ;; enabled and it's out of range.
+ (let ((value (if index
+ (cl-parse-integer (match-string index token))
+ token)))
+ (when error
+ (let ((range (nth slot ietf-drums-date--slot-ranges)))
+ (when (and range
+ (not (<= (car range) value (cadr range))))
+ (signal 'date-parse-error
+ (list "Slot out of range"
+ (nth slot ietf-drums-date--slot-names)
+ token (car range) (cadr range))))))
+ (setf (nth slot time) value)))
+ (set-numeric (slot token)
+ ;; Only assign the slot if the token is a number.
+ (cond ((natnump token)
+ (set-matched-slot slot nil token))
+ (error
+ (signal 'date-parse-error
+ (list "Not a number"
+ (nth slot ietf-drums-date--slot-names)
+ token))))))
+ ;; Check for weekday.
+ (let ((dow (assoc (car tokens) parse-time-weekdays)))
+ (when dow
+ ;; Day of the week.
+ (set-matched-slot 6 nil (cdr dow))
+ (pop tokens)))
+ ;; Day.
+ (set-numeric 3 (pop tokens))
+ ;; Alphabetic month.
+ (let* ((month (pop tokens))
+ (match (assoc month parse-time-months)))
+ (cond (match
+ (set-matched-slot 4 nil (cdr match)))
+ (error
+ (signal 'date-parse-error
+ (list "Expected an alphabetic month" month)))
+ (t
+ (push month tokens))))
+ ;; Year.
+ (let ((year (pop tokens)))
+ ;; Check the year for the right number of digits.
+ (cond ((not (natnump year))
+ (when error
+ (signal 'date-parse-error
+ (list "Expected a year" year)))
+ (push year tokens))
+ ((>= year 1000)
+ (set-numeric 5 year))
+ ((or no-822
+ (>= year 100))
+ (when error
+ (signal 'date-parse-error
+ (list "Four-digit years are required" year)))
+ (push year tokens))
+ ((>= year 50)
+ ;; second half of the 20th century.
+ (set-numeric 5 (+ 1900 year)))
+ (t
+ ;; first half of the 21st century.
+ (set-numeric 5 (+ 2000 year)))))
+ ;; Time.
+ (let ((time (pop tokens)))
+ (cond ((or (null time) (natnump time))
+ (when error
+ (signal 'date-parse-error
+ (list "Expected a time" time)))
+ (push time tokens))
+ ((string-match
+ "^\\([0-9][0-9]?\\):\\([0-9][0-9]\\):\\([0-9][0-9]\\)$"
+ time)
+ (set-matched-slot 2 1 time)
+ (set-matched-slot 1 2 time)
+ (set-matched-slot 0 3 time))
+ ((string-match "^\\([0-9][0-9]?\\):\\([0-9][0-9]\\)$" time)
+ ;; Time without seconds.
+ (set-matched-slot 2 1 time)
+ (set-matched-slot 1 2 time)
+ (set-matched-slot 0 nil 0))
+ (error
+ (signal 'date-parse-error
+ (list "Expected a time" time)))))
+ ;; Timezone.
+ (let* ((zone (pop tokens))
+ (match (assoc zone parse-time-zoneinfo)))
+ (cond (match
+ (set-matched-slot 8 nil (cadr match))
+ (set-matched-slot 7 nil (caddr match)))
+ ((and (stringp zone)
+ (string-match "^[-+][0-9][0-9][0-9][0-9]$" zone))
+ ;; Numeric time zone.
+ (set-matched-slot
+ 8 nil
+ (* 60
+ (+ (cl-parse-integer zone :start 3 :end 5)
+ (* 60 (cl-parse-integer zone :start 1 :end 3)))
+ (if (= (aref zone 0) ?-) -1 1))))
+ ((and zone error)
+ (signal 'date-parse-error
+ (list "Expected a timezone" zone)))))
+ (when (and tokens error)
+ (signal 'date-parse-error
+ (list "Extra token(s)" (car tokens)))))
+ time))
+
+(provide 'ietf-drums-date)
+
+;;; ietf-drums-date.el ends here
--- /dev/null
+;;; ietf-drums-date-tests.el --- Test suite for ietf-drums-date.el -*- lexical-binding:t -*-
+
+;; Copyright (C) 2022 Free Software Foundation, Inc.
+
+;; Author: Bob Rogers <rogers@rgrjr.com>
+
+;; This file is part of GNU Emacs.
+
+;; GNU Emacs is free software: you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation, either version 3 of the License, or
+;; (at your option) any later version.
+
+;; GNU Emacs is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>.
+
+;;; Commentary:
+
+;;; Code:
+
+(require 'ert)
+(require 'ietf-drums)
+(require 'ietf-drums-date)
+
+(ert-deftest ietf-drums-date-tests ()
+ "Test basic ietf-drums-parse-date-string functionality."
+
+ ;; Test tokenization.
+ (should (equal (ietf-drums-date--tokenize-string " ") '()))
+ (should (equal (ietf-drums-date--tokenize-string " a b") '("a" "b")))
+ (should (equal (ietf-drums-date--tokenize-string "a bbc dde")
+ '("a" "bbc" "dde")))
+ (should (equal (ietf-drums-date--tokenize-string " , a 27 b,, c 14:32 ")
+ '("a" 27 "b" "c" "14:32")))
+ ;; Some folding whitespace tests.
+ (should (equal (ietf-drums-date--tokenize-string " a b (end) c" t)
+ '("a" "b")))
+ (should (equal (ietf-drums-date--tokenize-string "(quux)a (foo (bar)) b(baz)")
+ '("a" "b")))
+ (should (equal (ietf-drums-date--tokenize-string "a b\\cde")
+ ;; Strictly incorrect, but strictly unnecessary syntax.
+ '("a" "b\\cde")))
+ (should (equal (ietf-drums-date--tokenize-string "a b\\ de")
+ '("a" "b\\ de")))
+ (should (equal (ietf-drums-date--tokenize-string "a \\de \\(f")
+ '("a" "\\de" "\\(f")))
+
+ ;; Start with some compatible RFC822 dates.
+ (dolist (case '(("Mon, 22 Feb 2016 19:35:42 +0100"
+ (42 35 19 22 2 2016 1 -1 3600)
+ (22219 21758))
+ ("22 Feb 2016 19:35:42 +0100"
+ (42 35 19 22 2 2016 nil -1 3600)
+ (22219 21758))
+ ("Mon, 22 February 2016 19:35:42 +0100"
+ (42 35 19 22 2 2016 1 -1 3600)
+ (22219 21758))
+ ("Mon, 22 feb 2016 19:35:42 +0100"
+ (42 35 19 22 2 2016 1 -1 3600)
+ (22219 21758))
+ ("Monday, 22 february 2016 19:35:42 +0100"
+ (42 35 19 22 2 2016 1 -1 3600)
+ (22219 21758))
+ ("Monday, 22 february 2016 19:35:42 PST"
+ (42 35 19 22 2 2016 1 nil -28800)
+ (22219 54158))
+ ("Friday, 21 Sep 2018 13:47:58 PDT"
+ (58 47 13 21 9 2018 5 t -25200)
+ (23461 22782))
+ ("Friday, 21 Sep 2018 13:47:58 EDT"
+ (58 47 13 21 9 2018 5 t -14400)
+ (23461 11982))))
+ (let* ((input (car case))
+ (parsed (cadr case))
+ (encoded (caddr case)))
+ ;; The input should parse the same without RFC822.
+ (should (equal (ietf-drums-parse-date-string input) parsed))
+ (should (equal (ietf-drums-parse-date-string input nil t) parsed))
+ ;; Check the encoded date (the official output, though the
+ ;; decoded-time is easier to debug).
+ (should (equal (ietf-drums-parse-date input) encoded))))
+
+ ;; Test a few without timezones.
+ (dolist (case '(("Mon, 22 Feb 2016 19:35:42"
+ (42 35 19 22 2 2016 1 -1 nil))
+ ("Friday, 21 Sep 2018 13:47:58"
+ (58 47 13 21 9 2018 5 -1 nil))))
+ (let* ((input (car case))
+ (parsed (cadr case)))
+ ;; The input should parse the same without RFC822.
+ (should (equal (ietf-drums-parse-date-string input) parsed))
+ (should (equal (ietf-drums-parse-date-string input nil t) parsed))
+ ;; We can't check the encoded date here because it will differ
+ ;; depending on the TZ of the test environment.
+ ))
+
+ ;; Two-digit years are not allowed by the "modern" format.
+ (should (equal (ietf-drums-parse-date-string "22 Feb 16 19:35:42 +0100")
+ '(42 35 19 22 2 2016 nil -1 3600)))
+ (should (equal (ietf-drums-parse-date-string "22 Feb 16 19:35:42 +0100" nil t)
+ '(nil nil nil 22 2 nil nil -1 nil)))
+ (should (equal (should-error (ietf-drums-parse-date-string
+ "22 Feb 16 19:35:42 +0100" t t))
+ '(date-parse-error "Four-digit years are required" 16)))
+ (should (equal (ietf-drums-parse-date-string "22 Feb 96 19:35:42 +0100")
+ '(42 35 19 22 2 1996 nil -1 3600)))
+ (should (equal (ietf-drums-parse-date-string "22 Feb 96 19:35:42 +0100" nil t)
+ '(nil nil nil 22 2 nil nil -1 nil)))
+ (should (equal (should-error (ietf-drums-parse-date-string
+ "22 Feb 96 19:35:42 +0100" t t))
+ '(date-parse-error "Four-digit years are required" 96)))
+
+ ;; Try some dates with comments.
+ (should (equal (ietf-drums-parse-date-string
+ "22 Feb (today) 16 19:35:42 +0100")
+ '(42 35 19 22 2 2016 nil -1 3600)))
+ (should (equal (ietf-drums-parse-date-string
+ "22 Feb (today) 16 19:35:42 +0100" nil t)
+ '(nil nil nil 22 2 nil nil -1 nil)))
+ (should (equal (should-error (ietf-drums-parse-date-string
+ "22 Feb (today) 16 19:35:42 +0100" t t))
+ '(date-parse-error "Expected a year" nil)))
+ (should (equal (ietf-drums-parse-date-string
+ "22 Feb 96 (long ago) 19:35:42 +0100")
+ '(42 35 19 22 2 1996 nil -1 3600)))
+ (should (equal (ietf-drums-parse-date-string
+ "Friday, 21 Sep(comment \\) with \\( parens)18 19:35:42")
+ '(42 35 19 21 9 2018 5 -1 nil)))
+ (should (equal (ietf-drums-parse-date-string
+ "Friday, 21 Sep 18 19:35:42 (unterminated comment")
+ '(42 35 19 21 9 2018 5 -1 nil)))
+
+ ;; Test some RFC822 error cases
+ (dolist (test '(("33 1 2022" ("Slot out of range" day 33 1 31))
+ ("0 1 2022" ("Slot out of range" day 0 1 31))
+ ("1 1 2020 2021" ("Expected an alphabetic month" 1))
+ ("1 Jan 2020 2021" ("Expected a time" 2021))
+ ("1 Jan 2020 20:21 2000" ("Expected a timezone" 2000))
+ ("1 Jan 2020 20:21 +0200 33" ("Extra token(s)" 33))))
+ (should (equal (should-error (ietf-drums-parse-date-string (car test) t))
+ (cons 'date-parse-error (cadr test)))))
+
+ (dolist (test '(("22 Feb 196" nil ;; bad year
+ ("Four-digit years are required" 196))
+ ("22 Feb 16 19:35:24" t ;; two-digit year
+ ("Four-digit years are required" 16))
+ ("22 Feb 96 19:35:42" t ;; two-digit year
+ ("Four-digit years are required" 96))
+ ("2 Feb 2021 1996" nil
+ ("Expected a time" 1996))
+ ("22 Fub 1996" nil
+ ("Expected an alphabetic month" "fub"))
+ ("1 Jan 2020 30" nil
+ ("Expected a time" 30))
+ ("1 Jan 2020 16:47 15:15" nil
+ ("Expected a timezone" "15:15"))
+ ("1 Jan 2020 16:47 +0800 -0800" t
+ ("Extra token(s)" "-0800"))
+ ;; Range tests
+ ("32 Dec 2021" nil
+ ("Slot out of range" day 32 1 31))
+ ("0 Dec 2021" nil
+ ("Slot out of range" day 0 1 31))
+ ("3 13 2021" nil
+ ("Expected an alphabetic month" 13))
+ ("3 Dec 0000" t
+ ("Four-digit years are required" 0))
+ ("3 Dec 20021" nil
+ ("Slot out of range" year 20021 1 9999))
+ ("1 Jan 2020 24:21:14" nil
+ ("Slot out of range" hour "24:21:14" 0 23))
+ ("1 Jan 2020 14:60:21" nil
+ ("Slot out of range" minute "14:60:21" 0 59))
+ ("1 Jan 2020 14:21:61" nil
+ ("Slot out of range" second "14:21:61" 0 60))))
+ (should (equal (should-error
+ (ietf-drums-parse-date-string (car test) t (cadr test)))
+ (cons 'date-parse-error (caddr test)))))
+ (should (equal (ietf-drums-parse-date-string
+ "1 Jan 2020 14:21:60") ;; a leap second!
+ '(60 21 14 1 1 2020 nil -1 nil))))
+
+(provide 'ietf-drums-date-tests)
+
+;;; ietf-drums-date-tests.el ends here