From 33647b6d63fe3062361a23a901664a379c07097f Mon Sep 17 00:00:00 2001 From: Andreas Schwab Date: Tue, 25 Oct 2022 18:22:21 +0200 Subject: [PATCH] Ignore non-base64 junk when decoding MIME * src/fns.c (Fbase64_decode_region): Add optional argument IGNORE-INVALID and pass down to base64_decode_1. (Fbase64_decode_string): Likewise. (base64_decode_1): Add argument IGNORE_INVALID. * doc/lispref/text.texi (Base 64): Document them. * lisp/gnus/mm-bodies.el (mm-decode-content-transfer-encoding): Ignore any junk when decoding base64. --- doc/lispref/text.texi | 8 ++++++-- lisp/gnus/mm-bodies.el | 20 ++----------------- src/fns.c | 44 +++++++++++++++++++++++++----------------- 3 files changed, 34 insertions(+), 38 deletions(-) diff --git a/doc/lispref/text.texi b/doc/lispref/text.texi index 509ce56725d..d1010db515d 100644 --- a/doc/lispref/text.texi +++ b/doc/lispref/text.texi @@ -4876,7 +4876,7 @@ If the optional argument @var{no-pad} is non-@code{nil} then this function doesn't generate the padding. @end defun -@deffn Command base64-decode-region beg end &optional base64url +@deffn Command base64-decode-region beg end &optional base64url ignore-invalid This function converts the region from @var{beg} to @var{end} from base 64 code into the corresponding decoded text. It returns the length of the decoded text. @@ -4885,9 +4885,11 @@ The decoding functions ignore newline characters in the encoded text. If optional argument @var{base64url} is non-@code{nil}, then padding is optional, and the URL variant of base 64 encoding is used. +If optional argument @var{ignore-invalid} is non-@code{nil}, then any +unrecognized characters are ignored. @end deffn -@defun base64-decode-string string &optional base64url +@defun base64-decode-string string &optional base64url ignore-invalid This function converts the string @var{string} from base 64 code into the corresponding decoded text. It returns a unibyte string containing the decoded text. @@ -4897,6 +4899,8 @@ The decoding functions ignore newline characters in the encoded text. If optional argument @var{base64url} is non-@code{nil}, then padding is optional, and the URL variant of base 64 encoding is used. +If optional argument @var{ignore-invalid} is non-@code{nil}, then any +unrecognized characters are ignored. @end defun @node Checksum/Hash diff --git a/lisp/gnus/mm-bodies.el b/lisp/gnus/mm-bodies.el index 9045966df5a..44ce1c9485d 100644 --- a/lisp/gnus/mm-bodies.el +++ b/lisp/gnus/mm-bodies.el @@ -189,24 +189,8 @@ If TYPE is `text/plain' CRLF->LF translation may occur." (quoted-printable-decode-region (point-min) (point-max)) t) ((eq encoding 'base64) - (base64-decode-region - (point-min) - (save-excursion - ;; Some mailers insert whitespace junk at the end which - ;; base64-decode-region dislikes. - (goto-char (point-min)) - (while (re-search-forward "^[\t ]*\r?\n" nil t) - (delete-region (match-beginning 0) (match-end 0))) - ;; Also ignore junk which could have been added by - ;; mailing list software by finding the final line with - ;; base64 text. - (goto-char (point-max)) - (beginning-of-line) - (while (and (not (mm-base64-line-p)) - (not (bobp))) - (forward-line -1)) - (forward-line 1) - (point)))) + ;; MIME says to ignore any non-base64 junk + (base64-decode-region (point-min) (point-max) nil t)) ((memq encoding '(nil 7bit 8bit binary)) ;; Do nothing. t) diff --git a/src/fns.c b/src/fns.c index 940fb680fc3..c35f40357b7 100644 --- a/src/fns.c +++ b/src/fns.c @@ -3661,7 +3661,7 @@ static signed char const base64_char_to_value[2][UCHAR_MAX] = static ptrdiff_t base64_encode_1 (const char *, char *, ptrdiff_t, bool, bool, bool, bool); static ptrdiff_t base64_decode_1 (const char *, char *, ptrdiff_t, bool, - bool, ptrdiff_t *); + bool, bool, ptrdiff_t *); static Lisp_Object base64_encode_region_1 (Lisp_Object, Lisp_Object, bool, bool, bool); @@ -3924,7 +3924,7 @@ base64_encode_1 (const char *from, char *to, ptrdiff_t length, DEFUN ("base64-decode-region", Fbase64_decode_region, Sbase64_decode_region, - 2, 3, "r", + 2, 4, "r", doc: /* Base64-decode the region between BEG and END. Return the length of the decoded data. @@ -3935,8 +3935,11 @@ system. If the region can't be decoded, signal an error and don't modify the buffer. Optional third argument BASE64URL determines whether to use the URL variant -of the base 64 encoding, as defined in RFC 4648. */) - (Lisp_Object beg, Lisp_Object end, Lisp_Object base64url) +of the base 64 encoding, as defined in RFC 4648. +If optional fourth argument INGORE-INVALID is non-nil invalid characters +are ignored instead of signaling an error. */) + (Lisp_Object beg, Lisp_Object end, Lisp_Object base64url, + Lisp_Object ignore_invalid) { ptrdiff_t ibeg, iend, length, allength; char *decoded; @@ -3962,7 +3965,8 @@ of the base 64 encoding, as defined in RFC 4648. */) move_gap_both (XFIXNAT (beg), ibeg); decoded_length = base64_decode_1 ((char *) BYTE_POS_ADDR (ibeg), decoded, length, !NILP (base64url), - multibyte, &inserted_chars); + multibyte, !NILP (ignore_invalid), + &inserted_chars); if (decoded_length > allength) emacs_abort (); @@ -3995,11 +3999,13 @@ of the base 64 encoding, as defined in RFC 4648. */) } DEFUN ("base64-decode-string", Fbase64_decode_string, Sbase64_decode_string, - 1, 2, 0, + 1, 3, 0, doc: /* Base64-decode STRING and return the result as a string. Optional argument BASE64URL determines whether to use the URL variant of -the base 64 encoding, as defined in RFC 4648. */) - (Lisp_Object string, Lisp_Object base64url) +the base 64 encoding, as defined in RFC 4648. +If optional third argument IGNORE-INVALID is non-nil invalid characters are +ignored instead of signaling an error. */) + (Lisp_Object string, Lisp_Object base64url, Lisp_Object ignore_invalid) { char *decoded; ptrdiff_t length, decoded_length; @@ -4015,7 +4021,8 @@ the base 64 encoding, as defined in RFC 4648. */) /* The decoded result should be unibyte. */ ptrdiff_t decoded_chars; decoded_length = base64_decode_1 (SSDATA (string), decoded, length, - !NILP (base64url), 0, &decoded_chars); + !NILP (base64url), false, + !NILP (ignore_invalid), &decoded_chars); if (decoded_length > length) emacs_abort (); else if (decoded_length >= 0) @@ -4032,12 +4039,13 @@ the base 64 encoding, as defined in RFC 4648. */) /* Base64-decode the data at FROM of LENGTH bytes into TO. If MULTIBYTE, the decoded result should be in multibyte - form. Store the number of produced characters in *NCHARS_RETURN. */ + form. If IGNORE_INVALID, ignore invalid base64 characters. + Store the number of produced characters in *NCHARS_RETURN. */ static ptrdiff_t base64_decode_1 (const char *from, char *to, ptrdiff_t length, - bool base64url, - bool multibyte, ptrdiff_t *nchars_return) + bool base64url, bool multibyte, bool ignore_invalid, + ptrdiff_t *nchars_return) { char const *f = from; char const *flim = from + length; @@ -4063,7 +4071,7 @@ base64_decode_1 (const char *from, char *to, ptrdiff_t length, c = *f++; v1 = b64_char_to_value[c]; } - while (v1 < 0); + while (v1 < 0 || (v1 == 0 && ignore_invalid)); if (v1 == 0) return -1; @@ -4078,7 +4086,7 @@ base64_decode_1 (const char *from, char *to, ptrdiff_t length, c = *f++; v1 = b64_char_to_value[c]; } - while (v1 < 0); + while (v1 < 0 || (v1 == 0 && ignore_invalid)); if (v1 == 0) return -1; @@ -4097,7 +4105,7 @@ base64_decode_1 (const char *from, char *to, ptrdiff_t length, { if (f == flim) { - if (!base64url) + if (!base64url && !ignore_invalid) return -1; *nchars_return = nchars; return e - to; @@ -4105,7 +4113,7 @@ base64_decode_1 (const char *from, char *to, ptrdiff_t length, c = *f++; v1 = b64_char_to_value[c]; } - while (v1 < 0); + while (v1 < 0 || (v1 == 0 && ignore_invalid)); if (c == '=') { @@ -4139,7 +4147,7 @@ base64_decode_1 (const char *from, char *to, ptrdiff_t length, { if (f == flim) { - if (!base64url) + if (!base64url && !ignore_invalid) return -1; *nchars_return = nchars; return e - to; @@ -4147,7 +4155,7 @@ base64_decode_1 (const char *from, char *to, ptrdiff_t length, c = *f++; v1 = b64_char_to_value[c]; } - while (v1 < 0); + while (v1 < 0 || (v1 == 0 && ignore_invalid)); if (c == '=') continue; -- 2.39.5