From 2dc5f17c3ecf6864fdcb8ebae73c02a8d04c415a Mon Sep 17 00:00:00 2001 From: Eli Zaretskii Date: Thu, 27 Jul 2023 11:36:00 +0300 Subject: [PATCH] Support Posix-standard pax extended header in tar files * lisp/tar-mode.el (pax-extended-attribute-record-regexp) (tar-attr-vector): New variables. (pax-gid-index, pax-gname-index, pax-linkpath-index) (pax-mtime-index, pax-path-index, pax-size-index, pax-uid-index) (pax-uname-index): New constants. (pax-header-gid, pax-header-gname, pax-header-linkpath) (pax-header-mtime, pax-header-path, pax-header-size) (pax-header-uid, pax-header-uname): New accessors to pax header. (pax-decode-string, tar-parse-pax-extended-header): New functions. (tar-header-block-tokenize): Recognize and handle Posix-standard pax extended header, and use its attributes instead of those in the standard tar header. (Bug#64686) --- lisp/tar-mode.el | 260 ++++++++++++++++++++++++++++++++++++----------- 1 file changed, 199 insertions(+), 61 deletions(-) diff --git a/lisp/tar-mode.el b/lisp/tar-mode.el index c9206028e94..4e9843123b0 100644 --- a/lisp/tar-mode.el +++ b/lisp/tar-mode.el @@ -215,6 +215,99 @@ Preserve the modified states of the buffers and set `tar-data-swapped'." "Round S up to the next multiple of 512." (ash (ash (+ s 511) -9) 9)) +;; Reference: +;; https://pubs.opengroup.org/onlinepubs/9699919799/utilities/pax.html#tag_20_92_13_02 +(defconst pax-extended-attribute-record-regexp + ;; We omit attributes that are "reserved" by Posix, since no + ;; processing has been defined for them. + "\\([0-9]+\\) \\(gid\\|gname\\|hdrcharset\\|linkpath\\|mtime\\|path\\|size\\|uid\\|uname\\)=" + "Regular expression for looking up extended attributes in a +Posix-standard pax extended header of a tar file. +Only attributes that `tar-mode' can grok are mentioned.") + +(defconst pax-gid-index 0) +(defconst pax-gname-index 1) +(defconst pax-linkpath-index 2) +(defconst pax-mtime-index 3) +(defconst pax-path-index 4) +(defconst pax-size-index 5) +(defconst pax-uid-index 6) +(defconst pax-uname-index 7) +(defsubst pax-header-gid (attr-vec) + (aref attr-vec pax-gid-index)) +(defsubst pax-header-gname (attr-vec) + (aref attr-vec pax-gname-index)) +(defsubst pax-header-linkpath (attr-vec) + (aref attr-vec pax-linkpath-index)) +(defsubst pax-header-mtime (attr-vec) + (aref attr-vec pax-mtime-index)) +(defsubst pax-header-path (attr-vec) + (aref attr-vec pax-path-index)) +(defsubst pax-header-size (attr-vec) + (aref attr-vec pax-size-index)) +(defsubst pax-header-uid (attr-vec) + (aref attr-vec pax-uid-index)) +(defsubst pax-header-uname (attr-vec) + (aref attr-vec pax-uid-index)) + +(defsubst pax-decode-string (str coding) + (if str + (decode-coding-string str coding) + str)) + +(defvar tar-attr-vector '[nil nil nil nil nil nil nil nil]) +(defun tar-parse-pax-extended-header (pos) + "Parse a pax external header of a Posix-format tar file." + (let ((end (+ pos 512)) + (result tar-attr-vector) + (coding 'utf-8-unix) + attr value record-len value-len) + (dotimes (i 8) + (aset result i nil)) + (goto-char pos) + (while (and (< pos end) + (re-search-forward pax-extended-attribute-record-regexp + end 'move)) + (setq record-len (string-to-number (match-string 1)) + attr (match-string 2) + value-len (- record-len + (length (match-string 1)) + 1 + (length (match-string 2)) + 2) + value (buffer-substring (point) (+ (point) value-len))) + (setq pos (goto-char (+ (point) value-len 1))) + (cond + ((equal attr "gid") + (aset result pax-gid-index value)) + ((equal attr "gname") + (aset result pax-gname-index value)) + ((equal attr "linkpath") + (aset result pax-linkpath-index value)) + ((equal attr "mtime") + (aset result pax-mtime-index (string-to-number value))) + ((equal attr "path") + (aset result pax-path-index value)) + ((equal attr "size") + (aset result pax-size-index value)) + ((equal attr "uid") + (aset result pax-uid-index value)) + ((equal attr "uname") + (aset result pax-uname-index value)) + ((equal attr "hdrcharset") + (setq coding (if (equal value "BINARY") 'no-conversion 'utf-8-unix)))) + (setq pos (+ pos (skip-chars-forward "\000")))) + ;; Decode string-valued attributes. + (aset result pax-gname-index + (pax-decode-string (aref result pax-gname-index) coding)) + (aset result pax-linkpath-index + (pax-decode-string (aref result pax-linkpath-index) coding)) + (aset result pax-path-index + (pax-decode-string (aref result pax-path-index) coding)) + (aset result pax-uname-index + (pax-decode-string (aref result pax-uname-index) coding)) + result)) + (defun tar-header-block-tokenize (pos coding &optional disable-slash) "Return a `tar-header' structure. This is a list of name, mode, uid, gid, size, @@ -271,67 +364,112 @@ of the file header. This is used for \"old GNU\" Tar format." (if (and (null link-p) (null disable-slash) (string-match "/\\'" name)) (setq link-p 5)) ; directory - (if (and (equal name "././@LongLink") - ;; Supposedly @LongLink is only used for GNUTAR - ;; format (i.e. "ustar ") but some POSIX Tar files - ;; (with "ustar\0") have been seen using it as well. - (member magic-str '("ustar " "ustar\0"))) - ;; This is a GNU Tar long-file-name header. - (let* ((size (tar-parse-octal-integer - string tar-size-offset tar-time-offset)) - ;; The long name is in the next 512-byte block. - ;; We've already moved POS there, when we computed - ;; STRING above. - (name (decode-coding-string - ;; -1 so as to strip the terminating 0 byte. - (buffer-substring pos (+ pos size -1)) coding)) - ;; Tokenize the header of the _real_ file entry, - ;; which is further 512 bytes into the archive. - (descriptor (tar-header-block-tokenize - (+ pos (tar-roundup-512 size)) coding - ;; Don't intuit directories from - ;; the trailing slash, because the - ;; truncated name might by chance end - ;; in a slash. - 'ignore-trailing-slash))) - ;; Fix the descriptor of the real file entry by using - ;; the information from the long name entry. - (cond - ((eq link-p (- ?L ?0)) ;GNUTYPE_LONGNAME. - (setf (tar-header-name descriptor) name)) - ((eq link-p (- ?K ?0)) ;GNUTYPE_LONGLINK. - (setf (tar-header-link-name descriptor) name)) - (t - (message "Unrecognized GNU Tar @LongLink format"))) - ;; Fix the "link-type" attribute, based on the long name. - (if (and (null (tar-header-link-type descriptor)) - (string-match "/\\'" name)) - (setf (tar-header-link-type descriptor) 5)) ; directory - (setf (tar-header-header-start descriptor) - (copy-marker (- pos 512) t)) - descriptor) - - (make-tar-header - (copy-marker pos nil) - name - (tar-parse-octal-integer string tar-mode-offset tar-uid-offset) - (tar-parse-octal-integer string tar-uid-offset tar-gid-offset) - (tar-parse-octal-integer string tar-gid-offset tar-size-offset) - (tar-parse-octal-integer string tar-size-offset tar-time-offset) - (tar-parse-octal-integer string tar-time-offset tar-chk-offset) - (tar-parse-octal-integer string tar-chk-offset tar-linkp-offset) - link-p - linkname - uname-valid-p - (when uname-valid-p - (decode-coding-string - (substring string tar-uname-offset uname-end) coding)) - (when uname-valid-p - (decode-coding-string - (substring string tar-gname-offset gname-end) coding)) - (tar-parse-octal-integer string tar-dmaj-offset tar-dmin-offset) - (tar-parse-octal-integer string tar-dmin-offset tar-prefix-offset) - )))))) + (if (member magic-str '("ustar " "ustar\0")) + (if (equal name "././@LongLink") + ;; Supposedly @LongLink is only used for GNUTAR + ;; format (i.e. "ustar ") but some POSIX Tar files + ;; (with "ustar\0") have been seen using it as well. + ;; This is a GNU Tar long-file-name header. + (let* ((size (tar-parse-octal-integer + string tar-size-offset tar-time-offset)) + ;; The long name is in the next 512-byte block. + ;; We've already moved POS there, when we + ;; computed STRING above. + (name (decode-coding-string + ;; -1 so as to strip the terminating 0 byte. + (buffer-substring pos (+ pos size -1)) coding)) + ;; Tokenize the header of the _real_ file entry, + ;; which is further 512 bytes into the archive. + (descriptor (tar-header-block-tokenize + (+ pos (tar-roundup-512 size)) coding + ;; Don't intuit directories from + ;; the trailing slash, because the + ;; truncated name might by chance end + ;; in a slash. + 'ignore-trailing-slash))) + ;; Fix the descriptor of the real file entry by using + ;; the information from the long name entry. + (cond + ((eq link-p (- ?L ?0)) ;GNUTYPE_LONGNAME. + (setf (tar-header-name descriptor) name)) + ((eq link-p (- ?K ?0)) ;GNUTYPE_LONGLINK. + (setf (tar-header-link-name descriptor) name)) + (t + (message "Unrecognized GNU Tar @LongLink format"))) + ;; Fix the "link-type" attribute, based on the long name. + (if (and (null (tar-header-link-type descriptor)) + (string-match "/\\'" name)) + (setf (tar-header-link-type descriptor) 5)) ; directory + (setf (tar-header-header-start descriptor) + (copy-marker (- pos 512) t)) + descriptor) + ;; Posix pax extended header. FIXME: support ?g as well. + (if (eq link-p (- ?x ?0)) + ;; Get whatever attributes are in the extended header, + (let* ((pax-attrs (tar-parse-pax-extended-header pos)) + (gid (pax-header-gid pax-attrs)) + (gname (pax-header-gname pax-attrs)) + (linkpath (pax-header-linkpath pax-attrs)) + (mtime (pax-header-mtime pax-attrs)) + (path (pax-header-path pax-attrs)) + (size (pax-header-size pax-attrs)) + (uid (pax-header-uid pax-attrs)) + (uname (pax-header-uname pax-attrs)) + ;; Tokenize the header of the _real_ file entry, + ;; which is further 512 bytes into the archive. + (descriptor + (tar-header-block-tokenize (+ pos 512) coding + 'ignore-trailing-slash))) + ;; Fix the descriptor of the real file entry by + ;; overriding some of the fields with the information + ;; from the extended header. + (if gid + (setf (tar-header-gid descriptor) gid)) + (if gname + (setf (tar-header-gname descriptor) gname)) + (if linkpath + (setf (tar-header-link-name descriptor) linkpath)) + (if mtime + (setf (tar-header-date descriptor) mtime)) + (if path + (setf (tar-header-name descriptor) path)) + (if size + (setf (tar-header-size descriptor) size)) + (if uid + (setf (tar-header-uid descriptor) uid)) + (if uname + (setf (tar-header-uname descriptor) uname)) + descriptor) + + (make-tar-header + (copy-marker pos nil) + name + (tar-parse-octal-integer string tar-mode-offset + tar-uid-offset) + (tar-parse-octal-integer string tar-uid-offset + tar-gid-offset) + (tar-parse-octal-integer string tar-gid-offset + tar-size-offset) + (tar-parse-octal-integer string tar-size-offset + tar-time-offset) + (tar-parse-octal-integer string tar-time-offset + tar-chk-offset) + (tar-parse-octal-integer string tar-chk-offset + tar-linkp-offset) + link-p + linkname + uname-valid-p + (when uname-valid-p + (decode-coding-string + (substring string tar-uname-offset uname-end) coding)) + (when uname-valid-p + (decode-coding-string + (substring string tar-gname-offset gname-end) coding)) + (tar-parse-octal-integer string tar-dmaj-offset + tar-dmin-offset) + (tar-parse-octal-integer string tar-dmin-offset + tar-prefix-offset) + )))))))) ;; Pseudo-field. (defun tar-header-data-end (descriptor) -- 2.39.2