]> git.eshelyaron.com Git - emacs.git/commitdiff
Support Posix-standard pax extended header in tar files
authorEli Zaretskii <eliz@gnu.org>
Thu, 27 Jul 2023 08:36:00 +0000 (11:36 +0300)
committerEli Zaretskii <eliz@gnu.org>
Thu, 27 Jul 2023 08:36:00 +0000 (11:36 +0300)
* lisp/tar-mode.el (pax-extended-attribute-record-regexp)
(tar-attr-vector): New variables.
(pax-gid-index, pax-gname-index, pax-linkpath-index)
(pax-mtime-index, pax-path-index, pax-size-index, pax-uid-index)
(pax-uname-index): New constants.
(pax-header-gid, pax-header-gname, pax-header-linkpath)
(pax-header-mtime, pax-header-path, pax-header-size)
(pax-header-uid, pax-header-uname): New accessors to pax header.
(pax-decode-string, tar-parse-pax-extended-header): New functions.
(tar-header-block-tokenize): Recognize and handle Posix-standard
pax extended header, and use its attributes instead of those in
the standard tar header.  (Bug#64686)

lisp/tar-mode.el

index c9206028e943a7be1b78a803275b78e32abedff4..4e9843123b03c512a303769edd3065b56389e423 100644 (file)
@@ -215,6 +215,99 @@ Preserve the modified states of the buffers and set `tar-data-swapped'."
   "Round S up to the next multiple of 512."
   (ash (ash (+ s 511) -9) 9))
 
+;; Reference:
+;; https://pubs.opengroup.org/onlinepubs/9699919799/utilities/pax.html#tag_20_92_13_02
+(defconst pax-extended-attribute-record-regexp
+  ;; We omit attributes that are "reserved" by Posix, since no
+  ;; processing has been defined for them.
+  "\\([0-9]+\\) \\(gid\\|gname\\|hdrcharset\\|linkpath\\|mtime\\|path\\|size\\|uid\\|uname\\)="
+  "Regular expression for looking up extended attributes in a
+Posix-standard pax extended header of a tar file.
+Only attributes that `tar-mode' can grok are mentioned.")
+
+(defconst pax-gid-index 0)
+(defconst pax-gname-index 1)
+(defconst pax-linkpath-index 2)
+(defconst pax-mtime-index 3)
+(defconst pax-path-index 4)
+(defconst pax-size-index 5)
+(defconst pax-uid-index 6)
+(defconst pax-uname-index 7)
+(defsubst pax-header-gid (attr-vec)
+  (aref attr-vec pax-gid-index))
+(defsubst pax-header-gname (attr-vec)
+  (aref attr-vec pax-gname-index))
+(defsubst pax-header-linkpath (attr-vec)
+  (aref attr-vec pax-linkpath-index))
+(defsubst pax-header-mtime (attr-vec)
+  (aref attr-vec pax-mtime-index))
+(defsubst pax-header-path (attr-vec)
+  (aref attr-vec pax-path-index))
+(defsubst pax-header-size (attr-vec)
+  (aref attr-vec pax-size-index))
+(defsubst pax-header-uid (attr-vec)
+  (aref attr-vec pax-uid-index))
+(defsubst pax-header-uname (attr-vec)
+  (aref attr-vec pax-uid-index))
+
+(defsubst pax-decode-string (str coding)
+  (if str
+      (decode-coding-string str coding)
+    str))
+
+(defvar tar-attr-vector '[nil nil nil nil nil nil nil nil])
+(defun tar-parse-pax-extended-header (pos)
+  "Parse a pax external header of a Posix-format tar file."
+  (let ((end (+ pos 512))
+        (result tar-attr-vector)
+        (coding 'utf-8-unix)
+        attr value record-len value-len)
+    (dotimes (i 8)
+      (aset result i nil))
+    (goto-char pos)
+    (while (and (< pos end)
+                (re-search-forward pax-extended-attribute-record-regexp
+                                   end 'move))
+      (setq record-len (string-to-number (match-string 1))
+            attr (match-string 2)
+            value-len (- record-len
+                         (length (match-string 1))
+                         1
+                         (length (match-string 2))
+                         2)
+            value (buffer-substring (point) (+ (point) value-len)))
+      (setq pos (goto-char (+ (point) value-len 1)))
+      (cond
+       ((equal attr "gid")
+        (aset result pax-gid-index value))
+       ((equal attr "gname")
+        (aset result pax-gname-index value))
+       ((equal attr "linkpath")
+        (aset result pax-linkpath-index value))
+       ((equal attr "mtime")
+        (aset result pax-mtime-index (string-to-number value)))
+       ((equal attr "path")
+        (aset result pax-path-index value))
+       ((equal attr "size")
+        (aset result pax-size-index value))
+       ((equal attr "uid")
+        (aset result pax-uid-index value))
+       ((equal attr "uname")
+        (aset result pax-uname-index value))
+       ((equal attr "hdrcharset")
+        (setq coding (if (equal value "BINARY") 'no-conversion 'utf-8-unix))))
+      (setq pos (+ pos (skip-chars-forward "\000"))))
+    ;; Decode string-valued attributes.
+    (aset result pax-gname-index
+          (pax-decode-string (aref result pax-gname-index) coding))
+    (aset result pax-linkpath-index
+          (pax-decode-string (aref result pax-linkpath-index) coding))
+    (aset result pax-path-index
+          (pax-decode-string (aref result pax-path-index) coding))
+    (aset result pax-uname-index
+          (pax-decode-string (aref result pax-uname-index) coding))
+    result))
+
 (defun tar-header-block-tokenize (pos coding &optional disable-slash)
   "Return a `tar-header' structure.
 This is a list of name, mode, uid, gid, size,
@@ -271,67 +364,112 @@ of the file header.  This is used for \"old GNU\" Tar format."
         (if (and (null link-p) (null disable-slash) (string-match "/\\'" name))
             (setq link-p 5))            ; directory
 
-        (if (and (equal name "././@LongLink")
-                 ;; Supposedly @LongLink is only used for GNUTAR
-                 ;; format (i.e. "ustar ") but some POSIX Tar files
-                 ;; (with "ustar\0") have been seen using it as well.
-                 (member magic-str '("ustar " "ustar\0")))
-            ;; This is a GNU Tar long-file-name header.
-            (let* ((size (tar-parse-octal-integer
-                          string tar-size-offset tar-time-offset))
-                   ;; The long name is in the next 512-byte block.
-                   ;; We've already moved POS there, when we computed
-                   ;; STRING above.
-                  (name (decode-coding-string
-                          ;; -1 so as to strip the terminating 0 byte.
-                         (buffer-substring pos (+ pos size -1)) coding))
-                   ;; Tokenize the header of the _real_ file entry,
-                   ;; which is further 512 bytes into the archive.
-                   (descriptor (tar-header-block-tokenize
-                                (+ pos (tar-roundup-512 size)) coding
-                                ;; Don't intuit directories from
-                                ;; the trailing slash, because the
-                                ;; truncated name might by chance end
-                                ;; in a slash.
-                               'ignore-trailing-slash)))
-              ;; Fix the descriptor of the real file entry by using
-              ;; the information from the long name entry.
-              (cond
-               ((eq link-p (- ?L ?0))      ;GNUTYPE_LONGNAME.
-                (setf (tar-header-name descriptor) name))
-               ((eq link-p (- ?K ?0))      ;GNUTYPE_LONGLINK.
-                (setf (tar-header-link-name descriptor) name))
-               (t
-                (message "Unrecognized GNU Tar @LongLink format")))
-              ;; Fix the "link-type" attribute, based on the long name.
-              (if (and (null (tar-header-link-type descriptor))
-                       (string-match "/\\'" name))
-                  (setf (tar-header-link-type descriptor) 5)) ; directory
-              (setf (tar-header-header-start descriptor)
-                    (copy-marker (- pos 512) t))
-              descriptor)
-
-          (make-tar-header
-           (copy-marker pos nil)
-           name
-           (tar-parse-octal-integer string tar-mode-offset tar-uid-offset)
-           (tar-parse-octal-integer string tar-uid-offset tar-gid-offset)
-           (tar-parse-octal-integer string tar-gid-offset tar-size-offset)
-           (tar-parse-octal-integer string tar-size-offset tar-time-offset)
-           (tar-parse-octal-integer string tar-time-offset tar-chk-offset)
-           (tar-parse-octal-integer string tar-chk-offset tar-linkp-offset)
-           link-p
-           linkname
-           uname-valid-p
-           (when uname-valid-p
-             (decode-coding-string
-              (substring string tar-uname-offset uname-end) coding))
-           (when uname-valid-p
-             (decode-coding-string
-              (substring string tar-gname-offset gname-end) coding))
-           (tar-parse-octal-integer string tar-dmaj-offset tar-dmin-offset)
-           (tar-parse-octal-integer string tar-dmin-offset tar-prefix-offset)
-           ))))))
+        (if (member magic-str '("ustar " "ustar\0"))
+            (if (equal name "././@LongLink")
+                ;; Supposedly @LongLink is only used for GNUTAR
+                ;; format (i.e. "ustar ") but some POSIX Tar files
+                ;; (with "ustar\0") have been seen using it as well.
+                ;; This is a GNU Tar long-file-name header.
+                (let* ((size (tar-parse-octal-integer
+                              string tar-size-offset tar-time-offset))
+                       ;; The long name is in the next 512-byte block.
+                       ;; We've already moved POS there, when we
+                       ;; computed STRING above.
+                      (name (decode-coding-string
+                              ;; -1 so as to strip the terminating 0 byte.
+                             (buffer-substring pos (+ pos size -1)) coding))
+                       ;; Tokenize the header of the _real_ file entry,
+                       ;; which is further 512 bytes into the archive.
+                       (descriptor (tar-header-block-tokenize
+                                    (+ pos (tar-roundup-512 size)) coding
+                                    ;; Don't intuit directories from
+                                    ;; the trailing slash, because the
+                                    ;; truncated name might by chance end
+                                    ;; in a slash.
+                                   'ignore-trailing-slash)))
+                  ;; Fix the descriptor of the real file entry by using
+                  ;; the information from the long name entry.
+                  (cond
+                   ((eq link-p (- ?L ?0))      ;GNUTYPE_LONGNAME.
+                    (setf (tar-header-name descriptor) name))
+                   ((eq link-p (- ?K ?0))      ;GNUTYPE_LONGLINK.
+                    (setf (tar-header-link-name descriptor) name))
+                   (t
+                    (message "Unrecognized GNU Tar @LongLink format")))
+                  ;; Fix the "link-type" attribute, based on the long name.
+                  (if (and (null (tar-header-link-type descriptor))
+                           (string-match "/\\'" name))
+                      (setf (tar-header-link-type descriptor) 5)) ; directory
+                  (setf (tar-header-header-start descriptor)
+                        (copy-marker (- pos 512) t))
+                  descriptor)
+              ;; Posix pax extended header.  FIXME: support ?g as well.
+              (if (eq link-p (- ?x ?0))
+                  ;;      Get whatever attributes are in the extended header,
+                  (let* ((pax-attrs (tar-parse-pax-extended-header pos))
+                         (gid (pax-header-gid pax-attrs))
+                         (gname (pax-header-gname pax-attrs))
+                         (linkpath (pax-header-linkpath pax-attrs))
+                         (mtime (pax-header-mtime pax-attrs))
+                         (path (pax-header-path pax-attrs))
+                         (size (pax-header-size pax-attrs))
+                         (uid (pax-header-uid pax-attrs))
+                         (uname (pax-header-uname pax-attrs))
+                         ;; Tokenize the header of the _real_ file entry,
+                         ;; which is further 512 bytes into the archive.
+                         (descriptor
+                          (tar-header-block-tokenize (+ pos 512) coding
+                                                     'ignore-trailing-slash)))
+                    ;; Fix the descriptor of the real file entry by
+                    ;; overriding some of the fields with the information
+                    ;; from the extended header.
+                    (if gid
+                        (setf (tar-header-gid descriptor) gid))
+                    (if gname
+                        (setf (tar-header-gname descriptor) gname))
+                    (if linkpath
+                        (setf (tar-header-link-name descriptor) linkpath))
+                    (if mtime
+                        (setf (tar-header-date descriptor) mtime))
+                    (if path
+                        (setf (tar-header-name descriptor) path))
+                    (if size
+                        (setf (tar-header-size descriptor) size))
+                    (if uid
+                        (setf (tar-header-uid descriptor) uid))
+                    (if uname
+                        (setf (tar-header-uname descriptor) uname))
+                    descriptor)
+
+                (make-tar-header
+                 (copy-marker pos nil)
+                 name
+                 (tar-parse-octal-integer string tar-mode-offset
+                                          tar-uid-offset)
+                 (tar-parse-octal-integer string tar-uid-offset
+                                          tar-gid-offset)
+                 (tar-parse-octal-integer string tar-gid-offset
+                                          tar-size-offset)
+                 (tar-parse-octal-integer string tar-size-offset
+                                          tar-time-offset)
+                 (tar-parse-octal-integer string tar-time-offset
+                                          tar-chk-offset)
+                 (tar-parse-octal-integer string tar-chk-offset
+                                          tar-linkp-offset)
+                 link-p
+                 linkname
+                 uname-valid-p
+                 (when uname-valid-p
+                   (decode-coding-string
+                    (substring string tar-uname-offset uname-end) coding))
+                 (when uname-valid-p
+                   (decode-coding-string
+                    (substring string tar-gname-offset gname-end) coding))
+                 (tar-parse-octal-integer string tar-dmaj-offset
+                                          tar-dmin-offset)
+                 (tar-parse-octal-integer string tar-dmin-offset
+                                          tar-prefix-offset)
+                 ))))))))
 
 ;; Pseudo-field.
 (defun tar-header-data-end (descriptor)