From 682138a3544a2d7de457c88712e738938568f908 Mon Sep 17 00:00:00 2001 From: Valentin Samir Date: Wed, 19 Feb 2020 16:40:03 +0100 Subject: [PATCH 1/2] bpo-39688: tarfile: compute next header offset using pax size for sparse file In case of a sparse file, the tarinfo.size attribute is set to the sparse file expanded size (pax attribute GNU.sparse.size or GNU.sparse.size) and do not correspond to the actual size of the data block. The size of the data block is specified by the size pax header if present or by the ustar size header. Moreover, for GNU sparse 1.0 files, the data block start at the beginning of the sparse mapping and not after the sparse mapping and so the offset should be computed from here. --- Lib/tarfile.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/Lib/tarfile.py b/Lib/tarfile.py index e2b60532f693d4b..60c4eb97bb6860c 100755 --- a/Lib/tarfile.py +++ b/Lib/tarfile.py @@ -1292,17 +1292,22 @@ def _proc_pax(self, tarfile): if self.type in (XHDTYPE, SOLARIS_XHDTYPE): # Patch the TarInfo object with the extended header info. next._apply_pax_info(pax_headers, tarfile.encoding, tarfile.errors) - next.offset = self.offset if "size" in pax_headers: # If the extended header replaces the size field, # we need to recalculate the offset where the next # header starts. - offset = next.offset_data + offset = next.offset + BLOCKSIZE if next.isreg() or next.type not in SUPPORTED_TYPES: - offset += next._block(next.size) + try: + size = PAX_NUMBER_FIELDS["size"](pax_headers["size"]) + except ValueError: + size = 0 + offset += next._block(size) tarfile.offset = offset + next.offset = self.offset + return next def _proc_gnusparse_00(self, next, pax_headers, buf): From e3fb592e79a01098d5d7f2b154773bb6aaa8a791 Mon Sep 17 00:00:00 2001 From: "blurb-it[bot]" <43283697+blurb-it[bot]@users.noreply.github.com> Date: Wed, 19 Feb 2020 16:35:54 +0000 Subject: [PATCH 2/2] =?UTF-8?q?=F0=9F=93=9C=F0=9F=A4=96=20Added=20by=20blu?= =?UTF-8?q?rb=5Fit.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../next/Library/2020-02-19-16-35-52.bpo-39688.EPD_zn.rst | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2020-02-19-16-35-52.bpo-39688.EPD_zn.rst diff --git a/Misc/NEWS.d/next/Library/2020-02-19-16-35-52.bpo-39688.EPD_zn.rst b/Misc/NEWS.d/next/Library/2020-02-19-16-35-52.bpo-39688.EPD_zn.rst new file mode 100644 index 000000000000000..5ac73115e562fe6 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2020-02-19-16-35-52.bpo-39688.EPD_zn.rst @@ -0,0 +1,2 @@ +Compute next header offset using pax size for sparse file instead of the +sparse real size (expended size). \ No newline at end of file