Skip to content

Commit b0517a1

Browse files
bpo-45863: tarfile: don't zero out header fields unnecessarily (GH-29693)
Numeric fields of type float, notably mtime, can't be represented exactly in the ustar header, so the pax header is used. But it is helpful to set them to the nearest int (i.e. second rather than nanosecond precision mtimes) in the ustar header as well, for the benefit of unarchivers that don't understand the pax header. Add test for tarfile.TarInfo.create_pax_header to confirm correct behaviour. (cherry picked from commit bf2d44f) Co-authored-by: Joshua Root <[email protected]>
1 parent bde3765 commit b0517a1

File tree

3 files changed

+71
-6
lines changed

3 files changed

+71
-6
lines changed

Lib/tarfile.py

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -887,15 +887,24 @@ def create_pax_header(self, info, encoding):
887887
# Test number fields for values that exceed the field limit or values
888888
# that like to be stored as float.
889889
for name, digits in (("uid", 8), ("gid", 8), ("size", 12), ("mtime", 12)):
890-
if name in pax_headers:
891-
# The pax header has priority. Avoid overflow.
892-
info[name] = 0
893-
continue
890+
needs_pax = False
894891

895892
val = info[name]
896-
if not 0 <= val < 8 ** (digits - 1) or isinstance(val, float):
897-
pax_headers[name] = str(val)
893+
val_is_float = isinstance(val, float)
894+
val_int = round(val) if val_is_float else val
895+
if not 0 <= val_int < 8 ** (digits - 1):
896+
# Avoid overflow.
898897
info[name] = 0
898+
needs_pax = True
899+
elif val_is_float:
900+
# Put rounded value in ustar header, and full
901+
# precision value in pax header.
902+
info[name] = val_int
903+
needs_pax = True
904+
905+
# The existing pax header has priority.
906+
if needs_pax and name not in pax_headers:
907+
pax_headers[name] = str(val)
899908

900909
# Create a pax extended header if necessary.
901910
if pax_headers:

Lib/test/test_tarfile.py

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1898,6 +1898,61 @@ def test_pax_extended_header(self):
18981898
finally:
18991899
tar.close()
19001900

1901+
def test_create_pax_header(self):
1902+
# The ustar header should contain values that can be
1903+
# represented reasonably, even if a better (e.g. higher
1904+
# precision) version is set in the pax header.
1905+
# Issue #45863
1906+
1907+
# values that should be kept
1908+
t = tarfile.TarInfo()
1909+
t.name = "foo"
1910+
t.mtime = 1000.1
1911+
t.size = 100
1912+
t.uid = 123
1913+
t.gid = 124
1914+
info = t.get_info()
1915+
header = t.create_pax_header(info, encoding="iso8859-1")
1916+
self.assertEqual(info['name'], "foo")
1917+
# mtime should be rounded to nearest second
1918+
self.assertIsInstance(info['mtime'], int)
1919+
self.assertEqual(info['mtime'], 1000)
1920+
self.assertEqual(info['size'], 100)
1921+
self.assertEqual(info['uid'], 123)
1922+
self.assertEqual(info['gid'], 124)
1923+
self.assertEqual(header,
1924+
b'././@PaxHeader' + bytes(86) \
1925+
+ b'0000000\x000000000\x000000000\x0000000000020\x0000000000000\x00010205\x00 x' \
1926+
+ bytes(100) + b'ustar\x0000'+ bytes(247) \
1927+
+ b'16 mtime=1000.1\n' + bytes(496) + b'foo' + bytes(97) \
1928+
+ b'0000644\x000000173\x000000174\x0000000000144\x0000000001750\x00006516\x00 0' \
1929+
+ bytes(100) + b'ustar\x0000' + bytes(247))
1930+
1931+
# values that should be changed
1932+
t = tarfile.TarInfo()
1933+
t.name = "foo\u3374" # can't be represented in ascii
1934+
t.mtime = 10**10 # too big
1935+
t.size = 10**10 # too big
1936+
t.uid = 8**8 # too big
1937+
t.gid = 8**8+1 # too big
1938+
info = t.get_info()
1939+
header = t.create_pax_header(info, encoding="iso8859-1")
1940+
# name is kept as-is in info but should be added to pax header
1941+
self.assertEqual(info['name'], "foo\u3374")
1942+
self.assertEqual(info['mtime'], 0)
1943+
self.assertEqual(info['size'], 0)
1944+
self.assertEqual(info['uid'], 0)
1945+
self.assertEqual(info['gid'], 0)
1946+
self.assertEqual(header,
1947+
b'././@PaxHeader' + bytes(86) \
1948+
+ b'0000000\x000000000\x000000000\x0000000000130\x0000000000000\x00010207\x00 x' \
1949+
+ bytes(100) + b'ustar\x0000' + bytes(247) \
1950+
+ b'15 path=foo\xe3\x8d\xb4\n16 uid=16777216\n' \
1951+
+ b'16 gid=16777217\n20 size=10000000000\n' \
1952+
+ b'21 mtime=10000000000\n'+ bytes(424) + b'foo?' + bytes(96) \
1953+
+ b'0000644\x000000000\x000000000\x0000000000000\x0000000000000\x00006540\x00 0' \
1954+
+ bytes(100) + b'ustar\x0000' + bytes(247))
1955+
19011956

19021957
class UnicodeTest:
19031958

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
When the :mod:`tarfile` module creates a pax format archive, it will put an integer representation of timestamps in the ustar header (if possible) for the benefit of older unarchivers, in addition to the existing full-precision timestamps in the pax extended header.

0 commit comments

Comments
 (0)