Skip to content

bpo-36993: Improve error reporting for zipfiles with bad zip64 extra … #14656

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Oct 29, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
222 changes: 222 additions & 0 deletions Lib/test/test_zipfile.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import contextlib
import importlib.util
import io
import itertools
import os
import pathlib
import posixpath
Expand Down Expand Up @@ -821,6 +822,227 @@ def test_append(self):
zinfo = zipfp.getinfo("strfile")
self.assertEqual(zinfo.extra, extra)

def make_zip64_file(
self, file_size_64_set=False, file_size_extra=False,
compress_size_64_set=False, compress_size_extra=False,
header_offset_64_set=False, header_offset_extra=False,
):
"""Generate bytes sequence for a zip with (incomplete) zip64 data.

The actual values (not the zip 64 0xffffffff values) stored in the file
are:
file_size: 8
compress_size: 8
header_offset: 0
"""
actual_size = 8
actual_header_offset = 0
local_zip64_fields = []
central_zip64_fields = []

file_size = actual_size
if file_size_64_set:
file_size = 0xffffffff
if file_size_extra:
local_zip64_fields.append(actual_size)
central_zip64_fields.append(actual_size)
file_size = struct.pack("<L", file_size)

compress_size = actual_size
if compress_size_64_set:
compress_size = 0xffffffff
if compress_size_extra:
local_zip64_fields.append(actual_size)
central_zip64_fields.append(actual_size)
compress_size = struct.pack("<L", compress_size)

header_offset = actual_header_offset
if header_offset_64_set:
header_offset = 0xffffffff
if header_offset_extra:
central_zip64_fields.append(actual_header_offset)
header_offset = struct.pack("<L", header_offset)

local_extra = struct.pack(
'<HH' + 'Q'*len(local_zip64_fields),
0x0001,
8*len(local_zip64_fields),
*local_zip64_fields
)

central_extra = struct.pack(
'<HH' + 'Q'*len(central_zip64_fields),
0x0001,
8*len(central_zip64_fields),
*central_zip64_fields
)

central_dir_size = struct.pack('<Q', 58 + 8 * len(central_zip64_fields))
offset_to_central_dir = struct.pack('<Q', 50 + 8 * len(local_zip64_fields))

local_extra_length = struct.pack("<H", 4 + 8 * len(local_zip64_fields))
central_extra_length = struct.pack("<H", 4 + 8 * len(central_zip64_fields))

filename = b"test.txt"
content = b"test1234"
filename_length = struct.pack("<H", len(filename))
zip64_contents = (
# Local file header
b"PK\x03\x04\x14\x00\x00\x00\x00\x00\x00\x00!\x00\x9e%\xf5\xaf"
+ compress_size
+ file_size
+ filename_length
+ local_extra_length
+ filename
+ local_extra
+ content
# Central directory:
+ b"PK\x01\x02-\x03-\x00\x00\x00\x00\x00\x00\x00!\x00\x9e%\xf5\xaf"
+ compress_size
+ file_size
+ filename_length
+ central_extra_length
+ b"\x00\x00\x00\x00\x00\x00\x00\x00\x80\x01"
+ header_offset
+ filename
+ central_extra
# Zip64 end of central directory
+ b"PK\x06\x06,\x00\x00\x00\x00\x00\x00\x00-\x00-"
+ b"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00"
+ b"\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00"
+ central_dir_size
+ offset_to_central_dir
# Zip64 end of central directory locator
+ b"PK\x06\x07\x00\x00\x00\x00l\x00\x00\x00\x00\x00\x00\x00\x01"
+ b"\x00\x00\x00"
# end of central directory
+ b"PK\x05\x06\x00\x00\x00\x00\x01\x00\x01\x00:\x00\x00\x002\x00"
+ b"\x00\x00\x00\x00"
)
return zip64_contents

def test_bad_zip64_extra(self):
"""Missing zip64 extra records raises an exception.

There are 4 fields that the zip64 format handles (the disk number is
not used in this module and so is ignored here). According to the zip
spec:
The order of the fields in the zip64 extended
information record is fixed, but the fields MUST
only appear if the corresponding Local or Central
directory record field is set to 0xFFFF or 0xFFFFFFFF.

If the zip64 extra content doesn't contain enough entries for the
number of fields marked with 0xFFFF or 0xFFFFFFFF, we raise an error.
This test mismatches the length of the zip64 extra field and the number
of fields set to indicate the presence of zip64 data.
"""
# zip64 file size present, no fields in extra, expecting one, equals
# missing file size.
missing_file_size_extra = self.make_zip64_file(
file_size_64_set=True,
)
with self.assertRaises(zipfile.BadZipFile) as e:
zipfile.ZipFile(io.BytesIO(missing_file_size_extra))
self.assertIn('file size', str(e.exception).lower())

# zip64 file size present, zip64 compress size present, one field in
# extra, expecting two, equals missing compress size.
missing_compress_size_extra = self.make_zip64_file(
file_size_64_set=True,
file_size_extra=True,
compress_size_64_set=True,
)
with self.assertRaises(zipfile.BadZipFile) as e:
zipfile.ZipFile(io.BytesIO(missing_compress_size_extra))
self.assertIn('compress size', str(e.exception).lower())

# zip64 compress size present, no fields in extra, expecting one,
# equals missing compress size.
missing_compress_size_extra = self.make_zip64_file(
compress_size_64_set=True,
)
with self.assertRaises(zipfile.BadZipFile) as e:
zipfile.ZipFile(io.BytesIO(missing_compress_size_extra))
self.assertIn('compress size', str(e.exception).lower())

# zip64 file size present, zip64 compress size present, zip64 header
# offset present, two fields in extra, expecting three, equals missing
# header offset
missing_header_offset_extra = self.make_zip64_file(
file_size_64_set=True,
file_size_extra=True,
compress_size_64_set=True,
compress_size_extra=True,
header_offset_64_set=True,
)
with self.assertRaises(zipfile.BadZipFile) as e:
zipfile.ZipFile(io.BytesIO(missing_header_offset_extra))
self.assertIn('header offset', str(e.exception).lower())

# zip64 compress size present, zip64 header offset present, one field
# in extra, expecting two, equals missing header offset
missing_header_offset_extra = self.make_zip64_file(
file_size_64_set=False,
compress_size_64_set=True,
compress_size_extra=True,
header_offset_64_set=True,
)
with self.assertRaises(zipfile.BadZipFile) as e:
zipfile.ZipFile(io.BytesIO(missing_header_offset_extra))
self.assertIn('header offset', str(e.exception).lower())

# zip64 file size present, zip64 header offset present, one field in
# extra, expecting two, equals missing header offset
missing_header_offset_extra = self.make_zip64_file(
file_size_64_set=True,
file_size_extra=True,
compress_size_64_set=False,
header_offset_64_set=True,
)
with self.assertRaises(zipfile.BadZipFile) as e:
zipfile.ZipFile(io.BytesIO(missing_header_offset_extra))
self.assertIn('header offset', str(e.exception).lower())

# zip64 header offset present, no fields in extra, expecting one,
# equals missing header offset
missing_header_offset_extra = self.make_zip64_file(
file_size_64_set=False,
compress_size_64_set=False,
header_offset_64_set=True,
)
with self.assertRaises(zipfile.BadZipFile) as e:
zipfile.ZipFile(io.BytesIO(missing_header_offset_extra))
self.assertIn('header offset', str(e.exception).lower())

def test_generated_valid_zip64_extra(self):
# These values are what is set in the make_zip64_file method.
expected_file_size = 8
expected_compress_size = 8
expected_header_offset = 0
expected_content = b"test1234"

# Loop through the various valid combinations of zip64 masks
# present and extra fields present.
params = (
{"file_size_64_set": True, "file_size_extra": True},
{"compress_size_64_set": True, "compress_size_extra": True},
{"header_offset_64_set": True, "header_offset_extra": True},
)

for r in range(1, len(params) + 1):
for combo in itertools.combinations(params, r):
kwargs = {}
for c in combo:
kwargs.update(c)
with zipfile.ZipFile(io.BytesIO(self.make_zip64_file(**kwargs))) as zf:
zinfo = zf.infolist()[0]
self.assertEqual(zinfo.file_size, expected_file_size)
self.assertEqual(zinfo.compress_size, expected_compress_size)
self.assertEqual(zinfo.header_offset, expected_header_offset)
self.assertEqual(zf.read(zinfo), expected_content)


@requires_zlib
class DeflateTestZip64InSmallFiles(AbstractTestZip64InSmallFiles,
unittest.TestCase):
Expand Down
12 changes: 12 additions & 0 deletions Lib/zipfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -479,14 +479,26 @@ def _decodeExtra(self):

# ZIP64 extension (large files and/or large archives)
if self.file_size in (0xffffffffffffffff, 0xffffffff):
if len(counts) <= idx:
raise BadZipFile(
"Corrupt zip64 extra field. File size not found."
)
self.file_size = counts[idx]
idx += 1

if self.compress_size == 0xFFFFFFFF:
if len(counts) <= idx:
raise BadZipFile(
"Corrupt zip64 extra field. Compress size not found."
)
self.compress_size = counts[idx]
idx += 1

if self.header_offset == 0xffffffff:
if len(counts) <= idx:
raise BadZipFile(
"Corrupt zip64 extra field. Header offset not found."
)
old = self.header_offset
self.header_offset = counts[idx]
idx+=1
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Improve error reporting for corrupt zip files with bad zip64 extra data. Patch
by Daniel Hillier.