Skip to content

Commit da6ce58

Browse files
danifusserhiy-storchaka
authored andcommitted
bpo-36993: Improve error reporting for zipfiles with bad zip64 extra data. (GH-14656)
1 parent 4c155f7 commit da6ce58

File tree

3 files changed

+236
-0
lines changed

3 files changed

+236
-0
lines changed

Lib/test/test_zipfile.py

Lines changed: 222 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import contextlib
22
import importlib.util
33
import io
4+
import itertools
45
import os
56
import pathlib
67
import posixpath
@@ -824,6 +825,227 @@ def test_append(self):
824825
zinfo = zipfp.getinfo("strfile")
825826
self.assertEqual(zinfo.extra, extra)
826827

828+
def make_zip64_file(
829+
self, file_size_64_set=False, file_size_extra=False,
830+
compress_size_64_set=False, compress_size_extra=False,
831+
header_offset_64_set=False, header_offset_extra=False,
832+
):
833+
"""Generate bytes sequence for a zip with (incomplete) zip64 data.
834+
835+
The actual values (not the zip 64 0xffffffff values) stored in the file
836+
are:
837+
file_size: 8
838+
compress_size: 8
839+
header_offset: 0
840+
"""
841+
actual_size = 8
842+
actual_header_offset = 0
843+
local_zip64_fields = []
844+
central_zip64_fields = []
845+
846+
file_size = actual_size
847+
if file_size_64_set:
848+
file_size = 0xffffffff
849+
if file_size_extra:
850+
local_zip64_fields.append(actual_size)
851+
central_zip64_fields.append(actual_size)
852+
file_size = struct.pack("<L", file_size)
853+
854+
compress_size = actual_size
855+
if compress_size_64_set:
856+
compress_size = 0xffffffff
857+
if compress_size_extra:
858+
local_zip64_fields.append(actual_size)
859+
central_zip64_fields.append(actual_size)
860+
compress_size = struct.pack("<L", compress_size)
861+
862+
header_offset = actual_header_offset
863+
if header_offset_64_set:
864+
header_offset = 0xffffffff
865+
if header_offset_extra:
866+
central_zip64_fields.append(actual_header_offset)
867+
header_offset = struct.pack("<L", header_offset)
868+
869+
local_extra = struct.pack(
870+
'<HH' + 'Q'*len(local_zip64_fields),
871+
0x0001,
872+
8*len(local_zip64_fields),
873+
*local_zip64_fields
874+
)
875+
876+
central_extra = struct.pack(
877+
'<HH' + 'Q'*len(central_zip64_fields),
878+
0x0001,
879+
8*len(central_zip64_fields),
880+
*central_zip64_fields
881+
)
882+
883+
central_dir_size = struct.pack('<Q', 58 + 8 * len(central_zip64_fields))
884+
offset_to_central_dir = struct.pack('<Q', 50 + 8 * len(local_zip64_fields))
885+
886+
local_extra_length = struct.pack("<H", 4 + 8 * len(local_zip64_fields))
887+
central_extra_length = struct.pack("<H", 4 + 8 * len(central_zip64_fields))
888+
889+
filename = b"test.txt"
890+
content = b"test1234"
891+
filename_length = struct.pack("<H", len(filename))
892+
zip64_contents = (
893+
# Local file header
894+
b"PK\x03\x04\x14\x00\x00\x00\x00\x00\x00\x00!\x00\x9e%\xf5\xaf"
895+
+ compress_size
896+
+ file_size
897+
+ filename_length
898+
+ local_extra_length
899+
+ filename
900+
+ local_extra
901+
+ content
902+
# Central directory:
903+
+ b"PK\x01\x02-\x03-\x00\x00\x00\x00\x00\x00\x00!\x00\x9e%\xf5\xaf"
904+
+ compress_size
905+
+ file_size
906+
+ filename_length
907+
+ central_extra_length
908+
+ b"\x00\x00\x00\x00\x00\x00\x00\x00\x80\x01"
909+
+ header_offset
910+
+ filename
911+
+ central_extra
912+
# Zip64 end of central directory
913+
+ b"PK\x06\x06,\x00\x00\x00\x00\x00\x00\x00-\x00-"
914+
+ b"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00"
915+
+ b"\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00"
916+
+ central_dir_size
917+
+ offset_to_central_dir
918+
# Zip64 end of central directory locator
919+
+ b"PK\x06\x07\x00\x00\x00\x00l\x00\x00\x00\x00\x00\x00\x00\x01"
920+
+ b"\x00\x00\x00"
921+
# end of central directory
922+
+ b"PK\x05\x06\x00\x00\x00\x00\x01\x00\x01\x00:\x00\x00\x002\x00"
923+
+ b"\x00\x00\x00\x00"
924+
)
925+
return zip64_contents
926+
927+
def test_bad_zip64_extra(self):
928+
"""Missing zip64 extra records raises an exception.
929+
930+
There are 4 fields that the zip64 format handles (the disk number is
931+
not used in this module and so is ignored here). According to the zip
932+
spec:
933+
The order of the fields in the zip64 extended
934+
information record is fixed, but the fields MUST
935+
only appear if the corresponding Local or Central
936+
directory record field is set to 0xFFFF or 0xFFFFFFFF.
937+
938+
If the zip64 extra content doesn't contain enough entries for the
939+
number of fields marked with 0xFFFF or 0xFFFFFFFF, we raise an error.
940+
This test mismatches the length of the zip64 extra field and the number
941+
of fields set to indicate the presence of zip64 data.
942+
"""
943+
# zip64 file size present, no fields in extra, expecting one, equals
944+
# missing file size.
945+
missing_file_size_extra = self.make_zip64_file(
946+
file_size_64_set=True,
947+
)
948+
with self.assertRaises(zipfile.BadZipFile) as e:
949+
zipfile.ZipFile(io.BytesIO(missing_file_size_extra))
950+
self.assertIn('file size', str(e.exception).lower())
951+
952+
# zip64 file size present, zip64 compress size present, one field in
953+
# extra, expecting two, equals missing compress size.
954+
missing_compress_size_extra = self.make_zip64_file(
955+
file_size_64_set=True,
956+
file_size_extra=True,
957+
compress_size_64_set=True,
958+
)
959+
with self.assertRaises(zipfile.BadZipFile) as e:
960+
zipfile.ZipFile(io.BytesIO(missing_compress_size_extra))
961+
self.assertIn('compress size', str(e.exception).lower())
962+
963+
# zip64 compress size present, no fields in extra, expecting one,
964+
# equals missing compress size.
965+
missing_compress_size_extra = self.make_zip64_file(
966+
compress_size_64_set=True,
967+
)
968+
with self.assertRaises(zipfile.BadZipFile) as e:
969+
zipfile.ZipFile(io.BytesIO(missing_compress_size_extra))
970+
self.assertIn('compress size', str(e.exception).lower())
971+
972+
# zip64 file size present, zip64 compress size present, zip64 header
973+
# offset present, two fields in extra, expecting three, equals missing
974+
# header offset
975+
missing_header_offset_extra = self.make_zip64_file(
976+
file_size_64_set=True,
977+
file_size_extra=True,
978+
compress_size_64_set=True,
979+
compress_size_extra=True,
980+
header_offset_64_set=True,
981+
)
982+
with self.assertRaises(zipfile.BadZipFile) as e:
983+
zipfile.ZipFile(io.BytesIO(missing_header_offset_extra))
984+
self.assertIn('header offset', str(e.exception).lower())
985+
986+
# zip64 compress size present, zip64 header offset present, one field
987+
# in extra, expecting two, equals missing header offset
988+
missing_header_offset_extra = self.make_zip64_file(
989+
file_size_64_set=False,
990+
compress_size_64_set=True,
991+
compress_size_extra=True,
992+
header_offset_64_set=True,
993+
)
994+
with self.assertRaises(zipfile.BadZipFile) as e:
995+
zipfile.ZipFile(io.BytesIO(missing_header_offset_extra))
996+
self.assertIn('header offset', str(e.exception).lower())
997+
998+
# zip64 file size present, zip64 header offset present, one field in
999+
# extra, expecting two, equals missing header offset
1000+
missing_header_offset_extra = self.make_zip64_file(
1001+
file_size_64_set=True,
1002+
file_size_extra=True,
1003+
compress_size_64_set=False,
1004+
header_offset_64_set=True,
1005+
)
1006+
with self.assertRaises(zipfile.BadZipFile) as e:
1007+
zipfile.ZipFile(io.BytesIO(missing_header_offset_extra))
1008+
self.assertIn('header offset', str(e.exception).lower())
1009+
1010+
# zip64 header offset present, no fields in extra, expecting one,
1011+
# equals missing header offset
1012+
missing_header_offset_extra = self.make_zip64_file(
1013+
file_size_64_set=False,
1014+
compress_size_64_set=False,
1015+
header_offset_64_set=True,
1016+
)
1017+
with self.assertRaises(zipfile.BadZipFile) as e:
1018+
zipfile.ZipFile(io.BytesIO(missing_header_offset_extra))
1019+
self.assertIn('header offset', str(e.exception).lower())
1020+
1021+
def test_generated_valid_zip64_extra(self):
1022+
# These values are what is set in the make_zip64_file method.
1023+
expected_file_size = 8
1024+
expected_compress_size = 8
1025+
expected_header_offset = 0
1026+
expected_content = b"test1234"
1027+
1028+
# Loop through the various valid combinations of zip64 masks
1029+
# present and extra fields present.
1030+
params = (
1031+
{"file_size_64_set": True, "file_size_extra": True},
1032+
{"compress_size_64_set": True, "compress_size_extra": True},
1033+
{"header_offset_64_set": True, "header_offset_extra": True},
1034+
)
1035+
1036+
for r in range(1, len(params) + 1):
1037+
for combo in itertools.combinations(params, r):
1038+
kwargs = {}
1039+
for c in combo:
1040+
kwargs.update(c)
1041+
with zipfile.ZipFile(io.BytesIO(self.make_zip64_file(**kwargs))) as zf:
1042+
zinfo = zf.infolist()[0]
1043+
self.assertEqual(zinfo.file_size, expected_file_size)
1044+
self.assertEqual(zinfo.compress_size, expected_compress_size)
1045+
self.assertEqual(zinfo.header_offset, expected_header_offset)
1046+
self.assertEqual(zf.read(zinfo), expected_content)
1047+
1048+
8271049
@requires_zlib
8281050
class DeflateTestZip64InSmallFiles(AbstractTestZip64InSmallFiles,
8291051
unittest.TestCase):

Lib/zipfile.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -480,14 +480,26 @@ def _decodeExtra(self):
480480

481481
# ZIP64 extension (large files and/or large archives)
482482
if self.file_size in (0xffffffffffffffff, 0xffffffff):
483+
if len(counts) <= idx:
484+
raise BadZipFile(
485+
"Corrupt zip64 extra field. File size not found."
486+
)
483487
self.file_size = counts[idx]
484488
idx += 1
485489

486490
if self.compress_size == 0xFFFFFFFF:
491+
if len(counts) <= idx:
492+
raise BadZipFile(
493+
"Corrupt zip64 extra field. Compress size not found."
494+
)
487495
self.compress_size = counts[idx]
488496
idx += 1
489497

490498
if self.header_offset == 0xffffffff:
499+
if len(counts) <= idx:
500+
raise BadZipFile(
501+
"Corrupt zip64 extra field. Header offset not found."
502+
)
491503
old = self.header_offset
492504
self.header_offset = counts[idx]
493505
idx+=1
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Improve error reporting for corrupt zip files with bad zip64 extra data. Patch
2+
by Daniel Hillier.

0 commit comments

Comments
 (0)