Skip to content

Commit 06e5225

Browse files
bpo-10030: Sped up reading encrypted ZIP files by 2 times. (#550)
1 parent d4edfc9 commit 06e5225

File tree

2 files changed

+57
-57
lines changed

2 files changed

+57
-57
lines changed

Lib/zipfile.py

Lines changed: 55 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -509,65 +509,63 @@ def is_dir(self):
509509
return self.filename[-1] == '/'
510510

511511

512-
class _ZipDecrypter:
513-
"""Class to handle decryption of files stored within a ZIP archive.
512+
# ZIP encryption uses the CRC32 one-byte primitive for scrambling some
513+
# internal keys. We noticed that a direct implementation is faster than
514+
# relying on binascii.crc32().
515+
516+
_crctable = None
517+
def _gen_crc(crc):
518+
for j in range(8):
519+
if crc & 1:
520+
crc = (crc >> 1) ^ 0xEDB88320
521+
else:
522+
crc >>= 1
523+
return crc
524+
525+
# ZIP supports a password-based form of encryption. Even though known
526+
# plaintext attacks have been found against it, it is still useful
527+
# to be able to get data out of such a file.
528+
#
529+
# Usage:
530+
# zd = _ZipDecrypter(mypwd)
531+
# plain_bytes = zd(cypher_bytes)
532+
533+
def _ZipDecrypter(pwd):
534+
key0 = 305419896
535+
key1 = 591751049
536+
key2 = 878082192
537+
538+
global _crctable
539+
if _crctable is None:
540+
_crctable = list(map(_gen_crc, range(256)))
541+
crctable = _crctable
542+
543+
def crc32(ch, crc):
544+
"""Compute the CRC32 primitive on one byte."""
545+
return (crc >> 8) ^ crctable[(crc ^ ch) & 0xFF]
514546

515-
ZIP supports a password-based form of encryption. Even though known
516-
plaintext attacks have been found against it, it is still useful
517-
to be able to get data out of such a file.
547+
def update_keys(c):
548+
nonlocal key0, key1, key2
549+
key0 = crc32(c, key0)
550+
key1 = (key1 + (key0 & 0xFF)) & 0xFFFFFFFF
551+
key1 = (key1 * 134775813 + 1) & 0xFFFFFFFF
552+
key2 = crc32(key1 >> 24, key2)
518553

519-
Usage:
520-
zd = _ZipDecrypter(mypwd)
521-
plain_char = zd(cypher_char)
522-
plain_text = map(zd, cypher_text)
523-
"""
554+
for p in pwd:
555+
update_keys(p)
524556

525-
def _GenerateCRCTable():
526-
"""Generate a CRC-32 table.
557+
def decrypter(data):
558+
"""Decrypt a bytes object."""
559+
result = bytearray()
560+
append = result.append
561+
for c in data:
562+
k = key2 | 2
563+
c ^= ((k * (k^1)) >> 8) & 0xFF
564+
update_keys(c)
565+
append(c)
566+
return bytes(result)
527567

528-
ZIP encryption uses the CRC32 one-byte primitive for scrambling some
529-
internal keys. We noticed that a direct implementation is faster than
530-
relying on binascii.crc32().
531-
"""
532-
poly = 0xedb88320
533-
table = [0] * 256
534-
for i in range(256):
535-
crc = i
536-
for j in range(8):
537-
if crc & 1:
538-
crc = ((crc >> 1) & 0x7FFFFFFF) ^ poly
539-
else:
540-
crc = ((crc >> 1) & 0x7FFFFFFF)
541-
table[i] = crc
542-
return table
543-
crctable = None
544-
545-
def _crc32(self, ch, crc):
546-
"""Compute the CRC32 primitive on one byte."""
547-
return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ch) & 0xff]
548-
549-
def __init__(self, pwd):
550-
if _ZipDecrypter.crctable is None:
551-
_ZipDecrypter.crctable = _ZipDecrypter._GenerateCRCTable()
552-
self.key0 = 305419896
553-
self.key1 = 591751049
554-
self.key2 = 878082192
555-
for p in pwd:
556-
self._UpdateKeys(p)
557-
558-
def _UpdateKeys(self, c):
559-
self.key0 = self._crc32(c, self.key0)
560-
self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295
561-
self.key1 = (self.key1 * 134775813 + 1) & 4294967295
562-
self.key2 = self._crc32((self.key1 >> 24) & 255, self.key2)
563-
564-
def __call__(self, c):
565-
"""Decrypt a single character."""
566-
assert isinstance(c, int)
567-
k = self.key2 | 2
568-
c = c ^ (((k * (k^1)) >> 8) & 255)
569-
self._UpdateKeys(c)
570-
return c
568+
return decrypter
571569

572570

573571
class LZMACompressor:
@@ -953,7 +951,7 @@ def _read2(self, n):
953951
raise EOFError
954952

955953
if self._decrypter is not None:
956-
data = bytes(map(self._decrypter, data))
954+
data = self._decrypter(data)
957955
return data
958956

959957
def close(self):
@@ -1411,7 +1409,7 @@ def open(self, name, mode="r", pwd=None, *, force_zip64=False):
14111409
# or the MSB of the file time depending on the header type
14121410
# and is used to check the correctness of the password.
14131411
header = zef_file.read(12)
1414-
h = list(map(zd, header[0:12]))
1412+
h = zd(header[0:12])
14151413
if zinfo.flag_bits & 0x8:
14161414
# compare against the file type from extended local headers
14171415
check_byte = (zinfo._raw_time >> 8) & 0xff

Misc/NEWS

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -301,6 +301,8 @@ Extension Modules
301301
Library
302302
-------
303303

304+
- bpo-10030: Sped up reading encrypted ZIP files by 2 times.
305+
304306
- bpo-29204: Element.getiterator() and the html parameter of XMLParser() were
305307
deprecated only in the documentation (since Python 3.2 and 3.4 correspondintly).
306308
Now using them emits a deprecation warning.

0 commit comments

Comments
 (0)