-
-
Notifications
You must be signed in to change notification settings - Fork 32.3k
bpo-10030: Sped up reading encrypted ZIP files by 2 times. #550
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
137d48d
aeaa0e3
7e335c9
42629de
55d084b
559b6fe
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -509,65 +509,63 @@ def is_dir(self): | |
return self.filename[-1] == '/' | ||
|
||
|
||
class _ZipDecrypter: | ||
"""Class to handle decryption of files stored within a ZIP archive. | ||
# ZIP encryption uses the CRC32 one-byte primitive for scrambling some | ||
# internal keys. We noticed that a direct implementation is faster than | ||
# relying on binascii.crc32(). | ||
|
||
_crctable = None | ||
def _gen_crc(crc): | ||
for j in range(8): | ||
if crc & 1: | ||
crc = (crc >> 1) ^ 0xEDB88320 | ||
else: | ||
crc >>= 1 | ||
return crc | ||
|
||
# ZIP supports a password-based form of encryption. Even though known | ||
# plaintext attacks have been found against it, it is still useful | ||
# to be able to get data out of such a file. | ||
# | ||
# Usage: | ||
# zd = _ZipDecrypter(mypwd) | ||
# plain_bytes = zd(cypher_bytes) | ||
|
||
def _ZipDecrypter(pwd): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I suggest to rename the parameter to "password". There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The name "pwd" is used for parameters of public API. It would be better to keep it consistent. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Oh ok, in this case it's fine :-) |
||
key0 = 305419896 | ||
key1 = 591751049 | ||
key2 = 878082192 | ||
|
||
global _crctable | ||
if _crctable is None: | ||
_crctable = list(map(_gen_crc, range(256))) | ||
crctable = _crctable | ||
|
||
def crc32(ch, crc): | ||
"""Compute the CRC32 primitive on one byte.""" | ||
return (crc >> 8) ^ crctable[(crc ^ ch) & 0xFF] | ||
|
||
ZIP supports a password-based form of encryption. Even though known | ||
plaintext attacks have been found against it, it is still useful | ||
to be able to get data out of such a file. | ||
def update_keys(c): | ||
nonlocal key0, key1, key2 | ||
key0 = crc32(c, key0) | ||
key1 = (key1 + (key0 & 0xFF)) & 0xFFFFFFFF | ||
key1 = (key1 * 134775813 + 1) & 0xFFFFFFFF | ||
key2 = crc32(key1 >> 24, key2) | ||
|
||
Usage: | ||
zd = _ZipDecrypter(mypwd) | ||
plain_char = zd(cypher_char) | ||
plain_text = map(zd, cypher_text) | ||
""" | ||
for p in pwd: | ||
update_keys(p) | ||
|
||
def _GenerateCRCTable(): | ||
"""Generate a CRC-32 table. | ||
def decrypter(data): | ||
"""Decrypt a bytes object.""" | ||
result = bytearray() | ||
append = result.append | ||
for c in data: | ||
k = key2 | 2 | ||
c ^= ((k * (k^1)) >> 8) & 0xFF | ||
update_keys(c) | ||
append(c) | ||
return bytes(result) | ||
|
||
ZIP encryption uses the CRC32 one-byte primitive for scrambling some | ||
internal keys. We noticed that a direct implementation is faster than | ||
relying on binascii.crc32(). | ||
""" | ||
poly = 0xedb88320 | ||
table = [0] * 256 | ||
for i in range(256): | ||
crc = i | ||
for j in range(8): | ||
if crc & 1: | ||
crc = ((crc >> 1) & 0x7FFFFFFF) ^ poly | ||
else: | ||
crc = ((crc >> 1) & 0x7FFFFFFF) | ||
table[i] = crc | ||
return table | ||
crctable = None | ||
|
||
def _crc32(self, ch, crc): | ||
"""Compute the CRC32 primitive on one byte.""" | ||
return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ch) & 0xff] | ||
|
||
def __init__(self, pwd): | ||
if _ZipDecrypter.crctable is None: | ||
_ZipDecrypter.crctable = _ZipDecrypter._GenerateCRCTable() | ||
self.key0 = 305419896 | ||
self.key1 = 591751049 | ||
self.key2 = 878082192 | ||
for p in pwd: | ||
self._UpdateKeys(p) | ||
|
||
def _UpdateKeys(self, c): | ||
self.key0 = self._crc32(c, self.key0) | ||
self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295 | ||
self.key1 = (self.key1 * 134775813 + 1) & 4294967295 | ||
self.key2 = self._crc32((self.key1 >> 24) & 255, self.key2) | ||
|
||
def __call__(self, c): | ||
"""Decrypt a single character.""" | ||
assert isinstance(c, int) | ||
k = self.key2 | 2 | ||
c = c ^ (((k * (k^1)) >> 8) & 255) | ||
self._UpdateKeys(c) | ||
return c | ||
return decrypter | ||
|
||
|
||
class LZMACompressor: | ||
|
@@ -953,7 +951,7 @@ def _read2(self, n): | |
raise EOFError | ||
|
||
if self._decrypter is not None: | ||
data = bytes(map(self._decrypter, data)) | ||
data = self._decrypter(data) | ||
return data | ||
|
||
def close(self): | ||
|
@@ -1411,7 +1409,7 @@ def open(self, name, mode="r", pwd=None, *, force_zip64=False): | |
# or the MSB of the file time depending on the header type | ||
# and is used to check the correctness of the password. | ||
header = zef_file.read(12) | ||
h = list(map(zd, header[0:12])) | ||
h = zd(header[0:12]) | ||
if zinfo.flag_bits & 0x8: | ||
# compare against the file type from extended local headers | ||
check_byte = (zinfo._raw_time >> 8) & 0xff | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
IMHO it's worth it to add this warning, maybe just as a note, in zipfile documentation.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
zipfile
doesn't support creating encrypted ZIP archives. It just supports reading encrypted ZIP archives created by other programs. Thus this warning doesn't have relation to the use of thezipfile
module. It has relation to the cause why encrypting is not implemented. It is useful forzipfile
developers, not users.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ok. Anyway, it's unrelated to your change. I started a thread on security-SIG:
https://mail.python.org/pipermail/security-sig/2017-March/000238.html