Skip to content

Commit 12c45ef

Browse files
[3.7] bpo-39389: gzip: fix compression level metadata (GH-18077) (GH-18101)
* bpo-39389: gzip: fix compression level metadata (GH-18077) As described in RFC 1952, section 2.3.1, the XFL (eXtra FLags) byte of a gzip member header should indicate whether the DEFLATE algorithm was tuned for speed or compression ratio. Prior to this patch, archives emitted by the `gzip` module always indicated maximum compression. (cherry picked from commit eab3b3f) Co-authored-by: William Chargin <[email protected]>
1 parent 3eff46f commit 12c45ef

File tree

3 files changed

+36
-3
lines changed

3 files changed

+36
-3
lines changed

Lib/gzip.py

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,11 @@
1717

1818
READ, WRITE = 1, 2
1919

20+
_COMPRESS_LEVEL_FAST = 1
21+
_COMPRESS_LEVEL_TRADEOFF = 6
22+
_COMPRESS_LEVEL_BEST = 9
23+
24+
2025
def open(filename, mode="rb", compresslevel=9,
2126
encoding=None, errors=None, newline=None):
2227
"""Open a gzip-compressed file in binary or text mode.
@@ -191,7 +196,7 @@ def __init__(self, filename=None, mode=None,
191196
self.fileobj = fileobj
192197

193198
if self.mode == WRITE:
194-
self._write_gzip_header()
199+
self._write_gzip_header(compresslevel)
195200

196201
@property
197202
def filename(self):
@@ -218,7 +223,7 @@ def _init_write(self, filename):
218223
self.bufsize = 0
219224
self.offset = 0 # Current file offset for seek(), tell(), etc
220225

221-
def _write_gzip_header(self):
226+
def _write_gzip_header(self, compresslevel):
222227
self.fileobj.write(b'\037\213') # magic header
223228
self.fileobj.write(b'\010') # compression method
224229
try:
@@ -239,7 +244,13 @@ def _write_gzip_header(self):
239244
if mtime is None:
240245
mtime = time.time()
241246
write32u(self.fileobj, int(mtime))
242-
self.fileobj.write(b'\002')
247+
if compresslevel == _COMPRESS_LEVEL_BEST:
248+
xfl = b'\002'
249+
elif compresslevel == _COMPRESS_LEVEL_FAST:
250+
xfl = b'\004'
251+
else:
252+
xfl = b'\000'
253+
self.fileobj.write(xfl)
243254
self.fileobj.write(b'\377')
244255
if fname:
245256
self.fileobj.write(fname + b'\000')

Lib/test/test_gzip.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -358,6 +358,26 @@ def test_metadata(self):
358358
isizeBytes = fRead.read(4)
359359
self.assertEqual(isizeBytes, struct.pack('<i', len(data1)))
360360

361+
def test_compresslevel_metadata(self):
362+
# see RFC 1952: http://www.faqs.org/rfcs/rfc1952.html
363+
# specifically, discussion of XFL in section 2.3.1
364+
cases = [
365+
('fast', 1, b'\x04'),
366+
('best', 9, b'\x02'),
367+
('tradeoff', 6, b'\x00'),
368+
]
369+
xflOffset = 8
370+
371+
for (name, level, expectedXflByte) in cases:
372+
with self.subTest(name):
373+
fWrite = gzip.GzipFile(self.filename, 'w', compresslevel=level)
374+
with fWrite:
375+
fWrite.write(data1)
376+
with open(self.filename, 'rb') as fRead:
377+
fRead.seek(xflOffset)
378+
xflByte = fRead.read(1)
379+
self.assertEqual(xflByte, expectedXflByte)
380+
361381
def test_with_open(self):
362382
# GzipFile supports the context management protocol
363383
with gzip.GzipFile(self.filename, "wb") as f:
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Write accurate compression level metadata in :mod:`gzip` archives, rather
2+
than always signaling maximum compression.

0 commit comments

Comments
 (0)