Skip to content

Commit eab3b3f

Browse files
wcharginserhiy-storchaka
authored andcommitted
bpo-39389: gzip: fix compression level metadata (GH-18077)
As described in RFC 1952, section 2.3.1, the XFL (eXtra FLags) byte of a gzip member header should indicate whether the DEFLATE algorithm was tuned for speed or compression ratio. Prior to this patch, archives emitted by the `gzip` module always indicated maximum compression.
1 parent 85ead4f commit eab3b3f

File tree

3 files changed

+31
-3
lines changed

3 files changed

+31
-3
lines changed

Lib/gzip.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -209,7 +209,7 @@ def __init__(self, filename=None, mode=None,
209209
self.fileobj = fileobj
210210

211211
if self.mode == WRITE:
212-
self._write_gzip_header()
212+
self._write_gzip_header(compresslevel)
213213

214214
@property
215215
def filename(self):
@@ -236,7 +236,7 @@ def _init_write(self, filename):
236236
self.bufsize = 0
237237
self.offset = 0 # Current file offset for seek(), tell(), etc
238238

239-
def _write_gzip_header(self):
239+
def _write_gzip_header(self, compresslevel):
240240
self.fileobj.write(b'\037\213') # magic header
241241
self.fileobj.write(b'\010') # compression method
242242
try:
@@ -257,7 +257,13 @@ def _write_gzip_header(self):
257257
if mtime is None:
258258
mtime = time.time()
259259
write32u(self.fileobj, int(mtime))
260-
self.fileobj.write(b'\002')
260+
if compresslevel == _COMPRESS_LEVEL_BEST:
261+
xfl = b'\002'
262+
elif compresslevel == _COMPRESS_LEVEL_FAST:
263+
xfl = b'\004'
264+
else:
265+
xfl = b'\000'
266+
self.fileobj.write(xfl)
261267
self.fileobj.write(b'\377')
262268
if fname:
263269
self.fileobj.write(fname + b'\000')

Lib/test/test_gzip.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -358,6 +358,26 @@ def test_metadata(self):
358358
isizeBytes = fRead.read(4)
359359
self.assertEqual(isizeBytes, struct.pack('<i', len(data1)))
360360

361+
def test_compresslevel_metadata(self):
362+
# see RFC 1952: http://www.faqs.org/rfcs/rfc1952.html
363+
# specifically, discussion of XFL in section 2.3.1
364+
cases = [
365+
('fast', 1, b'\x04'),
366+
('best', 9, b'\x02'),
367+
('tradeoff', 6, b'\x00'),
368+
]
369+
xflOffset = 8
370+
371+
for (name, level, expectedXflByte) in cases:
372+
with self.subTest(name):
373+
fWrite = gzip.GzipFile(self.filename, 'w', compresslevel=level)
374+
with fWrite:
375+
fWrite.write(data1)
376+
with open(self.filename, 'rb') as fRead:
377+
fRead.seek(xflOffset)
378+
xflByte = fRead.read(1)
379+
self.assertEqual(xflByte, expectedXflByte)
380+
361381
def test_with_open(self):
362382
# GzipFile supports the context management protocol
363383
with gzip.GzipFile(self.filename, "wb") as f:
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Write accurate compression level metadata in :mod:`gzip` archives, rather
2+
than always signaling maximum compression.

0 commit comments

Comments
 (0)