Skip to content

Commit 22bcc07

Browse files
miss-islingtonMa Lin
andauthored
bpo-41486: zlib uses an UINT32_MAX sliding window for the output buffer (GH-26143)
* zlib uses an UINT32_MAX sliding window for the output buffer These funtions have an initial output buffer size parameter: - zlib.decompress(data, /, wbits=MAX_WBITS, bufsize=DEF_BUF_SIZE) - zlib.Decompress.flush([length]) If the initial size > UINT32_MAX, use an UINT32_MAX sliding window, instead of clamping to UINT32_MAX. Speed up when (the initial size == the actual size). This fixes a memory consumption and copying performance regression in earlier 3.10 beta releases if someone used an output buffer larger than 4GiB with zlib.decompress. Reviewed-by: Gregory P. Smith (cherry picked from commit a9a69bb) Co-authored-by: Ma Lin <[email protected]>
1 parent 68330b6 commit 22bcc07

File tree

2 files changed

+117
-30
lines changed

2 files changed

+117
-30
lines changed
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Fix a memory consumption and copying performance regression in earlier 3.10
2+
beta releases if someone used an output buffer larger than 4GiB with
3+
zlib.decompress on input data that expands that large.

Modules/zlibmodule.c

Lines changed: 114 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -30,28 +30,6 @@ OutputBuffer_InitAndGrow(_BlocksOutputBuffer *buffer, Py_ssize_t max_length,
3030
return allocated;
3131
}
3232

33-
/* On success, return value >= 0
34-
On failure, return -1 */
35-
static inline Py_ssize_t
36-
OutputBuffer_InitWithSize(_BlocksOutputBuffer *buffer, Py_ssize_t init_size,
37-
Bytef **next_out, uint32_t *avail_out)
38-
{
39-
Py_ssize_t allocated;
40-
41-
if (init_size >= 0 && // ensure (size_t) cast is safe
42-
(size_t)init_size > UINT32_MAX)
43-
{
44-
/* In 32-bit build, never reach this conditional branch.
45-
The maximum block size accepted by zlib is UINT32_MAX. */
46-
init_size = UINT32_MAX;
47-
}
48-
49-
allocated = _BlocksOutputBuffer_InitWithSize(
50-
buffer, init_size, (void**) next_out);
51-
*avail_out = (uint32_t) allocated;
52-
return allocated;
53-
}
54-
5533
/* On success, return value >= 0
5634
On failure, return -1 */
5735
static inline Py_ssize_t
@@ -84,6 +62,106 @@ OutputBuffer_OnError(_BlocksOutputBuffer *buffer)
8462
_BlocksOutputBuffer_OnError(buffer);
8563
}
8664

65+
/* The max buffer size accepted by zlib is UINT32_MAX, the initial buffer size
66+
`init_size` may > it in 64-bit build. These wrapper functions maintain an
67+
UINT32_MAX sliding window for the first block:
68+
1. OutputBuffer_WindowInitWithSize()
69+
2. OutputBuffer_WindowGrow()
70+
3. OutputBuffer_WindowFinish()
71+
4. OutputBuffer_WindowOnError()
72+
73+
==== is the sliding window:
74+
1. ====------
75+
^ next_posi, left_bytes is 6
76+
2. ----====--
77+
^ next_posi, left_bytes is 2
78+
3. --------==
79+
^ next_posi, left_bytes is 0 */
80+
typedef struct {
81+
Py_ssize_t left_bytes;
82+
Bytef *next_posi;
83+
} _Uint32Window;
84+
85+
/* Initialize the buffer with an inital buffer size.
86+
87+
On success, return value >= 0
88+
On failure, return value < 0 */
89+
static inline Py_ssize_t
90+
OutputBuffer_WindowInitWithSize(_BlocksOutputBuffer *buffer, _Uint32Window *window,
91+
Py_ssize_t init_size,
92+
Bytef **next_out, uint32_t *avail_out)
93+
{
94+
Py_ssize_t allocated = _BlocksOutputBuffer_InitWithSize(
95+
buffer, init_size, (void**) next_out);
96+
97+
if (allocated >= 0) {
98+
// the UINT32_MAX sliding window
99+
Py_ssize_t window_size = Py_MIN((size_t)allocated, UINT32_MAX);
100+
*avail_out = (uint32_t) window_size;
101+
102+
window->left_bytes = allocated - window_size;
103+
window->next_posi = *next_out + window_size;
104+
}
105+
return allocated;
106+
}
107+
108+
/* Grow the buffer.
109+
110+
On success, return value >= 0
111+
On failure, return value < 0 */
112+
static inline Py_ssize_t
113+
OutputBuffer_WindowGrow(_BlocksOutputBuffer *buffer, _Uint32Window *window,
114+
Bytef **next_out, uint32_t *avail_out)
115+
{
116+
Py_ssize_t allocated;
117+
118+
/* ensure no gaps in the data.
119+
if inlined, this check could be optimized away.*/
120+
if (*avail_out != 0) {
121+
PyErr_SetString(PyExc_SystemError,
122+
"*avail_out != 0 in OutputBuffer_WindowGrow().");
123+
return -1;
124+
}
125+
126+
// slide the UINT32_MAX sliding window
127+
if (window->left_bytes > 0) {
128+
Py_ssize_t window_size = Py_MIN((size_t)window->left_bytes, UINT32_MAX);
129+
130+
*next_out = window->next_posi;
131+
*avail_out = (uint32_t) window_size;
132+
133+
window->left_bytes -= window_size;
134+
window->next_posi += window_size;
135+
136+
return window_size;
137+
}
138+
assert(window->left_bytes == 0);
139+
140+
// only the first block may > UINT32_MAX
141+
allocated = _BlocksOutputBuffer_Grow(
142+
buffer, (void**) next_out, (Py_ssize_t) *avail_out);
143+
*avail_out = (uint32_t) allocated;
144+
return allocated;
145+
}
146+
147+
/* Finish the buffer.
148+
149+
On success, return a bytes object
150+
On failure, return NULL */
151+
static inline PyObject *
152+
OutputBuffer_WindowFinish(_BlocksOutputBuffer *buffer, _Uint32Window *window,
153+
uint32_t avail_out)
154+
{
155+
Py_ssize_t real_avail_out = (Py_ssize_t) avail_out + window->left_bytes;
156+
return _BlocksOutputBuffer_Finish(buffer, real_avail_out);
157+
}
158+
159+
static inline void
160+
OutputBuffer_WindowOnError(_BlocksOutputBuffer *buffer, _Uint32Window *window)
161+
{
162+
_BlocksOutputBuffer_OnError(buffer);
163+
}
164+
87165

88166
#define ENTER_ZLIB(obj) do { \
89167
if (!PyThread_acquire_lock((obj)->lock, 0)) { \
@@ -344,6 +422,7 @@ zlib_decompress_impl(PyObject *module, Py_buffer *data, int wbits,
344422
int err, flush;
345423
z_stream zst;
346424
_BlocksOutputBuffer buffer = {.list = NULL};
425+
_Uint32Window window; // output buffer's UINT32_MAX sliding window
347426

348427
zlibstate *state = get_zlib_state(module);
349428

@@ -354,7 +433,8 @@ zlib_decompress_impl(PyObject *module, Py_buffer *data, int wbits,
354433
bufsize = 1;
355434
}
356435

357-
if (OutputBuffer_InitWithSize(&buffer, bufsize, &zst.next_out, &zst.avail_out) < 0) {
436+
if (OutputBuffer_WindowInitWithSize(&buffer, &window, bufsize,
437+
&zst.next_out, &zst.avail_out) < 0) {
358438
goto error;
359439
}
360440

@@ -387,7 +467,8 @@ zlib_decompress_impl(PyObject *module, Py_buffer *data, int wbits,
387467

388468
do {
389469
if (zst.avail_out == 0) {
390-
if (OutputBuffer_Grow(&buffer, &zst.next_out, &zst.avail_out) < 0) {
470+
if (OutputBuffer_WindowGrow(&buffer, &window,
471+
&zst.next_out, &zst.avail_out) < 0) {
391472
inflateEnd(&zst);
392473
goto error;
393474
}
@@ -430,13 +511,13 @@ zlib_decompress_impl(PyObject *module, Py_buffer *data, int wbits,
430511
goto error;
431512
}
432513

433-
RetVal = OutputBuffer_Finish(&buffer, zst.avail_out);
514+
RetVal = OutputBuffer_WindowFinish(&buffer, &window, zst.avail_out);
434515
if (RetVal != NULL) {
435516
return RetVal;
436517
}
437518

438519
error:
439-
OutputBuffer_OnError(&buffer);
520+
OutputBuffer_WindowOnError(&buffer, &window);
440521
return NULL;
441522
}
442523

@@ -1171,6 +1252,7 @@ zlib_Decompress_flush_impl(compobject *self, PyTypeObject *cls,
11711252
PyObject *RetVal;
11721253
Py_ssize_t ibuflen;
11731254
_BlocksOutputBuffer buffer = {.list = NULL};
1255+
_Uint32Window window; // output buffer's UINT32_MAX sliding window
11741256

11751257
PyObject *module = PyType_GetModule(cls);
11761258
if (module == NULL) {
@@ -1193,7 +1275,8 @@ zlib_Decompress_flush_impl(compobject *self, PyTypeObject *cls,
11931275
self->zst.next_in = data.buf;
11941276
ibuflen = data.len;
11951277

1196-
if (OutputBuffer_InitWithSize(&buffer, length, &self->zst.next_out, &self->zst.avail_out) < 0) {
1278+
if (OutputBuffer_WindowInitWithSize(&buffer, &window, length,
1279+
&self->zst.next_out, &self->zst.avail_out) < 0) {
11971280
goto abort;
11981281
}
11991282

@@ -1203,7 +1286,8 @@ zlib_Decompress_flush_impl(compobject *self, PyTypeObject *cls,
12031286

12041287
do {
12051288
if (self->zst.avail_out == 0) {
1206-
if (OutputBuffer_Grow(&buffer, &self->zst.next_out, &self->zst.avail_out) < 0) {
1289+
if (OutputBuffer_WindowGrow(&buffer, &window,
1290+
&self->zst.next_out, &self->zst.avail_out) < 0) {
12071291
goto abort;
12081292
}
12091293
}
@@ -1248,13 +1332,13 @@ zlib_Decompress_flush_impl(compobject *self, PyTypeObject *cls,
12481332
}
12491333
}
12501334

1251-
RetVal = OutputBuffer_Finish(&buffer, self->zst.avail_out);
1335+
RetVal = OutputBuffer_WindowFinish(&buffer, &window, self->zst.avail_out);
12521336
if (RetVal != NULL) {
12531337
goto success;
12541338
}
12551339

12561340
abort:
1257-
OutputBuffer_OnError(&buffer);
1341+
OutputBuffer_WindowOnError(&buffer, &window);
12581342
RetVal = NULL;
12591343
success:
12601344
PyBuffer_Release(&data);

0 commit comments

Comments
 (0)