Skip to content

Commit e61ca22

Browse files
authored
gh-95865: Further reduce quote_from_bytes memory consumption (#96860)
on large input values. Based on Dennis Sweeney's chunking idea.
1 parent 04f4977 commit e61ca22

File tree

3 files changed

+16
-1
lines changed

3 files changed

+16
-1
lines changed

Lib/test/test_urlparse.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -985,6 +985,10 @@ def test_quote_from_bytes(self):
985985
self.assertEqual(result, 'archaeological%20arcana')
986986
result = urllib.parse.quote_from_bytes(b'')
987987
self.assertEqual(result, '')
988+
result = urllib.parse.quote_from_bytes(b'A'*10_000)
989+
self.assertEqual(result, 'A'*10_000)
990+
result = urllib.parse.quote_from_bytes(b'z\x01/ '*253_183)
991+
self.assertEqual(result, 'z%01/%20'*253_183)
988992

989993
def test_unquote_to_bytes(self):
990994
result = urllib.parse.unquote_to_bytes('abc%20def')

Lib/urllib/parse.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929

3030
from collections import namedtuple
3131
import functools
32+
import math
3233
import re
3334
import types
3435
import warnings
@@ -906,7 +907,14 @@ def quote_from_bytes(bs, safe='/'):
906907
if not bs.rstrip(_ALWAYS_SAFE_BYTES + safe):
907908
return bs.decode()
908909
quoter = _byte_quoter_factory(safe)
909-
return ''.join(map(quoter, bs))
910+
if (bs_len := len(bs)) < 200_000:
911+
return ''.join(map(quoter, bs))
912+
else:
913+
# This saves memory - https://github.com/python/cpython/issues/95865
914+
chunk_size = math.isqrt(bs_len)
915+
chunks = [''.join(map(quoter, bs[i:i+chunk_size]))
916+
for i in range(0, bs_len, chunk_size)]
917+
return ''.join(chunks)
910918

911919
def urlencode(query, doseq=False, safe='', encoding=None, errors=None,
912920
quote_via=quote_plus):
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Reduce :func:`urllib.parse.quote_from_bytes` memory use on large values.
2+
3+
Contributed by Dennis Sweeney.

0 commit comments

Comments
 (0)