Skip to content

Commit 6de373d

Browse files
[3.12] gh-120380: fix Python implementation of pickle.Pickler for bytes and bytearray objects in protocol version 5. (GH-120422) (GH-120833)
gh-120380: fix Python implementation of `pickle.Pickler` for `bytes` and `bytearray` objects in protocol version 5. (GH-120422) (cherry picked from commit 7595e67) Co-authored-by: Bénédikt Tran <[email protected]>
1 parent 41e1ac6 commit 6de373d

File tree

3 files changed

+81
-21
lines changed

3 files changed

+81
-21
lines changed

Lib/pickle.py

Lines changed: 35 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -780,14 +780,10 @@ def save_float(self, obj):
780780
self.write(FLOAT + repr(obj).encode("ascii") + b'\n')
781781
dispatch[float] = save_float
782782

783-
def save_bytes(self, obj):
784-
if self.proto < 3:
785-
if not obj: # bytes object is empty
786-
self.save_reduce(bytes, (), obj=obj)
787-
else:
788-
self.save_reduce(codecs.encode,
789-
(str(obj, 'latin1'), 'latin1'), obj=obj)
790-
return
783+
def _save_bytes_no_memo(self, obj):
784+
# helper for writing bytes objects for protocol >= 3
785+
# without memoizing them
786+
assert self.proto >= 3
791787
n = len(obj)
792788
if n <= 0xff:
793789
self.write(SHORT_BINBYTES + pack("<B", n) + obj)
@@ -797,21 +793,37 @@ def save_bytes(self, obj):
797793
self._write_large_bytes(BINBYTES + pack("<I", n), obj)
798794
else:
799795
self.write(BINBYTES + pack("<I", n) + obj)
796+
797+
def save_bytes(self, obj):
798+
if self.proto < 3:
799+
if not obj: # bytes object is empty
800+
self.save_reduce(bytes, (), obj=obj)
801+
else:
802+
self.save_reduce(codecs.encode,
803+
(str(obj, 'latin1'), 'latin1'), obj=obj)
804+
return
805+
self._save_bytes_no_memo(obj)
800806
self.memoize(obj)
801807
dispatch[bytes] = save_bytes
802808

809+
def _save_bytearray_no_memo(self, obj):
810+
# helper for writing bytearray objects for protocol >= 5
811+
# without memoizing them
812+
assert self.proto >= 5
813+
n = len(obj)
814+
if n >= self.framer._FRAME_SIZE_TARGET:
815+
self._write_large_bytes(BYTEARRAY8 + pack("<Q", n), obj)
816+
else:
817+
self.write(BYTEARRAY8 + pack("<Q", n) + obj)
818+
803819
def save_bytearray(self, obj):
804820
if self.proto < 5:
805821
if not obj: # bytearray is empty
806822
self.save_reduce(bytearray, (), obj=obj)
807823
else:
808824
self.save_reduce(bytearray, (bytes(obj),), obj=obj)
809825
return
810-
n = len(obj)
811-
if n >= self.framer._FRAME_SIZE_TARGET:
812-
self._write_large_bytes(BYTEARRAY8 + pack("<Q", n), obj)
813-
else:
814-
self.write(BYTEARRAY8 + pack("<Q", n) + obj)
826+
self._save_bytearray_no_memo(obj)
815827
self.memoize(obj)
816828
dispatch[bytearray] = save_bytearray
817829

@@ -830,10 +842,18 @@ def save_picklebuffer(self, obj):
830842
if in_band:
831843
# Write data in-band
832844
# XXX The C implementation avoids a copy here
845+
buf = m.tobytes()
846+
in_memo = id(buf) in self.memo
833847
if m.readonly:
834-
self.save_bytes(m.tobytes())
848+
if in_memo:
849+
self._save_bytes_no_memo(buf)
850+
else:
851+
self.save_bytes(buf)
835852
else:
836-
self.save_bytearray(m.tobytes())
853+
if in_memo:
854+
self._save_bytearray_no_memo(buf)
855+
else:
856+
self.save_bytearray(buf)
837857
else:
838858
# Write data out-of-band
839859
self.write(NEXT_BUFFER)

Lib/test/pickletester.py

Lines changed: 43 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1845,6 +1845,25 @@ def test_bytes(self):
18451845
p = self.dumps(s, proto)
18461846
self.assert_is_copy(s, self.loads(p))
18471847

1848+
def test_bytes_memoization(self):
1849+
for proto in protocols:
1850+
for array_type in [bytes, ZeroCopyBytes]:
1851+
for s in b'', b'xyz', b'xyz'*100:
1852+
with self.subTest(proto=proto, array_type=array_type, s=s, independent=False):
1853+
b = array_type(s)
1854+
p = self.dumps((b, b), proto)
1855+
x, y = self.loads(p)
1856+
self.assertIs(x, y)
1857+
self.assert_is_copy((b, b), (x, y))
1858+
1859+
with self.subTest(proto=proto, array_type=array_type, s=s, independent=True):
1860+
b1, b2 = array_type(s), array_type(s)
1861+
p = self.dumps((b1, b2), proto)
1862+
# Note that (b1, b2) = self.loads(p) might have identical
1863+
# components, i.e., b1 is b2, but this is not always the
1864+
# case if the content is large (equality still holds).
1865+
self.assert_is_copy((b1, b2), self.loads(p))
1866+
18481867
def test_bytearray(self):
18491868
for proto in protocols:
18501869
for s in b'', b'xyz', b'xyz'*100:
@@ -1864,13 +1883,31 @@ def test_bytearray(self):
18641883
self.assertNotIn(b'bytearray', p)
18651884
self.assertTrue(opcode_in_pickle(pickle.BYTEARRAY8, p))
18661885

1867-
def test_bytearray_memoization_bug(self):
1886+
def test_bytearray_memoization(self):
18681887
for proto in protocols:
1869-
for s in b'', b'xyz', b'xyz'*100:
1870-
b = bytearray(s)
1871-
p = self.dumps((b, b), proto)
1872-
b1, b2 = self.loads(p)
1873-
self.assertIs(b1, b2)
1888+
for array_type in [bytearray, ZeroCopyBytearray]:
1889+
for s in b'', b'xyz', b'xyz'*100:
1890+
with self.subTest(proto=proto, array_type=array_type, s=s, independent=False):
1891+
b = array_type(s)
1892+
p = self.dumps((b, b), proto)
1893+
b1, b2 = self.loads(p)
1894+
self.assertIs(b1, b2)
1895+
1896+
with self.subTest(proto=proto, array_type=array_type, s=s, independent=True):
1897+
b1a, b2a = array_type(s), array_type(s)
1898+
# Unlike bytes, equal but independent bytearray objects are
1899+
# never identical.
1900+
self.assertIsNot(b1a, b2a)
1901+
1902+
p = self.dumps((b1a, b2a), proto)
1903+
b1b, b2b = self.loads(p)
1904+
self.assertIsNot(b1b, b2b)
1905+
1906+
self.assertIsNot(b1a, b1b)
1907+
self.assert_is_copy(b1a, b1b)
1908+
1909+
self.assertIsNot(b2a, b2b)
1910+
self.assert_is_copy(b2a, b2b)
18741911

18751912
def test_ints(self):
18761913
for proto in protocols:
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Fix Python implementation of :class:`pickle.Pickler` for :class:`bytes` and
2+
:class:`bytearray` objects when using protocol version 5. Patch by Bénédikt
3+
Tran.

0 commit comments

Comments
 (0)