@@ -2037,6 +2037,7 @@ def test_setitems_on_non_dicts(self):
2037
2037
2038
2038
# Exercise framing (proto >= 4) for significant workloads
2039
2039
2040
+ FRAME_SIZE_MIN = 4
2040
2041
FRAME_SIZE_TARGET = 64 * 1024
2041
2042
2042
2043
def check_frame_opcodes (self , pickled ):
@@ -2047,36 +2048,43 @@ def check_frame_opcodes(self, pickled):
2047
2048
framed by default and are therefore considered a frame by themselves in
2048
2049
the following consistency check.
2049
2050
"""
2050
- last_arg = last_pos = last_frame_opcode_size = None
2051
- frameless_opcode_sizes = {
2052
- 'BINBYTES' : 5 ,
2053
- 'BINUNICODE' : 5 ,
2054
- 'BINBYTES8' : 9 ,
2055
- 'BINUNICODE8' : 9 ,
2056
- }
2051
+ frame_end = frameless_start = None
2052
+ frameless_opcodes = {'BINBYTES' , 'BINUNICODE' , 'BINBYTES8' , 'BINUNICODE8' }
2057
2053
for op , arg , pos in pickletools .genops (pickled ):
2058
- if op .name in frameless_opcode_sizes :
2059
- if len (arg ) > self .FRAME_SIZE_TARGET :
2060
- frame_opcode_size = frameless_opcode_sizes [op .name ]
2061
- arg = len (arg )
2062
- else :
2063
- continue
2064
- elif op .name == 'FRAME' :
2065
- frame_opcode_size = 9
2066
- else :
2067
- continue
2068
-
2069
- if last_pos is not None :
2070
- # The previous frame's size should be equal to the number
2071
- # of bytes up to the current frame.
2072
- frame_size = pos - last_pos - last_frame_opcode_size
2073
- self .assertEqual (frame_size , last_arg )
2074
- last_arg , last_pos = arg , pos
2075
- last_frame_opcode_size = frame_opcode_size
2076
- # The last frame's size should be equal to the number of bytes up
2077
- # to the pickle's end.
2078
- frame_size = len (pickled ) - last_pos - last_frame_opcode_size
2079
- self .assertEqual (frame_size , last_arg )
2054
+ if frame_end is not None :
2055
+ self .assertLessEqual (pos , frame_end )
2056
+ if pos == frame_end :
2057
+ frame_end = None
2058
+
2059
+ if frame_end is not None : # framed
2060
+ self .assertNotEqual (op .name , 'FRAME' )
2061
+ if op .name in frameless_opcodes :
2062
+ # Only short bytes and str objects should be written
2063
+ # in a frame
2064
+ self .assertLessEqual (len (arg ), self .FRAME_SIZE_TARGET )
2065
+
2066
+ else : # not framed
2067
+ if (op .name == 'FRAME' or
2068
+ (op .name in frameless_opcodes and
2069
+ len (arg ) > self .FRAME_SIZE_TARGET )):
2070
+ # Frame or large bytes or str object
2071
+ if frameless_start is not None :
2072
+ # Only short data should be written outside of a frame
2073
+ self .assertLess (pos - frameless_start ,
2074
+ self .FRAME_SIZE_MIN )
2075
+ frameless_start = None
2076
+ elif frameless_start is None and op .name != 'PROTO' :
2077
+ frameless_start = pos
2078
+
2079
+ if op .name == 'FRAME' :
2080
+ self .assertGreaterEqual (arg , self .FRAME_SIZE_MIN )
2081
+ frame_end = pos + 9 + arg
2082
+
2083
+ pos = len (pickled )
2084
+ if frame_end is not None :
2085
+ self .assertEqual (frame_end , pos )
2086
+ elif frameless_start is not None :
2087
+ self .assertLess (pos - frameless_start , self .FRAME_SIZE_MIN )
2080
2088
2081
2089
def test_framing_many_objects (self ):
2082
2090
obj = list (range (10 ** 5 ))
@@ -2095,7 +2103,8 @@ def test_framing_many_objects(self):
2095
2103
2096
2104
def test_framing_large_objects (self ):
2097
2105
N = 1024 * 1024
2098
- obj = [b'x' * N , b'y' * N , 'z' * N ]
2106
+ small_items = [[i ] for i in range (10 )]
2107
+ obj = [b'x' * N , * small_items , b'y' * N , 'z' * N ]
2099
2108
for proto in range (4 , pickle .HIGHEST_PROTOCOL + 1 ):
2100
2109
for fast in [False , True ]:
2101
2110
with self .subTest (proto = proto , fast = fast ):
@@ -2119,12 +2128,9 @@ def test_framing_large_objects(self):
2119
2128
# Perform full equality check if the lengths match.
2120
2129
self .assertEqual (obj , unpickled )
2121
2130
n_frames = count_opcode (pickle .FRAME , pickled )
2122
- if not fast :
2123
- # One frame per memoize for each large object.
2124
- self .assertGreaterEqual (n_frames , len (obj ))
2125
- else :
2126
- # One frame at the beginning and one at the end.
2127
- self .assertGreaterEqual (n_frames , 2 )
2131
+ # A single frame for small objects between
2132
+ # first two large objects.
2133
+ self .assertEqual (n_frames , 1 )
2128
2134
self .check_frame_opcodes (pickled )
2129
2135
2130
2136
def test_optional_frames (self ):
@@ -2152,7 +2158,9 @@ def remove_frames(pickled, keep_frame=None):
2152
2158
2153
2159
frame_size = self .FRAME_SIZE_TARGET
2154
2160
num_frames = 20
2155
- obj = [bytes ([i ]) * frame_size for i in range (num_frames )]
2161
+ # Large byte objects (dict values) intermitted with small objects
2162
+ # (dict keys)
2163
+ obj = {i : bytes ([i ]) * frame_size for i in range (num_frames )}
2156
2164
2157
2165
for proto in range (4 , pickle .HIGHEST_PROTOCOL + 1 ):
2158
2166
pickled = self .dumps (obj , proto )
0 commit comments