Skip to content

Commit c8f1715

Browse files
tkrenpitrou
andauthored
bpo-38891: avoid quadratic item access performance of ShareableList (GH-18996)
Avoid linear runtime of ShareableList.__getitem__ and ShareableList.__setitem__ by storing running allocated bytes in ShareableList._allocated_bytes instead of the number of bytes for a particular stored item. Co-authored-by: Antoine Pitrou <[email protected]>
1 parent 1ac6e37 commit c8f1715

File tree

2 files changed

+48
-30
lines changed

2 files changed

+48
-30
lines changed

Lib/multiprocessing/shared_memory.py

Lines changed: 45 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -252,6 +252,15 @@ class ShareableList:
252252
packing format for any storable value must require no more than 8
253253
characters to describe its format."""
254254

255+
# The shared memory area is organized as follows:
256+
# - 8 bytes: number of items (N) as a 64-bit integer
257+
# - (N + 1) * 8 bytes: offsets of each element from the start of the
258+
# data area
259+
# - K bytes: the data area storing item values (with encoding and size
260+
# depending on their respective types)
261+
# - N * 8 bytes: `struct` format string for each element
262+
# - N bytes: index into _back_transforms_mapping for each element
263+
# (for reconstructing the corresponding Python value)
255264
_types_mapping = {
256265
int: "q",
257266
float: "d",
@@ -283,7 +292,8 @@ def _extract_recreation_code(value):
283292
return 3 # NoneType
284293

285294
def __init__(self, sequence=None, *, name=None):
286-
if sequence is not None:
295+
if name is None or sequence is not None:
296+
sequence = sequence or ()
287297
_formats = [
288298
self._types_mapping[type(item)]
289299
if not isinstance(item, (str, bytes))
@@ -294,10 +304,14 @@ def __init__(self, sequence=None, *, name=None):
294304
]
295305
self._list_len = len(_formats)
296306
assert sum(len(fmt) <= 8 for fmt in _formats) == self._list_len
297-
self._allocated_bytes = tuple(
298-
self._alignment if fmt[-1] != "s" else int(fmt[:-1])
299-
for fmt in _formats
300-
)
307+
offset = 0
308+
# The offsets of each list element into the shared memory's
309+
# data area (0 meaning the start of the data area, not the start
310+
# of the shared memory area).
311+
self._allocated_offsets = [0]
312+
for fmt in _formats:
313+
offset += self._alignment if fmt[-1] != "s" else int(fmt[:-1])
314+
self._allocated_offsets.append(offset)
301315
_recreation_codes = [
302316
self._extract_recreation_code(item) for item in sequence
303317
]
@@ -308,13 +322,9 @@ def __init__(self, sequence=None, *, name=None):
308322
self._format_back_transform_codes
309323
)
310324

325+
self.shm = SharedMemory(name, create=True, size=requested_size)
311326
else:
312-
requested_size = 8 # Some platforms require > 0.
313-
314-
if name is not None and sequence is None:
315327
self.shm = SharedMemory(name)
316-
else:
317-
self.shm = SharedMemory(name, create=True, size=requested_size)
318328

319329
if sequence is not None:
320330
_enc = _encoding
@@ -323,7 +333,7 @@ def __init__(self, sequence=None, *, name=None):
323333
self.shm.buf,
324334
0,
325335
self._list_len,
326-
*(self._allocated_bytes)
336+
*(self._allocated_offsets)
327337
)
328338
struct.pack_into(
329339
"".join(_formats),
@@ -346,10 +356,12 @@ def __init__(self, sequence=None, *, name=None):
346356

347357
else:
348358
self._list_len = len(self) # Obtains size from offset 0 in buffer.
349-
self._allocated_bytes = struct.unpack_from(
350-
self._format_size_metainfo,
351-
self.shm.buf,
352-
1 * 8
359+
self._allocated_offsets = list(
360+
struct.unpack_from(
361+
self._format_size_metainfo,
362+
self.shm.buf,
363+
1 * 8
364+
)
353365
)
354366

355367
def _get_packing_format(self, position):
@@ -371,7 +383,6 @@ def _get_packing_format(self, position):
371383
def _get_back_transform(self, position):
372384
"Gets the back transformation function for a single value."
373385

374-
position = position if position >= 0 else position + self._list_len
375386
if (position >= self._list_len) or (self._list_len < 0):
376387
raise IndexError("Requested position out of range.")
377388

@@ -388,7 +399,6 @@ def _set_packing_format_and_transform(self, position, fmt_as_str, value):
388399
"""Sets the packing format and back transformation code for a
389400
single value in the list at the specified position."""
390401

391-
position = position if position >= 0 else position + self._list_len
392402
if (position >= self._list_len) or (self._list_len < 0):
393403
raise IndexError("Requested position out of range.")
394404

@@ -408,9 +418,9 @@ def _set_packing_format_and_transform(self, position, fmt_as_str, value):
408418
)
409419

410420
def __getitem__(self, position):
421+
position = position if position >= 0 else position + self._list_len
411422
try:
412-
offset = self._offset_data_start \
413-
+ sum(self._allocated_bytes[:position])
423+
offset = self._offset_data_start + self._allocated_offsets[position]
414424
(v,) = struct.unpack_from(
415425
self._get_packing_format(position),
416426
self.shm.buf,
@@ -425,23 +435,26 @@ def __getitem__(self, position):
425435
return v
426436

427437
def __setitem__(self, position, value):
438+
position = position if position >= 0 else position + self._list_len
428439
try:
429-
offset = self._offset_data_start \
430-
+ sum(self._allocated_bytes[:position])
440+
item_offset = self._allocated_offsets[position]
441+
offset = self._offset_data_start + item_offset
431442
current_format = self._get_packing_format(position)
432443
except IndexError:
433444
raise IndexError("assignment index out of range")
434445

435446
if not isinstance(value, (str, bytes)):
436447
new_format = self._types_mapping[type(value)]
437448
else:
438-
if len(value) > self._allocated_bytes[position]:
449+
allocated_length = self._allocated_offsets[position + 1] - item_offset
450+
451+
if len(value) > allocated_length:
439452
raise ValueError("exceeds available storage for existing str")
440453
if current_format[-1] == "s":
441454
new_format = current_format
442455
else:
443456
new_format = self._types_mapping[str] % (
444-
self._allocated_bytes[position],
457+
allocated_length,
445458
)
446459

447460
self._set_packing_format_and_transform(
@@ -463,33 +476,35 @@ def __repr__(self):
463476

464477
@property
465478
def format(self):
466-
"The struct packing format used by all currently stored values."
479+
"The struct packing format used by all currently stored items."
467480
return "".join(
468481
self._get_packing_format(i) for i in range(self._list_len)
469482
)
470483

471484
@property
472485
def _format_size_metainfo(self):
473-
"The struct packing format used for metainfo on storage sizes."
474-
return f"{self._list_len}q"
486+
"The struct packing format used for the items' storage offsets."
487+
return "q" * (self._list_len + 1)
475488

476489
@property
477490
def _format_packing_metainfo(self):
478-
"The struct packing format used for the values' packing formats."
491+
"The struct packing format used for the items' packing formats."
479492
return "8s" * self._list_len
480493

481494
@property
482495
def _format_back_transform_codes(self):
483-
"The struct packing format used for the values' back transforms."
496+
"The struct packing format used for the items' back transforms."
484497
return "b" * self._list_len
485498

486499
@property
487500
def _offset_data_start(self):
488-
return (self._list_len + 1) * 8 # 8 bytes per "q"
501+
# - 8 bytes for the list length
502+
# - (N + 1) * 8 bytes for the element offsets
503+
return (self._list_len + 2) * 8
489504

490505
@property
491506
def _offset_packing_formats(self):
492-
return self._offset_data_start + sum(self._allocated_bytes)
507+
return self._offset_data_start + self._allocated_offsets[-1]
493508

494509
@property
495510
def _offset_back_transform_codes(self):
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Fix linear runtime behaviour of the `__getitem__` and `__setitem__` methods in
2+
:class:`multiprocessing.shared_memory.ShareableList`. This avoids quadratic
3+
performance when iterating a `ShareableList`. Patch by Thomas Krennwallner.

0 commit comments

Comments
 (0)