Skip to content

Commit 7d95e40

Browse files
committed
Implement PEP 412: Key-sharing dictionaries (closes #13903)
Patch from Mark Shannon.
1 parent 80d07f8 commit 7d95e40

File tree

12 files changed

+1358
-909
lines changed

12 files changed

+1358
-909
lines changed

Include/dictobject.h

Lines changed: 18 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -13,78 +13,20 @@ extern "C" {
1313
tuning dictionaries, and several ideas for possible optimizations.
1414
*/
1515

16-
/*
17-
There are three kinds of slots in the table:
18-
19-
1. Unused. me_key == me_value == NULL
20-
Does not hold an active (key, value) pair now and never did. Unused can
21-
transition to Active upon key insertion. This is the only case in which
22-
me_key is NULL, and is each slot's initial state.
23-
24-
2. Active. me_key != NULL and me_key != dummy and me_value != NULL
25-
Holds an active (key, value) pair. Active can transition to Dummy upon
26-
key deletion. This is the only case in which me_value != NULL.
27-
28-
3. Dummy. me_key == dummy and me_value == NULL
29-
Previously held an active (key, value) pair, but that was deleted and an
30-
active pair has not yet overwritten the slot. Dummy can transition to
31-
Active upon key insertion. Dummy slots cannot be made Unused again
32-
(cannot have me_key set to NULL), else the probe sequence in case of
33-
collision would have no way to know they were once active.
34-
35-
Note: .popitem() abuses the me_hash field of an Unused or Dummy slot to
36-
hold a search finger. The me_hash field of Unused or Dummy slots has no
37-
meaning otherwise.
38-
*/
39-
40-
/* PyDict_MINSIZE is the minimum size of a dictionary. This many slots are
41-
* allocated directly in the dict object (in the ma_smalltable member).
42-
* It must be a power of 2, and at least 4. 8 allows dicts with no more
43-
* than 5 active entries to live in ma_smalltable (and so avoid an
44-
* additional malloc); instrumentation suggested this suffices for the
45-
* majority of dicts (consisting mostly of usually-small instance dicts and
46-
* usually-small dicts created to pass keyword arguments).
47-
*/
4816
#ifndef Py_LIMITED_API
49-
#define PyDict_MINSIZE 8
5017

18+
typedef struct _dictkeysobject PyDictKeysObject;
19+
20+
/* The ma_values pointer is NULL for a combined table
21+
* or points to an array of PyObject* for a split table
22+
*/
5123
typedef struct {
52-
/* Cached hash code of me_key. */
53-
Py_hash_t me_hash;
54-
PyObject *me_key;
55-
PyObject *me_value;
56-
} PyDictEntry;
57-
58-
/*
59-
To ensure the lookup algorithm terminates, there must be at least one Unused
60-
slot (NULL key) in the table.
61-
The value ma_fill is the number of non-NULL keys (sum of Active and Dummy);
62-
ma_used is the number of non-NULL, non-dummy keys (== the number of non-NULL
63-
values == the number of Active items).
64-
To avoid slowing down lookups on a near-full table, we resize the table when
65-
it's two-thirds full.
66-
*/
67-
typedef struct _dictobject PyDictObject;
68-
struct _dictobject {
6924
PyObject_HEAD
70-
Py_ssize_t ma_fill; /* # Active + # Dummy */
71-
Py_ssize_t ma_used; /* # Active */
72-
73-
/* The table contains ma_mask + 1 slots, and that's a power of 2.
74-
* We store the mask instead of the size because the mask is more
75-
* frequently needed.
76-
*/
77-
Py_ssize_t ma_mask;
78-
79-
/* ma_table points to ma_smalltable for small tables, else to
80-
* additional malloc'ed memory. ma_table is never NULL! This rule
81-
* saves repeated runtime null-tests in the workhorse getitem and
82-
* setitem calls.
83-
*/
84-
PyDictEntry *ma_table;
85-
PyDictEntry *(*ma_lookup)(PyDictObject *mp, PyObject *key, Py_hash_t hash);
86-
PyDictEntry ma_smalltable[PyDict_MINSIZE];
87-
};
25+
Py_ssize_t ma_used;
26+
PyDictKeysObject *ma_keys;
27+
PyObject **ma_values;
28+
} PyDictObject;
29+
8830
#endif /* Py_LIMITED_API */
8931

9032
PyAPI_DATA(PyTypeObject) PyDict_Type;
@@ -117,6 +59,8 @@ PyAPI_FUNC(void) PyDict_Clear(PyObject *mp);
11759
PyAPI_FUNC(int) PyDict_Next(
11860
PyObject *mp, Py_ssize_t *pos, PyObject **key, PyObject **value);
11961
#ifndef Py_LIMITED_API
62+
PyDictKeysObject *_PyDict_NewKeysForClass(void);
63+
PyAPI_FUNC(PyObject *) PyObject_GenericGetDict(PyObject *, void *);
12064
PyAPI_FUNC(int) _PyDict_Next(
12165
PyObject *mp, Py_ssize_t *pos, PyObject **key, PyObject **value, Py_hash_t *hash);
12266
#endif
@@ -131,6 +75,7 @@ PyAPI_FUNC(int) _PyDict_Contains(PyObject *mp, PyObject *key, Py_hash_t hash);
13175
PyAPI_FUNC(PyObject *) _PyDict_NewPresized(Py_ssize_t minused);
13276
PyAPI_FUNC(void) _PyDict_MaybeUntrack(PyObject *mp);
13377
PyAPI_FUNC(int) _PyDict_HasOnlyStringKeys(PyObject *mp);
78+
#define _PyDict_HasSplitTable(d) ((d)->ma_values != NULL)
13479

13580
PyAPI_FUNC(int) PyDict_ClearFreeList(void);
13681
#endif
@@ -162,6 +107,11 @@ PyAPI_FUNC(int) PyDict_SetItemString(PyObject *dp, const char *key, PyObject *it
162107
PyAPI_FUNC(int) _PyDict_SetItemId(PyObject *dp, struct _Py_Identifier *key, PyObject *item);
163108
PyAPI_FUNC(int) PyDict_DelItemString(PyObject *dp, const char *key);
164109

110+
#ifndef Py_LIMITED_API
111+
int _PyObjectDict_SetItem(PyTypeObject *tp, PyObject **dictptr, PyObject *name, PyObject *value);
112+
PyObject *_PyDict_LoadGlobal(PyDictObject *, PyDictObject *, PyObject *);
113+
#endif
114+
165115
#ifdef __cplusplus
166116
}
167117
#endif

Include/object.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -449,6 +449,7 @@ typedef struct _heaptypeobject {
449449
see add_operators() in typeobject.c . */
450450
PyBufferProcs as_buffer;
451451
PyObject *ht_name, *ht_slots, *ht_qualname;
452+
struct _dictkeysobject *ht_cached_keys;
452453
/* here are optional user slots, followed by the members. */
453454
} PyHeapTypeObject;
454455

@@ -517,7 +518,6 @@ PyAPI_FUNC(PyObject *) _PyObject_NextNotImplemented(PyObject *);
517518
PyAPI_FUNC(PyObject *) PyObject_GenericGetAttr(PyObject *, PyObject *);
518519
PyAPI_FUNC(int) PyObject_GenericSetAttr(PyObject *,
519520
PyObject *, PyObject *);
520-
PyAPI_FUNC(PyObject *) PyObject_GenericGetDict(PyObject *, void *);
521521
PyAPI_FUNC(int) PyObject_GenericSetDict(PyObject *, PyObject *, void *);
522522
PyAPI_FUNC(Py_hash_t) PyObject_Hash(PyObject *);
523523
PyAPI_FUNC(Py_hash_t) PyObject_HashNotImplemented(PyObject *);

Lib/test/test_dict.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -321,6 +321,27 @@ def __eq__(self, other):
321321
self.assertEqual(hashed2.hash_count, 1)
322322
self.assertEqual(hashed1.eq_count + hashed2.eq_count, 1)
323323

324+
def test_setitem_atomic_at_resize(self):
325+
class Hashed(object):
326+
def __init__(self):
327+
self.hash_count = 0
328+
self.eq_count = 0
329+
def __hash__(self):
330+
self.hash_count += 1
331+
return 42
332+
def __eq__(self, other):
333+
self.eq_count += 1
334+
return id(self) == id(other)
335+
hashed1 = Hashed()
336+
# 5 items
337+
y = {hashed1: 5, 0: 0, 1: 1, 2: 2, 3: 3}
338+
hashed2 = Hashed()
339+
# 6th item forces a resize
340+
y[hashed2] = []
341+
self.assertEqual(hashed1.hash_count, 1)
342+
self.assertEqual(hashed2.hash_count, 1)
343+
self.assertEqual(hashed1.eq_count + hashed2.eq_count, 1)
344+
324345
def test_popitem(self):
325346
# dict.popitem()
326347
for copymode in -1, +1:

Lib/test/test_pprint.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -219,6 +219,8 @@ def test_subclassing(self):
219219
others.should.not.be: like.this}"""
220220
self.assertEqual(DottedPrettyPrinter().pformat(o), exp)
221221

222+
@unittest.expectedFailure
223+
#See http://bugs.python.org/issue13907
222224
@test.support.cpython_only
223225
def test_set_reprs(self):
224226
# This test creates a complex arrangement of frozensets and
@@ -241,10 +243,12 @@ def test_set_reprs(self):
241243
# Consequently, this test is fragile and
242244
# implementation-dependent. Small changes to Python's sort
243245
# algorithm cause the test to fail when it should pass.
246+
# XXX Or changes to the dictionary implmentation...
244247

245248
self.assertEqual(pprint.pformat(set()), 'set()')
246249
self.assertEqual(pprint.pformat(set(range(3))), '{0, 1, 2}')
247250
self.assertEqual(pprint.pformat(frozenset()), 'frozenset()')
251+
248252
self.assertEqual(pprint.pformat(frozenset(range(3))), 'frozenset({0, 1, 2})')
249253
cube_repr_tgt = """\
250254
{frozenset(): frozenset({frozenset({2}), frozenset({0}), frozenset({1})}),

Lib/test/test_sys.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -687,9 +687,9 @@ def inner():
687687
# method-wrapper (descriptor object)
688688
check({}.__iter__, size(h + '2P'))
689689
# dict
690-
check({}, size(h + '3P2P' + 8*'P2P'))
690+
check({}, size(h + '3P' + '4P' + 8*'P2P'))
691691
longdict = {1:1, 2:2, 3:3, 4:4, 5:5, 6:6, 7:7, 8:8}
692-
check(longdict, size(h + '3P2P' + 8*'P2P') + 16*size('P2P'))
692+
check(longdict, size(h + '3P' + '4P') + 16*size('P2P'))
693693
# dictionary-keyiterator
694694
check({}.keys(), size(h + 'P'))
695695
# dictionary-valueiterator
@@ -831,7 +831,7 @@ def delx(self): del self.__x
831831
# type
832832
# (PyTypeObject + PyNumberMethods + PyMappingMethods +
833833
# PySequenceMethods + PyBufferProcs)
834-
s = size(vh + 'P2P15Pl4PP9PP11PI') + size('16Pi17P 3P 10P 2P 3P')
834+
s = size(vh + 'P2P15Pl4PP9PP11PIP') + size('16Pi17P 3P 10P 2P 3P')
835835
check(int, s)
836836
# class
837837
class newstyleclass(object): pass

Misc/NEWS

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,10 @@ What's New in Python 3.3.0 Alpha 3?
1010
Core and Builtins
1111
-----------------
1212

13+
- Issue #13903: Implement PEP 412. Individual dictionary instances can now share
14+
their keys with other dictionaries. Classes take advantage of this to share
15+
their instance dictionary keys for improved memory and performance.
16+
1317
- Issue #14630: Fix a memory access bug for instances of a subclass of int
1418
with value 0.
1519

0 commit comments

Comments
 (0)