Skip to content

Commit 03642df

Browse files
authored
dict: Internal cleanup (GH-31641)
* Make empty_key from split table to combined table. * Use unicode_get_hash() when possible.
1 parent 20a1c8e commit 03642df

File tree

1 file changed

+44
-66
lines changed

1 file changed

+44
-66
lines changed

Objects/dictobject.c

Lines changed: 44 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -454,24 +454,14 @@ static PyDictKeysObject empty_keys_struct = {
454454
1, /* dk_refcnt */
455455
0, /* dk_log2_size */
456456
0, /* dk_log2_index_bytes */
457-
DICT_KEYS_SPLIT, /* dk_kind */
457+
DICT_KEYS_UNICODE, /* dk_kind */
458458
1, /* dk_version */
459459
0, /* dk_usable (immutable) */
460460
0, /* dk_nentries */
461461
{DKIX_EMPTY, DKIX_EMPTY, DKIX_EMPTY, DKIX_EMPTY,
462462
DKIX_EMPTY, DKIX_EMPTY, DKIX_EMPTY, DKIX_EMPTY}, /* dk_indices */
463463
};
464464

465-
466-
struct {
467-
uint8_t prefix[sizeof(PyObject *)];
468-
PyDictValues values;
469-
} empty_values_struct = {
470-
{ [sizeof(PyObject *)-1] = sizeof(PyObject *) },
471-
{{NULL}}
472-
};
473-
#define empty_values (&empty_values_struct.values)
474-
475465
#define Py_EMPTY_KEYS &empty_keys_struct
476466

477467
/* Uncomment to check the dict content in _PyDict_CheckConsistency() */
@@ -495,7 +485,6 @@ get_index_from_order(PyDictObject *mp, Py_ssize_t i)
495485
static void
496486
dump_entries(PyDictKeysObject *dk)
497487
{
498-
int kind = dk->dk_kind;
499488
for (Py_ssize_t i = 0; i < dk->dk_nentries; i++) {
500489
if (DK_IS_UNICODE(dk)) {
501490
PyDictUnicodeEntry *ep = &DK_UNICODE_ENTRIES(dk)[i];
@@ -531,7 +520,7 @@ _PyDict_CheckConsistency(PyObject *op, int check_content)
531520
if (!splitted) {
532521
/* combined table */
533522
CHECK(keys->dk_kind != DICT_KEYS_SPLIT);
534-
CHECK(keys->dk_refcnt == 1);
523+
CHECK(keys->dk_refcnt == 1 || keys == Py_EMPTY_KEYS);
535524
}
536525
else {
537526
CHECK(keys->dk_kind == DICT_KEYS_SPLIT);
@@ -688,7 +677,8 @@ free_keys_object(PyDictKeysObject *keys)
688677
// free_keys_object() must not be called after _PyDict_Fini()
689678
assert(state->keys_numfree != -1);
690679
#endif
691-
if (DK_LOG_SIZE(keys) == PyDict_LOG_MINSIZE && state->keys_numfree < PyDict_MAXFREELIST
680+
if (DK_LOG_SIZE(keys) == PyDict_LOG_MINSIZE
681+
&& state->keys_numfree < PyDict_MAXFREELIST
692682
&& DK_IS_UNICODE(keys)) {
693683
state->keys_free_list[state->keys_numfree++] = keys;
694684
return;
@@ -845,7 +835,7 @@ PyObject *
845835
PyDict_New(void)
846836
{
847837
dictkeys_incref(Py_EMPTY_KEYS);
848-
return new_dict(Py_EMPTY_KEYS, empty_values, 0, 0);
838+
return new_dict(Py_EMPTY_KEYS, NULL, 0, 0);
849839
}
850840

851841
/* Search index of hash table from offset of entry table */
@@ -1478,9 +1468,7 @@ dictresize(PyDictObject *mp, uint8_t log2_newsize, int unicode)
14781468
}
14791469
dictkeys_decref(oldkeys);
14801470
mp->ma_values = NULL;
1481-
if (oldvalues != empty_values) {
1482-
free_values(oldvalues);
1483-
}
1471+
free_values(oldvalues);
14841472
}
14851473
else { // oldkeys is combined.
14861474
if (oldkeys->dk_kind == DICT_KEYS_GENERAL) {
@@ -1506,7 +1494,7 @@ dictresize(PyDictObject *mp, uint8_t log2_newsize, int unicode)
15061494
if (unicode) { // combined unicode -> combined unicode
15071495
PyDictUnicodeEntry *newentries = DK_UNICODE_ENTRIES(mp->ma_keys);
15081496
if (oldkeys->dk_nentries == numentries && mp->ma_keys->dk_kind == DICT_KEYS_UNICODE) {
1509-
memcpy(newentries, oldentries, numentries * sizeof(PyDictUnicodeEntry));
1497+
memcpy(newentries, oldentries, numentries * sizeof(PyDictUnicodeEntry));
15101498
}
15111499
else {
15121500
PyDictUnicodeEntry *ep = oldentries;
@@ -1533,27 +1521,31 @@ dictresize(PyDictObject *mp, uint8_t log2_newsize, int unicode)
15331521
}
15341522
}
15351523

1536-
assert(oldkeys->dk_kind != DICT_KEYS_SPLIT);
1537-
assert(oldkeys->dk_refcnt == 1);
1524+
// We can not use free_keys_object here because key's reference
1525+
// are moved already.
1526+
if (oldkeys != Py_EMPTY_KEYS) {
1527+
assert(oldkeys->dk_kind != DICT_KEYS_SPLIT);
1528+
assert(oldkeys->dk_refcnt == 1);
15381529
#ifdef Py_REF_DEBUG
1539-
_Py_RefTotal--;
1530+
_Py_RefTotal--;
15401531
#endif
15411532
#if PyDict_MAXFREELIST > 0
1542-
struct _Py_dict_state *state = get_dict_state();
1533+
struct _Py_dict_state *state = get_dict_state();
15431534
#ifdef Py_DEBUG
1544-
// dictresize() must not be called after _PyDict_Fini()
1545-
assert(state->keys_numfree != -1);
1535+
// dictresize() must not be called after _PyDict_Fini()
1536+
assert(state->keys_numfree != -1);
15461537
#endif
1547-
if (DK_LOG_SIZE(oldkeys) == PyDict_LOG_MINSIZE &&
1548-
DK_IS_UNICODE(oldkeys) &&
1549-
state->keys_numfree < PyDict_MAXFREELIST)
1550-
{
1551-
state->keys_free_list[state->keys_numfree++] = oldkeys;
1552-
}
1553-
else
1538+
if (DK_LOG_SIZE(oldkeys) == PyDict_LOG_MINSIZE &&
1539+
DK_IS_UNICODE(oldkeys) &&
1540+
state->keys_numfree < PyDict_MAXFREELIST)
1541+
{
1542+
state->keys_free_list[state->keys_numfree++] = oldkeys;
1543+
}
1544+
else
15541545
#endif
1555-
{
1556-
PyObject_Free(oldkeys);
1546+
{
1547+
PyObject_Free(oldkeys);
1548+
}
15571549
}
15581550
}
15591551

@@ -1844,9 +1836,7 @@ _PyDict_LoadGlobal(PyDictObject *globals, PyDictObject *builtins, PyObject *key)
18441836
Py_hash_t hash;
18451837
PyObject *value;
18461838

1847-
if (!PyUnicode_CheckExact(key) ||
1848-
(hash = ((PyASCIIObject *) key)->hash) == -1)
1849-
{
1839+
if (!PyUnicode_CheckExact(key) || (hash = unicode_get_hash(key)) == -1) {
18501840
hash = PyObject_Hash(key);
18511841
if (hash == -1)
18521842
return NULL;
@@ -1873,9 +1863,7 @@ _PyDict_SetItem_Take2(PyDictObject *mp, PyObject *key, PyObject *value)
18731863
assert(value);
18741864
assert(PyDict_Check(mp));
18751865
Py_hash_t hash;
1876-
if (!PyUnicode_CheckExact(key) ||
1877-
(hash = ((PyASCIIObject *) key)->hash) == -1)
1878-
{
1866+
if (!PyUnicode_CheckExact(key) || (hash = unicode_get_hash(key)) == -1) {
18791867
hash = PyObject_Hash(key);
18801868
if (hash == -1) {
18811869
Py_DECREF(key);
@@ -1998,8 +1986,7 @@ PyDict_DelItem(PyObject *op, PyObject *key)
19981986
{
19991987
Py_hash_t hash;
20001988
assert(key);
2001-
if (!PyUnicode_CheckExact(key) ||
2002-
(hash = ((PyASCIIObject *) key)->hash) == -1) {
1989+
if (!PyUnicode_CheckExact(key) || (hash = unicode_get_hash(key)) == -1) {
20031990
hash = PyObject_Hash(key);
20041991
if (hash == -1)
20051992
return -1;
@@ -2091,12 +2078,13 @@ PyDict_Clear(PyObject *op)
20912078
mp = ((PyDictObject *)op);
20922079
oldkeys = mp->ma_keys;
20932080
oldvalues = mp->ma_values;
2094-
if (oldvalues == empty_values)
2081+
if (oldkeys == Py_EMPTY_KEYS) {
20952082
return;
2083+
}
20962084
/* Empty the dict... */
20972085
dictkeys_incref(Py_EMPTY_KEYS);
20982086
mp->ma_keys = Py_EMPTY_KEYS;
2099-
mp->ma_values = empty_values;
2087+
mp->ma_values = NULL;
21002088
mp->ma_used = 0;
21012089
mp->ma_version_tag = DICT_NEXT_VERSION();
21022090
/* ...then clear the keys and values */
@@ -2257,8 +2245,7 @@ _PyDict_Pop(PyObject *dict, PyObject *key, PyObject *deflt)
22572245
_PyErr_SetKeyError(key);
22582246
return NULL;
22592247
}
2260-
if (!PyUnicode_CheckExact(key) ||
2261-
(hash = ((PyASCIIObject *) key)->hash) == -1) {
2248+
if (!PyUnicode_CheckExact(key) || (hash = unicode_get_hash(key)) == -1) {
22622249
hash = PyObject_Hash(key);
22632250
if (hash == -1)
22642251
return NULL;
@@ -2372,16 +2359,14 @@ dict_dealloc(PyDictObject *mp)
23722359
PyObject_GC_UnTrack(mp);
23732360
Py_TRASHCAN_BEGIN(mp, dict_dealloc)
23742361
if (values != NULL) {
2375-
if (values != empty_values) {
2376-
for (i = 0, n = mp->ma_keys->dk_nentries; i < n; i++) {
2377-
Py_XDECREF(values->values[i]);
2378-
}
2379-
free_values(values);
2362+
for (i = 0, n = mp->ma_keys->dk_nentries; i < n; i++) {
2363+
Py_XDECREF(values->values[i]);
23802364
}
2365+
free_values(values);
23812366
dictkeys_decref(keys);
23822367
}
23832368
else if (keys != NULL) {
2384-
assert(keys->dk_refcnt == 1);
2369+
assert(keys->dk_refcnt == 1 || keys == Py_EMPTY_KEYS);
23852370
dictkeys_decref(keys);
23862371
}
23872372
#if PyDict_MAXFREELIST > 0
@@ -2498,8 +2483,7 @@ dict_subscript(PyDictObject *mp, PyObject *key)
24982483
Py_hash_t hash;
24992484
PyObject *value;
25002485

2501-
if (!PyUnicode_CheckExact(key) ||
2502-
(hash = ((PyASCIIObject *) key)->hash) == -1) {
2486+
if (!PyUnicode_CheckExact(key) || (hash = unicode_get_hash(key)) == -1) {
25032487
hash = PyObject_Hash(key);
25042488
if (hash == -1)
25052489
return NULL;
@@ -2862,9 +2846,7 @@ dict_merge(PyObject *a, PyObject *b, int override)
28622846
dictkeys_decref(mp->ma_keys);
28632847
mp->ma_keys = keys;
28642848
if (mp->ma_values != NULL) {
2865-
if (mp->ma_values != empty_values) {
2866-
free_values(mp->ma_values);
2867-
}
2849+
free_values(mp->ma_values);
28682850
mp->ma_values = NULL;
28692851
}
28702852

@@ -3257,8 +3239,7 @@ dict___contains__(PyDictObject *self, PyObject *key)
32573239
Py_ssize_t ix;
32583240
PyObject *value;
32593241

3260-
if (!PyUnicode_CheckExact(key) ||
3261-
(hash = ((PyASCIIObject *) key)->hash) == -1) {
3242+
if (!PyUnicode_CheckExact(key) || (hash = unicode_get_hash(key)) == -1) {
32623243
hash = PyObject_Hash(key);
32633244
if (hash == -1)
32643245
return NULL;
@@ -3289,8 +3270,7 @@ dict_get_impl(PyDictObject *self, PyObject *key, PyObject *default_value)
32893270
Py_hash_t hash;
32903271
Py_ssize_t ix;
32913272

3292-
if (!PyUnicode_CheckExact(key) ||
3293-
(hash = ((PyASCIIObject *) key)->hash) == -1) {
3273+
if (!PyUnicode_CheckExact(key) || (hash = unicode_get_hash(key)) == -1) {
32943274
hash = PyObject_Hash(key);
32953275
if (hash == -1)
32963276
return NULL;
@@ -3317,8 +3297,7 @@ PyDict_SetDefault(PyObject *d, PyObject *key, PyObject *defaultobj)
33173297
return NULL;
33183298
}
33193299

3320-
if (!PyUnicode_CheckExact(key) ||
3321-
(hash = ((PyASCIIObject *) key)->hash) == -1) {
3300+
if (!PyUnicode_CheckExact(key) || (hash = unicode_get_hash(key)) == -1) {
33223301
hash = PyObject_Hash(key);
33233302
if (hash == -1)
33243303
return NULL;
@@ -3707,8 +3686,7 @@ PyDict_Contains(PyObject *op, PyObject *key)
37073686
PyDictObject *mp = (PyDictObject *)op;
37083687
PyObject *value;
37093688

3710-
if (!PyUnicode_CheckExact(key) ||
3711-
(hash = ((PyASCIIObject *) key)->hash) == -1) {
3689+
if (!PyUnicode_CheckExact(key) || (hash = unicode_get_hash(key)) == -1) {
37123690
hash = PyObject_Hash(key);
37133691
if (hash == -1)
37143692
return -1;
@@ -3780,7 +3758,7 @@ dict_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
37803758
d->ma_version_tag = DICT_NEXT_VERSION();
37813759
dictkeys_incref(Py_EMPTY_KEYS);
37823760
d->ma_keys = Py_EMPTY_KEYS;
3783-
d->ma_values = empty_values;
3761+
d->ma_values = NULL;
37843762
ASSERT_CONSISTENT(d);
37853763

37863764
if (type != &PyDict_Type) {

0 commit comments

Comments
 (0)