Skip to content

Commit 93035c4

Browse files
committed
Issue #23119: Simplify setobject by inlining the special case for unicode equality testing.
1 parent bbd3aa8 commit 93035c4

File tree

3 files changed

+13
-73
lines changed

3 files changed

+13
-73
lines changed

Include/setobject.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ Invariants for frozensets:
3535
3636
*/
3737

38-
typedef struct _setobject {
38+
typedef struct {
3939
PyObject_HEAD
4040

4141
Py_ssize_t fill; /* Number active and dummy entries*/
@@ -53,7 +53,6 @@ typedef struct _setobject {
5353
* runtime null-tests.
5454
*/
5555
setentry *table;
56-
setentry *(*lookup)(struct _setobject *so, PyObject *key, Py_hash_t hash);
5756
Py_hash_t hash; /* Only used by frozenset objects */
5857
setentry smalltable[PySet_MINSIZE];
5958

Lib/test/test_sys.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -994,7 +994,7 @@ def delx(self): del self.__x
994994
# frozenset
995995
PySet_MINSIZE = 8
996996
samples = [[], range(10), range(50)]
997-
s = size('3n2P' + PySet_MINSIZE*'nP' + '2nP')
997+
s = size('3nP' + PySet_MINSIZE*'nP' + '2nP')
998998
for sample in samples:
999999
minused = len(sample)
10001000
if minused == 0: tmp = 1

Objects/setobject.c

Lines changed: 11 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,10 @@ set_lookkey(PySetObject *so, PyObject *key, Py_hash_t hash)
6969
PyObject *startkey = entry->key;
7070
if (startkey == key)
7171
return entry;
72+
if (PyUnicode_CheckExact(startkey)
73+
&& PyUnicode_CheckExact(key)
74+
&& unicode_eq(startkey, key))
75+
return entry;
7276
Py_INCREF(startkey);
7377
cmp = PyObject_RichCompareBool(startkey, key, Py_EQ);
7478
Py_DECREF(startkey);
@@ -90,6 +94,10 @@ set_lookkey(PySetObject *so, PyObject *key, Py_hash_t hash)
9094
PyObject *startkey = entry->key;
9195
if (startkey == key)
9296
return entry;
97+
if (PyUnicode_CheckExact(startkey)
98+
&& PyUnicode_CheckExact(key)
99+
&& unicode_eq(startkey, key))
100+
return entry;
93101
Py_INCREF(startkey);
94102
cmp = PyObject_RichCompareBool(startkey, key, Py_EQ);
95103
Py_DECREF(startkey);
@@ -115,68 +123,6 @@ set_lookkey(PySetObject *so, PyObject *key, Py_hash_t hash)
115123
return freeslot == NULL ? entry : freeslot;
116124
}
117125

118-
/*
119-
* Hacked up version of set_lookkey which can assume keys are always unicode;
120-
* This means we can always use unicode_eq directly and not have to check to
121-
* see if the comparison altered the table.
122-
*/
123-
static setentry *
124-
set_lookkey_unicode(PySetObject *so, PyObject *key, Py_hash_t hash)
125-
{
126-
setentry *table = so->table;
127-
setentry *freeslot = NULL;
128-
setentry *entry;
129-
size_t perturb = hash;
130-
size_t mask = so->mask;
131-
size_t i = (size_t)hash;
132-
size_t j;
133-
134-
/* Make sure this function doesn't have to handle non-unicode keys,
135-
including subclasses of str; e.g., one reason to subclass
136-
strings is to override __eq__, and for speed we don't cater to
137-
that here. */
138-
if (!PyUnicode_CheckExact(key)) { /* unlikely */
139-
so->lookup = set_lookkey;
140-
return set_lookkey(so, key, hash);
141-
}
142-
143-
entry = &table[i & mask];
144-
if (entry->key == NULL)
145-
return entry;
146-
147-
while (1) {
148-
if (entry->hash == hash
149-
&& (entry->key == key
150-
|| (entry->key != dummy /* unlikely */
151-
&& unicode_eq(entry->key, key)))) /* likely */
152-
return entry;
153-
if (entry->key == dummy && freeslot == NULL)
154-
freeslot = entry;
155-
156-
for (j = 1 ; j <= LINEAR_PROBES ; j++) {
157-
entry = &table[(i + j) & mask];
158-
if (entry->key == NULL)
159-
goto found_null;
160-
if (entry->hash == hash
161-
&& (entry->key == key
162-
|| (entry->key != dummy /* unlikely */
163-
&& unicode_eq(entry->key, key)))) /* likely */
164-
return entry;
165-
if (entry->key == dummy && freeslot == NULL)
166-
freeslot = entry;
167-
}
168-
169-
perturb >>= PERTURB_SHIFT;
170-
i = i * 5 + 1 + perturb;
171-
172-
entry = &table[i & mask];
173-
if (entry->key == NULL)
174-
goto found_null;
175-
}
176-
found_null:
177-
return freeslot == NULL ? entry : freeslot;
178-
}
179-
180126
/*
181127
Internal routine used by set_table_resize() to insert an item which is
182128
known to be absent from the set. This routine also assumes that
@@ -225,8 +171,7 @@ set_insert_key(PySetObject *so, PyObject *key, Py_hash_t hash)
225171
{
226172
setentry *entry;
227173

228-
assert(so->lookup != NULL);
229-
entry = so->lookup(so, key, hash);
174+
entry = set_lookkey(so, key, hash);
230175
if (entry == NULL)
231176
return -1;
232177
if (entry->key == NULL) {
@@ -385,7 +330,7 @@ set_discard_entry(PySetObject *so, setentry *oldentry)
385330
setentry *entry;
386331
PyObject *old_key;
387332

388-
entry = (so->lookup)(so, oldentry->key, oldentry->hash);
333+
entry = set_lookkey(so, oldentry->key, oldentry->hash);
389334
if (entry == NULL)
390335
return -1;
391336
if (entry->key == NULL || entry->key == dummy)
@@ -631,7 +576,7 @@ set_contains_entry(PySetObject *so, setentry *entry)
631576
PyObject *key;
632577
setentry *lu_entry;
633578

634-
lu_entry = (so->lookup)(so, entry->key, entry->hash);
579+
lu_entry = set_lookkey(so, entry->key, entry->hash);
635580
if (lu_entry == NULL)
636581
return -1;
637582
key = lu_entry->key;
@@ -994,7 +939,6 @@ make_new_set(PyTypeObject *type, PyObject *iterable)
994939
so->used = 0;
995940
so->mask = PySet_MINSIZE - 1;
996941
so->table = so->smalltable;
997-
so->lookup = set_lookkey_unicode;
998942
so->hash = -1;
999943
so->finger = 0;
1000944
so->weakreflist = NULL;
@@ -1095,7 +1039,6 @@ set_swap_bodies(PySetObject *a, PySetObject *b)
10951039
{
10961040
Py_ssize_t t;
10971041
setentry *u;
1098-
setentry *(*f)(PySetObject *so, PyObject *key, Py_ssize_t hash);
10991042
setentry tab[PySet_MINSIZE];
11001043
Py_hash_t h;
11011044

@@ -1111,8 +1054,6 @@ set_swap_bodies(PySetObject *a, PySetObject *b)
11111054
a->table = a->smalltable;
11121055
b->table = u;
11131056

1114-
f = a->lookup; a->lookup = b->lookup; b->lookup = f;
1115-
11161057
if (a->table == a->smalltable || b->table == b->smalltable) {
11171058
memcpy(tab, a->smalltable, sizeof(tab));
11181059
memcpy(a->smalltable, b->smalltable, sizeof(tab));

0 commit comments

Comments
 (0)