Skip to content

gh-121795: Improve performance of set membership testing from set arguments #121796

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Jul 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions Lib/test/test_set.py
Original file line number Diff line number Diff line change
Expand Up @@ -635,6 +635,16 @@ def __le__(self, some_set):
myset >= myobj
self.assertTrue(myobj.le_called)

def test_set_membership(self):
myfrozenset = frozenset(range(3))
myset = {myfrozenset, "abc", 1}
self.assertIn(set(range(3)), myset)
self.assertNotIn(set(range(1)), myset)
myset.discard(set(range(3)))
self.assertEqual(myset, {"abc", 1})
self.assertRaises(KeyError, myset.remove, set(range(1)))
self.assertRaises(KeyError, myset.remove, set(range(3)))


class SetSubclass(set):
pass
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Improve performance of set membership testing, ``set.remove()`` and ``set.discard()`` when the argument is a set.
59 changes: 36 additions & 23 deletions Objects/setobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -709,18 +709,20 @@ _shuffle_bits(Py_uhash_t h)
large primes with "interesting bit patterns" and that passed tests
for good collision statistics on a variety of problematic datasets
including powersets and graph structures (such as David Eppstein's
graph recipes in Lib/test/test_set.py) */
graph recipes in Lib/test/test_set.py).

This hash algorithm can be used on either a frozenset or a set.
When it is used on a set, it computes the hash value of the equivalent
frozenset without creating a new frozenset object. */

static Py_hash_t
frozenset_hash(PyObject *self)
frozenset_hash_impl(PyObject *self)
{
assert(PyAnySet_Check(self));
PySetObject *so = (PySetObject *)self;
Py_uhash_t hash = 0;
setentry *entry;

if (so->hash != -1)
return so->hash;

/* Xor-in shuffled bits from every entry's hash field because xor is
commutative and a frozenset hash should be independent of order.

Expand Down Expand Up @@ -753,6 +755,20 @@ frozenset_hash(PyObject *self)
if (hash == (Py_uhash_t)-1)
hash = 590923713UL;

return (Py_hash_t)hash;
}

static Py_hash_t
frozenset_hash(PyObject *self)
{
PySetObject *so = (PySetObject *)self;
Py_uhash_t hash;

if (so->hash != -1) {
return so->hash;
}

hash = frozenset_hash_impl(self);
so->hash = hash;
return hash;
}
Expand Down Expand Up @@ -2137,19 +2153,18 @@ set_add_impl(PySetObject *so, PyObject *key)
static int
set_contains_lock_held(PySetObject *so, PyObject *key)
{
PyObject *tmpkey;
int rv;

rv = set_contains_key(so, key);
if (rv < 0) {
if (!PySet_Check(key) || !PyErr_ExceptionMatches(PyExc_TypeError))
return -1;
PyErr_Clear();
tmpkey = make_new_set(&PyFrozenSet_Type, key);
if (tmpkey == NULL)
return -1;
rv = set_contains_key(so, tmpkey);
Py_DECREF(tmpkey);
Py_hash_t hash;
Py_BEGIN_CRITICAL_SECTION(key);
hash = frozenset_hash_impl(key);
Py_END_CRITICAL_SECTION();
rv = set_contains_entry(so, key, hash);
}
return rv;
}
Expand Down Expand Up @@ -2203,19 +2218,18 @@ static PyObject *
set_remove_impl(PySetObject *so, PyObject *key)
/*[clinic end generated code: output=0b9134a2a2200363 input=893e1cb1df98227a]*/
{
PyObject *tmpkey;
int rv;

rv = set_discard_key(so, key);
if (rv < 0) {
if (!PySet_Check(key) || !PyErr_ExceptionMatches(PyExc_TypeError))
return NULL;
PyErr_Clear();
tmpkey = make_new_set(&PyFrozenSet_Type, key);
if (tmpkey == NULL)
return NULL;
rv = set_discard_key(so, tmpkey);
Py_DECREF(tmpkey);
Py_hash_t hash;
Py_BEGIN_CRITICAL_SECTION(key);
hash = frozenset_hash_impl(key);
Py_END_CRITICAL_SECTION();
rv = set_discard_entry(so, key, hash);
if (rv < 0)
return NULL;
}
Expand Down Expand Up @@ -2244,19 +2258,18 @@ static PyObject *
set_discard_impl(PySetObject *so, PyObject *key)
/*[clinic end generated code: output=eec3b687bf32759e input=861cb7fb69b4def0]*/
{
PyObject *tmpkey;
int rv;

rv = set_discard_key(so, key);
if (rv < 0) {
if (!PySet_Check(key) || !PyErr_ExceptionMatches(PyExc_TypeError))
return NULL;
PyErr_Clear();
tmpkey = make_new_set(&PyFrozenSet_Type, key);
if (tmpkey == NULL)
return NULL;
rv = set_discard_key(so, tmpkey);
Py_DECREF(tmpkey);
Py_hash_t hash;
Py_BEGIN_CRITICAL_SECTION(key);
hash = frozenset_hash_impl(key);
Py_END_CRITICAL_SECTION();
rv = set_discard_entry(so, key, hash);
if (rv < 0)
return NULL;
}
Expand Down
Loading