Skip to content

bpo-27945: Fixed various segfaults with dict. #1657

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
May 20, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
85 changes: 85 additions & 0 deletions Lib/test/test_dict.py
Original file line number Diff line number Diff line change
Expand Up @@ -1085,6 +1085,91 @@ def test_free_after_iterating(self):
support.check_free_after_iterating(self, lambda d: iter(d.values()), dict)
support.check_free_after_iterating(self, lambda d: iter(d.items()), dict)

def test_equal_operator_modifying_operand(self):
# test fix for seg fault reported in issue 27945 part 3.
class X():
def __del__(self):
dict_b.clear()

def __eq__(self, other):
dict_a.clear()
return True

def __hash__(self):
return 13

dict_a = {X(): 0}
dict_b = {X(): X()}
self.assertTrue(dict_a == dict_b)

def test_fromkeys_operator_modifying_dict_operand(self):
# test fix for seg fault reported in issue 27945 part 4a.
class X(int):
def __hash__(self):
return 13

def __eq__(self, other):
if len(d) > 1:
d.clear()
return False

d = {} # this is required to exist so that d can be constructed!
d = {X(1): 1, X(2): 2}
try:
dict.fromkeys(d) # shouldn't crash
except RuntimeError: # implementation defined
pass

def test_fromkeys_operator_modifying_set_operand(self):
# test fix for seg fault reported in issue 27945 part 4b.
class X(int):
def __hash__(self):
return 13

def __eq__(self, other):
if len(d) > 1:
d.clear()
return False

d = {} # this is required to exist so that d can be constructed!
d = {X(1), X(2)}
try:
dict.fromkeys(d) # shouldn't crash
except RuntimeError: # implementation defined
pass

def test_dictitems_contains_use_after_free(self):
class X:
def __eq__(self, other):
d.clear()
return NotImplemented

d = {0: set()}
(0, X()) in d.items()

def test_init_use_after_free(self):
class X:
def __hash__(self):
pair[:] = []
return 13

pair = [X(), 123]
dict([pair])

def test_oob_indexing_dictiter_iternextitem(self):
class X(int):
def __del__(self):
d.clear()

d = {i: X(i) for i in range(8)}

def iter_and_mutate():
for result in d.items():
if result[0] == 2:
d[2] = None # free d[2] --> X(2).__del__ was called

self.assertRaises(RuntimeError, iter_and_mutate)


class CAPITest(unittest.TestCase):

Expand Down
1 change: 1 addition & 0 deletions Misc/ACKS
Original file line number Diff line number Diff line change
Expand Up @@ -546,6 +546,7 @@ Tim Graham
Kim Gräsman
Nathaniel Gray
Eddy De Greef
Duane Griffin
Grant Griffin
Andrea Griffini
Duncan Grisby
Expand Down
4 changes: 4 additions & 0 deletions Misc/NEWS
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,10 @@ What's New in Python 3.7.0 alpha 1?
Core and Builtins
-----------------

- bpo-27945: Fixed various segfaults with dict when input collections are
mutated during searching, inserting or comparing. Based on patches by
Duane Griffin and Tim Mitchell.

- bpo-25794: Fixed type.__setattr__() and type.__delattr__() for
non-interned attribute names. Based on patch by Eryk Sun.

Expand Down
68 changes: 43 additions & 25 deletions Objects/dictobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -1107,18 +1107,18 @@ insertdict(PyDictObject *mp, PyObject *key, Py_hash_t hash, PyObject *value)
PyDictKeyEntry *ep;
Py_ssize_t hashpos, ix;

Py_INCREF(key);
Py_INCREF(value);
if (mp->ma_values != NULL && !PyUnicode_CheckExact(key)) {
if (insertion_resize(mp) < 0)
return -1;
goto Fail;
}

ix = mp->ma_keys->dk_lookup(mp, key, hash, &old_value, &hashpos);
if (ix == DKIX_ERROR) {
return -1;
}
if (ix == DKIX_ERROR)
goto Fail;

assert(PyUnicode_CheckExact(key) || mp->ma_keys->dk_lookup == lookdict);
Py_INCREF(value);
MAINTAIN_TRACKING(mp, key, value);

/* When insertion order is different from shared key, we can't share
Expand All @@ -1127,10 +1127,8 @@ insertdict(PyDictObject *mp, PyObject *key, Py_hash_t hash, PyObject *value)
if (_PyDict_HasSplitTable(mp) &&
((ix >= 0 && old_value == NULL && mp->ma_used != ix) ||
(ix == DKIX_EMPTY && mp->ma_used != mp->ma_keys->dk_nentries))) {
if (insertion_resize(mp) < 0) {
Py_DECREF(value);
return -1;
}
if (insertion_resize(mp) < 0)
goto Fail;
hashpos = find_empty_slot(mp->ma_keys, key, hash);
ix = DKIX_EMPTY;
}
Expand All @@ -1140,15 +1138,12 @@ insertdict(PyDictObject *mp, PyObject *key, Py_hash_t hash, PyObject *value)
assert(old_value == NULL);
if (mp->ma_keys->dk_usable <= 0) {
/* Need to resize. */
if (insertion_resize(mp) < 0) {
Py_DECREF(value);
return -1;
}
if (insertion_resize(mp) < 0)
goto Fail;
hashpos = find_empty_slot(mp->ma_keys, key, hash);
}
ep = &DK_ENTRIES(mp->ma_keys)[mp->ma_keys->dk_nentries];
dk_set_index(mp->ma_keys, hashpos, mp->ma_keys->dk_nentries);
Py_INCREF(key);
ep->me_key = key;
ep->me_hash = hash;
if (mp->ma_values) {
Expand Down Expand Up @@ -1183,7 +1178,13 @@ insertdict(PyDictObject *mp, PyObject *key, Py_hash_t hash, PyObject *value)
mp->ma_version_tag = DICT_NEXT_VERSION();
Py_XDECREF(old_value); /* which **CAN** re-enter (see issue #22653) */
assert(_PyDict_CheckConsistency(mp));
Py_DECREF(key);
return 0;

Fail:
Py_DECREF(value);
Py_DECREF(key);
return -1;
}

/*
Expand Down Expand Up @@ -2419,11 +2420,18 @@ PyDict_MergeFromSeq2(PyObject *d, PyObject *seq2, int override)
/* Update/merge with this (key, value) pair. */
key = PySequence_Fast_GET_ITEM(fast, 0);
value = PySequence_Fast_GET_ITEM(fast, 1);
Py_INCREF(key);
Py_INCREF(value);
if (override || PyDict_GetItem(d, key) == NULL) {
int status = PyDict_SetItem(d, key, value);
if (status < 0)
if (status < 0) {
Py_DECREF(key);
Py_DECREF(value);
goto Fail;
}
}
Py_DECREF(key);
Py_DECREF(value);
Py_DECREF(fast);
Py_DECREF(item);
}
Expand Down Expand Up @@ -2720,14 +2728,15 @@ dict_equal(PyDictObject *a, PyDictObject *b)
Py_INCREF(key);
/* reuse the known hash value */
b->ma_keys->dk_lookup(b, key, ep->me_hash, &bval, NULL);
Py_DECREF(key);
if (bval == NULL) {
Py_DECREF(key);
Py_DECREF(aval);
if (PyErr_Occurred())
return -1;
return 0;
}
cmp = PyObject_RichCompareBool(aval, bval, Py_EQ);
Py_DECREF(key);
Py_DECREF(aval);
if (cmp <= 0) /* error or not equal */
return cmp;
Expand Down Expand Up @@ -3612,7 +3621,7 @@ PyTypeObject PyDictIterValue_Type = {
static PyObject *
dictiter_iternextitem(dictiterobject *di)
{
PyObject *key, *value, *result = di->di_result;
PyObject *key, *value, *result;
Py_ssize_t i;
PyDictObject *d = di->di_dict;

Expand Down Expand Up @@ -3650,20 +3659,25 @@ dictiter_iternextitem(dictiterobject *di)
}
di->di_pos = i+1;
di->len--;
if (result->ob_refcnt == 1) {
Py_INCREF(key);
Py_INCREF(value);
result = di->di_result;
if (Py_REFCNT(result) == 1) {
PyObject *oldkey = PyTuple_GET_ITEM(result, 0);
PyObject *oldvalue = PyTuple_GET_ITEM(result, 1);
PyTuple_SET_ITEM(result, 0, key); /* steals reference */
PyTuple_SET_ITEM(result, 1, value); /* steals reference */
Py_INCREF(result);
Py_DECREF(PyTuple_GET_ITEM(result, 0));
Py_DECREF(PyTuple_GET_ITEM(result, 1));
Py_DECREF(oldkey);
Py_DECREF(oldvalue);
}
else {
result = PyTuple_New(2);
if (result == NULL)
return NULL;
PyTuple_SET_ITEM(result, 0, key); /* steals reference */
PyTuple_SET_ITEM(result, 1, value); /* steals reference */
}
Py_INCREF(key);
Py_INCREF(value);
PyTuple_SET_ITEM(result, 0, key); /* steals reference */
PyTuple_SET_ITEM(result, 1, value); /* steals reference */
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@serhiy-storchaka , just my curious, simply moving the two Py_INCREF forward is also enough here right? So any advantage here or just personal style?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm... You are right! Py_INCREF(result) prevents reentering in that branch or modifying the tuple by other code. Shame on me that I missed such simple solution!

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But on other hand, Py_DECREF() can trigger the garbage collecting. The garbage collector can see the tuple with references to freed objects and expose it to user code via gc.get_objects() or gc.get_referrers(). Doing some operations with this tuple (repr() or hash()) can cause a crash. We already encountered with similar issues.

So yes, there are reasons for writing the code in that way.

Copy link
Member

@zhangyangyu zhangyangyu May 20, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ahh, nice point! Stupid me! I didn't recognise it's just like Py_SETREF(PyTuple_GET_ITEM(result, 0), key). But it seems to mean that in your case, users are possible to see a mutable tuple, which smells no good.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

At that point there is only one owner of the tuple. From user's point of view there is no difference between mutating this tuple or deallocating it and allocating a new tuple at the same memory address.

return result;

fail:
Expand Down Expand Up @@ -4156,6 +4170,7 @@ dictitems_iter(_PyDictViewObject *dv)
static int
dictitems_contains(_PyDictViewObject *dv, PyObject *obj)
{
int result;
PyObject *key, *value, *found;
if (dv->dv_dict == NULL)
return 0;
Expand All @@ -4169,7 +4184,10 @@ dictitems_contains(_PyDictViewObject *dv, PyObject *obj)
return -1;
return 0;
}
return PyObject_RichCompareBool(value, found, Py_EQ);
Py_INCREF(found);
result = PyObject_RichCompareBool(value, found, Py_EQ);
Py_DECREF(found);
return result;
}

static PySequenceMethods dictitems_as_sequence = {
Expand Down