Skip to content

Commit 60463e8

Browse files
authored
bpo-42536: GC track recycled tuples (GH-23623) (GH-23651)
Several built-in and standard library types now ensure that their internal result tuples are always tracked by the garbage collector: - collections.OrderedDict.items - dict.items - enumerate - functools.reduce - itertools.combinations - itertools.combinations_with_replacement - itertools.permutations - itertools.product - itertools.zip_longest - zip Previously, they could have become untracked by a prior garbage collection. (cherry picked from commit 226a012)
1 parent e9a6dcd commit 60463e8

File tree

12 files changed

+192
-0
lines changed

12 files changed

+192
-0
lines changed

Lib/test/test_builtin.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import collections
77
import decimal
88
import fractions
9+
import gc
910
import io
1011
import locale
1112
import os
@@ -1606,6 +1607,18 @@ def __iter__(self):
16061607

16071608
self.assertIs(cm.exception, exception)
16081609

1610+
@support.cpython_only
1611+
def test_zip_result_gc(self):
1612+
# bpo-42536: zip's tuple-reuse speed trick breaks the GC's assumptions
1613+
# about what can be untracked. Make sure we re-track result tuples
1614+
# whenever we reuse them.
1615+
it = zip([[]])
1616+
gc.collect()
1617+
# That GC collection probably untracked the recycled internal result
1618+
# tuple, which is initialized to (None,). Make sure it's re-tracked when
1619+
# it's mutated and returned from __next__:
1620+
self.assertTrue(gc.is_tracked(next(it)))
1621+
16091622
def test_format(self):
16101623
# Test the basic machinery of the format() builtin. Don't test
16111624
# the specifics of the various formatters

Lib/test/test_dict.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1422,6 +1422,25 @@ def items(self):
14221422
d = CustomReversedDict(pairs)
14231423
self.assertEqual(pairs[::-1], list(dict(d).items()))
14241424

1425+
@support.cpython_only
1426+
def test_dict_items_result_gc(self):
1427+
# bpo-42536: dict.items's tuple-reuse speed trick breaks the GC's
1428+
# assumptions about what can be untracked. Make sure we re-track result
1429+
# tuples whenever we reuse them.
1430+
it = iter({None: []}.items())
1431+
gc.collect()
1432+
# That GC collection probably untracked the recycled internal result
1433+
# tuple, which is initialized to (None, None). Make sure it's re-tracked
1434+
# when it's mutated and returned from __next__:
1435+
self.assertTrue(gc.is_tracked(next(it)))
1436+
1437+
@support.cpython_only
1438+
def test_dict_items_result_gc(self):
1439+
# Same as test_dict_items_result_gc above, but reversed.
1440+
it = reversed({None: []}.items())
1441+
gc.collect()
1442+
self.assertTrue(gc.is_tracked(next(it)))
1443+
14251444

14261445
class CAPITest(unittest.TestCase):
14271446

Lib/test/test_enumerate.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
import operator
33
import sys
44
import pickle
5+
import gc
56

67
from test import support
78

@@ -134,6 +135,18 @@ def test_tuple_reuse(self):
134135
self.assertEqual(len(set(map(id, list(enumerate(self.seq))))), len(self.seq))
135136
self.assertEqual(len(set(map(id, enumerate(self.seq)))), min(1,len(self.seq)))
136137

138+
@support.cpython_only
139+
def test_enumerate_result_gc(self):
140+
# bpo-42536: enumerate's tuple-reuse speed trick breaks the GC's
141+
# assumptions about what can be untracked. Make sure we re-track result
142+
# tuples whenever we reuse them.
143+
it = self.enum([[]])
144+
gc.collect()
145+
# That GC collection probably untracked the recycled internal result
146+
# tuple, which is initialized to (None, None). Make sure it's re-tracked
147+
# when it's mutated and returned from __next__:
148+
self.assertTrue(gc.is_tracked(next(it)))
149+
137150
class MyEnum(enumerate):
138151
pass
139152

Lib/test/test_itertools.py

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@
1212
import sys
1313
import struct
1414
import threading
15+
import gc
16+
1517
maxsize = support.MAX_Py_ssize_t
1618
minsize = -maxsize-1
1719

@@ -1554,6 +1556,51 @@ def test_StopIteration(self):
15541556
self.assertRaises(StopIteration, next, f(lambda x:x, []))
15551557
self.assertRaises(StopIteration, next, f(lambda x:x, StopNow()))
15561558

1559+
@support.cpython_only
1560+
def test_combinations_result_gc(self):
1561+
# bpo-42536: combinations's tuple-reuse speed trick breaks the GC's
1562+
# assumptions about what can be untracked. Make sure we re-track result
1563+
# tuples whenever we reuse them.
1564+
it = combinations([None, []], 1)
1565+
next(it)
1566+
gc.collect()
1567+
# That GC collection probably untracked the recycled internal result
1568+
# tuple, which has the value (None,). Make sure it's re-tracked when
1569+
# it's mutated and returned from __next__:
1570+
self.assertTrue(gc.is_tracked(next(it)))
1571+
1572+
@support.cpython_only
1573+
def test_combinations_with_replacement_result_gc(self):
1574+
# Ditto for combinations_with_replacement.
1575+
it = combinations_with_replacement([None, []], 1)
1576+
next(it)
1577+
gc.collect()
1578+
self.assertTrue(gc.is_tracked(next(it)))
1579+
1580+
@support.cpython_only
1581+
def test_permutations_result_gc(self):
1582+
# Ditto for permutations.
1583+
it = permutations([None, []], 1)
1584+
next(it)
1585+
gc.collect()
1586+
self.assertTrue(gc.is_tracked(next(it)))
1587+
1588+
@support.cpython_only
1589+
def test_product_result_gc(self):
1590+
# Ditto for product.
1591+
it = product([None, []])
1592+
next(it)
1593+
gc.collect()
1594+
self.assertTrue(gc.is_tracked(next(it)))
1595+
1596+
@support.cpython_only
1597+
def test_zip_longest_result_gc(self):
1598+
# Ditto for zip_longest.
1599+
it = zip_longest([[]])
1600+
gc.collect()
1601+
self.assertTrue(gc.is_tracked(next(it)))
1602+
1603+
15571604
class TestExamples(unittest.TestCase):
15581605

15591606
def test_accumulate(self):

Lib/test/test_ordered_dict.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -697,6 +697,17 @@ def test_merge_operator(self):
697697
with self.assertRaises(ValueError):
698698
a |= "BAD"
699699

700+
@support.cpython_only
701+
def test_ordered_dict_items_result_gc(self):
702+
# bpo-42536: OrderedDict.items's tuple-reuse speed trick breaks the GC's
703+
# assumptions about what can be untracked. Make sure we re-track result
704+
# tuples whenever we reuse them.
705+
it = iter(self.OrderedDict({None: []}).items())
706+
gc.collect()
707+
# That GC collection probably untracked the recycled internal result
708+
# tuple, which is initialized to (None, None). Make sure it's re-tracked
709+
# when it's mutated and returned from __next__:
710+
self.assertTrue(gc.is_tracked(next(it)))
700711

701712
class PurePythonOrderedDictTests(OrderedDictTests, unittest.TestCase):
702713

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
Several built-in and standard library types now ensure that their internal
2+
result tuples are always tracked by the :term:`garbage collector
3+
<garbage collection>`:
4+
5+
- :meth:`collections.OrderedDict.items() <collections.OrderedDict>`
6+
7+
- :meth:`dict.items`
8+
9+
- :func:`enumerate`
10+
11+
- :func:`functools.reduce`
12+
13+
- :func:`itertools.combinations`
14+
15+
- :func:`itertools.combinations_with_replacement`
16+
17+
- :func:`itertools.permutations`
18+
19+
- :func:`itertools.product`
20+
21+
- :func:`itertools.zip_longest`
22+
23+
- :func:`zip`
24+
25+
Previously, they could have become untracked by a prior garbage collection.
26+
Patch by Brandt Bucher.

Modules/_functoolsmodule.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
#include "Python.h"
2+
#include "pycore_object.h" // _PyObject_GC_TRACK
23
#include "pycore_pystate.h" // _PyThreadState_GET()
34
#include "pycore_tupleobject.h"
45
#include "structmember.h" // PyMemberDef
@@ -673,6 +674,11 @@ functools_reduce(PyObject *self, PyObject *args)
673674
if ((result = PyObject_Call(func, args, NULL)) == NULL) {
674675
goto Fail;
675676
}
677+
// bpo-42536: The GC may have untracked this args tuple. Since we're
678+
// recycling it, make sure it's tracked again:
679+
if (!_PyObject_GC_IS_TRACKED(args)) {
680+
_PyObject_GC_TRACK(args);
681+
}
676682
}
677683
}
678684

Modules/itertoolsmodule.c

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
#define PY_SSIZE_T_CLEAN
33
#include "Python.h"
44
#include "pycore_tupleobject.h"
5+
#include "pycore_object.h" // _PyObject_GC_TRACK()
56
#include <stddef.h> // offsetof()
67

78
/* Itertools module written and maintained
@@ -2245,6 +2246,11 @@ product_next(productobject *lz)
22452246
lz->result = result;
22462247
Py_DECREF(old_result);
22472248
}
2249+
// bpo-42536: The GC may have untracked this result tuple. Since we're
2250+
// recycling it, make sure it's tracked again:
2251+
else if (!_PyObject_GC_IS_TRACKED(result)) {
2252+
_PyObject_GC_TRACK(result);
2253+
}
22482254
/* Now, we've got the only copy so we can update it in-place */
22492255
assert (npools==0 || Py_REFCNT(result) == 1);
22502256

@@ -2568,6 +2574,11 @@ combinations_next(combinationsobject *co)
25682574
co->result = result;
25692575
Py_DECREF(old_result);
25702576
}
2577+
// bpo-42536: The GC may have untracked this result tuple. Since we're
2578+
// recycling it, make sure it's tracked again:
2579+
else if (!_PyObject_GC_IS_TRACKED(result)) {
2580+
_PyObject_GC_TRACK(result);
2581+
}
25712582
/* Now, we've got the only copy so we can update it in-place
25722583
* CPython's empty tuple is a singleton and cached in
25732584
* PyTuple's freelist.
@@ -2902,6 +2913,11 @@ cwr_next(cwrobject *co)
29022913
co->result = result;
29032914
Py_DECREF(old_result);
29042915
}
2916+
// bpo-42536: The GC may have untracked this result tuple. Since we're
2917+
// recycling it, make sure it's tracked again:
2918+
else if (!_PyObject_GC_IS_TRACKED(result)) {
2919+
_PyObject_GC_TRACK(result);
2920+
}
29052921
/* Now, we've got the only copy so we can update it in-place CPython's
29062922
empty tuple is a singleton and cached in PyTuple's freelist. */
29072923
assert(r == 0 || Py_REFCNT(result) == 1);
@@ -3246,6 +3262,11 @@ permutations_next(permutationsobject *po)
32463262
po->result = result;
32473263
Py_DECREF(old_result);
32483264
}
3265+
// bpo-42536: The GC may have untracked this result tuple. Since we're
3266+
// recycling it, make sure it's tracked again:
3267+
else if (!_PyObject_GC_IS_TRACKED(result)) {
3268+
_PyObject_GC_TRACK(result);
3269+
}
32493270
/* Now, we've got the only copy so we can update it in-place */
32503271
assert(r == 0 || Py_REFCNT(result) == 1);
32513272

@@ -4515,6 +4536,11 @@ zip_longest_next(ziplongestobject *lz)
45154536
PyTuple_SET_ITEM(result, i, item);
45164537
Py_DECREF(olditem);
45174538
}
4539+
// bpo-42536: The GC may have untracked this result tuple. Since we're
4540+
// recycling it, make sure it's tracked again:
4541+
if (!_PyObject_GC_IS_TRACKED(result)) {
4542+
_PyObject_GC_TRACK(result);
4543+
}
45184544
} else {
45194545
result = PyTuple_New(tuplesize);
45204546
if (result == NULL)

Objects/dictobject.c

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3861,6 +3861,11 @@ dictiter_iternextitem(dictiterobject *di)
38613861
Py_INCREF(result);
38623862
Py_DECREF(oldkey);
38633863
Py_DECREF(oldvalue);
3864+
// bpo-42536: The GC may have untracked this result tuple. Since we're
3865+
// recycling it, make sure it's tracked again:
3866+
if (!_PyObject_GC_IS_TRACKED(result)) {
3867+
_PyObject_GC_TRACK(result);
3868+
}
38643869
}
38653870
else {
38663871
result = PyTuple_New(2);
@@ -3976,6 +3981,11 @@ dictreviter_iternext(dictiterobject *di)
39763981
Py_INCREF(result);
39773982
Py_DECREF(oldkey);
39783983
Py_DECREF(oldvalue);
3984+
// bpo-42536: The GC may have untracked this result tuple. Since
3985+
// we're recycling it, make sure it's tracked again:
3986+
if (!_PyObject_GC_IS_TRACKED(result)) {
3987+
_PyObject_GC_TRACK(result);
3988+
}
39793989
}
39803990
else {
39813991
result = PyTuple_New(2);

Objects/enumobject.c

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
/* enumerate object */
22

33
#include "Python.h"
4+
#include "pycore_object.h" // _PyObject_GC_TRACK()
45

56
#include "clinic/enumobject.c.h"
67

@@ -130,6 +131,11 @@ enum_next_long(enumobject *en, PyObject* next_item)
130131
PyTuple_SET_ITEM(result, 1, next_item);
131132
Py_DECREF(old_index);
132133
Py_DECREF(old_item);
134+
// bpo-42536: The GC may have untracked this result tuple. Since we're
135+
// recycling it, make sure it's tracked again:
136+
if (!_PyObject_GC_IS_TRACKED(result)) {
137+
_PyObject_GC_TRACK(result);
138+
}
133139
return result;
134140
}
135141
result = PyTuple_New(2);
@@ -175,6 +181,11 @@ enum_next(enumobject *en)
175181
PyTuple_SET_ITEM(result, 1, next_item);
176182
Py_DECREF(old_index);
177183
Py_DECREF(old_item);
184+
// bpo-42536: The GC may have untracked this result tuple. Since we're
185+
// recycling it, make sure it's tracked again:
186+
if (!_PyObject_GC_IS_TRACKED(result)) {
187+
_PyObject_GC_TRACK(result);
188+
}
178189
return result;
179190
}
180191
result = PyTuple_New(2);

Objects/odictobject.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1817,6 +1817,11 @@ odictiter_iternext(odictiterobject *di)
18171817
Py_INCREF(result);
18181818
Py_DECREF(PyTuple_GET_ITEM(result, 0)); /* borrowed */
18191819
Py_DECREF(PyTuple_GET_ITEM(result, 1)); /* borrowed */
1820+
// bpo-42536: The GC may have untracked this result tuple. Since we're
1821+
// recycling it, make sure it's tracked again:
1822+
if (!_PyObject_GC_IS_TRACKED(result)) {
1823+
_PyObject_GC_TRACK(result);
1824+
}
18201825
}
18211826
else {
18221827
result = PyTuple_New(2);

Python/bltinmodule.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2619,6 +2619,11 @@ zip_next(zipobject *lz)
26192619
PyTuple_SET_ITEM(result, i, item);
26202620
Py_DECREF(olditem);
26212621
}
2622+
// bpo-42536: The GC may have untracked this result tuple. Since we're
2623+
// recycling it, make sure it's tracked again:
2624+
if (!_PyObject_GC_IS_TRACKED(result)) {
2625+
_PyObject_GC_TRACK(result);
2626+
}
26222627
} else {
26232628
result = PyTuple_New(tuplesize);
26242629
if (result == NULL)

0 commit comments

Comments
 (0)