Skip to content

Commit 7c79798

Browse files
authored
bpo-42536: GC track recycled tuples (GH-23623) (GH-23652)
Several built-in and standard library types now ensure that their internal result tuples are always tracked by the garbage collector: - collections.OrderedDict.items - dict.items - enumerate - functools.reduce - itertools.combinations - itertools.combinations_with_replacement - itertools.permutations - itertools.product - itertools.zip_longest - zip Previously, they could have become untracked by a prior garbage collection. (cherry picked from commit 226a012)
1 parent ca52aa3 commit 7c79798

File tree

12 files changed

+194
-1
lines changed

12 files changed

+194
-1
lines changed

Lib/test/test_builtin.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import collections
77
import decimal
88
import fractions
9+
import gc
910
import io
1011
import locale
1112
import os
@@ -27,7 +28,7 @@
2728
from operator import neg
2829
from test.support import (
2930
EnvironmentVarGuard, TESTFN, check_warnings, swap_attr, unlink,
30-
maybe_get_event_loop_policy)
31+
maybe_get_event_loop_policy, cpython_only)
3132
from test.support.script_helper import assert_python_ok
3233
from unittest.mock import MagicMock, patch
3334
try:
@@ -1573,6 +1574,18 @@ def __iter__(self):
15731574

15741575
self.assertIs(cm.exception, exception)
15751576

1577+
@cpython_only
1578+
def test_zip_result_gc(self):
1579+
# bpo-42536: zip's tuple-reuse speed trick breaks the GC's assumptions
1580+
# about what can be untracked. Make sure we re-track result tuples
1581+
# whenever we reuse them.
1582+
it = zip([[]])
1583+
gc.collect()
1584+
# That GC collection probably untracked the recycled internal result
1585+
# tuple, which is initialized to (None,). Make sure it's re-tracked when
1586+
# it's mutated and returned from __next__:
1587+
self.assertTrue(gc.is_tracked(next(it)))
1588+
15761589
def test_format(self):
15771590
# Test the basic machinery of the format() builtin. Don't test
15781591
# the specifics of the various formatters

Lib/test/test_dict.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1377,6 +1377,25 @@ def items(self):
13771377
d = CustomReversedDict(pairs)
13781378
self.assertEqual(pairs[::-1], list(dict(d).items()))
13791379

1380+
@support.cpython_only
1381+
def test_dict_items_result_gc(self):
1382+
# bpo-42536: dict.items's tuple-reuse speed trick breaks the GC's
1383+
# assumptions about what can be untracked. Make sure we re-track result
1384+
# tuples whenever we reuse them.
1385+
it = iter({None: []}.items())
1386+
gc.collect()
1387+
# That GC collection probably untracked the recycled internal result
1388+
# tuple, which is initialized to (None, None). Make sure it's re-tracked
1389+
# when it's mutated and returned from __next__:
1390+
self.assertTrue(gc.is_tracked(next(it)))
1391+
1392+
@support.cpython_only
1393+
def test_dict_items_result_gc(self):
1394+
# Same as test_dict_items_result_gc above, but reversed.
1395+
it = reversed({None: []}.items())
1396+
gc.collect()
1397+
self.assertTrue(gc.is_tracked(next(it)))
1398+
13801399

13811400
class CAPITest(unittest.TestCase):
13821401

Lib/test/test_enumerate.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
import operator
33
import sys
44
import pickle
5+
import gc
56

67
from test import support
78

@@ -134,6 +135,18 @@ def test_tuple_reuse(self):
134135
self.assertEqual(len(set(map(id, list(enumerate(self.seq))))), len(self.seq))
135136
self.assertEqual(len(set(map(id, enumerate(self.seq)))), min(1,len(self.seq)))
136137

138+
@support.cpython_only
139+
def test_enumerate_result_gc(self):
140+
# bpo-42536: enumerate's tuple-reuse speed trick breaks the GC's
141+
# assumptions about what can be untracked. Make sure we re-track result
142+
# tuples whenever we reuse them.
143+
it = self.enum([[]])
144+
gc.collect()
145+
# That GC collection probably untracked the recycled internal result
146+
# tuple, which is initialized to (None, None). Make sure it's re-tracked
147+
# when it's mutated and returned from __next__:
148+
self.assertTrue(gc.is_tracked(next(it)))
149+
137150
class MyEnum(enumerate):
138151
pass
139152

Lib/test/test_itertools.py

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@
1212
import sys
1313
import struct
1414
import threading
15+
import gc
16+
1517
maxsize = support.MAX_Py_ssize_t
1618
minsize = -maxsize-1
1719

@@ -1554,6 +1556,51 @@ def test_StopIteration(self):
15541556
self.assertRaises(StopIteration, next, f(lambda x:x, []))
15551557
self.assertRaises(StopIteration, next, f(lambda x:x, StopNow()))
15561558

1559+
@support.cpython_only
1560+
def test_combinations_result_gc(self):
1561+
# bpo-42536: combinations's tuple-reuse speed trick breaks the GC's
1562+
# assumptions about what can be untracked. Make sure we re-track result
1563+
# tuples whenever we reuse them.
1564+
it = combinations([None, []], 1)
1565+
next(it)
1566+
gc.collect()
1567+
# That GC collection probably untracked the recycled internal result
1568+
# tuple, which has the value (None,). Make sure it's re-tracked when
1569+
# it's mutated and returned from __next__:
1570+
self.assertTrue(gc.is_tracked(next(it)))
1571+
1572+
@support.cpython_only
1573+
def test_combinations_with_replacement_result_gc(self):
1574+
# Ditto for combinations_with_replacement.
1575+
it = combinations_with_replacement([None, []], 1)
1576+
next(it)
1577+
gc.collect()
1578+
self.assertTrue(gc.is_tracked(next(it)))
1579+
1580+
@support.cpython_only
1581+
def test_permutations_result_gc(self):
1582+
# Ditto for permutations.
1583+
it = permutations([None, []], 1)
1584+
next(it)
1585+
gc.collect()
1586+
self.assertTrue(gc.is_tracked(next(it)))
1587+
1588+
@support.cpython_only
1589+
def test_product_result_gc(self):
1590+
# Ditto for product.
1591+
it = product([None, []])
1592+
next(it)
1593+
gc.collect()
1594+
self.assertTrue(gc.is_tracked(next(it)))
1595+
1596+
@support.cpython_only
1597+
def test_zip_longest_result_gc(self):
1598+
# Ditto for zip_longest.
1599+
it = zip_longest([[]])
1600+
gc.collect()
1601+
self.assertTrue(gc.is_tracked(next(it)))
1602+
1603+
15571604
class TestExamples(unittest.TestCase):
15581605

15591606
def test_accumulate(self):

Lib/test/test_ordered_dict.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -654,6 +654,17 @@ def test_free_after_iterating(self):
654654
support.check_free_after_iterating(self, lambda d: iter(d.values()), self.OrderedDict)
655655
support.check_free_after_iterating(self, lambda d: iter(d.items()), self.OrderedDict)
656656

657+
@support.cpython_only
658+
def test_ordered_dict_items_result_gc(self):
659+
# bpo-42536: OrderedDict.items's tuple-reuse speed trick breaks the GC's
660+
# assumptions about what can be untracked. Make sure we re-track result
661+
# tuples whenever we reuse them.
662+
it = iter(self.OrderedDict({None: []}).items())
663+
gc.collect()
664+
# That GC collection probably untracked the recycled internal result
665+
# tuple, which is initialized to (None, None). Make sure it's re-tracked
666+
# when it's mutated and returned from __next__:
667+
self.assertTrue(gc.is_tracked(next(it)))
657668

658669
class PurePythonOrderedDictTests(OrderedDictTests, unittest.TestCase):
659670

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
Several built-in and standard library types now ensure that their internal
2+
result tuples are always tracked by the :term:`garbage collector
3+
<garbage collection>`:
4+
5+
- :meth:`collections.OrderedDict.items() <collections.OrderedDict>`
6+
7+
- :meth:`dict.items`
8+
9+
- :func:`enumerate`
10+
11+
- :func:`functools.reduce`
12+
13+
- :func:`itertools.combinations`
14+
15+
- :func:`itertools.combinations_with_replacement`
16+
17+
- :func:`itertools.permutations`
18+
19+
- :func:`itertools.product`
20+
21+
- :func:`itertools.zip_longest`
22+
23+
- :func:`zip`
24+
25+
Previously, they could have become untracked by a prior garbage collection.
26+
Patch by Brandt Bucher.

Modules/_functoolsmodule.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
#include "pycore_pystate.h"
44
#include "pycore_tupleobject.h"
55
#include "structmember.h"
6+
#include "pycore_object.h" // _PyObject_GC_TRACK
67

78
/* _functools module written and maintained
89
by Hye-Shik Chang <[email protected]>
@@ -633,6 +634,11 @@ functools_reduce(PyObject *self, PyObject *args)
633634
if ((result = PyObject_Call(func, args, NULL)) == NULL) {
634635
goto Fail;
635636
}
637+
// bpo-42536: The GC may have untracked this args tuple. Since we're
638+
// recycling it, make sure it's tracked again:
639+
if (!_PyObject_GC_IS_TRACKED(args)) {
640+
_PyObject_GC_TRACK(args);
641+
}
636642
}
637643
}
638644

Modules/itertoolsmodule.c

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
#include "Python.h"
44
#include "pycore_tupleobject.h"
55
#include "structmember.h"
6+
#include "pycore_object.h" // _PyObject_GC_TRACK()
67

78
/* Itertools module written and maintained
89
by Raymond D. Hettinger <[email protected]>
@@ -2255,6 +2256,11 @@ product_next(productobject *lz)
22552256
lz->result = result;
22562257
Py_DECREF(old_result);
22572258
}
2259+
// bpo-42536: The GC may have untracked this result tuple. Since we're
2260+
// recycling it, make sure it's tracked again:
2261+
else if (!_PyObject_GC_IS_TRACKED(result)) {
2262+
_PyObject_GC_TRACK(result);
2263+
}
22582264
/* Now, we've got the only copy so we can update it in-place */
22592265
assert (npools==0 || Py_REFCNT(result) == 1);
22602266

@@ -2580,6 +2586,11 @@ combinations_next(combinationsobject *co)
25802586
co->result = result;
25812587
Py_DECREF(old_result);
25822588
}
2589+
// bpo-42536: The GC may have untracked this result tuple. Since we're
2590+
// recycling it, make sure it's tracked again:
2591+
else if (!_PyObject_GC_IS_TRACKED(result)) {
2592+
_PyObject_GC_TRACK(result);
2593+
}
25832594
/* Now, we've got the only copy so we can update it in-place
25842595
* CPython's empty tuple is a singleton and cached in
25852596
* PyTuple's freelist.
@@ -2916,6 +2927,11 @@ cwr_next(cwrobject *co)
29162927
co->result = result;
29172928
Py_DECREF(old_result);
29182929
}
2930+
// bpo-42536: The GC may have untracked this result tuple. Since we're
2931+
// recycling it, make sure it's tracked again:
2932+
else if (!_PyObject_GC_IS_TRACKED(result)) {
2933+
_PyObject_GC_TRACK(result);
2934+
}
29192935
/* Now, we've got the only copy so we can update it in-place CPython's
29202936
empty tuple is a singleton and cached in PyTuple's freelist. */
29212937
assert(r == 0 || Py_REFCNT(result) == 1);
@@ -3259,6 +3275,11 @@ permutations_next(permutationsobject *po)
32593275
po->result = result;
32603276
Py_DECREF(old_result);
32613277
}
3278+
// bpo-42536: The GC may have untracked this result tuple. Since we're
3279+
// recycling it, make sure it's tracked again:
3280+
else if (!_PyObject_GC_IS_TRACKED(result)) {
3281+
_PyObject_GC_TRACK(result);
3282+
}
32623283
/* Now, we've got the only copy so we can update it in-place */
32633284
assert(r == 0 || Py_REFCNT(result) == 1);
32643285

@@ -4536,6 +4557,11 @@ zip_longest_next(ziplongestobject *lz)
45364557
PyTuple_SET_ITEM(result, i, item);
45374558
Py_DECREF(olditem);
45384559
}
4560+
// bpo-42536: The GC may have untracked this result tuple. Since we're
4561+
// recycling it, make sure it's tracked again:
4562+
if (!_PyObject_GC_IS_TRACKED(result)) {
4563+
_PyObject_GC_TRACK(result);
4564+
}
45394565
} else {
45404566
result = PyTuple_New(tuplesize);
45414567
if (result == NULL)

Objects/dictobject.c

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3769,6 +3769,11 @@ dictiter_iternextitem(dictiterobject *di)
37693769
Py_INCREF(result);
37703770
Py_DECREF(oldkey);
37713771
Py_DECREF(oldvalue);
3772+
// bpo-42536: The GC may have untracked this result tuple. Since we're
3773+
// recycling it, make sure it's tracked again:
3774+
if (!_PyObject_GC_IS_TRACKED(result)) {
3775+
_PyObject_GC_TRACK(result);
3776+
}
37723777
}
37733778
else {
37743779
result = PyTuple_New(2);
@@ -3884,6 +3889,11 @@ dictreviter_iternext(dictiterobject *di)
38843889
Py_INCREF(result);
38853890
Py_DECREF(oldkey);
38863891
Py_DECREF(oldvalue);
3892+
// bpo-42536: The GC may have untracked this result tuple. Since
3893+
// we're recycling it, make sure it's tracked again:
3894+
if (!_PyObject_GC_IS_TRACKED(result)) {
3895+
_PyObject_GC_TRACK(result);
3896+
}
38873897
}
38883898
else {
38893899
result = PyTuple_New(2);

Objects/enumobject.c

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
/* enumerate object */
22

33
#include "Python.h"
4+
#include "pycore_object.h" // _PyObject_GC_TRACK()
45

56
#include "clinic/enumobject.c.h"
67

@@ -130,6 +131,11 @@ enum_next_long(enumobject *en, PyObject* next_item)
130131
PyTuple_SET_ITEM(result, 1, next_item);
131132
Py_DECREF(old_index);
132133
Py_DECREF(old_item);
134+
// bpo-42536: The GC may have untracked this result tuple. Since we're
135+
// recycling it, make sure it's tracked again:
136+
if (!_PyObject_GC_IS_TRACKED(result)) {
137+
_PyObject_GC_TRACK(result);
138+
}
133139
return result;
134140
}
135141
result = PyTuple_New(2);
@@ -175,6 +181,11 @@ enum_next(enumobject *en)
175181
PyTuple_SET_ITEM(result, 1, next_item);
176182
Py_DECREF(old_index);
177183
Py_DECREF(old_item);
184+
// bpo-42536: The GC may have untracked this result tuple. Since we're
185+
// recycling it, make sure it's tracked again:
186+
if (!_PyObject_GC_IS_TRACKED(result)) {
187+
_PyObject_GC_TRACK(result);
188+
}
178189
return result;
179190
}
180191
result = PyTuple_New(2);

Objects/odictobject.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1766,6 +1766,11 @@ odictiter_iternext(odictiterobject *di)
17661766
Py_INCREF(result);
17671767
Py_DECREF(PyTuple_GET_ITEM(result, 0)); /* borrowed */
17681768
Py_DECREF(PyTuple_GET_ITEM(result, 1)); /* borrowed */
1769+
// bpo-42536: The GC may have untracked this result tuple. Since we're
1770+
// recycling it, make sure it's tracked again:
1771+
if (!_PyObject_GC_IS_TRACKED(result)) {
1772+
_PyObject_GC_TRACK(result);
1773+
}
17691774
}
17701775
else {
17711776
result = PyTuple_New(2);

Python/bltinmodule.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
#include <ctype.h>
55
#include "ast.h"
66
#undef Yield /* undefine macro conflicting with <winbase.h> */
7+
#include "pycore_object.h" // _PyObject_GC_TRACK()
78
#include "pycore_pystate.h"
89
#include "pycore_tupleobject.h"
910

@@ -2618,6 +2619,11 @@ zip_next(zipobject *lz)
26182619
PyTuple_SET_ITEM(result, i, item);
26192620
Py_DECREF(olditem);
26202621
}
2622+
// bpo-42536: The GC may have untracked this result tuple. Since we're
2623+
// recycling it, make sure it's tracked again:
2624+
if (!_PyObject_GC_IS_TRACKED(result)) {
2625+
_PyObject_GC_TRACK(result);
2626+
}
26212627
} else {
26222628
result = PyTuple_New(tuplesize);
26232629
if (result == NULL)

0 commit comments

Comments
 (0)