Skip to content

Commit 4cde149

Browse files
committed
Incremental GC
1 parent f19b93f commit 4cde149

File tree

12 files changed

+602
-387
lines changed

12 files changed

+602
-387
lines changed

Doc/whatsnew/3.13.rst

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,10 @@ Important deprecations, removals or restrictions:
8585
New Features
8686
============
8787

88+
* The cyclic garbage collector is now incremental.
89+
This means that maximum pause times are reduced,
90+
by an order of magnitude or more for larger heaps.
91+
8892
Improved Error Messages
8993
-----------------------
9094

@@ -94,6 +98,13 @@ Improved Error Messages
9498
variables. See also :ref:`using-on-controlling-color`.
9599
(Contributed by Pablo Galindo Salgado in :gh:`112730`.)
96100

101+
Incremental Garbage Collection
102+
------------------------------
103+
104+
* The cycle garbage collector is now incremental.
105+
This means that maximum pause times are reduced
106+
by an order of magnitude or more for larger heaps.
107+
97108
Other Language Changes
98109
======================
99110

@@ -225,6 +236,28 @@ fractions
225236
sign handling, minimum width and grouping. (Contributed by Mark Dickinson
226237
in :gh:`111320`)
227238

239+
gc
240+
--
241+
* The cyclic garbage collector is now incremental, which changes the meanings
242+
of the results of :meth:`gc.get_threshold` and :meth:`gc.get_threshold` as
243+
well as :meth:`gc.get_count` and :meth:`gc.get_stats`.
244+
* :meth:`gc.get_threshold` returns a three-tuple for backwards compatibility,
245+
the first value is the threshold for young collections, as before, the second
246+
value determines the rate at which the old collection is scanned; the
247+
default is 10 and higher values mean that the old collection is scanned more slowly.
248+
The third value is meangless and is always zero.
249+
* :meth:`gc.set_threshold` ignores any items after the second.
250+
* :meth:`gc.get_count` and :meth:`gc.get_stats`.
251+
These functions return the same format of results as before.
252+
The only difference is that instead of the results refering to
253+
the young, aging and old generations, the results refer to the
254+
young generation and the aging and collecting spaces of the old generation.
255+
In summary, code that attempted to manipulate the behavior of the cycle GC may
256+
not work as well as intended, but it is very unlikely to harmful.
257+
All other code will work just fine.
258+
Uses should avoid calling :meth:`gc.collect` unless their workload is episodic,
259+
but that has always been the case to some extent.
260+
228261
glob
229262
----
230263

Include/internal/pycore_gc.h

Lines changed: 27 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -57,11 +57,15 @@ static inline int _PyObject_GC_MAY_BE_TRACKED(PyObject *obj) {
5757

5858
/* Bit flags for _gc_prev */
5959
/* Bit 0 is set when tp_finalize is called */
60-
#define _PyGC_PREV_MASK_FINALIZED (1)
60+
#define _PyGC_PREV_MASK_FINALIZED 1
6161
/* Bit 1 is set when the object is in generation which is GCed currently. */
62-
#define _PyGC_PREV_MASK_COLLECTING (2)
62+
#define _PyGC_PREV_MASK_COLLECTING 2
63+
64+
/* Bit 0 is set if the object belongs to old space 1 */
65+
#define _PyGC_NEXT_MASK_OLD_SPACE_1 1
66+
6367
/* The (N-2) most significant bits contain the real address. */
64-
#define _PyGC_PREV_SHIFT (2)
68+
#define _PyGC_PREV_SHIFT 2
6569
#define _PyGC_PREV_MASK (((uintptr_t) -1) << _PyGC_PREV_SHIFT)
6670

6771
/* set for debugging information */
@@ -87,18 +91,21 @@ typedef enum {
8791
// Lowest bit of _gc_next is used for flags only in GC.
8892
// But it is always 0 for normal code.
8993
static inline PyGC_Head* _PyGCHead_NEXT(PyGC_Head *gc) {
90-
uintptr_t next = gc->_gc_next;
94+
uintptr_t next = gc->_gc_next & _PyGC_PREV_MASK;
9195
return (PyGC_Head*)next;
9296
}
9397
static inline void _PyGCHead_SET_NEXT(PyGC_Head *gc, PyGC_Head *next) {
94-
gc->_gc_next = (uintptr_t)next;
98+
uintptr_t unext = (uintptr_t)next;
99+
assert((unext & ~_PyGC_PREV_MASK) == 0);
100+
gc->_gc_next = (gc->_gc_next & ~_PyGC_PREV_MASK) | unext;
95101
}
96102

97103
// Lowest two bits of _gc_prev is used for _PyGC_PREV_MASK_* flags.
98104
static inline PyGC_Head* _PyGCHead_PREV(PyGC_Head *gc) {
99105
uintptr_t prev = (gc->_gc_prev & _PyGC_PREV_MASK);
100106
return (PyGC_Head*)prev;
101107
}
108+
102109
static inline void _PyGCHead_SET_PREV(PyGC_Head *gc, PyGC_Head *prev) {
103110
uintptr_t uprev = (uintptr_t)prev;
104111
assert((uprev & ~_PyGC_PREV_MASK) == 0);
@@ -175,6 +182,13 @@ struct gc_generation {
175182
generations */
176183
};
177184

185+
struct gc_collection_stats {
186+
/* number of collected objects */
187+
Py_ssize_t collected;
188+
/* total number of uncollectable objects (put into gc.garbage) */
189+
Py_ssize_t uncollectable;
190+
};
191+
178192
/* Running stats per generation */
179193
struct gc_generation_stats {
180194
/* total number of collections */
@@ -196,8 +210,8 @@ struct _gc_runtime_state {
196210
int enabled;
197211
int debug;
198212
/* linked lists of container objects */
199-
struct gc_generation generations[NUM_GENERATIONS];
200-
PyGC_Head *generation0;
213+
struct gc_generation young;
214+
struct gc_generation old[2];
201215
/* a permanent generation which won't be collected */
202216
struct gc_generation permanent_generation;
203217
struct gc_generation_stats generation_stats[NUM_GENERATIONS];
@@ -207,25 +221,17 @@ struct _gc_runtime_state {
207221
PyObject *garbage;
208222
/* a list of callbacks to be invoked when collection is performed */
209223
PyObject *callbacks;
210-
/* This is the number of objects that survived the last full
211-
collection. It approximates the number of long lived objects
212-
tracked by the GC.
213-
214-
(by "full collection", we mean a collection of the oldest
215-
generation). */
216-
Py_ssize_t long_lived_total;
217-
/* This is the number of objects that survived all "non-full"
218-
collections, and are awaiting to undergo a full collection for
219-
the first time. */
220-
Py_ssize_t long_lived_pending;
224+
225+
Py_ssize_t work_to_do;
226+
/* Which of the old spaces is the visited space */
227+
int visited_space;
221228
};
222229

223230

224231
extern void _PyGC_InitState(struct _gc_runtime_state *);
225232

226-
extern Py_ssize_t _PyGC_Collect(PyThreadState *tstate, int generation,
227-
_PyGC_Reason reason);
228-
extern Py_ssize_t _PyGC_CollectNoFail(PyThreadState *tstate);
233+
extern Py_ssize_t _PyGC_Collect(PyThreadState *tstate, int generation, _PyGC_Reason reason);
234+
extern void _PyGC_CollectNoFail(PyThreadState *tstate);
229235

230236
/* Freeze objects tracked by the GC and ignore them in future collections. */
231237
extern void _PyGC_Freeze(PyInterpreterState *interp);

Include/internal/pycore_object.h

Lines changed: 3 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -125,19 +125,7 @@ static inline void _Py_RefcntAdd(PyObject* op, Py_ssize_t n)
125125
}
126126
#define _Py_RefcntAdd(op, n) _Py_RefcntAdd(_PyObject_CAST(op), n)
127127

128-
static inline void _Py_SetImmortal(PyObject *op)
129-
{
130-
if (op) {
131-
#ifdef Py_GIL_DISABLED
132-
op->ob_tid = _Py_UNOWNED_TID;
133-
op->ob_ref_local = _Py_IMMORTAL_REFCNT_LOCAL;
134-
op->ob_ref_shared = 0;
135-
#else
136-
op->ob_refcnt = _Py_IMMORTAL_REFCNT;
137-
#endif
138-
}
139-
}
140-
#define _Py_SetImmortal(op) _Py_SetImmortal(_PyObject_CAST(op))
128+
extern void _Py_SetImmortal(PyObject *op);
141129

142130
// Makes an immortal object mortal again with the specified refcnt. Should only
143131
// be used during runtime finalization.
@@ -323,11 +311,12 @@ static inline void _PyObject_GC_TRACK(
323311
filename, lineno, __func__);
324312

325313
PyInterpreterState *interp = _PyInterpreterState_GET();
326-
PyGC_Head *generation0 = interp->gc.generation0;
314+
PyGC_Head *generation0 = &interp->gc.young.head;
327315
PyGC_Head *last = (PyGC_Head*)(generation0->_gc_prev);
328316
_PyGCHead_SET_NEXT(last, gc);
329317
_PyGCHead_SET_PREV(gc, last);
330318
_PyGCHead_SET_NEXT(gc, generation0);
319+
assert((gc->_gc_next & _PyGC_NEXT_MASK_OLD_SPACE_1) == 0);
331320
generation0->_gc_prev = (uintptr_t)gc;
332321
}
333322

Include/internal/pycore_runtime_init.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -157,12 +157,12 @@ extern PyTypeObject _PyExc_MemoryError;
157157
}, \
158158
.gc = { \
159159
.enabled = 1, \
160-
.generations = { \
161-
/* .head is set in _PyGC_InitState(). */ \
162-
{ .threshold = 700, }, \
163-
{ .threshold = 10, }, \
160+
.young = { .threshold = 2000, }, \
161+
.old = { \
164162
{ .threshold = 10, }, \
163+
{ .threshold = 0, }, \
165164
}, \
165+
.work_to_do = -5000, \
166166
}, \
167167
.object_state = _py_object_state_INIT(INTERP), \
168168
.dtoa = _dtoa_state_INIT(&(INTERP)), \

Lib/test/test_gc.py

Lines changed: 2 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -383,19 +383,11 @@ def test_collect_generations(self):
383383
# each call to collect(N)
384384
x = []
385385
gc.collect(0)
386-
# x is now in gen 1
386+
# x is now in the old gen
387387
a, b, c = gc.get_count()
388-
gc.collect(1)
389-
# x is now in gen 2
390-
d, e, f = gc.get_count()
391-
gc.collect(2)
392-
# x is now in gen 3
393-
g, h, i = gc.get_count()
394-
# We don't check a, d, g since their exact values depends on
388+
# We don't check a since its exact values depends on
395389
# internal implementation details of the interpreter.
396390
self.assertEqual((b, c), (1, 0))
397-
self.assertEqual((e, f), (0, 1))
398-
self.assertEqual((h, i), (0, 0))
399391

400392
def test_trashcan(self):
401393
class Ouch:
@@ -837,16 +829,6 @@ def test_get_objects(self):
837829
self.assertFalse(
838830
any(l is element for element in gc.get_objects(generation=2))
839831
)
840-
gc.collect(generation=1)
841-
self.assertFalse(
842-
any(l is element for element in gc.get_objects(generation=0))
843-
)
844-
self.assertFalse(
845-
any(l is element for element in gc.get_objects(generation=1))
846-
)
847-
self.assertTrue(
848-
any(l is element for element in gc.get_objects(generation=2))
849-
)
850832
gc.collect(generation=2)
851833
self.assertFalse(
852834
any(l is element for element in gc.get_objects(generation=0))
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
Implements an incremental cyclic garbage collector. By collecting the old
2+
generation in increments, there is no need for a full heap scan. This can
3+
hugely reduce maximum pause time for programs with large heaps.
4+
5+
Reduces the number of generations from three to two. The old generation is
6+
split into two spaces, "aging" and "collecting".
7+
8+
Collection happens in two steps:: * First, the young generation is scanned
9+
and the survivors moved to the end of the aging space. * Then objects are
10+
taken from the collecting space, at such a rate that all cycles are
11+
collected eventually. Those objects are then scanned and the survivors
12+
moved to the end of the aging space. When the collecting space becomes
13+
empty, the two spaces are swapped.

Modules/gcmodule.c

Lines changed: 10 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -144,15 +144,12 @@ static PyObject *
144144
gc_set_threshold(PyObject *self, PyObject *args)
145145
{
146146
GCState *gcstate = get_gc_state();
147+
int ignore;
147148
if (!PyArg_ParseTuple(args, "i|ii:set_threshold",
148-
&gcstate->generations[0].threshold,
149-
&gcstate->generations[1].threshold,
150-
&gcstate->generations[2].threshold))
149+
&gcstate->young.threshold,
150+
&gcstate->old[0].threshold,
151+
&ignore))
151152
return NULL;
152-
for (int i = 3; i < NUM_GENERATIONS; i++) {
153-
/* generations higher than 2 get the same threshold */
154-
gcstate->generations[i].threshold = gcstate->generations[2].threshold;
155-
}
156153
Py_RETURN_NONE;
157154
}
158155

@@ -168,9 +165,9 @@ gc_get_threshold_impl(PyObject *module)
168165
{
169166
GCState *gcstate = get_gc_state();
170167
return Py_BuildValue("(iii)",
171-
gcstate->generations[0].threshold,
172-
gcstate->generations[1].threshold,
173-
gcstate->generations[2].threshold);
168+
gcstate->young.threshold,
169+
gcstate->old[0].threshold,
170+
0);
174171
}
175172

176173
/*[clinic input]
@@ -185,9 +182,9 @@ gc_get_count_impl(PyObject *module)
185182
{
186183
GCState *gcstate = get_gc_state();
187184
return Py_BuildValue("(iii)",
188-
gcstate->generations[0].count,
189-
gcstate->generations[1].count,
190-
gcstate->generations[2].count);
185+
gcstate->young.count,
186+
gcstate->old[gcstate->visited_space].count,
187+
gcstate->old[gcstate->visited_space^1].count);
191188
}
192189

193190
PyDoc_STRVAR(gc_get_referrers__doc__,

Objects/object.c

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2387,6 +2387,21 @@ _Py_NewReferenceNoTotal(PyObject *op)
23872387
new_reference(op);
23882388
}
23892389

2390+
void
2391+
_Py_SetImmortal(PyObject *op)
2392+
{
2393+
if (PyObject_IS_GC(op) && _PyObject_GC_IS_TRACKED(op)) {
2394+
_PyObject_GC_UNTRACK(op);
2395+
}
2396+
#ifdef Py_GIL_DISABLED
2397+
op->ob_tid = _Py_UNOWNED_TID;
2398+
op->ob_ref_local = _Py_IMMORTAL_REFCNT_LOCAL;
2399+
op->ob_ref_shared = 0;
2400+
#else
2401+
op->ob_refcnt = _Py_IMMORTAL_REFCNT;
2402+
#endif
2403+
}
2404+
23902405
void
23912406
_Py_ResurrectReference(PyObject *op)
23922407
{

Objects/structseq.c

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -603,6 +603,9 @@ _PyStructSequence_InitBuiltinWithFlags(PyInterpreterState *interp,
603603
PyStructSequence_Desc *desc,
604604
unsigned long tp_flags)
605605
{
606+
if (Py_TYPE(type) == NULL) {
607+
Py_SET_TYPE(type, &PyType_Type);
608+
}
606609
Py_ssize_t n_unnamed_members;
607610
Py_ssize_t n_members = count_members(desc, &n_unnamed_members);
608611
PyMemberDef *members = NULL;
@@ -618,7 +621,7 @@ _PyStructSequence_InitBuiltinWithFlags(PyInterpreterState *interp,
618621
}
619622
initialize_static_fields(type, desc, members, tp_flags);
620623

621-
_Py_SetImmortal(type);
624+
_Py_SetImmortal((PyObject *)type);
622625
}
623626
#ifndef NDEBUG
624627
else {

0 commit comments

Comments
 (0)