Skip to content

Commit 52ff880

Browse files
committed
Merge remote-tracking branch 'upstream/main' into rename-Py_NOGIL
2 parents 7713093 + 97c4c06 commit 52ff880

25 files changed

+715
-14
lines changed

Include/cpython/pystate.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,13 @@ struct _ts {
149149

150150
struct _py_trashcan trash;
151151

152+
/* Tagged pointer to top-most critical section, or zero if there is no
153+
* active critical section. Critical sections are only used in
154+
* `--disable-gil` builds (i.e., when Py_NOGIL is defined to 1). In the
155+
* default build, this field is always zero.
156+
*/
157+
uintptr_t critical_section;
158+
152159
/* Called when a thread state is deleted normally, but not when it
153160
* is destroyed after fork().
154161
* Pain: to prevent rare but fatal shutdown errors (issue 18808),
Lines changed: 242 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,242 @@
1+
#ifndef Py_INTERNAL_CRITICAL_SECTION_H
2+
#define Py_INTERNAL_CRITICAL_SECTION_H
3+
4+
#ifndef Py_BUILD_CORE
5+
# error "this header requires Py_BUILD_CORE define"
6+
#endif
7+
8+
#include "pycore_lock.h" // PyMutex
9+
#include "pycore_pystate.h" // _PyThreadState_GET()
10+
#include <stdint.h>
11+
12+
#ifdef __cplusplus
13+
extern "C" {
14+
#endif
15+
16+
// Implementation of Python critical sections
17+
//
18+
// Conceptually, critical sections are a deadlock avoidance layer on top of
19+
// per-object locks. These helpers, in combination with those locks, replace
20+
// our usage of the global interpreter lock to provide thread-safety for
21+
// otherwise thread-unsafe objects, such as dict.
22+
//
23+
// NOTE: These APIs are no-ops in non-free-threaded builds.
24+
//
25+
// Straightforward per-object locking could introduce deadlocks that were not
26+
// present when running with the GIL. Threads may hold locks for multiple
27+
// objects simultaneously because Python operations can nest. If threads were
28+
// to acquire the same locks in different orders, they would deadlock.
29+
//
30+
// One way to avoid deadlocks is to allow threads to hold only the lock (or
31+
// locks) for a single operation at a time (typically a single lock, but some
32+
// operations involve two locks). When a thread begins a nested operation it
33+
// could suspend the locks for any outer operation: before beginning the nested
34+
// operation, the locks for the outer operation are released and when the
35+
// nested operation completes, the locks for the outer operation are
36+
// reacquired.
37+
//
38+
// To improve performance, this API uses a variation of the above scheme.
39+
// Instead of immediately suspending locks any time a nested operation begins,
40+
// locks are only suspended if the thread would block. This reduces the number
41+
// of lock acquisitions and releases for nested operations, while still
42+
// avoiding deadlocks.
43+
//
44+
// Additionally, the locks for any active operation are suspended around
45+
// other potentially blocking operations, such as I/O. This is because the
46+
// interaction between locks and blocking operations can lead to deadlocks in
47+
// the same way as the interaction between multiple locks.
48+
//
49+
// Each thread's critical sections and their corresponding locks are tracked in
50+
// a stack in `PyThreadState.critical_section`. When a thread calls
51+
// `_PyThreadState_Detach()`, such as before a blocking I/O operation or when
52+
// waiting to acquire a lock, the thread suspends all of its active critical
53+
// sections, temporarily releasing the associated locks. When the thread calls
54+
// `_PyThreadState_Attach()`, it resumes the top-most (i.e., most recent)
55+
// critical section by reacquiring the associated lock or locks. See
56+
// `_PyCriticalSection_Resume()`.
57+
//
58+
// NOTE: Only the top-most critical section is guaranteed to be active.
59+
// Operations that need to lock two objects at once must use
60+
// `Py_BEGIN_CRITICAL_SECTION2()`. You *CANNOT* use nested critical sections
61+
// to lock more than one object at once, because the inner critical section
62+
// may suspend the outer critical sections. This API does not provide a way
63+
// to lock more than two objects at once (though it could be added later
64+
// if actually needed).
65+
//
66+
// NOTE: Critical sections implicitly behave like reentrant locks because
67+
// attempting to acquire the same lock will suspend any outer (earlier)
68+
// critical sections. However, they are less efficient for this use case than
69+
// purposefully designed reentrant locks.
70+
//
71+
// Example usage:
72+
// Py_BEGIN_CRITICAL_SECTION(op);
73+
// ...
74+
// Py_END_CRITICAL_SECTION();
75+
//
76+
// To lock two objects at once:
77+
// Py_BEGIN_CRITICAL_SECTION2(op1, op2);
78+
// ...
79+
// Py_END_CRITICAL_SECTION2();
80+
81+
82+
// Tagged pointers to critical sections use the two least significant bits to
83+
// mark if the pointed-to critical section is inactive and whether it is a
84+
// _PyCriticalSection2 object.
85+
#define _Py_CRITICAL_SECTION_INACTIVE 0x1
86+
#define _Py_CRITICAL_SECTION_TWO_MUTEXES 0x2
87+
#define _Py_CRITICAL_SECTION_MASK 0x3
88+
89+
#ifdef Py_NOGIL
90+
# define Py_BEGIN_CRITICAL_SECTION(op) \
91+
{ \
92+
_PyCriticalSection _cs; \
93+
_PyCriticalSection_Begin(&_cs, &_PyObject_CAST(op)->ob_mutex)
94+
95+
# define Py_END_CRITICAL_SECTION() \
96+
_PyCriticalSection_End(&_cs); \
97+
}
98+
99+
# define Py_BEGIN_CRITICAL_SECTION2(a, b) \
100+
{ \
101+
_PyCriticalSection2 _cs2; \
102+
_PyCriticalSection2_Begin(&_cs2, &_PyObject_CAST(a)->ob_mutex, &_PyObject_CAST(b)->ob_mutex)
103+
104+
# define Py_END_CRITICAL_SECTION2() \
105+
_PyCriticalSection2_End(&_cs2); \
106+
}
107+
#else /* !Py_NOGIL */
108+
// The critical section APIs are no-ops with the GIL.
109+
# define Py_BEGIN_CRITICAL_SECTION(op)
110+
# define Py_END_CRITICAL_SECTION()
111+
# define Py_BEGIN_CRITICAL_SECTION2(a, b)
112+
# define Py_END_CRITICAL_SECTION2()
113+
#endif /* !Py_NOGIL */
114+
115+
typedef struct {
116+
// Tagged pointer to an outer active critical section (or 0).
117+
// The two least-significant-bits indicate whether the pointed-to critical
118+
// section is inactive and whether it is a _PyCriticalSection2 object.
119+
uintptr_t prev;
120+
121+
// Mutex used to protect critical section
122+
PyMutex *mutex;
123+
} _PyCriticalSection;
124+
125+
// A critical section protected by two mutexes. Use
126+
// _PyCriticalSection2_Begin and _PyCriticalSection2_End.
127+
typedef struct {
128+
_PyCriticalSection base;
129+
130+
PyMutex *mutex2;
131+
} _PyCriticalSection2;
132+
133+
static inline int
134+
_PyCriticalSection_IsActive(uintptr_t tag)
135+
{
136+
return tag != 0 && (tag & _Py_CRITICAL_SECTION_INACTIVE) == 0;
137+
}
138+
139+
// Resumes the top-most critical section.
140+
PyAPI_FUNC(void)
141+
_PyCriticalSection_Resume(PyThreadState *tstate);
142+
143+
// (private) slow path for locking the mutex
144+
PyAPI_FUNC(void)
145+
_PyCriticalSection_BeginSlow(_PyCriticalSection *c, PyMutex *m);
146+
147+
PyAPI_FUNC(void)
148+
_PyCriticalSection2_BeginSlow(_PyCriticalSection2 *c, PyMutex *m1, PyMutex *m2,
149+
int is_m1_locked);
150+
151+
static inline void
152+
_PyCriticalSection_Begin(_PyCriticalSection *c, PyMutex *m)
153+
{
154+
if (PyMutex_LockFast(&m->v)) {
155+
PyThreadState *tstate = _PyThreadState_GET();
156+
c->mutex = m;
157+
c->prev = tstate->critical_section;
158+
tstate->critical_section = (uintptr_t)c;
159+
}
160+
else {
161+
_PyCriticalSection_BeginSlow(c, m);
162+
}
163+
}
164+
165+
// Removes the top-most critical section from the thread's stack of critical
166+
// sections. If the new top-most critical section is inactive, then it is
167+
// resumed.
168+
static inline void
169+
_PyCriticalSection_Pop(_PyCriticalSection *c)
170+
{
171+
PyThreadState *tstate = _PyThreadState_GET();
172+
uintptr_t prev = c->prev;
173+
tstate->critical_section = prev;
174+
175+
if ((prev & _Py_CRITICAL_SECTION_INACTIVE) != 0) {
176+
_PyCriticalSection_Resume(tstate);
177+
}
178+
}
179+
180+
static inline void
181+
_PyCriticalSection_End(_PyCriticalSection *c)
182+
{
183+
PyMutex_Unlock(c->mutex);
184+
_PyCriticalSection_Pop(c);
185+
}
186+
187+
static inline void
188+
_PyCriticalSection2_Begin(_PyCriticalSection2 *c, PyMutex *m1, PyMutex *m2)
189+
{
190+
if (m1 == m2) {
191+
// If the two mutex arguments are the same, treat this as a critical
192+
// section with a single mutex.
193+
c->mutex2 = NULL;
194+
_PyCriticalSection_Begin(&c->base, m1);
195+
return;
196+
}
197+
198+
if ((uintptr_t)m2 < (uintptr_t)m1) {
199+
// Sort the mutexes so that the lower address is locked first.
200+
// The exact order does not matter, but we need to acquire the mutexes
201+
// in a consistent order to avoid lock ordering deadlocks.
202+
PyMutex *tmp = m1;
203+
m1 = m2;
204+
m2 = tmp;
205+
}
206+
207+
if (PyMutex_LockFast(&m1->v)) {
208+
if (PyMutex_LockFast(&m2->v)) {
209+
PyThreadState *tstate = _PyThreadState_GET();
210+
c->base.mutex = m1;
211+
c->mutex2 = m2;
212+
c->base.prev = tstate->critical_section;
213+
214+
uintptr_t p = (uintptr_t)c | _Py_CRITICAL_SECTION_TWO_MUTEXES;
215+
tstate->critical_section = p;
216+
}
217+
else {
218+
_PyCriticalSection2_BeginSlow(c, m1, m2, 1);
219+
}
220+
}
221+
else {
222+
_PyCriticalSection2_BeginSlow(c, m1, m2, 0);
223+
}
224+
}
225+
226+
static inline void
227+
_PyCriticalSection2_End(_PyCriticalSection2 *c)
228+
{
229+
if (c->mutex2) {
230+
PyMutex_Unlock(c->mutex2);
231+
}
232+
PyMutex_Unlock(c->base.mutex);
233+
_PyCriticalSection_Pop(&c->base);
234+
}
235+
236+
PyAPI_FUNC(void)
237+
_PyCriticalSection_SuspendAll(PyThreadState *tstate);
238+
239+
#ifdef __cplusplus
240+
}
241+
#endif
242+
#endif /* !Py_INTERNAL_CRITICAL_SECTION_H */

Include/internal/pycore_lock.h

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -32,9 +32,16 @@ extern "C" {
3232
// PyMutex_Lock(&m);
3333
// ...
3434
// PyMutex_Unlock(&m);
35-
typedef struct _PyMutex {
36-
uint8_t v;
37-
} PyMutex;
35+
36+
// NOTE: In Py_NOGIL builds, `struct _PyMutex` is defined in Include/object.h.
37+
// The Py_NOGIL builds need the definition in Include/object.h for the
38+
// `ob_mutex` field in PyObject. For the default (non-free-threaded) build,
39+
// we define the struct here to avoid exposing it in the public API.
40+
#ifndef Py_NOGIL
41+
struct _PyMutex { uint8_t v; };
42+
#endif
43+
44+
typedef struct _PyMutex PyMutex;
3845

3946
#define _Py_UNLOCKED 0
4047
#define _Py_LOCKED 1
@@ -46,6 +53,13 @@ PyAPI_FUNC(void) _PyMutex_LockSlow(PyMutex *m);
4653
// (private) slow path for unlocking the mutex
4754
PyAPI_FUNC(void) _PyMutex_UnlockSlow(PyMutex *m);
4855

56+
static inline int
57+
PyMutex_LockFast(uint8_t *lock_bits)
58+
{
59+
uint8_t expected = _Py_UNLOCKED;
60+
return _Py_atomic_compare_exchange_uint8(lock_bits, &expected, _Py_LOCKED);
61+
}
62+
4963
// Locks the mutex.
5064
//
5165
// If the mutex is currently locked, the calling thread will be parked until

Include/object.h

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,7 @@ check by comparing the reference count field to the immortality reference count.
119119
{ \
120120
0, \
121121
0, \
122-
0, \
122+
{ 0 }, \
123123
0, \
124124
_Py_IMMORTAL_REFCNT_LOCAL, \
125125
0, \
@@ -204,10 +204,14 @@ struct _object {
204204
// Create a shared field from a refcnt and desired flags
205205
#define _Py_REF_SHARED(refcnt, flags) (((refcnt) << _Py_REF_SHARED_SHIFT) + (flags))
206206

207+
// NOTE: In non-free-threaded builds, `struct _PyMutex` is defined in
208+
// pycore_lock.h. See pycore_lock.h for more details.
209+
struct _PyMutex { uint8_t v; };
210+
207211
struct _object {
208212
uintptr_t ob_tid; // thread id (or zero)
209213
uint16_t _padding;
210-
uint8_t ob_mutex; // per-object lock
214+
struct _PyMutex ob_mutex; // per-object lock
211215
uint8_t ob_gc_bits; // gc-related state
212216
uint32_t ob_ref_local; // local reference count
213217
Py_ssize_t ob_ref_shared; // shared (atomic) reference count

Lib/test/test_listcomps.py

Lines changed: 43 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import doctest
22
import textwrap
3+
import types
34
import unittest
45

56

@@ -92,7 +93,8 @@
9293

9394

9495
class ListComprehensionTest(unittest.TestCase):
95-
def _check_in_scopes(self, code, outputs=None, ns=None, scopes=None, raises=()):
96+
def _check_in_scopes(self, code, outputs=None, ns=None, scopes=None, raises=(),
97+
exec_func=exec):
9698
code = textwrap.dedent(code)
9799
scopes = scopes or ["module", "class", "function"]
98100
for scope in scopes:
@@ -119,7 +121,7 @@ def get_output(moddict, name):
119121
return moddict[name]
120122
newns = ns.copy() if ns else {}
121123
try:
122-
exec(newcode, newns)
124+
exec_func(newcode, newns)
123125
except raises as e:
124126
# We care about e.g. NameError vs UnboundLocalError
125127
self.assertIs(type(e), raises)
@@ -613,6 +615,45 @@ def test_frame_locals(self):
613615
import sys
614616
self._check_in_scopes(code, {"val": 0}, ns={"sys": sys})
615617

618+
def _recursive_replace(self, maybe_code):
619+
if not isinstance(maybe_code, types.CodeType):
620+
return maybe_code
621+
return maybe_code.replace(co_consts=tuple(
622+
self._recursive_replace(c) for c in maybe_code.co_consts
623+
))
624+
625+
def _replacing_exec(self, code_string, ns):
626+
co = compile(code_string, "<string>", "exec")
627+
co = self._recursive_replace(co)
628+
exec(co, ns)
629+
630+
def test_code_replace(self):
631+
code = """
632+
x = 3
633+
[x for x in (1, 2)]
634+
dir()
635+
y = [x]
636+
"""
637+
self._check_in_scopes(code, {"y": [3], "x": 3})
638+
self._check_in_scopes(code, {"y": [3], "x": 3}, exec_func=self._replacing_exec)
639+
640+
def test_code_replace_extended_arg(self):
641+
num_names = 300
642+
assignments = "; ".join(f"x{i} = {i}" for i in range(num_names))
643+
name_list = ", ".join(f"x{i}" for i in range(num_names))
644+
expected = {
645+
"y": list(range(num_names)),
646+
**{f"x{i}": i for i in range(num_names)}
647+
}
648+
code = f"""
649+
{assignments}
650+
[({name_list}) for {name_list} in (range(300),)]
651+
dir()
652+
y = [{name_list}]
653+
"""
654+
self._check_in_scopes(code, expected)
655+
self._check_in_scopes(code, expected, exec_func=self._replacing_exec)
656+
616657

617658
__test__ = {'doctests' : doctests}
618659

0 commit comments

Comments
 (0)