gh-116322: Enable the GIL while loading C extension modules (#118560)

swtaarrs · web-flow · commit 853163d3b5fa · 2024-05-06T23:07:23.000-04:00
Add the ability to enable/disable the GIL at runtime, and use that in
the C module loading code.

We can't know before running a module init function if it supports
free-threading, so the GIL is temporarily enabled before doing so. If
the module declares support for running without the GIL, the GIL is
later disabled. Otherwise, the GIL is permanently enabled, and will
never be disabled again for the life of the current interpreter.
diff --git a/Include/internal/pycore_ceval.h b/Include/internal/pycore_ceval.h
@@ -131,9 +131,54 @@ extern int _PyEval_ThreadsInitialized(void);
 extern void _PyEval_InitGIL(PyThreadState *tstate, int own_gil);
 extern void _PyEval_FiniGIL(PyInterpreterState *interp);
 
-extern void _PyEval_AcquireLock(PyThreadState *tstate);
+// Acquire the GIL and return 1. In free-threaded builds, this function may
+// return 0 to indicate that the GIL was disabled and therefore not acquired.
+extern int _PyEval_AcquireLock(PyThreadState *tstate);
+
 extern void _PyEval_ReleaseLock(PyInterpreterState *, PyThreadState *);
 
+#ifdef Py_GIL_DISABLED
+// Returns 0 or 1 if the GIL for the given thread's interpreter is disabled or
+// enabled, respectively.
+//
+// The enabled state of the GIL will not change while one or more threads are
+// attached.
+static inline int
+_PyEval_IsGILEnabled(PyThreadState *tstate)
+{
+    return tstate->interp->ceval.gil->enabled != 0;
+}
+
+// Enable or disable the GIL used by the interpreter that owns tstate, which
+// must be the current thread. This may affect other interpreters, if the GIL
+// is shared. All three functions will be no-ops (and return 0) if the
+// interpreter's `enable_gil' config is not _PyConfig_GIL_DEFAULT.
+//
+// Every call to _PyEval_EnableGILTransient() must be paired with exactly one
+// call to either _PyEval_EnableGILPermanent() or
+// _PyEval_DisableGIL(). _PyEval_EnableGILPermanent() and _PyEval_DisableGIL()
+// must only be called while the GIL is enabled from a call to
+// _PyEval_EnableGILTransient().
+//
+// _PyEval_EnableGILTransient() returns 1 if it enabled the GIL, or 0 if the
+// GIL was already enabled, whether transiently or permanently. The caller will
+// hold the GIL upon return.
+//
+// _PyEval_EnableGILPermanent() returns 1 if it permanently enabled the GIL
+// (which must already be enabled), or 0 if it was already permanently
+// enabled. Once _PyEval_EnableGILPermanent() has been called once, all
+// subsequent calls to any of the three functions will be no-ops.
+//
+// _PyEval_DisableGIL() returns 1 if it disabled the GIL, or 0 if the GIL was
+// kept enabled because of another request, whether transient or permanent.
+//
+// All three functions must be called by an attached thread (this implies that
+// if the GIL is enabled, the current thread must hold it).
+extern int _PyEval_EnableGILTransient(PyThreadState *tstate);
+extern int _PyEval_EnableGILPermanent(PyThreadState *tstate);
+extern int _PyEval_DisableGIL(PyThreadState *state);
+#endif
+
 extern void _PyEval_DeactivateOpCache(void);
 
 
diff --git a/Include/internal/pycore_gil.h b/Include/internal/pycore_gil.h
@@ -21,8 +21,20 @@ extern "C" {
 
 struct _gil_runtime_state {
 #ifdef Py_GIL_DISABLED
-    /* Whether or not this GIL is being used. Can change from 0 to 1 at runtime
-       if, for example, a module that requires the GIL is loaded. */
+    /* If this GIL is disabled, enabled == 0.
+
+       If this GIL is enabled transiently (most likely to initialize a module
+       of unknown safety), enabled indicates the number of active transient
+       requests.
+
+       If this GIL is enabled permanently, enabled == INT_MAX.
+
+       It must not be modified directly; use _PyEval_EnableGILTransiently(),
+       _PyEval_EnableGILPermanently(), and _PyEval_DisableGIL()
+
+       It is always read and written atomically, but a thread can assume its
+       value will be stable as long as that thread is attached or knows that no
+       other threads are attached (e.g., during a stop-the-world.). */
     int enabled;
 #endif
     /* microseconds (the Python API uses seconds, though) */
diff --git a/Include/internal/pycore_import.h b/Include/internal/pycore_import.h
@@ -206,6 +206,19 @@ extern int _PyImport_CheckSubinterpIncompatibleExtensionAllowed(
 // Export for '_testinternalcapi' shared extension
 PyAPI_FUNC(int) _PyImport_ClearExtension(PyObject *name, PyObject *filename);
 
+#ifdef Py_GIL_DISABLED
+// Assuming that the GIL is enabled from a call to
+// _PyEval_EnableGILTransient(), resolve the transient request depending on the
+// state of the module argument:
+// - If module is NULL or a PyModuleObject with md_gil == Py_MOD_GIL_NOT_USED,
+//   call _PyEval_DisableGIL().
+// - Otherwise, call _PyEval_EnableGILPermanent(). If the GIL was not already
+//   enabled permanently, issue a warning referencing the module's name.
+//
+// This function may raise an exception.
+extern int _PyImport_CheckGILForModule(PyObject *module, PyObject *module_name);
+#endif
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-05-03-17-49-37.gh-issue-116322.Gy6M4j.rst b/Misc/NEWS.d/next/Core and Builtins/2024-05-03-17-49-37.gh-issue-116322.Gy6M4j.rst
@@ -0,0 +1 @@
+In ``--disable-gil`` builds, the GIL will be enabled while loading C extension modules. If the module indicates that it supports running without the GIL, the GIL will be disabled once loading is complete. Otherwise, the GIL will remain enabled for the remainder of the interpreter's lifetime. This behavior does not apply if the GIL has been explicitly enabled or disabled with ``PYTHON_GIL`` or ``-Xgil``.
diff --git a/PC/_wmimodule.cpp b/PC/_wmimodule.cpp
@@ -362,12 +362,18 @@ static PyMethodDef wmi_functions[] = {
     { NULL, NULL, 0, NULL }
 };
 
+static PyModuleDef_Slot wmi_slots[] = {
+    {Py_mod_gil, Py_MOD_GIL_NOT_USED},
+    {0, NULL},
+};
+
 static PyModuleDef wmi_def = {
     PyModuleDef_HEAD_INIT,
     "_wmi",
-    NULL,   // doc
-    0,      // m_size
-    wmi_functions
+    NULL,          // doc
+    0,             // m_size
+    wmi_functions, // m_methods
+    wmi_slots,     // m_slots
 };
 
 extern "C" {
diff --git a/Python/ceval_gil.c b/Python/ceval_gil.c
@@ -205,6 +205,16 @@ static void recreate_gil(struct _gil_runtime_state *gil)
 }
 #endif
 
+static void
+drop_gil_impl(struct _gil_runtime_state *gil)
+{
+    MUTEX_LOCK(gil->mutex);
+    _Py_ANNOTATE_RWLOCK_RELEASED(&gil->locked, /*is_write=*/1);
+    _Py_atomic_store_int_relaxed(&gil->locked, 0);
+    COND_SIGNAL(gil->cond);
+    MUTEX_UNLOCK(gil->mutex);
+}
+
 static void
 drop_gil(PyInterpreterState *interp, PyThreadState *tstate)
 {
@@ -220,7 +230,7 @@ drop_gil(PyInterpreterState *interp, PyThreadState *tstate)
 
     struct _gil_runtime_state *gil = ceval->gil;
 #ifdef Py_GIL_DISABLED
-    if (!gil->enabled) {
+    if (!_Py_atomic_load_int_relaxed(&gil->enabled)) {
         return;
     }
 #endif
@@ -236,11 +246,7 @@ drop_gil(PyInterpreterState *interp, PyThreadState *tstate)
         _Py_atomic_store_ptr_relaxed(&gil->last_holder, tstate);
     }
 
-    MUTEX_LOCK(gil->mutex);
-    _Py_ANNOTATE_RWLOCK_RELEASED(&gil->locked, /*is_write=*/1);
-    _Py_atomic_store_int_relaxed(&gil->locked, 0);
-    COND_SIGNAL(gil->cond);
-    MUTEX_UNLOCK(gil->mutex);
+    drop_gil_impl(gil);
 
 #ifdef FORCE_SWITCHING
     /* We check tstate first in case we might be releasing the GIL for
@@ -275,8 +281,10 @@ drop_gil(PyInterpreterState *interp, PyThreadState *tstate)
 
    The function saves errno at entry and restores its value at exit.
 
-   tstate must be non-NULL. */
-static void
+   tstate must be non-NULL.
+
+   Returns 1 if the GIL was acquired, or 0 if not. */
+static int
 take_gil(PyThreadState *tstate)
 {
     int err = errno;
@@ -300,8 +308,8 @@ take_gil(PyThreadState *tstate)
     PyInterpreterState *interp = tstate->interp;
     struct _gil_runtime_state *gil = interp->ceval.gil;
 #ifdef Py_GIL_DISABLED
-    if (!gil->enabled) {
-        return;
+    if (!_Py_atomic_load_int_relaxed(&gil->enabled)) {
+        return 0;
     }
 #endif
 
@@ -346,6 +354,17 @@ take_gil(PyThreadState *tstate)
         }
     }
 
+#ifdef Py_GIL_DISABLED
+    if (!_Py_atomic_load_int_relaxed(&gil->enabled)) {
+        // Another thread disabled the GIL between our check above and
+        // now. Don't take the GIL, signal any other waiting threads, and
+        // return 0.
+        COND_SIGNAL(gil->cond);
+        MUTEX_UNLOCK(gil->mutex);
+        return 0;
+    }
+#endif
+
 #ifdef FORCE_SWITCHING
     /* This mutex must be taken before modifying gil->last_holder:
        see drop_gil(). */
@@ -387,6 +406,7 @@ take_gil(PyThreadState *tstate)
     MUTEX_UNLOCK(gil->mutex);
 
     errno = err;
+    return 1;
 }
 
 void _PyEval_SetSwitchInterval(unsigned long microseconds)
@@ -451,7 +471,8 @@ init_own_gil(PyInterpreterState *interp, struct _gil_runtime_state *gil)
 {
     assert(!gil_created(gil));
 #ifdef Py_GIL_DISABLED
-    gil->enabled = _PyInterpreterState_GetConfig(interp)->enable_gil == _PyConfig_GIL_ENABLE;
+    const PyConfig *config = _PyInterpreterState_GetConfig(interp);
+    gil->enabled = config->enable_gil == _PyConfig_GIL_ENABLE ? INT_MAX : 0;
 #endif
     create_gil(gil);
     assert(gil_created(gil));
@@ -545,11 +566,11 @@ PyEval_ReleaseLock(void)
     drop_gil(tstate->interp, tstate);
 }
 
-void
+int
 _PyEval_AcquireLock(PyThreadState *tstate)
 {
     _Py_EnsureTstateNotNULL(tstate);
-    take_gil(tstate);
+    return take_gil(tstate);
 }
 
 void
@@ -1011,6 +1032,117 @@ _PyEval_InitState(PyInterpreterState *interp)
     _gil_initialize(&interp->_gil);
 }
 
+#ifdef Py_GIL_DISABLED
+int
+_PyEval_EnableGILTransient(PyThreadState *tstate)
+{
+    const PyConfig *config = _PyInterpreterState_GetConfig(tstate->interp);
+    if (config->enable_gil != _PyConfig_GIL_DEFAULT) {
+        return 0;
+    }
+    struct _gil_runtime_state *gil = tstate->interp->ceval.gil;
+
+    int enabled = _Py_atomic_load_int_relaxed(&gil->enabled);
+    if (enabled == INT_MAX) {
+        // The GIL is already enabled permanently.
+        return 0;
+    }
+    if (enabled == INT_MAX - 1) {
+        Py_FatalError("Too many transient requests to enable the GIL");
+    }
+    if (enabled > 0) {
+        // If enabled is nonzero, we know we hold the GIL. This means that no
+        // other threads are attached, and nobody else can be concurrently
+        // mutating it.
+        _Py_atomic_store_int_relaxed(&gil->enabled, enabled + 1);
+        return 0;
+    }
+
+    // Enabling the GIL changes what it means to be an "attached" thread. To
+    // safely make this transition, we:
+    // 1. Detach the current thread.
+    // 2. Stop the world to detach (and suspend) all other threads.
+    // 3. Enable the GIL, if nobody else did between our check above and when
+    //    our stop-the-world begins.
+    // 4. Start the world.
+    // 5. Attach the current thread. Other threads may attach and hold the GIL
+    //    before this thread, which is harmless.
+    _PyThreadState_Detach(tstate);
+
+    // This could be an interpreter-local stop-the-world in situations where we
+    // know that this interpreter's GIL is not shared, and that it won't become
+    // shared before the stop-the-world begins. For now, we always stop all
+    // interpreters for simplicity.
+    _PyEval_StopTheWorldAll(&_PyRuntime);
+
+    enabled = _Py_atomic_load_int_relaxed(&gil->enabled);
+    int this_thread_enabled = enabled == 0;
+    _Py_atomic_store_int_relaxed(&gil->enabled, enabled + 1);
+
+    _PyEval_StartTheWorldAll(&_PyRuntime);
+    _PyThreadState_Attach(tstate);
+
+    return this_thread_enabled;
+}
+
+int
+_PyEval_EnableGILPermanent(PyThreadState *tstate)
+{
+    const PyConfig *config = _PyInterpreterState_GetConfig(tstate->interp);
+    if (config->enable_gil != _PyConfig_GIL_DEFAULT) {
+        return 0;
+    }
+
+    struct _gil_runtime_state *gil = tstate->interp->ceval.gil;
+    assert(current_thread_holds_gil(gil, tstate));
+
+    int enabled = _Py_atomic_load_int_relaxed(&gil->enabled);
+    if (enabled == INT_MAX) {
+        return 0;
+    }
+
+    _Py_atomic_store_int_relaxed(&gil->enabled, INT_MAX);
+    return 1;
+}
+
+int
+_PyEval_DisableGIL(PyThreadState *tstate)
+{
+    const PyConfig *config = _PyInterpreterState_GetConfig(tstate->interp);
+    if (config->enable_gil != _PyConfig_GIL_DEFAULT) {
+        return 0;
+    }
+
+    struct _gil_runtime_state *gil = tstate->interp->ceval.gil;
+    assert(current_thread_holds_gil(gil, tstate));
+
+    int enabled = _Py_atomic_load_int_relaxed(&gil->enabled);
+    if (enabled == INT_MAX) {
+        return 0;
+    }
+
+    assert(enabled >= 1);
+    enabled--;
+
+    // Disabling the GIL is much simpler than enabling it, since we know we are
+    // the only attached thread. Other threads may start free-threading as soon
+    // as this store is complete, if it sets gil->enabled to 0.
+    _Py_atomic_store_int_relaxed(&gil->enabled, enabled);
+
+    if (enabled == 0) {
+        // We're attached, so we know the GIL will remain disabled until at
+        // least the next time we detach, which must be after this function
+        // returns.
+        //
+        // Drop the GIL, which will wake up any threads waiting in take_gil()
+        // and let them resume execution without the GIL.
+        drop_gil_impl(gil);
+        return 1;
+    }
+    return 0;
+}
+#endif
+
 
 /* Do periodic things, like check for signals and async I/0.
 * We need to do reasonably frequently, but not too frequently.
diff --git a/Python/import.c b/Python/import.c
diff --git a/Python/pystate.c b/Python/pystate.c
diff --git a/Python/sysmodule.c b/Python/sysmodule.c

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	+In ``--disable-gil`` builds, the GIL will be enabled while loading C extension modules. If the module indicates that it supports running without the GIL, the GIL will be disabled once loading is complete. Otherwise, the GIL will remain enabled for the remainder of the interpreter's lifetime. This behavior does not apply if the GIL has been explicitly enabled or disabled with ``PYTHON_GIL`` or ``-Xgil``.