From 9419609290597e4ba75bc764f72a3f3cca34505c Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Sun, 21 Dec 2025 22:43:04 -0800 Subject: [PATCH] POC: Add compat mutex for free-threaded Python builds In free-threaded Python (Py_GIL_DISABLED), the GIL no longer provides mutual exclusion. Existing code that assumes gil_scoped_acquire provides mutual exclusion would have data races. This adds a global compatibility mutex that restores the safety guarantee: - Acquired when gil_scoped_acquire is constructed (if not already held) - Released when gil_scoped_release is constructed (if held) - Ownership is tracked per-thread to handle the main thread case KNOWN LIMITATION: The global mutex conflicts with per-interpreter GILs (Py_MOD_PER_INTERPRETER_GIL_SUPPORTED). The "Per-Subinterpreter GIL" test deadlocks with this change. Future work could use per-interpreter mutexes. This is a proof-of-concept for discussion. --- include/pybind11/gil.h | 32 +++++++ include/pybind11/gil_simple.h | 88 ++++++++++++++++++- tests/test_with_catch/test_subinterpreter.cpp | 6 ++ 3 files changed, 123 insertions(+), 3 deletions(-) diff --git a/include/pybind11/gil.h b/include/pybind11/gil.h index 9e799b3cf7..1b84f75b3d 100644 --- a/include/pybind11/gil.h +++ b/include/pybind11/gil.h @@ -97,6 +97,13 @@ class gil_scoped_acquire { } inc_ref(); +# ifdef Py_GIL_DISABLED + if (!detail::compat_mutex_held_by_this_thread()) { + detail::get_compat_mutex().lock(); + detail::compat_mutex_held_by_this_thread() = true; + acquired_compat_mutex_ = true; + } +# endif } gil_scoped_acquire(const gil_scoped_acquire &) = delete; @@ -141,6 +148,12 @@ class gil_scoped_acquire { PYBIND11_NOINLINE void disarm() { active = false; } PYBIND11_NOINLINE ~gil_scoped_acquire() { +# ifdef Py_GIL_DISABLED + if (acquired_compat_mutex_) { + detail::compat_mutex_held_by_this_thread() = false; + detail::get_compat_mutex().unlock(); + } +# endif dec_ref(); if (release) { PyEval_SaveThread(); @@ -151,6 +164,9 @@ class gil_scoped_acquire { PyThreadState *tstate = nullptr; bool release = true; bool active = true; +# ifdef Py_GIL_DISABLED + bool acquired_compat_mutex_ = false; +# endif }; class gil_scoped_release { @@ -162,6 +178,13 @@ class gil_scoped_release { // `internals.tstate` for subsequent `gil_scoped_acquire` calls. Otherwise, an // initialization race could occur as multiple threads try `gil_scoped_acquire`. auto &internals = detail::get_internals(); +# ifdef Py_GIL_DISABLED + if (detail::compat_mutex_held_by_this_thread()) { + detail::compat_mutex_held_by_this_thread() = false; + detail::get_compat_mutex().unlock(); + released_compat_mutex_ = true; + } +# endif // NOLINTNEXTLINE(cppcoreguidelines-prefer-member-initializer) tstate = PyEval_SaveThread(); if (disassoc) { @@ -186,6 +209,12 @@ class gil_scoped_release { // `PyEval_RestoreThread()` should not be called if runtime is finalizing if (active) { PyEval_RestoreThread(tstate); +# ifdef Py_GIL_DISABLED + if (released_compat_mutex_) { + detail::get_compat_mutex().lock(); + detail::compat_mutex_held_by_this_thread() = true; + } +# endif } if (disassoc) { detail::get_internals().tstate = tstate; @@ -196,6 +225,9 @@ class gil_scoped_release { PyThreadState *tstate; bool disassoc; bool active = true; +# ifdef Py_GIL_DISABLED + bool released_compat_mutex_ = false; +# endif }; PYBIND11_NAMESPACE_END(PYBIND11_NAMESPACE) diff --git a/include/pybind11/gil_simple.h b/include/pybind11/gil_simple.h index 9ff428ad84..edc0d610bd 100644 --- a/include/pybind11/gil_simple.h +++ b/include/pybind11/gil_simple.h @@ -7,31 +7,113 @@ #include "detail/common.h" #include +#ifdef Py_GIL_DISABLED +# include +#endif PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE) +#ifdef Py_GIL_DISABLED +namespace detail { + +// Compatibility mutex for free-threaded Python builds. +// In traditional Python, the GIL provides mutual exclusion for code that acquires it. +// In free-threaded Python, there is no GIL, so existing code that assumes mutual exclusion +// after gil_scoped_acquire would have data races. This mutex restores that safety guarantee. +// +// This is intentionally a global mutex (not per-interpreter) for simplicity. The performance +// cost is acceptable as a safe default; code that needs maximum parallelism can be migrated +// to use explicit locking or the lighter-weight scoped_ensure_thread_state helper. +inline std::mutex &get_compat_mutex() { + static std::mutex mtx; + return mtx; +} + +// Thread-local flag to track whether this thread holds the compat mutex. +// This is needed because the main thread starts with Python initialized (holding the "GIL") +// but we don't lock the compat mutex at that point. We only want to lock/unlock when +// transitioning via gil_scoped_acquire/release. +inline bool &compat_mutex_held_by_this_thread() { + static thread_local bool held = false; + return held; +} + +inline void acquire_compat_mutex() { + if (!compat_mutex_held_by_this_thread()) { + get_compat_mutex().lock(); + compat_mutex_held_by_this_thread() = true; + } +} + +inline void release_compat_mutex() { + if (compat_mutex_held_by_this_thread()) { + compat_mutex_held_by_this_thread() = false; + get_compat_mutex().unlock(); + } +} + +} // namespace detail +#endif + class gil_scoped_acquire_simple { PyGILState_STATE state; +#ifdef Py_GIL_DISABLED + bool acquired_compat_mutex_ = false; +#endif public: - gil_scoped_acquire_simple() : state{PyGILState_Ensure()} {} + gil_scoped_acquire_simple() : state{PyGILState_Ensure()} { +#ifdef Py_GIL_DISABLED + if (!detail::compat_mutex_held_by_this_thread()) { + detail::get_compat_mutex().lock(); + detail::compat_mutex_held_by_this_thread() = true; + acquired_compat_mutex_ = true; + } +#endif + } gil_scoped_acquire_simple(const gil_scoped_acquire_simple &) = delete; gil_scoped_acquire_simple &operator=(const gil_scoped_acquire_simple &) = delete; - ~gil_scoped_acquire_simple() { PyGILState_Release(state); } + ~gil_scoped_acquire_simple() { +#ifdef Py_GIL_DISABLED + if (acquired_compat_mutex_) { + detail::compat_mutex_held_by_this_thread() = false; + detail::get_compat_mutex().unlock(); + } +#endif + PyGILState_Release(state); + } }; class gil_scoped_release_simple { PyThreadState *state; +#ifdef Py_GIL_DISABLED + bool released_compat_mutex_ = false; +#endif public: // PRECONDITION: The GIL must be held when this constructor is called. gil_scoped_release_simple() { assert(PyGILState_Check()); +#ifdef Py_GIL_DISABLED + if (detail::compat_mutex_held_by_this_thread()) { + detail::compat_mutex_held_by_this_thread() = false; + detail::get_compat_mutex().unlock(); + released_compat_mutex_ = true; + } +#endif state = PyEval_SaveThread(); } gil_scoped_release_simple(const gil_scoped_release_simple &) = delete; gil_scoped_release_simple &operator=(const gil_scoped_release_simple &) = delete; - ~gil_scoped_release_simple() { PyEval_RestoreThread(state); } + ~gil_scoped_release_simple() { + PyEval_RestoreThread(state); +#ifdef Py_GIL_DISABLED + if (released_compat_mutex_) { + detail::get_compat_mutex().lock(); + detail::compat_mutex_held_by_this_thread() = true; + } +#endif + } }; PYBIND11_NAMESPACE_END(PYBIND11_NAMESPACE) diff --git a/tests/test_with_catch/test_subinterpreter.cpp b/tests/test_with_catch/test_subinterpreter.cpp index 26e0597582..ed70bccd33 100644 --- a/tests/test_with_catch/test_subinterpreter.cpp +++ b/tests/test_with_catch/test_subinterpreter.cpp @@ -310,6 +310,12 @@ TEST_CASE("Multiple Subinterpreters") { # ifdef Py_MOD_PER_INTERPRETER_GIL_SUPPORTED TEST_CASE("Per-Subinterpreter GIL") { + // Test is skipped on free-threaded Python because the pybind11 compat mutex + // (which restores GIL-like mutual exclusion) conflicts with per-interpreter GILs. +# ifdef Py_GIL_DISABLED + PYBIND11_CATCH2_SKIP_IF(true, "Skipped: compat mutex conflicts with per-interpreter GILs"); +# endif + auto main_int = py::module_::import("external_module").attr("internals_at")().cast();