diff --git a/include/pybind11/detail/internals.h b/include/pybind11/detail/internals.h index 858de67525..95c1f2dfc1 100644 --- a/include/pybind11/detail/internals.h +++ b/include/pybind11/detail/internals.h @@ -420,8 +420,8 @@ inline PyThreadState *get_thread_state_unchecked() { /// We use this counter to figure out if there are or have been multiple subinterpreters active at /// any point. This must never decrease while any interpreter may be running in any thread! -inline std::atomic &get_num_interpreters_seen() { - static std::atomic counter(0); +inline std::atomic &get_num_interpreters_seen() { + static std::atomic counter(0); return counter; } @@ -550,6 +550,91 @@ inline object get_python_state_dict() { return state_dict; } +// Get or create per-storage capsule in the current interpreter's state dict. +// - The storage is interpreter-dependent: different interpreters will have different storage. +// This is important when using multiple-interpreters, to avoid sharing unshareable objects +// between interpreters. +// - There is one storage per `key` in an interpreter and it is accessible between all extensions +// in the same interpreter. +// - The life span of the storage is tied to the interpreter: it will be kept alive until the +// interpreter shuts down. +// +// Use test-and-set pattern with `PyDict_SetDefault` for thread-safe concurrent access. +// WARNING: There can be multiple threads creating the storage at the same time, while only one +// will succeed in inserting its capsule into the dict. Therefore, the deleter will be +// used to clean up the storage of the unused capsules. +// +// Returns: pair of (pointer to storage, bool indicating if newly created). +// The bool follows std::map::insert convention: true = created, false = existed. +template +std::pair atomic_get_or_create_in_state_dict(const char *key, + bool clear_destructor = false) { + error_scope err_scope; // preserve any existing Python error states + + auto state_dict = reinterpret_borrow(get_python_state_dict()); + PyObject *capsule_obj = nullptr; + bool created = false; + + // Try to get existing storage (fast path). + capsule_obj = dict_getitemstring(state_dict.ptr(), key); + if (capsule_obj == nullptr) { + if (PyErr_Occurred()) { + throw error_already_set(); + } + // Storage doesn't exist yet, create a new one. + // Use unique_ptr for exception safety: if capsule creation throws, the storage is + // automatically deleted. + auto storage_ptr = std::unique_ptr(new Payload{}); + // Create capsule with destructor to clean up when the interpreter shuts down. + auto new_capsule = capsule( + storage_ptr.get(), + // The destructor will be called when the capsule is GC'ed. + // - If our capsule is inserted into the dict below, it will be kept alive until + // interpreter shutdown, so the destructor will be called at that time. + // - If our capsule is NOT inserted (another thread inserted first), it will be + // destructed when going out of scope here, so the destructor will be called + // immediately, which will also free the storage. + /*destructor=*/[](void *ptr) -> void { delete static_cast(ptr); }); + // At this point, the capsule object is created successfully. + // Release the unique_ptr and let the capsule object own the storage to avoid double-free. + (void) storage_ptr.release(); + + // Use `PyDict_SetDefault` for atomic test-and-set: + // - If key doesn't exist, inserts our capsule and returns it. + // - If key exists (another thread inserted first), returns the existing value. + // This is thread-safe because `PyDict_SetDefault` will hold a lock on the dict. + // + // NOTE: Here we use `PyDict_SetDefault` instead of `PyDict_SetDefaultRef` because the + // capsule is kept alive until interpreter shutdown, so we do not need to handle + // incref and decref here. + capsule_obj = dict_setdefaultstring(state_dict.ptr(), key, new_capsule.ptr()); + if (capsule_obj == nullptr) { + throw error_already_set(); + } + created = (capsule_obj == new_capsule.ptr()); + if (clear_destructor && created) { + // Our capsule was inserted. + // Remove the destructor to leak the storage on interpreter shutdown. + if (PyCapsule_SetDestructor(capsule_obj, nullptr) < 0) { + throw error_already_set(); + } + } + // - If key already existed, our `new_capsule` is not inserted, it will be destructed when + // going out of scope here, which will also free the storage. + // - Otherwise, our `new_capsule` is now in the dict, and it owns the storage and the state + // dict will incref it. + } + + // Get the storage pointer from the capsule. + void *raw_ptr = PyCapsule_GetPointer(capsule_obj, /*name=*/nullptr); + if (!raw_ptr) { + raise_from(PyExc_SystemError, + "pybind11::detail::atomic_get_or_create_in_state_dict() FAILED"); + throw error_already_set(); + } + return std::pair(static_cast(raw_ptr), created); +} + template class internals_pp_manager { public: @@ -622,26 +707,18 @@ class internals_pp_manager { : holder_id_(id), on_fetch_(on_fetch) {} std::unique_ptr *get_or_create_pp_in_state_dict() { - error_scope err_scope; - dict state_dict = get_python_state_dict(); - auto internals_obj - = reinterpret_steal(dict_getitemstringref(state_dict.ptr(), holder_id_)); - std::unique_ptr *pp = nullptr; - if (internals_obj) { - void *raw_ptr = PyCapsule_GetPointer(internals_obj.ptr(), /*name=*/nullptr); - if (!raw_ptr) { - raise_from(PyExc_SystemError, - "pybind11::detail::internals_pp_manager::get_pp_from_dict() FAILED"); - throw error_already_set(); - } - pp = reinterpret_cast *>(raw_ptr); - if (on_fetch_ && pp) { - on_fetch_(pp->get()); - } - } else { - pp = new std::unique_ptr; - // NOLINTNEXTLINE(bugprone-casting-through-void) - state_dict[holder_id_] = capsule(reinterpret_cast(pp)); + // The `unique_ptr` output is leaked on interpreter shutdown. Once an + // instance is created, it will never be deleted until the process exits (compare to + // interpreter shutdown in multiple-interpreter scenarios). + // Because we cannot guarantee the order of destruction of capsules in the interpreter + // state dict, leaking avoids potential use-after-free issues during interpreter shutdown. + auto result = atomic_get_or_create_in_state_dict>( + holder_id_, /*clear_destructor=*/true); + auto *pp = result.first; + bool created = result.second; + // Only call on_fetch_ when fetching existing internals, not when creating new ones. + if (!created && on_fetch_ && pp) { + on_fetch_(pp->get()); } return pp; } @@ -660,6 +737,8 @@ class internals_pp_manager { char const *holder_id_ = nullptr; on_fetch_function *on_fetch_ = nullptr; + // Pointer-to-pointer to the singleton internals for the first seen interpreter (may not be the + // main interpreter) std::unique_ptr *internals_singleton_pp_; }; diff --git a/include/pybind11/gil_safe_call_once.h b/include/pybind11/gil_safe_call_once.h index 2abd8fc326..3c8ed84dfc 100644 --- a/include/pybind11/gil_safe_call_once.h +++ b/include/pybind11/gil_safe_call_once.h @@ -3,17 +3,31 @@ #pragma once #include "detail/common.h" +#include "detail/internals.h" #include "gil.h" #include #include -#ifdef Py_GIL_DISABLED +#if defined(Py_GIL_DISABLED) || defined(PYBIND11_HAS_SUBINTERPRETER_SUPPORT) # include #endif +#ifdef PYBIND11_HAS_SUBINTERPRETER_SUPPORT +# include +# include +# include +#endif PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE) +PYBIND11_NAMESPACE_BEGIN(detail) +#if defined(Py_GIL_DISABLED) || defined(PYBIND11_HAS_SUBINTERPRETER_SUPPORT) +using atomic_bool = std::atomic_bool; +#else +using atomic_bool = bool; +#endif +PYBIND11_NAMESPACE_END(detail) + // Use the `gil_safe_call_once_and_store` class below instead of the naive // // static auto imported_obj = py::module_::import("module_name"); // BAD, DO NOT USE! @@ -48,12 +62,23 @@ PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE) // functions, which is usually the case. // // For in-depth background, see docs/advanced/deadlock.md +#ifndef PYBIND11_HAS_SUBINTERPRETER_SUPPORT +// Subinterpreter support is disabled. +// In this case, we can store the result globally, because there is only a single interpreter. +// +// The life span of the stored result is the entire process lifetime. It is leaked on process +// termination to avoid destructor calls after the Python interpreter was finalized. template class gil_safe_call_once_and_store { public: // PRECONDITION: The GIL must be held when `call_once_and_store_result()` is called. + // + // NOTE: The second parameter (finalize callback) is intentionally unused when subinterpreter + // support is disabled. In that case, storage is process-global and intentionally leaked to + // avoid calling destructors after the Python interpreter has been finalized. template - gil_safe_call_once_and_store &call_once_and_store_result(Callable &&fn) { + gil_safe_call_once_and_store &call_once_and_store_result(Callable &&fn, + void (*)(T &) /*unused*/ = nullptr) { if (!is_initialized_) { // This read is guarded by the GIL. // Multiple threads may enter here, because the GIL is released in the next line and // CPython API calls in the `fn()` call below may release and reacquire the GIL. @@ -74,29 +99,175 @@ class gil_safe_call_once_and_store { T &get_stored() { assert(is_initialized_); PYBIND11_WARNING_PUSH -#if !defined(__clang__) && defined(__GNUC__) && __GNUC__ < 5 +# if !defined(__clang__) && defined(__GNUC__) && __GNUC__ < 5 // Needed for gcc 4.8.5 PYBIND11_WARNING_DISABLE_GCC("-Wstrict-aliasing") -#endif +# endif return *reinterpret_cast(storage_); PYBIND11_WARNING_POP } constexpr gil_safe_call_once_and_store() = default; + // The instance is a global static, so its destructor runs when the process + // is terminating. Therefore, do nothing here because the Python interpreter + // may have been finalized already. PYBIND11_DTOR_CONSTEXPR ~gil_safe_call_once_and_store() = default; + // Disable copy and move operations. + gil_safe_call_once_and_store(const gil_safe_call_once_and_store &) = delete; + gil_safe_call_once_and_store(gil_safe_call_once_and_store &&) = delete; + gil_safe_call_once_and_store &operator=(const gil_safe_call_once_and_store &) = delete; + gil_safe_call_once_and_store &operator=(gil_safe_call_once_and_store &&) = delete; + private: + // The global static storage (per-process) when subinterpreter support is disabled. alignas(T) char storage_[sizeof(T)] = {}; std::once_flag once_flag_; -#ifdef Py_GIL_DISABLED - std::atomic_bool -#else - bool -#endif - is_initialized_{false}; + // The `is_initialized_`-`storage_` pair is very similar to `std::optional`, // but the latter does not have the triviality properties of former, // therefore `std::optional` is not a viable alternative here. + detail::atomic_bool is_initialized_{false}; }; +#else +// Subinterpreter support is enabled. +// In this case, we should store the result per-interpreter instead of globally, because each +// subinterpreter has its own separate state. The cached result may not shareable across +// interpreters (e.g., imported modules and their members). + +PYBIND11_NAMESPACE_BEGIN(detail) + +template +struct call_once_storage { + alignas(T) char storage[sizeof(T)] = {}; + std::once_flag once_flag; + void (*finalize)(T &) = nullptr; + std::atomic_bool is_initialized{false}; + + call_once_storage() = default; + ~call_once_storage() { + if (is_initialized) { + if (finalize != nullptr) { + finalize(*reinterpret_cast(storage)); + } else { + reinterpret_cast(storage)->~T(); + } + } + } + call_once_storage(const call_once_storage &) = delete; + call_once_storage(call_once_storage &&) = delete; + call_once_storage &operator=(const call_once_storage &) = delete; + call_once_storage &operator=(call_once_storage &&) = delete; +}; + +PYBIND11_NAMESPACE_END(detail) + +// Prefix for storage keys in the interpreter state dict. +# define PYBIND11_CALL_ONCE_STORAGE_KEY_PREFIX PYBIND11_INTERNALS_ID "_call_once_storage__" + +// The life span of the stored result is the entire interpreter lifetime. An additional +// `finalize_fn` can be provided to clean up the stored result when the interpreter is destroyed. +template +class gil_safe_call_once_and_store { +public: + // PRECONDITION: The GIL must be held when `call_once_and_store_result()` is called. + template + gil_safe_call_once_and_store &call_once_and_store_result(Callable &&fn, + void (*finalize_fn)(T &) = nullptr) { + if (!is_last_storage_valid()) { + // Multiple threads may enter here, because the GIL is released in the next line and + // CPython API calls in the `fn()` call below may release and reacquire the GIL. + gil_scoped_release gil_rel; // Needed to establish lock ordering. + // There can be multiple threads going through here. + storage_type *value = nullptr; + { + gil_scoped_acquire gil_acq; // Restore lock ordering. + // This function is thread-safe under free-threading. + value = get_or_create_storage_in_state_dict(); + } + assert(value != nullptr); + std::call_once(value->once_flag, [&] { + // Only one thread will ever enter here. + gil_scoped_acquire gil_acq; + // fn may release, but will reacquire, the GIL. + ::new (value->storage) T(fn()); + value->finalize = finalize_fn; + value->is_initialized = true; + last_storage_ptr_ = reinterpret_cast(value->storage); + is_initialized_by_at_least_one_interpreter_ = true; + }); + // All threads will observe `is_initialized_by_at_least_one_interpreter_` as true here. + } + // Intentionally not returning `T &` to ensure the calling code is self-documenting. + return *this; + } + + // This must only be called after `call_once_and_store_result()` was called. + T &get_stored() { + T *result = last_storage_ptr_; + if (!is_last_storage_valid()) { + gil_scoped_acquire gil_acq; + auto *value = get_or_create_storage_in_state_dict(); + result = last_storage_ptr_ = reinterpret_cast(value->storage); + } + assert(result != nullptr); + return *result; + } + + constexpr gil_safe_call_once_and_store() = default; + // The instance is a global static, so its destructor runs when the process + // is terminating. Therefore, do nothing here because the Python interpreter + // may have been finalized already. + PYBIND11_DTOR_CONSTEXPR ~gil_safe_call_once_and_store() = default; + + // Disable copy and move operations because the memory address is used as key. + gil_safe_call_once_and_store(const gil_safe_call_once_and_store &) = delete; + gil_safe_call_once_and_store(gil_safe_call_once_and_store &&) = delete; + gil_safe_call_once_and_store &operator=(const gil_safe_call_once_and_store &) = delete; + gil_safe_call_once_and_store &operator=(gil_safe_call_once_and_store &&) = delete; + +private: + using storage_type = detail::call_once_storage; + + // Indicator of fast path for single-interpreter case. + bool is_last_storage_valid() const { + return is_initialized_by_at_least_one_interpreter_ + && detail::get_num_interpreters_seen() == 1; + } + + // Get the unique key for this storage instance in the interpreter's state dict. + // The return type should not be `py::str` because PyObject is interpreter-dependent. + std::string get_storage_key() const { + // The instance is expected to be global static, so using its address as unique identifier. + // The typical usage is like: + // + // PYBIND11_CONSTINIT static gil_safe_call_once_and_store storage; + // + return PYBIND11_CALL_ONCE_STORAGE_KEY_PREFIX + + std::to_string(reinterpret_cast(this)); + } + + // Get or create per-storage capsule in the current interpreter's state dict. + // The storage is interpreter-dependent and will not be shared across interpreters. + storage_type *get_or_create_storage_in_state_dict() { + return detail::atomic_get_or_create_in_state_dict(get_storage_key().c_str()) + .first; + } + + // No storage needed when subinterpreter support is enabled. + // The actual storage is stored in the per-interpreter state dict via + // `get_or_create_storage_in_state_dict()`. + + // Fast local cache to avoid repeated lookups when there are no multiple interpreters. + // This is only valid if there is a single interpreter. Otherwise, it is not used. + // WARNING: We cannot use thread local cache similar to `internals_pp_manager::internals_p_tls` + // because the thread local storage cannot be explicitly invalidated when interpreters + // are destroyed (unlike `internals_pp_manager` which has explicit hooks for that). + T *last_storage_ptr_ = nullptr; + // This flag is true if the value has been initialized by any interpreter (may not be the + // current one). + detail::atomic_bool is_initialized_by_at_least_one_interpreter_{false}; +}; +#endif PYBIND11_NAMESPACE_END(PYBIND11_NAMESPACE) diff --git a/include/pybind11/pytypes.h b/include/pybind11/pytypes.h index 0ab0b73e1f..eb02946a86 100644 --- a/include/pybind11/pytypes.h +++ b/include/pybind11/pytypes.h @@ -997,8 +997,10 @@ inline PyObject *dict_getitem(PyObject *v, PyObject *key) { return rv; } +// PyDict_GetItemStringRef was added in Python 3.13.0a1. +// See also: https://github.com/python/pythoncapi-compat/blob/main/pythoncapi_compat.h inline PyObject *dict_getitemstringref(PyObject *v, const char *key) { -#if PY_VERSION_HEX >= 0x030D0000 +#if PY_VERSION_HEX >= 0x030D00A1 PyObject *rv = nullptr; if (PyDict_GetItemStringRef(v, key, &rv) < 0) { throw error_already_set(); @@ -1014,6 +1016,46 @@ inline PyObject *dict_getitemstringref(PyObject *v, const char *key) { #endif } +inline PyObject *dict_setdefaultstring(PyObject *v, const char *key, PyObject *defaultobj) { + PyObject *kv = PyUnicode_FromString(key); + if (kv == nullptr) { + throw error_already_set(); + } + + PyObject *rv = PyDict_SetDefault(v, kv, defaultobj); + Py_DECREF(kv); + if (rv == nullptr) { + throw error_already_set(); + } + return rv; +} + +// PyDict_SetDefaultRef was added in Python 3.13.0a4. +// See also: https://github.com/python/pythoncapi-compat/blob/main/pythoncapi_compat.h +inline PyObject *dict_setdefaultstringref(PyObject *v, const char *key, PyObject *defaultobj) { +#if PY_VERSION_HEX >= 0x030D00A4 + PyObject *kv = PyUnicode_FromString(key); + if (kv == nullptr) { + throw error_already_set(); + } + + PyObject *rv = nullptr; + if (PyDict_SetDefaultRef(v, kv, defaultobj, &rv) < 0) { + Py_DECREF(kv); + throw error_already_set(); + } + Py_DECREF(kv); + return rv; +#else + PyObject *rv = dict_setdefaultstring(v, key, defaultobj); + if (rv == nullptr || PyErr_Occurred()) { + throw error_already_set(); + } + Py_XINCREF(rv); + return rv; +#endif +} + // Helper aliases/functions to support implicit casting of values given to python // accessors/methods. When given a pyobject, this simply returns the pyobject as-is; for other C++ // type, the value goes through pybind11::cast(obj) to convert it to an `object`. diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 47ba4aa863..3d618662f9 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -578,24 +578,32 @@ add_custom_target( USES_TERMINAL) if(NOT PYBIND11_CUDA_TESTS) - # This module doesn't get mixed with other test modules because those aren't subinterpreter safe. - pybind11_add_module(mod_per_interpreter_gil THIN_LTO mod_per_interpreter_gil.cpp) - pybind11_add_module(mod_shared_interpreter_gil THIN_LTO mod_shared_interpreter_gil.cpp) - set_target_properties(mod_per_interpreter_gil PROPERTIES LIBRARY_OUTPUT_DIRECTORY - "$<1:${CMAKE_CURRENT_BINARY_DIR}>") - set_target_properties(mod_shared_interpreter_gil PROPERTIES LIBRARY_OUTPUT_DIRECTORY - "$<1:${CMAKE_CURRENT_BINARY_DIR}>") + set(PYBIND11_MULTIPLE_INTERPRETERS_TEST_MODULES + mod_per_interpreter_gil mod_shared_interpreter_gil mod_per_interpreter_gil_with_singleton) + + # These modules don't get mixed with other test modules because those aren't subinterpreter safe. + foreach(mod IN LISTS PYBIND11_MULTIPLE_INTERPRETERS_TEST_MODULES) + pybind11_add_module("${mod}" THIN_LTO "${mod}.cpp") + pybind11_enable_warnings("${mod}") + endforeach() + + # Put the built modules next to `pybind11_tests.so` so that the test scripts can find them. + get_target_property(pybind11_tests_output_directory pybind11_tests LIBRARY_OUTPUT_DIRECTORY) + foreach(mod IN LISTS PYBIND11_MULTIPLE_INTERPRETERS_TEST_MODULES) + set_target_properties("${mod}" PROPERTIES LIBRARY_OUTPUT_DIRECTORY + "${pybind11_tests_output_directory}") + endforeach() + if(PYBIND11_TEST_SMART_HOLDER) - target_compile_definitions( - mod_per_interpreter_gil - PUBLIC -DPYBIND11_RUN_TESTING_WITH_SMART_HOLDER_AS_DEFAULT_BUT_NEVER_USE_IN_PRODUCTION_PLEASE - ) - target_compile_definitions( - mod_shared_interpreter_gil - PUBLIC -DPYBIND11_RUN_TESTING_WITH_SMART_HOLDER_AS_DEFAULT_BUT_NEVER_USE_IN_PRODUCTION_PLEASE - ) + foreach(mod IN LISTS PYBIND11_MULTIPLE_INTERPRETERS_TEST_MODULES) + target_compile_definitions( + "${mod}" + PUBLIC + -DPYBIND11_RUN_TESTING_WITH_SMART_HOLDER_AS_DEFAULT_BUT_NEVER_USE_IN_PRODUCTION_PLEASE) + endforeach() endif() - add_dependencies(pytest mod_per_interpreter_gil mod_shared_interpreter_gil) + + add_dependencies(pytest ${PYBIND11_MULTIPLE_INTERPRETERS_TEST_MODULES}) endif() if(PYBIND11_TEST_OVERRIDE) diff --git a/tests/mod_per_interpreter_gil_with_singleton.cpp b/tests/mod_per_interpreter_gil_with_singleton.cpp new file mode 100644 index 0000000000..874b93c77f --- /dev/null +++ b/tests/mod_per_interpreter_gil_with_singleton.cpp @@ -0,0 +1,141 @@ +#include +#include + +#include + +namespace py = pybind11; + +#ifdef PYBIND11_HAS_NATIVE_ENUM +# include +#endif + +// A singleton class that holds references to certain Python objects +// This singleton is per-interpreter using gil_safe_call_once_and_store +class MySingleton { +public: + MySingleton() = default; + ~MySingleton() = default; + MySingleton(const MySingleton &) = delete; + MySingleton &operator=(const MySingleton &) = delete; + MySingleton(MySingleton &&) = default; + MySingleton &operator=(MySingleton &&) = default; + + static MySingleton &get_instance() { + PYBIND11_CONSTINIT static py::gil_safe_call_once_and_store storage; + return storage + .call_once_and_store_result([]() -> MySingleton { + MySingleton instance{}; + + auto emplace = [&instance](const py::handle &obj) -> void { + obj.inc_ref(); // Ensure the object is not GC'd while interpreter is alive + instance.objects.emplace_back(obj); + }; + + // Example objects to store in the singleton + emplace(py::type::handle_of(py::none())); // static type + emplace(py::type::handle_of(py::tuple())); // static type + emplace(py::type::handle_of(py::list())); // static type + emplace(py::type::handle_of(py::dict())); // static type + emplace(py::module_::import("collections").attr("OrderedDict")); // static type + emplace(py::module_::import("collections").attr("defaultdict")); // heap type + emplace(py::module_::import("collections").attr("deque")); // heap type + + assert(instance.objects.size() == 7); + return instance; + }) + .get_stored(); + } + + std::vector &get_objects() { return objects; } + + static void init() { + // Ensure the singleton is created + auto &instance = get_instance(); + (void) instance; // suppress unused variable warning + assert(instance.objects.size() == 7); + // Register cleanup at interpreter exit + py::module_::import("atexit").attr("register")(py::cpp_function(&MySingleton::clear)); + } + + static void clear() { + auto &instance = get_instance(); + (void) instance; // suppress unused variable warning + assert(instance.objects.size() == 7); + for (const auto &obj : instance.objects) { + obj.dec_ref(); + } + instance.objects.clear(); + } + +private: + std::vector objects; +}; + +class MyClass { +public: + explicit MyClass(py::ssize_t v) : value(v) {} + py::ssize_t get_value() const { return value; } + +private: + py::ssize_t value; +}; + +class MyGlobalError : public std::runtime_error { +public: + using std::runtime_error::runtime_error; +}; + +class MyLocalError : public std::runtime_error { +public: + using std::runtime_error::runtime_error; +}; + +enum class MyEnum : int { + ONE = 1, + TWO = 2, + THREE = 3, +}; + +PYBIND11_MODULE(mod_per_interpreter_gil_with_singleton, + m, + py::mod_gil_not_used(), + py::multiple_interpreters::per_interpreter_gil()) { +#ifdef PYBIND11_HAS_SUBINTERPRETER_SUPPORT + m.attr("defined_PYBIND11_HAS_SUBINTERPRETER_SUPPORT") = true; +#else + m.attr("defined_PYBIND11_HAS_SUBINTERPRETER_SUPPORT") = false; +#endif + + MySingleton::init(); + + // Ensure py::multiple_interpreters::per_interpreter_gil() works with singletons using + // py::gil_safe_call_once_and_store + m.def( + "get_objects_in_singleton", + []() -> std::vector { return MySingleton::get_instance().get_objects(); }, + "Get the list of objects stored in the singleton"); + + // Ensure py::multiple_interpreters::per_interpreter_gil() works with class bindings + py::class_(m, "MyClass") + .def(py::init()) + .def("get_value", &MyClass::get_value); + + // Ensure py::multiple_interpreters::per_interpreter_gil() works with global exceptions + py::register_exception(m, "MyGlobalError"); + // Ensure py::multiple_interpreters::per_interpreter_gil() works with local exceptions + py::register_local_exception(m, "MyLocalError"); + +#ifdef PYBIND11_HAS_NATIVE_ENUM + // Ensure py::multiple_interpreters::per_interpreter_gil() works with native_enum + py::native_enum(m, "MyEnum", "enum.IntEnum") + .value("ONE", MyEnum::ONE) + .value("TWO", MyEnum::TWO) + .value("THREE", MyEnum::THREE) + .finalize(); +#else + py::enum_(m, "MyEnum") + .value("ONE", MyEnum::ONE) + .value("TWO", MyEnum::TWO) + .value("THREE", MyEnum::THREE); +#endif +} diff --git a/tests/test_multiple_interpreters.py b/tests/test_multiple_interpreters.py index 627ccc591d..f706e9282b 100644 --- a/tests/test_multiple_interpreters.py +++ b/tests/test_multiple_interpreters.py @@ -3,10 +3,14 @@ import contextlib import os import pickle +import subprocess import sys +import textwrap import pytest +import pybind11_tests + # 3.14.0b3+, though sys.implementation.supports_isolated_interpreters is being added in b4 # Can be simplified when we drop support for the first three betas CONCURRENT_INTERPRETERS_SUPPORT = ( @@ -82,7 +86,7 @@ def run_string( def test_independent_subinterpreters(): """Makes sure the internals object differs across independent subinterpreters""" - sys.path.append(".") + sys.path.insert(0, os.path.dirname(pybind11_tests.__file__)) run_string, create = get_interpreters(modern=True) @@ -91,12 +95,14 @@ def test_independent_subinterpreters(): if not m.defined_PYBIND11_HAS_SUBINTERPRETER_SUPPORT: pytest.skip("Does not have subinterpreter support compiled in") - code = """ -import mod_per_interpreter_gil as m -import pickle -with open(pipeo, 'wb') as f: - pickle.dump(m.internals_at(), f) -""" + code = textwrap.dedent( + """ + import mod_per_interpreter_gil as m + import pickle + with open(pipeo, 'wb') as f: + pickle.dump(m.internals_at(), f) + """ + ).strip() with create() as interp1, create() as interp2: try: @@ -131,7 +137,7 @@ def test_independent_subinterpreters(): def test_independent_subinterpreters_modern(): """Makes sure the internals object differs across independent subinterpreters. Modern (3.14+) syntax.""" - sys.path.append(".") + sys.path.insert(0, os.path.dirname(pybind11_tests.__file__)) m = pytest.importorskip("mod_per_interpreter_gil") @@ -140,11 +146,13 @@ def test_independent_subinterpreters_modern(): from concurrent import interpreters - code = """ -import mod_per_interpreter_gil as m + code = textwrap.dedent( + """ + import mod_per_interpreter_gil as m -values.put_nowait(m.internals_at()) -""" + values.put_nowait(m.internals_at()) + """ + ).strip() with contextlib.closing(interpreters.create()) as interp1, contextlib.closing( interpreters.create() @@ -175,7 +183,7 @@ def test_independent_subinterpreters_modern(): def test_dependent_subinterpreters(): """Makes sure the internals object differs across subinterpreters""" - sys.path.append(".") + sys.path.insert(0, os.path.dirname(pybind11_tests.__file__)) run_string, create = get_interpreters(modern=False) @@ -184,12 +192,14 @@ def test_dependent_subinterpreters(): if not m.defined_PYBIND11_HAS_SUBINTERPRETER_SUPPORT: pytest.skip("Does not have subinterpreter support compiled in") - code = """ -import mod_shared_interpreter_gil as m -import pickle -with open(pipeo, 'wb') as f: - pickle.dump(m.internals_at(), f) -""" + code = textwrap.dedent( + """ + import mod_shared_interpreter_gil as m + import pickle + with open(pipeo, 'wb') as f: + pickle.dump(m.internals_at(), f) + """ + ).strip() with create("legacy") as interp1: pipei, pipeo = os.pipe() @@ -198,3 +208,252 @@ def test_dependent_subinterpreters(): res1 = pickle.load(f) assert res1 != m.internals_at(), "internals should differ from main interpreter" + + +PREAMBLE_CODE = textwrap.dedent( + f""" + def test(): + import sys + + sys.path.insert(0, {os.path.dirname(pybind11_tests.__file__)!r}) + + import collections + import mod_per_interpreter_gil_with_singleton as m + + objects = m.get_objects_in_singleton() + expected = [ + type(None), + tuple, + list, + dict, + collections.OrderedDict, + collections.defaultdict, + collections.deque, + ] + assert objects == expected, f"Expected {{expected!r}}, got {{objects!r}}." + + assert hasattr(m, 'MyClass'), "Module missing MyClass" + assert hasattr(m, 'MyGlobalError'), "Module missing MyGlobalError" + assert hasattr(m, 'MyLocalError'), "Module missing MyLocalError" + assert hasattr(m, 'MyEnum'), "Module missing MyEnum" + """ +).lstrip() + + +@pytest.mark.xfail( + reason="Duplicate C++ type registration under multiple-interpreters, needs investigation.", + # raises=interpreters.ExecutionFailed, # need to import the module + strict=False, +) +@pytest.mark.skipif( + sys.platform.startswith("emscripten"), reason="Requires loadable modules" +) +@pytest.mark.skipif(not CONCURRENT_INTERPRETERS_SUPPORT, reason="Requires 3.14.0b3+") +def test_import_module_with_singleton_per_interpreter(): + """Tests that a singleton storing Python objects works correctly per-interpreter""" + from concurrent import interpreters + + code = f"{PREAMBLE_CODE.strip()}\n\ntest()\n" + with contextlib.closing(interpreters.create()) as interp: + interp.exec(code) + + +def check_script_success_in_subprocess(code: str, *, rerun: int = 8) -> None: + """Runs the given code in a subprocess.""" + code = textwrap.dedent(code).strip() + try: + for _ in range(rerun): # run flakily failing test multiple times + subprocess.check_output( + [sys.executable, "-c", code], + cwd=os.getcwd(), + stderr=subprocess.STDOUT, + text=True, + ) + except subprocess.CalledProcessError as ex: + raise RuntimeError( + f"Subprocess failed with exit code {ex.returncode}.\n\n" + f"Code:\n" + f"```python\n" + f"{code}\n" + f"```\n\n" + f"Output:\n" + f"{ex.output}" + ) from ex + + +@pytest.mark.xfail( + reason="Duplicate C++ type registration under multiple-interpreters, needs investigation.", + raises=RuntimeError, + strict=False, +) +@pytest.mark.skipif( + sys.platform.startswith("emscripten"), reason="Requires loadable modules" +) +@pytest.mark.skipif(not CONCURRENT_INTERPRETERS_SUPPORT, reason="Requires 3.14.0b3+") +def test_import_in_subinterpreter_after_main(): + """Tests that importing a module in a subinterpreter after the main interpreter works correctly""" + check_script_success_in_subprocess( + PREAMBLE_CODE + + textwrap.dedent( + """ + import contextlib + import gc + from concurrent import interpreters + + test() + + interp = None + with contextlib.closing(interpreters.create()) as interp: + interp.call(test) + + del interp + for _ in range(5): + gc.collect() + """ + ) + ) + + check_script_success_in_subprocess( + PREAMBLE_CODE + + textwrap.dedent( + """ + import contextlib + import gc + import random + from concurrent import interpreters + + test() + + interps = interp = None + with contextlib.ExitStack() as stack: + interps = [ + stack.enter_context(contextlib.closing(interpreters.create())) + for _ in range(8) + ] + random.shuffle(interps) + for interp in interps: + interp.call(test) + + del interps, interp, stack + for _ in range(5): + gc.collect() + """ + ) + ) + + +@pytest.mark.xfail( + reason="Duplicate C++ type registration under multiple-interpreters, needs investigation.", + raises=RuntimeError, + strict=False, +) +@pytest.mark.skipif( + sys.platform.startswith("emscripten"), reason="Requires loadable modules" +) +@pytest.mark.skipif(not CONCURRENT_INTERPRETERS_SUPPORT, reason="Requires 3.14.0b3+") +def test_import_in_subinterpreter_before_main(): + """Tests that importing a module in a subinterpreter before the main interpreter works correctly""" + check_script_success_in_subprocess( + PREAMBLE_CODE + + textwrap.dedent( + """ + import contextlib + import gc + from concurrent import interpreters + + interp = None + with contextlib.closing(interpreters.create()) as interp: + interp.call(test) + + test() + + del interp + for _ in range(5): + gc.collect() + """ + ) + ) + + check_script_success_in_subprocess( + PREAMBLE_CODE + + textwrap.dedent( + """ + import contextlib + import gc + from concurrent import interpreters + + interps = interp = None + with contextlib.ExitStack() as stack: + interps = [ + stack.enter_context(contextlib.closing(interpreters.create())) + for _ in range(8) + ] + for interp in interps: + interp.call(test) + + test() + + del interps, interp, stack + for _ in range(5): + gc.collect() + """ + ) + ) + + check_script_success_in_subprocess( + PREAMBLE_CODE + + textwrap.dedent( + """ + import contextlib + import gc + from concurrent import interpreters + + interps = interp = None + with contextlib.ExitStack() as stack: + interps = [ + stack.enter_context(contextlib.closing(interpreters.create())) + for _ in range(8) + ] + for interp in interps: + interp.call(test) + + test() + + del interps, interp, stack + for _ in range(5): + gc.collect() + """ + ) + ) + + +@pytest.mark.xfail( + reason="Duplicate C++ type registration under multiple-interpreters, needs investigation.", + raises=RuntimeError, + strict=False, +) +@pytest.mark.skipif( + sys.platform.startswith("emscripten"), reason="Requires loadable modules" +) +@pytest.mark.skipif(not CONCURRENT_INTERPRETERS_SUPPORT, reason="Requires 3.14.0b3+") +def test_import_in_subinterpreter_concurrently(): + """Tests that importing a module in multiple subinterpreters concurrently works correctly""" + check_script_success_in_subprocess( + PREAMBLE_CODE + + textwrap.dedent( + """ + import gc + from concurrent.futures import InterpreterPoolExecutor, as_completed + + futures = future = None + with InterpreterPoolExecutor(max_workers=16) as executor: + futures = [executor.submit(test) for _ in range(32)] + for future in as_completed(futures): + future.result() + del futures, future, executor + + for _ in range(5): + gc.collect() + """ + ) + ) diff --git a/tests/test_with_catch/test_subinterpreter.cpp b/tests/test_with_catch/test_subinterpreter.cpp index 2783133881..b17b6fbc36 100644 --- a/tests/test_with_catch/test_subinterpreter.cpp +++ b/tests/test_with_catch/test_subinterpreter.cpp @@ -1,5 +1,6 @@ #include #ifdef PYBIND11_HAS_SUBINTERPRETER_SUPPORT +# include # include // Silence MSVC C++17 deprecation warning from Catch regarding std::uncaught_exceptions (up to @@ -41,6 +42,30 @@ void unsafe_reset_internals_for_single_interpreter() { py::detail::get_local_internals(); } +py::object &get_dict_type_object() { + PYBIND11_CONSTINIT static py::gil_safe_call_once_and_store storage; + return storage + .call_once_and_store_result( + []() -> py::object { return py::module_::import("builtins").attr("dict"); }) + .get_stored(); +} + +py::object &get_ordered_dict_type_object() { + PYBIND11_CONSTINIT static py::gil_safe_call_once_and_store storage; + return storage + .call_once_and_store_result( + []() -> py::object { return py::module_::import("collections").attr("OrderedDict"); }) + .get_stored(); +} + +py::object &get_default_dict_type_object() { + PYBIND11_CONSTINIT static py::gil_safe_call_once_and_store storage; + return storage + .call_once_and_store_result( + []() -> py::object { return py::module_::import("collections").attr("defaultdict"); }) + .get_stored(); +} + TEST_CASE("Single Subinterpreter") { unsafe_reset_internals_for_single_interpreter(); @@ -308,6 +333,103 @@ TEST_CASE("Multiple Subinterpreters") { unsafe_reset_internals_for_single_interpreter(); } +// Test that gil_safe_call_once_and_store provides per-interpreter storage. +// Without the per-interpreter storage fix, the subinterpreter would see the value +// cached by the main interpreter, which is invalid (different interpreter's object). +TEST_CASE("gil_safe_call_once_and_store per-interpreter isolation") { + unsafe_reset_internals_for_single_interpreter(); + + // This static simulates a typical usage pattern where a module caches + // an imported object using gil_safe_call_once_and_store. + PYBIND11_CONSTINIT static py::gil_safe_call_once_and_store storage; + + // Get the interpreter ID in the main interpreter + auto main_interp_id = PyInterpreterState_GetID(PyInterpreterState_Get()); + + // Store a value in the main interpreter - we'll store the interpreter ID as a Python int + auto &main_value = storage + .call_once_and_store_result([]() { + return py::int_(PyInterpreterState_GetID(PyInterpreterState_Get())); + }) + .get_stored(); + REQUIRE(main_value.cast() == main_interp_id); + + py::object dict_type = get_dict_type_object(); + py::object ordered_dict_type = get_ordered_dict_type_object(); + py::object default_dict_type = get_default_dict_type_object(); + + int64_t sub_interp_id = -1; + int64_t sub_cached_value = -1; + + bool sub_default_dict_type_destroyed = false; + + // Create a subinterpreter and check that it gets its own storage + { + py::scoped_subinterpreter ssi; + + sub_interp_id = PyInterpreterState_GetID(PyInterpreterState_Get()); + REQUIRE(sub_interp_id != main_interp_id); + + // Access the same static storage from the subinterpreter. + // With per-interpreter storage, this should call the lambda again + // and cache a NEW value for this interpreter. + // Without per-interpreter storage, this would return main's cached value. + auto &sub_value + = storage + .call_once_and_store_result([]() { + return py::int_(PyInterpreterState_GetID(PyInterpreterState_Get())); + }) + .get_stored(); + + sub_cached_value = sub_value.cast(); + + // The cached value should be the SUBINTERPRETER's ID, not the main interpreter's. + // This would fail without per-interpreter storage. + REQUIRE(sub_cached_value == sub_interp_id); + REQUIRE(sub_cached_value != main_interp_id); + + py::object sub_dict_type = get_dict_type_object(); + py::object sub_ordered_dict_type = get_ordered_dict_type_object(); + py::object sub_default_dict_type = get_default_dict_type_object(); + + // Verify that the subinterpreter has its own cached type objects. + // For static types, they should be the same object across interpreters. + // See also: https://docs.python.org/3/c-api/typeobj.html#static-types + REQUIRE(sub_dict_type.is(dict_type)); // dict is a static type + REQUIRE(sub_ordered_dict_type.is(ordered_dict_type)); // OrderedDict is a static type + // For heap types, they are dynamically created per-interpreter. + // See also: https://docs.python.org/3/c-api/typeobj.html#heap-types + REQUIRE_FALSE(sub_default_dict_type.is(default_dict_type)); // defaultdict is a heap type + + // Set up a weakref callback to detect when the subinterpreter's cached default_dict_type + // is destroyed so the gil_safe_call_once_and_store storage is not leaked when the + // subinterpreter is shutdown. + (void) py::weakref(sub_default_dict_type, + py::cpp_function([&](py::handle weakref) -> void { + sub_default_dict_type_destroyed = true; + weakref.dec_ref(); + })) + .release(); + } + + // Back in main interpreter, verify main's value is unchanged + auto &main_value_after = storage.get_stored(); + REQUIRE(main_value_after.cast() == main_interp_id); + + // Verify that the types cached in main are unchanged + py::object dict_type_after = get_dict_type_object(); + py::object ordered_dict_type_after = get_ordered_dict_type_object(); + py::object default_dict_type_after = get_default_dict_type_object(); + REQUIRE(dict_type_after.is(dict_type)); + REQUIRE(ordered_dict_type_after.is(ordered_dict_type)); + REQUIRE(default_dict_type_after.is(default_dict_type)); + + // Verify that the subinterpreter's cached default_dict_type was destroyed + REQUIRE(sub_default_dict_type_destroyed); + + unsafe_reset_internals_for_single_interpreter(); +} + # ifdef Py_MOD_PER_INTERPRETER_GIL_SUPPORTED TEST_CASE("Per-Subinterpreter GIL") { auto main_int