diff --git a/include/pybind11/cast.h b/include/pybind11/cast.h index 6434d38a9..679ab7ab7 100644 --- a/include/pybind11/cast.h +++ b/include/pybind11/cast.h @@ -117,6 +117,18 @@ PYBIND11_NOINLINE inline handle get_object_handle(const void *ptr) { return handle((PyObject *) it->second); } +inline PyThreadState *get_thread_state_unchecked() { +#if PY_VERSION_HEX < 0x03000000 + return _PyThreadState_Current; +#elif PY_VERSION_HEX < 0x03050000 + return (PyThreadState*) _Py_atomic_load_relaxed(&_PyThreadState_Current); +#elif PY_VERSION_HEX < 0x03050200 + return (PyThreadState*) _PyThreadState_Current.value; +#else + return _PyThreadState_UncheckedGet(); +#endif +} + class type_caster_generic { public: PYBIND11_NOINLINE type_caster_generic(const std::type_info &type_info) diff --git a/include/pybind11/common.h b/include/pybind11/common.h index 1cf0cd9b6..8aa0075cb 100644 --- a/include/pybind11/common.h +++ b/include/pybind11/common.h @@ -110,6 +110,13 @@ extern "C" PYBIND11_EXPORT PyObject *init##name() #endif +#if PY_VERSION_HEX >= 0x03050000 && PY_VERSION_HEX < 0x03050200 +extern "C" { + struct _Py_atomic_address { void *value; }; + PyAPI_DATA(_Py_atomic_address) _PyThreadState_Current; +}; +#endif + #define PYBIND11_TRY_NEXT_OVERLOAD ((PyObject *) 1) // special failure return code #define PYBIND11_STRINGIFY(x) #x #define PYBIND11_TOSTRING(x) PYBIND11_STRINGIFY(x) diff --git a/include/pybind11/pybind11.h b/include/pybind11/pybind11.h index cf7f4b7b2..7c3642115 100644 --- a/include/pybind11/pybind11.h +++ b/include/pybind11/pybind11.h @@ -1053,11 +1053,13 @@ template void implicitly_convertible() * * 3. The reference count of an acquired thread state can be controlled. This * can be handy to prevent cases where callbacks issued from an external - * thread constantly construct and destroy thread state data structures. */ + * thread would otherwise constantly construct and destroy thread state data + * structures. + */ class gil_scoped_acquire { public: - gil_scoped_acquire() { + PYBIND11_NOINLINE gil_scoped_acquire() { auto const &internals = detail::get_internals(); tstate = (PyThreadState *) PyThread_get_key_value(internals.tstate); @@ -1068,17 +1070,24 @@ public: pybind11_fail("scoped_acquire: could not create thread state!"); #endif tstate->gilstate_counter = 0; + #if PY_MAJOR_VERSION < 3 + PyThread_delete_key_value(internals.tstate); + #endif PyThread_set_key_value(internals.tstate, tstate); } else { - release = PyThreadState_GET() != tstate; + release = detail::get_thread_state_unchecked() != tstate; } if (release) { - PyInterpreterState *interp = tstate->interp; /* Work around an annoying assertion in PyThreadState_Swap */ - tstate->interp = nullptr; + #if defined(Py_DEBUG) + PyInterpreterState *interp = tstate->interp; + tstate->interp = nullptr; + #endif PyEval_AcquireThread(tstate); - tstate->interp = interp; + #if defined(Py_DEBUG) + tstate->interp = interp; + #endif } inc_ref(); @@ -1088,10 +1097,10 @@ public: ++tstate->gilstate_counter; } - void dec_ref() { + PYBIND11_NOINLINE void dec_ref() { --tstate->gilstate_counter; #if !defined(NDEBUG) - if (PyThreadState_GET() != tstate) + if (detail::get_thread_state_unchecked() != tstate) pybind11_fail("scoped_acquire::dec_ref(): thread state must be current!"); if (tstate->gilstate_counter < 0) pybind11_fail("scoped_acquire::dec_ref(): reference count underflow!"); @@ -1103,12 +1112,12 @@ public: #endif PyThreadState_Clear(tstate); PyThreadState_DeleteCurrent(); - PyThread_set_key_value(detail::get_internals().tstate, nullptr); + PyThread_delete_key_value(detail::get_internals().tstate); release = false; } } - ~gil_scoped_acquire() { + PYBIND11_NOINLINE ~gil_scoped_acquire() { dec_ref(); if (release) PyEval_SaveThread();