From 14976c853b8422dc41ce7874742cab122684c97e Mon Sep 17 00:00:00 2001 From: Jeremy Maitin-Shepard Date: Mon, 20 Sep 2021 04:57:38 -0700 Subject: [PATCH] Eliminate duplicate TLS keys for loader_life_support stack (#3275) * Eliminate duplicate TLS keys for loader_life_support stack This revises the existing fix for https://github.com/pybind/pybind11/issues/2765 in https://github.com/pybind/pybind11/pull/3237 to reduce the amount of TLS storage used. The shared TLS key is stored in two different ways, depending on `PYBIND11_INTERNALS_VERSION`. If `PYBIND11_INTERNALS_VERSION == 4` (as is currently set), the TLS key is stored in the `internal::shared_data` map to avoid breaking ABI compatibility. If `PYBIND11_INTERNALS_VERSION > 4`, the TLS key is stored directly in the `internals` struct. * Fix test_pytypes.py::test_issue2361 failure on PyPy3.7 * Add github actions tests for unstable ABI --- .github/workflows/ci.yml | 18 ++ CMakeLists.txt | 7 + include/pybind11/detail/internals.h | 182 +++++++++++++++------ include/pybind11/detail/type_caster_base.h | 41 +++-- include/pybind11/gil.h | 4 +- tests/test_pytypes.py | 3 +- 6 files changed, 189 insertions(+), 66 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index a3664ddf5..2348212f4 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -144,6 +144,24 @@ jobs: if: "!(runner.os == 'Windows' && (matrix.python == 3.8 || matrix.python == 3.9 || matrix.python == '3.10-dev'))" run: cmake --build build2 --target cpptest + # Third build - C++17 mode with unstable ABI + - name: Configure (unstable ABI) + run: > + cmake -S . -B build3 + -DPYBIND11_WERROR=ON + -DDOWNLOAD_CATCH=ON + -DDOWNLOAD_EIGEN=ON + -DCMAKE_CXX_STANDARD=17 + -DPYBIND11_INTERNALS_VERSION=10000000 + "-DPYBIND11_TEST_OVERRIDE=test_call_policies.cpp;test_gil_scoped.cpp;test_thread.cpp" + ${{ matrix.args }} + + - name: Build (unstable ABI) + run: cmake --build build3 -j 2 + + - name: Python tests (unstable ABI) + run: cmake --build build3 --target pytest + - name: Interface test run: cmake --build build2 --target test_cmake_build diff --git a/CMakeLists.txt b/CMakeLists.txt index bb01a861a..25cfcec2f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -89,6 +89,9 @@ endif() option(PYBIND11_INSTALL "Install pybind11 header files?" ${PYBIND11_MASTER_PROJECT}) option(PYBIND11_TEST "Build pybind11 test suite?" ${PYBIND11_MASTER_PROJECT}) option(PYBIND11_NOPYTHON "Disable search for Python" OFF) +set(PYBIND11_INTERNALS_VERSION + "" + CACHE STRING "Override the ABI version, may be used to enable the unstable ABI.") cmake_dependent_option( USE_PYTHON_INCLUDE_DIR @@ -183,6 +186,10 @@ if(NOT TARGET pybind11_headers) target_compile_features(pybind11_headers INTERFACE cxx_inheriting_constructors cxx_user_literals cxx_right_angle_brackets) + if(NOT "${PYBIND11_INTERNALS_VERSION}" STREQUAL "") + target_compile_definitions( + pybind11_headers INTERFACE "PYBIND11_INTERNALS_VERSION=${PYBIND11_INTERNALS_VERSION}") + endif() else() # It is invalid to install a target twice, too. set(PYBIND11_INSTALL OFF) diff --git a/include/pybind11/detail/internals.h b/include/pybind11/detail/internals.h index 39c28e773..7c2e49978 100644 --- a/include/pybind11/detail/internals.h +++ b/include/pybind11/detail/internals.h @@ -11,6 +11,24 @@ #include "../pytypes.h" +/// Tracks the `internals` and `type_info` ABI version independent of the main library version. +/// +/// Some portions of the code use an ABI that is conditional depending on this +/// version number. That allows ABI-breaking changes to be "pre-implemented". +/// Once the default version number is incremented, the conditional logic that +/// no longer applies can be removed. Additionally, users that need not +/// maintain ABI compatibility can increase the version number in order to take +/// advantage of any functionality/efficiency improvements that depend on the +/// newer ABI. +/// +/// WARNING: If you choose to manually increase the ABI version, note that +/// pybind11 may not be tested as thoroughly with a non-default ABI version, and +/// further ABI-incompatible changes may be made before the ABI is officially +/// changed to the new version. +#ifndef PYBIND11_INTERNALS_VERSION +# define PYBIND11_INTERNALS_VERSION 4 +#endif + PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE) using ExceptionTranslator = void (*)(std::exception_ptr); @@ -25,30 +43,58 @@ inline PyObject *make_object_base_type(PyTypeObject *metaclass); // The old Python Thread Local Storage (TLS) API is deprecated in Python 3.7 in favor of the new // Thread Specific Storage (TSS) API. #if PY_VERSION_HEX >= 0x03070000 -# define PYBIND11_TLS_KEY_INIT(var) Py_tss_t *var = nullptr -# define PYBIND11_TLS_GET_VALUE(key) PyThread_tss_get((key)) -# define PYBIND11_TLS_REPLACE_VALUE(key, value) PyThread_tss_set((key), (value)) -# define PYBIND11_TLS_DELETE_VALUE(key) PyThread_tss_set((key), nullptr) -# define PYBIND11_TLS_FREE(key) PyThread_tss_free(key) -#else - // Usually an int but a long on Cygwin64 with Python 3.x -# define PYBIND11_TLS_KEY_INIT(var) decltype(PyThread_create_key()) var = 0 -# define PYBIND11_TLS_GET_VALUE(key) PyThread_get_key_value((key)) -# if PY_MAJOR_VERSION < 3 -# define PYBIND11_TLS_DELETE_VALUE(key) \ - PyThread_delete_key_value(key) -# define PYBIND11_TLS_REPLACE_VALUE(key, value) \ - do { \ - PyThread_delete_key_value((key)); \ - PyThread_set_key_value((key), (value)); \ - } while (false) +// Avoid unnecessary allocation of `Py_tss_t`, since we cannot use +// `Py_LIMITED_API` anyway. +# if PYBIND11_INTERNALS_VERSION > 4 +# define PYBIND11_TLS_KEY_REF Py_tss_t & +# ifdef __GNUC__ +// Clang on macOS warns due to `Py_tss_NEEDS_INIT` not specifying an initializer +// for every field. +# define PYBIND11_TLS_KEY_INIT(var) \ + _Pragma("GCC diagnostic push") /**/ \ + _Pragma("GCC diagnostic ignored \"-Wmissing-field-initializers\"") /**/ \ + Py_tss_t var \ + = Py_tss_NEEDS_INIT; \ + _Pragma("GCC diagnostic pop") +# else +# define PYBIND11_TLS_KEY_INIT(var) Py_tss_t var = Py_tss_NEEDS_INIT; +# endif +# define PYBIND11_TLS_KEY_CREATE(var) (PyThread_tss_create(&(var)) == 0) +# define PYBIND11_TLS_GET_VALUE(key) PyThread_tss_get(&(key)) +# define PYBIND11_TLS_REPLACE_VALUE(key, value) PyThread_tss_set(&(key), (value)) +# define PYBIND11_TLS_DELETE_VALUE(key) PyThread_tss_set(&(key), nullptr) +# define PYBIND11_TLS_FREE(key) PyThread_tss_delete(&(key)) # else -# define PYBIND11_TLS_DELETE_VALUE(key) \ - PyThread_set_key_value((key), nullptr) -# define PYBIND11_TLS_REPLACE_VALUE(key, value) \ - PyThread_set_key_value((key), (value)) +# define PYBIND11_TLS_KEY_REF Py_tss_t * +# define PYBIND11_TLS_KEY_INIT(var) Py_tss_t *var = nullptr; +# define PYBIND11_TLS_KEY_CREATE(var) \ + (((var) = PyThread_tss_alloc()) != nullptr && (PyThread_tss_create((var)) == 0)) +# define PYBIND11_TLS_GET_VALUE(key) PyThread_tss_get((key)) +# define PYBIND11_TLS_REPLACE_VALUE(key, value) PyThread_tss_set((key), (value)) +# define PYBIND11_TLS_DELETE_VALUE(key) PyThread_tss_set((key), nullptr) +# define PYBIND11_TLS_FREE(key) PyThread_tss_free(key) # endif -# define PYBIND11_TLS_FREE(key) (void)key +#else +// Usually an int but a long on Cygwin64 with Python 3.x +# define PYBIND11_TLS_KEY_REF decltype(PyThread_create_key()) +# define PYBIND11_TLS_KEY_INIT(var) PYBIND11_TLS_KEY_REF var = 0; +# define PYBIND11_TLS_KEY_CREATE(var) (((var) = PyThread_create_key()) != -1) +# define PYBIND11_TLS_GET_VALUE(key) PyThread_get_key_value((key)) +# if PY_MAJOR_VERSION < 3 || defined(PYPY_VERSION) +// On CPython < 3.4 and on PyPy, `PyThread_set_key_value` strangely does not set +// the value if it has already been set. Instead, it must first be deleted and +// then set again. +# define PYBIND11_TLS_DELETE_VALUE(key) PyThread_delete_key_value(key) +# define PYBIND11_TLS_REPLACE_VALUE(key, value) \ + do { \ + PyThread_delete_key_value((key)); \ + PyThread_set_key_value((key), (value)); \ + } while (false) +# else +# define PYBIND11_TLS_DELETE_VALUE(key) PyThread_set_key_value((key), nullptr) +# define PYBIND11_TLS_REPLACE_VALUE(key, value) PyThread_set_key_value((key), (value)) +# endif +# define PYBIND11_TLS_FREE(key) (void) key #endif // Python loads modules by default with dlopen with the RTLD_LOCAL flag; under libc++ and possibly @@ -106,22 +152,31 @@ struct internals { std::unordered_map> patients; std::forward_list registered_exception_translators; std::unordered_map shared_data; // Custom data to be shared across extensions +#if PYBIND11_INTERNALS_VERSION == 4 std::vector unused_loader_patient_stack_remove_at_v5; +#endif std::forward_list static_strings; // Stores the std::strings backing detail::c_str() PyTypeObject *static_property_type; PyTypeObject *default_metaclass; PyObject *instance_base; #if defined(WITH_THREAD) - PYBIND11_TLS_KEY_INIT(tstate); + PYBIND11_TLS_KEY_INIT(tstate) +# if PYBIND11_INTERNALS_VERSION > 4 + PYBIND11_TLS_KEY_INIT(loader_life_support_tls_key) +# endif // PYBIND11_INTERNALS_VERSION > 4 PyInterpreterState *istate = nullptr; ~internals() { +# if PYBIND11_INTERNALS_VERSION > 4 + PYBIND11_TLS_FREE(loader_life_support_tls_key); +# endif // PYBIND11_INTERNALS_VERSION > 4 + // This destructor is called *after* Py_Finalize() in finalize_interpreter(). - // That *SHOULD BE* fine. The following details what happens when PyThread_tss_free is called. - // PYBIND11_TLS_FREE is PyThread_tss_free on python 3.7+. On older python, it does nothing. - // PyThread_tss_free calls PyThread_tss_delete and PyMem_RawFree. - // PyThread_tss_delete just calls TlsFree (on Windows) or pthread_key_delete (on *NIX). Neither - // of those have anything to do with CPython internals. - // PyMem_RawFree *requires* that the `tstate` be allocated with the CPython allocator. + // That *SHOULD BE* fine. The following details what happens when PyThread_tss_free is + // called. PYBIND11_TLS_FREE is PyThread_tss_free on python 3.7+. On older python, it does + // nothing. PyThread_tss_free calls PyThread_tss_delete and PyMem_RawFree. + // PyThread_tss_delete just calls TlsFree (on Windows) or pthread_key_delete (on *NIX). + // Neither of those have anything to do with CPython internals. PyMem_RawFree *requires* + // that the `tstate` be allocated with the CPython allocator. PYBIND11_TLS_FREE(tstate); } #endif @@ -153,9 +208,6 @@ struct type_info { bool module_local : 1; }; -/// Tracks the `internals` and `type_info` ABI version independent of the main library version -#define PYBIND11_INTERNALS_VERSION 4 - /// On MSVC, debug and release builds are not ABI-compatible! #if defined(_MSC_VER) && defined(_DEBUG) # define PYBIND11_BUILD_TYPE "_debug" @@ -291,21 +343,21 @@ PYBIND11_NOINLINE internals &get_internals() { internals_ptr = new internals(); #if defined(WITH_THREAD) - #if PY_VERSION_HEX < 0x03090000 - PyEval_InitThreads(); - #endif +# if PY_VERSION_HEX < 0x03090000 + PyEval_InitThreads(); +# endif PyThreadState *tstate = PyThreadState_Get(); - #if PY_VERSION_HEX >= 0x03070000 - internals_ptr->tstate = PyThread_tss_alloc(); - if (!internals_ptr->tstate || (PyThread_tss_create(internals_ptr->tstate) != 0)) - pybind11_fail("get_internals: could not successfully initialize the tstate TSS key!"); - PyThread_tss_set(internals_ptr->tstate, tstate); - #else - internals_ptr->tstate = PyThread_create_key(); - if (internals_ptr->tstate == -1) - pybind11_fail("get_internals: could not successfully initialize the tstate TLS key!"); - PyThread_set_key_value(internals_ptr->tstate, tstate); - #endif + if (!PYBIND11_TLS_KEY_CREATE(internals_ptr->tstate)) { + pybind11_fail("get_internals: could not successfully initialize the tstate TSS key!"); + } + PYBIND11_TLS_REPLACE_VALUE(internals_ptr->tstate, tstate); + +# if PYBIND11_INTERNALS_VERSION > 4 + if (!PYBIND11_TLS_KEY_CREATE(internals_ptr->loader_life_support_tls_key)) { + pybind11_fail("get_internals: could not successfully initialize the " + "loader_life_support TSS key!"); + } +# endif internals_ptr->istate = tstate->interp; #endif builtins[id] = capsule(internals_pp); @@ -317,7 +369,6 @@ PYBIND11_NOINLINE internals &get_internals() { return **internals_pp; } - // the internals struct (above) is shared between all the modules. local_internals are only // for a single module. Any changes made to internals may require an update to // PYBIND11_INTERNALS_VERSION, breaking backwards compatibility. local_internals is, by design, @@ -325,8 +376,41 @@ PYBIND11_NOINLINE internals &get_internals() { // impact any other modules, because the only things accessing the local internals is the // module that contains them. struct local_internals { - type_map registered_types_cpp; - std::forward_list registered_exception_translators; + type_map registered_types_cpp; + std::forward_list registered_exception_translators; +#if defined(WITH_THREAD) && PYBIND11_INTERNALS_VERSION == 4 + + // For ABI compatibility, we can't store the loader_life_support TLS key in + // the `internals` struct directly. Instead, we store it in `shared_data` and + // cache a copy in `local_internals`. If we allocated a separate TLS key for + // each instance of `local_internals`, we could end up allocating hundreds of + // TLS keys if hundreds of different pybind11 modules are loaded (which is a + // plausible number). + PYBIND11_TLS_KEY_INIT(loader_life_support_tls_key) + + // Holds the shared TLS key for the loader_life_support stack. + struct shared_loader_life_support_data { + PYBIND11_TLS_KEY_INIT(loader_life_support_tls_key) + shared_loader_life_support_data() { + if (!PYBIND11_TLS_KEY_CREATE(loader_life_support_tls_key)) { + pybind11_fail("local_internals: could not successfully initialize the " + "loader_life_support TLS key!"); + } + } + // We can't help but leak the TLS key, because Python never unloads extension modules. + }; + + local_internals() { + auto &internals = get_internals(); + // Get or create the `loader_life_support_stack_key`. + auto &ptr = internals.shared_data["_life_support"]; + if (!ptr) { + ptr = new shared_loader_life_support_data; + } + loader_life_support_tls_key + = static_cast(ptr)->loader_life_support_tls_key; + } +#endif // defined(WITH_THREAD) && PYBIND11_INTERNALS_VERSION == 4 }; /// Works like `get_internals`, but for things which are locally registered. diff --git a/include/pybind11/detail/type_caster_base.h b/include/pybind11/detail/type_caster_base.h index fa8433cfe..f804d9d10 100644 --- a/include/pybind11/detail/type_caster_base.h +++ b/include/pybind11/detail/type_caster_base.h @@ -35,30 +35,43 @@ private: loader_life_support* parent = nullptr; std::unordered_set keep_alive; - static loader_life_support** get_stack_pp() { #if defined(WITH_THREAD) - thread_local static loader_life_support* per_thread_stack = nullptr; - return &per_thread_stack; -#else - static loader_life_support* global_stack = nullptr; - return &global_stack; -#endif + // Store stack pointer in thread-local storage. + static PYBIND11_TLS_KEY_REF get_stack_tls_key() { +# if PYBIND11_INTERNALS_VERSION == 4 + return get_local_internals().loader_life_support_tls_key; +# else + return get_internals().loader_life_support_tls_key; +# endif } + static loader_life_support *get_stack_top() { + return static_cast(PYBIND11_TLS_GET_VALUE(get_stack_tls_key())); + } + static void set_stack_top(loader_life_support *value) { + PYBIND11_TLS_REPLACE_VALUE(get_stack_tls_key(), value); + } +#else + // Use single global variable for stack. + static loader_life_support **get_stack_pp() { + static loader_life_support *global_stack = nullptr; + return global_stack; + } + static loader_life_support *get_stack_top() { return *get_stack_pp(); } + static void set_stack_top(loader_life_support *value) { *get_stack_pp() = value; } +#endif public: /// A new patient frame is created when a function is entered loader_life_support() { - loader_life_support** stack = get_stack_pp(); - parent = *stack; - *stack = this; + parent = get_stack_top(); + set_stack_top(this); } /// ... and destroyed after it returns ~loader_life_support() { - loader_life_support** stack = get_stack_pp(); - if (*stack != this) + if (get_stack_top() != this) pybind11_fail("loader_life_support: internal error"); - *stack = parent; + set_stack_top(parent); for (auto* item : keep_alive) Py_DECREF(item); } @@ -66,7 +79,7 @@ public: /// This can only be used inside a pybind11-bound function, either by `argument_loader` /// at argument preparation time or by `py::cast()` at execution time. PYBIND11_NOINLINE static void add_patient(handle h) { - loader_life_support* frame = *get_stack_pp(); + loader_life_support *frame = get_stack_top(); if (!frame) { // NOTE: It would be nice to include the stack frames here, as this indicates // use of pybind11::cast<> outside the normal call framework, finding such diff --git a/include/pybind11/gil.h b/include/pybind11/gil.h index 32f1a8963..b73aaa3f5 100644 --- a/include/pybind11/gil.h +++ b/include/pybind11/gil.h @@ -50,7 +50,7 @@ PYBIND11_NAMESPACE_END(detail) class gil_scoped_acquire { public: PYBIND11_NOINLINE gil_scoped_acquire() { - auto const &internals = detail::get_internals(); + auto &internals = detail::get_internals(); tstate = (PyThreadState *) PYBIND11_TLS_GET_VALUE(internals.tstate); if (!tstate) { @@ -132,7 +132,7 @@ public: // `get_internals()` must be called here unconditionally in order to initialize // `internals.tstate` for subsequent `gil_scoped_acquire` calls. Otherwise, an // initialization race could occur as multiple threads try `gil_scoped_acquire`. - const auto &internals = detail::get_internals(); + auto &internals = detail::get_internals(); tstate = PyEval_SaveThread(); if (disassoc) { auto key = internals.tstate; diff --git a/tests/test_pytypes.py b/tests/test_pytypes.py index 070538cc9..cc27e60a1 100644 --- a/tests/test_pytypes.py +++ b/tests/test_pytypes.py @@ -467,7 +467,8 @@ def test_issue2361(): assert m.issue2361_str_implicit_copy_none() == "None" with pytest.raises(TypeError) as excinfo: assert m.issue2361_dict_implicit_copy_none() - assert "'NoneType' object is not iterable" in str(excinfo.value) + assert "NoneType" in str(excinfo.value) + assert "iterable" in str(excinfo.value) @pytest.mark.parametrize(