mirror of
https://github.com/pybind/pybind11.git
synced 2024-11-11 08:03:55 +00:00
Eliminate duplicate TLS keys for loader_life_support stack (#3275)
* Eliminate duplicate TLS keys for loader_life_support stack This revises the existing fix for https://github.com/pybind/pybind11/issues/2765 in https://github.com/pybind/pybind11/pull/3237 to reduce the amount of TLS storage used. The shared TLS key is stored in two different ways, depending on `PYBIND11_INTERNALS_VERSION`. If `PYBIND11_INTERNALS_VERSION == 4` (as is currently set), the TLS key is stored in the `internal::shared_data` map to avoid breaking ABI compatibility. If `PYBIND11_INTERNALS_VERSION > 4`, the TLS key is stored directly in the `internals` struct. * Fix test_pytypes.py::test_issue2361 failure on PyPy3.7 * Add github actions tests for unstable ABI
This commit is contained in:
parent
04dd3262f0
commit
14976c853b
18
.github/workflows/ci.yml
vendored
18
.github/workflows/ci.yml
vendored
@ -144,6 +144,24 @@ jobs:
|
||||
if: "!(runner.os == 'Windows' && (matrix.python == 3.8 || matrix.python == 3.9 || matrix.python == '3.10-dev'))"
|
||||
run: cmake --build build2 --target cpptest
|
||||
|
||||
# Third build - C++17 mode with unstable ABI
|
||||
- name: Configure (unstable ABI)
|
||||
run: >
|
||||
cmake -S . -B build3
|
||||
-DPYBIND11_WERROR=ON
|
||||
-DDOWNLOAD_CATCH=ON
|
||||
-DDOWNLOAD_EIGEN=ON
|
||||
-DCMAKE_CXX_STANDARD=17
|
||||
-DPYBIND11_INTERNALS_VERSION=10000000
|
||||
"-DPYBIND11_TEST_OVERRIDE=test_call_policies.cpp;test_gil_scoped.cpp;test_thread.cpp"
|
||||
${{ matrix.args }}
|
||||
|
||||
- name: Build (unstable ABI)
|
||||
run: cmake --build build3 -j 2
|
||||
|
||||
- name: Python tests (unstable ABI)
|
||||
run: cmake --build build3 --target pytest
|
||||
|
||||
- name: Interface test
|
||||
run: cmake --build build2 --target test_cmake_build
|
||||
|
||||
|
@ -89,6 +89,9 @@ endif()
|
||||
option(PYBIND11_INSTALL "Install pybind11 header files?" ${PYBIND11_MASTER_PROJECT})
|
||||
option(PYBIND11_TEST "Build pybind11 test suite?" ${PYBIND11_MASTER_PROJECT})
|
||||
option(PYBIND11_NOPYTHON "Disable search for Python" OFF)
|
||||
set(PYBIND11_INTERNALS_VERSION
|
||||
""
|
||||
CACHE STRING "Override the ABI version, may be used to enable the unstable ABI.")
|
||||
|
||||
cmake_dependent_option(
|
||||
USE_PYTHON_INCLUDE_DIR
|
||||
@ -183,6 +186,10 @@ if(NOT TARGET pybind11_headers)
|
||||
|
||||
target_compile_features(pybind11_headers INTERFACE cxx_inheriting_constructors cxx_user_literals
|
||||
cxx_right_angle_brackets)
|
||||
if(NOT "${PYBIND11_INTERNALS_VERSION}" STREQUAL "")
|
||||
target_compile_definitions(
|
||||
pybind11_headers INTERFACE "PYBIND11_INTERNALS_VERSION=${PYBIND11_INTERNALS_VERSION}")
|
||||
endif()
|
||||
else()
|
||||
# It is invalid to install a target twice, too.
|
||||
set(PYBIND11_INSTALL OFF)
|
||||
|
@ -11,6 +11,24 @@
|
||||
|
||||
#include "../pytypes.h"
|
||||
|
||||
/// Tracks the `internals` and `type_info` ABI version independent of the main library version.
|
||||
///
|
||||
/// Some portions of the code use an ABI that is conditional depending on this
|
||||
/// version number. That allows ABI-breaking changes to be "pre-implemented".
|
||||
/// Once the default version number is incremented, the conditional logic that
|
||||
/// no longer applies can be removed. Additionally, users that need not
|
||||
/// maintain ABI compatibility can increase the version number in order to take
|
||||
/// advantage of any functionality/efficiency improvements that depend on the
|
||||
/// newer ABI.
|
||||
///
|
||||
/// WARNING: If you choose to manually increase the ABI version, note that
|
||||
/// pybind11 may not be tested as thoroughly with a non-default ABI version, and
|
||||
/// further ABI-incompatible changes may be made before the ABI is officially
|
||||
/// changed to the new version.
|
||||
#ifndef PYBIND11_INTERNALS_VERSION
|
||||
# define PYBIND11_INTERNALS_VERSION 4
|
||||
#endif
|
||||
|
||||
PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
|
||||
|
||||
using ExceptionTranslator = void (*)(std::exception_ptr);
|
||||
@ -25,30 +43,58 @@ inline PyObject *make_object_base_type(PyTypeObject *metaclass);
|
||||
// The old Python Thread Local Storage (TLS) API is deprecated in Python 3.7 in favor of the new
|
||||
// Thread Specific Storage (TSS) API.
|
||||
#if PY_VERSION_HEX >= 0x03070000
|
||||
# define PYBIND11_TLS_KEY_INIT(var) Py_tss_t *var = nullptr
|
||||
# define PYBIND11_TLS_GET_VALUE(key) PyThread_tss_get((key))
|
||||
# define PYBIND11_TLS_REPLACE_VALUE(key, value) PyThread_tss_set((key), (value))
|
||||
# define PYBIND11_TLS_DELETE_VALUE(key) PyThread_tss_set((key), nullptr)
|
||||
# define PYBIND11_TLS_FREE(key) PyThread_tss_free(key)
|
||||
#else
|
||||
// Usually an int but a long on Cygwin64 with Python 3.x
|
||||
# define PYBIND11_TLS_KEY_INIT(var) decltype(PyThread_create_key()) var = 0
|
||||
# define PYBIND11_TLS_GET_VALUE(key) PyThread_get_key_value((key))
|
||||
# if PY_MAJOR_VERSION < 3
|
||||
# define PYBIND11_TLS_DELETE_VALUE(key) \
|
||||
PyThread_delete_key_value(key)
|
||||
# define PYBIND11_TLS_REPLACE_VALUE(key, value) \
|
||||
do { \
|
||||
PyThread_delete_key_value((key)); \
|
||||
PyThread_set_key_value((key), (value)); \
|
||||
} while (false)
|
||||
// Avoid unnecessary allocation of `Py_tss_t`, since we cannot use
|
||||
// `Py_LIMITED_API` anyway.
|
||||
# if PYBIND11_INTERNALS_VERSION > 4
|
||||
# define PYBIND11_TLS_KEY_REF Py_tss_t &
|
||||
# ifdef __GNUC__
|
||||
// Clang on macOS warns due to `Py_tss_NEEDS_INIT` not specifying an initializer
|
||||
// for every field.
|
||||
# define PYBIND11_TLS_KEY_INIT(var) \
|
||||
_Pragma("GCC diagnostic push") /**/ \
|
||||
_Pragma("GCC diagnostic ignored \"-Wmissing-field-initializers\"") /**/ \
|
||||
Py_tss_t var \
|
||||
= Py_tss_NEEDS_INIT; \
|
||||
_Pragma("GCC diagnostic pop")
|
||||
# else
|
||||
# define PYBIND11_TLS_KEY_INIT(var) Py_tss_t var = Py_tss_NEEDS_INIT;
|
||||
# endif
|
||||
# define PYBIND11_TLS_KEY_CREATE(var) (PyThread_tss_create(&(var)) == 0)
|
||||
# define PYBIND11_TLS_GET_VALUE(key) PyThread_tss_get(&(key))
|
||||
# define PYBIND11_TLS_REPLACE_VALUE(key, value) PyThread_tss_set(&(key), (value))
|
||||
# define PYBIND11_TLS_DELETE_VALUE(key) PyThread_tss_set(&(key), nullptr)
|
||||
# define PYBIND11_TLS_FREE(key) PyThread_tss_delete(&(key))
|
||||
# else
|
||||
# define PYBIND11_TLS_DELETE_VALUE(key) \
|
||||
PyThread_set_key_value((key), nullptr)
|
||||
# define PYBIND11_TLS_REPLACE_VALUE(key, value) \
|
||||
PyThread_set_key_value((key), (value))
|
||||
# define PYBIND11_TLS_KEY_REF Py_tss_t *
|
||||
# define PYBIND11_TLS_KEY_INIT(var) Py_tss_t *var = nullptr;
|
||||
# define PYBIND11_TLS_KEY_CREATE(var) \
|
||||
(((var) = PyThread_tss_alloc()) != nullptr && (PyThread_tss_create((var)) == 0))
|
||||
# define PYBIND11_TLS_GET_VALUE(key) PyThread_tss_get((key))
|
||||
# define PYBIND11_TLS_REPLACE_VALUE(key, value) PyThread_tss_set((key), (value))
|
||||
# define PYBIND11_TLS_DELETE_VALUE(key) PyThread_tss_set((key), nullptr)
|
||||
# define PYBIND11_TLS_FREE(key) PyThread_tss_free(key)
|
||||
# endif
|
||||
# define PYBIND11_TLS_FREE(key) (void)key
|
||||
#else
|
||||
// Usually an int but a long on Cygwin64 with Python 3.x
|
||||
# define PYBIND11_TLS_KEY_REF decltype(PyThread_create_key())
|
||||
# define PYBIND11_TLS_KEY_INIT(var) PYBIND11_TLS_KEY_REF var = 0;
|
||||
# define PYBIND11_TLS_KEY_CREATE(var) (((var) = PyThread_create_key()) != -1)
|
||||
# define PYBIND11_TLS_GET_VALUE(key) PyThread_get_key_value((key))
|
||||
# if PY_MAJOR_VERSION < 3 || defined(PYPY_VERSION)
|
||||
// On CPython < 3.4 and on PyPy, `PyThread_set_key_value` strangely does not set
|
||||
// the value if it has already been set. Instead, it must first be deleted and
|
||||
// then set again.
|
||||
# define PYBIND11_TLS_DELETE_VALUE(key) PyThread_delete_key_value(key)
|
||||
# define PYBIND11_TLS_REPLACE_VALUE(key, value) \
|
||||
do { \
|
||||
PyThread_delete_key_value((key)); \
|
||||
PyThread_set_key_value((key), (value)); \
|
||||
} while (false)
|
||||
# else
|
||||
# define PYBIND11_TLS_DELETE_VALUE(key) PyThread_set_key_value((key), nullptr)
|
||||
# define PYBIND11_TLS_REPLACE_VALUE(key, value) PyThread_set_key_value((key), (value))
|
||||
# endif
|
||||
# define PYBIND11_TLS_FREE(key) (void) key
|
||||
#endif
|
||||
|
||||
// Python loads modules by default with dlopen with the RTLD_LOCAL flag; under libc++ and possibly
|
||||
@ -106,22 +152,31 @@ struct internals {
|
||||
std::unordered_map<const PyObject *, std::vector<PyObject *>> patients;
|
||||
std::forward_list<ExceptionTranslator> registered_exception_translators;
|
||||
std::unordered_map<std::string, void *> shared_data; // Custom data to be shared across extensions
|
||||
#if PYBIND11_INTERNALS_VERSION == 4
|
||||
std::vector<PyObject *> unused_loader_patient_stack_remove_at_v5;
|
||||
#endif
|
||||
std::forward_list<std::string> static_strings; // Stores the std::strings backing detail::c_str()
|
||||
PyTypeObject *static_property_type;
|
||||
PyTypeObject *default_metaclass;
|
||||
PyObject *instance_base;
|
||||
#if defined(WITH_THREAD)
|
||||
PYBIND11_TLS_KEY_INIT(tstate);
|
||||
PYBIND11_TLS_KEY_INIT(tstate)
|
||||
# if PYBIND11_INTERNALS_VERSION > 4
|
||||
PYBIND11_TLS_KEY_INIT(loader_life_support_tls_key)
|
||||
# endif // PYBIND11_INTERNALS_VERSION > 4
|
||||
PyInterpreterState *istate = nullptr;
|
||||
~internals() {
|
||||
# if PYBIND11_INTERNALS_VERSION > 4
|
||||
PYBIND11_TLS_FREE(loader_life_support_tls_key);
|
||||
# endif // PYBIND11_INTERNALS_VERSION > 4
|
||||
|
||||
// This destructor is called *after* Py_Finalize() in finalize_interpreter().
|
||||
// That *SHOULD BE* fine. The following details what happens when PyThread_tss_free is called.
|
||||
// PYBIND11_TLS_FREE is PyThread_tss_free on python 3.7+. On older python, it does nothing.
|
||||
// PyThread_tss_free calls PyThread_tss_delete and PyMem_RawFree.
|
||||
// PyThread_tss_delete just calls TlsFree (on Windows) or pthread_key_delete (on *NIX). Neither
|
||||
// of those have anything to do with CPython internals.
|
||||
// PyMem_RawFree *requires* that the `tstate` be allocated with the CPython allocator.
|
||||
// That *SHOULD BE* fine. The following details what happens when PyThread_tss_free is
|
||||
// called. PYBIND11_TLS_FREE is PyThread_tss_free on python 3.7+. On older python, it does
|
||||
// nothing. PyThread_tss_free calls PyThread_tss_delete and PyMem_RawFree.
|
||||
// PyThread_tss_delete just calls TlsFree (on Windows) or pthread_key_delete (on *NIX).
|
||||
// Neither of those have anything to do with CPython internals. PyMem_RawFree *requires*
|
||||
// that the `tstate` be allocated with the CPython allocator.
|
||||
PYBIND11_TLS_FREE(tstate);
|
||||
}
|
||||
#endif
|
||||
@ -153,9 +208,6 @@ struct type_info {
|
||||
bool module_local : 1;
|
||||
};
|
||||
|
||||
/// Tracks the `internals` and `type_info` ABI version independent of the main library version
|
||||
#define PYBIND11_INTERNALS_VERSION 4
|
||||
|
||||
/// On MSVC, debug and release builds are not ABI-compatible!
|
||||
#if defined(_MSC_VER) && defined(_DEBUG)
|
||||
# define PYBIND11_BUILD_TYPE "_debug"
|
||||
@ -291,21 +343,21 @@ PYBIND11_NOINLINE internals &get_internals() {
|
||||
internals_ptr = new internals();
|
||||
#if defined(WITH_THREAD)
|
||||
|
||||
#if PY_VERSION_HEX < 0x03090000
|
||||
PyEval_InitThreads();
|
||||
#endif
|
||||
# if PY_VERSION_HEX < 0x03090000
|
||||
PyEval_InitThreads();
|
||||
# endif
|
||||
PyThreadState *tstate = PyThreadState_Get();
|
||||
#if PY_VERSION_HEX >= 0x03070000
|
||||
internals_ptr->tstate = PyThread_tss_alloc();
|
||||
if (!internals_ptr->tstate || (PyThread_tss_create(internals_ptr->tstate) != 0))
|
||||
pybind11_fail("get_internals: could not successfully initialize the tstate TSS key!");
|
||||
PyThread_tss_set(internals_ptr->tstate, tstate);
|
||||
#else
|
||||
internals_ptr->tstate = PyThread_create_key();
|
||||
if (internals_ptr->tstate == -1)
|
||||
pybind11_fail("get_internals: could not successfully initialize the tstate TLS key!");
|
||||
PyThread_set_key_value(internals_ptr->tstate, tstate);
|
||||
#endif
|
||||
if (!PYBIND11_TLS_KEY_CREATE(internals_ptr->tstate)) {
|
||||
pybind11_fail("get_internals: could not successfully initialize the tstate TSS key!");
|
||||
}
|
||||
PYBIND11_TLS_REPLACE_VALUE(internals_ptr->tstate, tstate);
|
||||
|
||||
# if PYBIND11_INTERNALS_VERSION > 4
|
||||
if (!PYBIND11_TLS_KEY_CREATE(internals_ptr->loader_life_support_tls_key)) {
|
||||
pybind11_fail("get_internals: could not successfully initialize the "
|
||||
"loader_life_support TSS key!");
|
||||
}
|
||||
# endif
|
||||
internals_ptr->istate = tstate->interp;
|
||||
#endif
|
||||
builtins[id] = capsule(internals_pp);
|
||||
@ -317,7 +369,6 @@ PYBIND11_NOINLINE internals &get_internals() {
|
||||
return **internals_pp;
|
||||
}
|
||||
|
||||
|
||||
// the internals struct (above) is shared between all the modules. local_internals are only
|
||||
// for a single module. Any changes made to internals may require an update to
|
||||
// PYBIND11_INTERNALS_VERSION, breaking backwards compatibility. local_internals is, by design,
|
||||
@ -325,8 +376,41 @@ PYBIND11_NOINLINE internals &get_internals() {
|
||||
// impact any other modules, because the only things accessing the local internals is the
|
||||
// module that contains them.
|
||||
struct local_internals {
|
||||
type_map<type_info *> registered_types_cpp;
|
||||
std::forward_list<ExceptionTranslator> registered_exception_translators;
|
||||
type_map<type_info *> registered_types_cpp;
|
||||
std::forward_list<ExceptionTranslator> registered_exception_translators;
|
||||
#if defined(WITH_THREAD) && PYBIND11_INTERNALS_VERSION == 4
|
||||
|
||||
// For ABI compatibility, we can't store the loader_life_support TLS key in
|
||||
// the `internals` struct directly. Instead, we store it in `shared_data` and
|
||||
// cache a copy in `local_internals`. If we allocated a separate TLS key for
|
||||
// each instance of `local_internals`, we could end up allocating hundreds of
|
||||
// TLS keys if hundreds of different pybind11 modules are loaded (which is a
|
||||
// plausible number).
|
||||
PYBIND11_TLS_KEY_INIT(loader_life_support_tls_key)
|
||||
|
||||
// Holds the shared TLS key for the loader_life_support stack.
|
||||
struct shared_loader_life_support_data {
|
||||
PYBIND11_TLS_KEY_INIT(loader_life_support_tls_key)
|
||||
shared_loader_life_support_data() {
|
||||
if (!PYBIND11_TLS_KEY_CREATE(loader_life_support_tls_key)) {
|
||||
pybind11_fail("local_internals: could not successfully initialize the "
|
||||
"loader_life_support TLS key!");
|
||||
}
|
||||
}
|
||||
// We can't help but leak the TLS key, because Python never unloads extension modules.
|
||||
};
|
||||
|
||||
local_internals() {
|
||||
auto &internals = get_internals();
|
||||
// Get or create the `loader_life_support_stack_key`.
|
||||
auto &ptr = internals.shared_data["_life_support"];
|
||||
if (!ptr) {
|
||||
ptr = new shared_loader_life_support_data;
|
||||
}
|
||||
loader_life_support_tls_key
|
||||
= static_cast<shared_loader_life_support_data *>(ptr)->loader_life_support_tls_key;
|
||||
}
|
||||
#endif // defined(WITH_THREAD) && PYBIND11_INTERNALS_VERSION == 4
|
||||
};
|
||||
|
||||
/// Works like `get_internals`, but for things which are locally registered.
|
||||
|
@ -35,30 +35,43 @@ private:
|
||||
loader_life_support* parent = nullptr;
|
||||
std::unordered_set<PyObject *> keep_alive;
|
||||
|
||||
static loader_life_support** get_stack_pp() {
|
||||
#if defined(WITH_THREAD)
|
||||
thread_local static loader_life_support* per_thread_stack = nullptr;
|
||||
return &per_thread_stack;
|
||||
#else
|
||||
static loader_life_support* global_stack = nullptr;
|
||||
return &global_stack;
|
||||
#endif
|
||||
// Store stack pointer in thread-local storage.
|
||||
static PYBIND11_TLS_KEY_REF get_stack_tls_key() {
|
||||
# if PYBIND11_INTERNALS_VERSION == 4
|
||||
return get_local_internals().loader_life_support_tls_key;
|
||||
# else
|
||||
return get_internals().loader_life_support_tls_key;
|
||||
# endif
|
||||
}
|
||||
static loader_life_support *get_stack_top() {
|
||||
return static_cast<loader_life_support *>(PYBIND11_TLS_GET_VALUE(get_stack_tls_key()));
|
||||
}
|
||||
static void set_stack_top(loader_life_support *value) {
|
||||
PYBIND11_TLS_REPLACE_VALUE(get_stack_tls_key(), value);
|
||||
}
|
||||
#else
|
||||
// Use single global variable for stack.
|
||||
static loader_life_support **get_stack_pp() {
|
||||
static loader_life_support *global_stack = nullptr;
|
||||
return global_stack;
|
||||
}
|
||||
static loader_life_support *get_stack_top() { return *get_stack_pp(); }
|
||||
static void set_stack_top(loader_life_support *value) { *get_stack_pp() = value; }
|
||||
#endif
|
||||
|
||||
public:
|
||||
/// A new patient frame is created when a function is entered
|
||||
loader_life_support() {
|
||||
loader_life_support** stack = get_stack_pp();
|
||||
parent = *stack;
|
||||
*stack = this;
|
||||
parent = get_stack_top();
|
||||
set_stack_top(this);
|
||||
}
|
||||
|
||||
/// ... and destroyed after it returns
|
||||
~loader_life_support() {
|
||||
loader_life_support** stack = get_stack_pp();
|
||||
if (*stack != this)
|
||||
if (get_stack_top() != this)
|
||||
pybind11_fail("loader_life_support: internal error");
|
||||
*stack = parent;
|
||||
set_stack_top(parent);
|
||||
for (auto* item : keep_alive)
|
||||
Py_DECREF(item);
|
||||
}
|
||||
@ -66,7 +79,7 @@ public:
|
||||
/// This can only be used inside a pybind11-bound function, either by `argument_loader`
|
||||
/// at argument preparation time or by `py::cast()` at execution time.
|
||||
PYBIND11_NOINLINE static void add_patient(handle h) {
|
||||
loader_life_support* frame = *get_stack_pp();
|
||||
loader_life_support *frame = get_stack_top();
|
||||
if (!frame) {
|
||||
// NOTE: It would be nice to include the stack frames here, as this indicates
|
||||
// use of pybind11::cast<> outside the normal call framework, finding such
|
||||
|
@ -50,7 +50,7 @@ PYBIND11_NAMESPACE_END(detail)
|
||||
class gil_scoped_acquire {
|
||||
public:
|
||||
PYBIND11_NOINLINE gil_scoped_acquire() {
|
||||
auto const &internals = detail::get_internals();
|
||||
auto &internals = detail::get_internals();
|
||||
tstate = (PyThreadState *) PYBIND11_TLS_GET_VALUE(internals.tstate);
|
||||
|
||||
if (!tstate) {
|
||||
@ -132,7 +132,7 @@ public:
|
||||
// `get_internals()` must be called here unconditionally in order to initialize
|
||||
// `internals.tstate` for subsequent `gil_scoped_acquire` calls. Otherwise, an
|
||||
// initialization race could occur as multiple threads try `gil_scoped_acquire`.
|
||||
const auto &internals = detail::get_internals();
|
||||
auto &internals = detail::get_internals();
|
||||
tstate = PyEval_SaveThread();
|
||||
if (disassoc) {
|
||||
auto key = internals.tstate;
|
||||
|
@ -467,7 +467,8 @@ def test_issue2361():
|
||||
assert m.issue2361_str_implicit_copy_none() == "None"
|
||||
with pytest.raises(TypeError) as excinfo:
|
||||
assert m.issue2361_dict_implicit_copy_none()
|
||||
assert "'NoneType' object is not iterable" in str(excinfo.value)
|
||||
assert "NoneType" in str(excinfo.value)
|
||||
assert "iterable" in str(excinfo.value)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
|
Loading…
Reference in New Issue
Block a user