Merge pull request #329 from jagerman/track-same-ptr-instances

Track registered instances that share a pointer address
This commit is contained in:
Wenzel Jakob 2016-08-10 18:24:10 +02:00 committed by GitHub
commit 5a4cd3b45c
7 changed files with 147 additions and 118 deletions

View File

@ -612,94 +612,26 @@ functions. The default policy is :enum:`return_value_policy::automatic`.
| | it is no longer used. Warning: undefined behavior will ensue when the C++ |
| | side deletes an object that is still referenced and used by Python. |
+--------------------------------------------------+----------------------------------------------------------------------------+
| :enum:`return_value_policy::reference_internal` | This policy only applies to methods and properties. It references the |
| | object without taking ownership similar to the above |
| | :enum:`return_value_policy::reference` policy. In contrast to that policy, |
| | the function or property's implicit ``this`` argument (called the *parent*)|
| | is considered to be the the owner of the return value (the *child*). |
| | pybind11 then couples the lifetime of the parent to the child via a |
| | reference relationship that ensures that the parent cannot be garbage |
| | collected while Python is still using the child. More advanced variations |
| | of this scheme are also possible using combinations of |
| | :enum:`return_value_policy::reference` and the :class:`keep_alive` call |
| | policy described next. |
| :enum:`return_value_policy::reference_internal` | Like :enum:`return_value_policy::reference` but additionally applies a |
| | :class:`keep_alive<0,1>()` call policy (described next) that keeps the |
| | ``this`` argument of the function or property from being garbage collected |
| | as long as the return value remains referenced. See the |
| | :class:`keep_alive` call policy (described next) for details. |
+--------------------------------------------------+----------------------------------------------------------------------------+
.. warning::
Code with invalid call policies might access unitialized memory or free
data structures multiple times, which can lead to hard-to-debug
Code with invalid return value policies might access unitialized memory or
free data structures multiple times, which can lead to hard-to-debug
non-determinism and segmentation faults, hence it is worth spending the
time to understand all the different options in the table above.
One important aspect regarding the above policies is that they only apply to
instances which pybind11 has *not* seen before, in which case the policy
clarifies essential questions about the return value's lifetime and ownership.
When pybind11 knows the instance already (as identified via its address in
One important aspect of the above policies is that they only apply to instances
which pybind11 has *not* seen before, in which case the policy clarifies
essential questions about the return value's lifetime and ownership. When
pybind11 knows the instance already (as identified by its type and address in
memory), it will return the existing Python object wrapper rather than creating
a copy. This means that functions which merely cast a reference (or pointer)
into a different type don't do what one would expect:
.. code-block:: cpp
A &func(B &value) { return (A&) value; }
The wrapped version of this function will return the original ``B`` instance.
To force a cast, the argument should be returned by value.
More common (and equally problematic) are cases where methods (e.g. getters)
return a pointer or reference to the first attribute of a class.
.. code-block:: cpp
:emphasize-lines: 3, 13
class Example {
public:
Internal &get_internal() { return internal; }
private:
Internal internal;
};
PYBIND11_PLUGIN(example) {
py::module m("example", "pybind11 example plugin");
py::class_<Example>(m, "Example")
.def(py::init<>())
.def("get_internal", &Example::get_internal); /* Note: don't do this! */
return m.ptr();
}
As in the above casting example, the instance and its attribute will be located
at the same address in memory, which pybind11 will recongnize and return the
parent instance instead of creating a new Python object that represents the
attribute. The special :enum:`return_value_policy::reference_internal` policy
should be used in this case: it disables the same-address optimization and
ensures that pybind11 returns a reference.
The following example snippet shows the correct usage:
.. code-block:: cpp
class Example {
public:
Internal &get_internal() { return internal; }
private:
Internal internal;
};
PYBIND11_PLUGIN(example) {
py::module m("example", "pybind11 example plugin");
py::class_<Example>(m, "Example")
.def(py::init<>())
.def("get_internal", &Example::get_internal, "Return the internal data",
py::return_value_policy::reference_internal);
return m.ptr();
}
a copy.
.. note::

View File

@ -9,6 +9,7 @@
#include "example.h"
#include <pybind11/stl.h>
#include <pybind11/operators.h>
PYBIND11_DECLARE_HOLDER_TYPE(T, std::shared_ptr<T>);
@ -157,4 +158,19 @@ void init_issues(py::module &m) {
})
;
// Issue #328: first member in a class can't be used in operators
#define TRACKERS(CLASS) CLASS() { std::cout << #CLASS "@" << this << " constructor\n"; } \
~CLASS() { std::cout << #CLASS "@" << this << " destructor\n"; }
struct NestABase { int value = -2; TRACKERS(NestABase) };
struct NestA : NestABase { int value = 3; NestA& operator+=(int i) { value += i; return *this; } TRACKERS(NestA) };
struct NestB { NestA a; int value = 4; NestB& operator-=(int i) { value -= i; return *this; } TRACKERS(NestB) };
struct NestC { NestB b; int value = 5; NestC& operator*=(int i) { value *= i; return *this; } TRACKERS(NestC) };
py::class_<NestABase>(m2, "NestABase").def(py::init<>()).def_readwrite("value", &NestABase::value);
py::class_<NestA>(m2, "NestA").def(py::init<>()).def(py::self += int())
.def("as_base", [](NestA &a) -> NestABase& { return (NestABase&) a; }, py::return_value_policy::reference_internal);
py::class_<NestB>(m2, "NestB").def(py::init<>()).def(py::self -= int()).def_readwrite("a", &NestB::a);
py::class_<NestC>(m2, "NestC").def(py::init<>()).def(py::self *= int()).def_readwrite("b", &NestC::b);
m2.def("print_NestA", [](const NestA &a) { std::cout << a.value << std::endl; });
m2.def("print_NestB", [](const NestB &b) { std::cout << b.value << std::endl; });
m2.def("print_NestC", [](const NestC &c) { std::cout << c.value << std::endl; });
}

View File

@ -11,6 +11,7 @@ from example.issues import ElementList, ElementA, print_element
from example.issues import expect_float, expect_int
from example.issues import A, call_f
from example.issues import StrIssue
from example.issues import NestA, NestB, NestC, print_NestA, print_NestB, print_NestC
import gc
print_cchar("const char *")
@ -78,3 +79,33 @@ try:
print(StrIssue("no", "such", "constructor"))
except TypeError as e:
print("Failed as expected: " + str(e))
a = NestA()
b = NestB()
c = NestC()
a += 10
b.a += 100
c.b.a += 1000
b -= 1
c.b -= 3
c *= 7
print_NestA(a)
print_NestA(b.a)
print_NestA(c.b.a)
print_NestB(b)
print_NestB(c.b)
print_NestC(c)
abase = a.as_base()
print(abase.value)
a.as_base().value += 44
print(abase.value)
print(c.b.a.as_base().value)
c.b.a.as_base().value += 44
print(c.b.a.as_base().value)
del c
gc.collect()
del a # Should't delete while abase is still alive
gc.collect()
print(abase.value)
del abase
gc.collect()

View File

@ -24,3 +24,32 @@ Failed as expected: Incompatible constructor arguments. The following argument t
1. example.issues.StrIssue(arg0: int)
2. example.issues.StrIssue()
Invoked with: no, such, constructor
NestABase@0x1152940 constructor
NestA@0x1152940 constructor
NestABase@0x11f9350 constructor
NestA@0x11f9350 constructor
NestB@0x11f9350 constructor
NestABase@0x112d0d0 constructor
NestA@0x112d0d0 constructor
NestB@0x112d0d0 constructor
NestC@0x112d0d0 constructor
13
103
1003
3
1
35
-2
42
-2
42
NestC@0x112d0d0 destructor
NestB@0x112d0d0 destructor
NestA@0x112d0d0 destructor
NestABase@0x112d0d0 destructor
42
NestA@0x1152940 destructor
NestABase@0x1152940 destructor
NestB@0x11f9350 destructor
NestA@0x11f9350 destructor
NestABase@0x11f9350 destructor

View File

@ -119,12 +119,15 @@ PYBIND11_NOINLINE inline std::string error_string() {
return errorString;
}
PYBIND11_NOINLINE inline handle get_object_handle(const void *ptr) {
auto instances = get_internals().registered_instances;
auto it = instances.find(ptr);
if (it == instances.end())
return handle();
return handle((PyObject *) it->second);
PYBIND11_NOINLINE inline handle get_object_handle(const void *ptr, const detail::type_info *type ) {
auto &instances = get_internals().registered_instances;
auto range = instances.equal_range(ptr);
for (auto it = range.first; it != range.second; ++it) {
auto instance_type = detail::get_type_info(Py_TYPE(it->second), false);
if (instance_type && instance_type == type)
return handle((PyObject *) it->second);
}
return handle();
}
inline PyThreadState *get_thread_state_unchecked() {
@ -139,6 +142,9 @@ inline PyThreadState *get_thread_state_unchecked() {
#endif
}
// Forward declaration
inline void keep_alive_impl(handle nurse, handle patient);
class type_caster_generic {
public:
PYBIND11_NOINLINE type_caster_generic(const std::type_info &type_info)
@ -174,14 +180,7 @@ public:
if (src == nullptr)
return handle(Py_None).inc_ref();
// avoid an issue with internal references matching their parent's address
bool dont_cache = policy == return_value_policy::reference_internal &&
parent && ((instance<void> *) parent.ptr())->value == (void *) src;
auto& internals = get_internals();
auto it_instance = internals.registered_instances.find(src);
if (it_instance != internals.registered_instances.end() && !dont_cache)
return handle((PyObject *) it_instance->second).inc_ref();
auto &internals = get_internals();
auto it = internals.registered_types_cpp.find(std::type_index(*type_info));
if (it == internals.registered_types_cpp.end()) {
@ -198,13 +197,20 @@ public:
}
auto tinfo = (const detail::type_info *) it->second;
auto it_instances = internals.registered_instances.equal_range(src);
for (auto it = it_instances.first; it != it_instances.second; ++it) {
auto instance_type = detail::get_type_info(Py_TYPE(it->second), false);
if (instance_type && instance_type == tinfo)
return handle((PyObject *) it->second).inc_ref();
}
object inst(PyType_GenericAlloc(tinfo->type, 0), false);
auto wrapper = (instance<void> *) inst.ptr();
wrapper->value = src;
wrapper->owned = true;
wrapper->parent = nullptr;
if (policy == return_value_policy::automatic)
policy = return_value_policy::take_ownership;
@ -225,12 +231,12 @@ public:
wrapper->owned = false;
} else if (policy == return_value_policy::reference_internal) {
wrapper->owned = false;
wrapper->parent = parent.inc_ref().ptr();
detail::keep_alive_impl(inst, parent);
}
tinfo->init_holder(inst.ptr(), existing_holder);
if (!dont_cache)
internals.registered_instances[wrapper->value] = inst.ptr();
internals.registered_instances.emplace(wrapper->value, inst.ptr());
return inst.release();
}

View File

@ -245,7 +245,6 @@ inline std::string error_string();
template <typename type> struct instance_essentials {
PyObject_HEAD
type *value;
PyObject *parent;
PyObject *weakrefs;
bool owned : 1;
bool constructed : 1;
@ -266,9 +265,9 @@ struct overload_hash {
/// Internal data struture used to track registered instances and types
struct internals {
std::unordered_map<std::type_index, void*> registered_types_cpp; // std::type_index -> type_info
std::unordered_map<const void *, void*> registered_types_py; // PyTypeObject* -> type_info
std::unordered_map<const void *, void*> registered_instances; // void * -> PyObject*
std::unordered_map<std::type_index, void*> registered_types_cpp; // std::type_index -> type_info
std::unordered_map<const void *, void*> registered_types_py; // PyTypeObject* -> type_info
std::unordered_multimap<const void *, void*> registered_instances; // void * -> PyObject*
std::unordered_set<std::pair<const PyObject *, const char *>, overload_hash> inactive_overload_cache;
std::forward_list<void (*) (std::exception_ptr)> registered_exception_translators;
#if defined(WITH_THREAD)

View File

@ -729,23 +729,27 @@ protected:
auto tinfo = detail::get_type_info(type);
self->value = ::operator new(tinfo->type_size);
self->owned = true;
self->parent = nullptr;
self->constructed = false;
detail::get_internals().registered_instances[self->value] = (PyObject *) self;
detail::get_internals().registered_instances.emplace(self->value, (PyObject *) self);
return (PyObject *) self;
}
static void dealloc(instance<void> *self) {
if (self->value) {
bool dont_cache = self->parent && ((instance<void> *) self->parent)->value == self->value;
if (!dont_cache) { // avoid an issue with internal references matching their parent's address
auto &registered_instances = detail::get_internals().registered_instances;
auto it = registered_instances.find(self->value);
if (it == registered_instances.end())
pybind11_fail("generic_type::dealloc(): Tried to deallocate unregistered instance!");
registered_instances.erase(it);
auto instance_type = Py_TYPE(self);
auto &registered_instances = detail::get_internals().registered_instances;
auto range = registered_instances.equal_range(self->value);
bool found = false;
for (auto it = range.first; it != range.second; ++it) {
if (instance_type == Py_TYPE(it->second)) {
registered_instances.erase(it);
found = true;
break;
}
}
Py_XDECREF(self->parent);
if (!found)
pybind11_fail("generic_type::dealloc(): Tried to deallocate unregistered instance!");
if (self->weakrefs)
PyObject_ClearWeakRefs((PyObject *) self);
}
@ -1089,11 +1093,8 @@ template <typename... Args> struct init {
}
};
PYBIND11_NOINLINE inline void keep_alive_impl(int Nurse, int Patient, handle args, handle ret) {
inline void keep_alive_impl(handle nurse, handle patient) {
/* Clever approach based on weak references taken from Boost.Python */
handle nurse (Nurse > 0 ? PyTuple_GetItem(args.ptr(), Nurse - 1) : ret.ptr());
handle patient(Patient > 0 ? PyTuple_GetItem(args.ptr(), Patient - 1) : ret.ptr());
if (!nurse || !patient)
pybind11_fail("Could not activate keep_alive!");
@ -1109,6 +1110,13 @@ PYBIND11_NOINLINE inline void keep_alive_impl(int Nurse, int Patient, handle arg
(void) wr.release();
}
PYBIND11_NOINLINE inline void keep_alive_impl(int Nurse, int Patient, handle args, handle ret) {
handle nurse (Nurse > 0 ? PyTuple_GetItem(args.ptr(), Nurse - 1) : ret.ptr());
handle patient(Patient > 0 ? PyTuple_GetItem(args.ptr(), Patient - 1) : ret.ptr());
keep_alive_impl(nurse, patient);
}
template <typename Iterator> struct iterator_state {
Iterator it, end;
bool first;
@ -1316,8 +1324,8 @@ class gil_scoped_acquire { };
class gil_scoped_release { };
#endif
inline function get_overload(const void *this_ptr, const char *name) {
handle py_object = detail::get_object_handle(this_ptr);
inline function get_type_overload(const void *this_ptr, const detail::type_info *this_type, const char *name) {
handle py_object = detail::get_object_handle(this_ptr, this_type);
if (!py_object)
return function();
handle type = py_object.get_type();
@ -1348,6 +1356,14 @@ inline function get_overload(const void *this_ptr, const char *name) {
return overload;
}
template <class T> function get_overload(const T *this_ptr, const char *name) {
auto &cpp_types = detail::get_internals().registered_types_cpp;
auto it = cpp_types.find(typeid(T));
if (it == cpp_types.end())
return function();
return get_type_overload(this_ptr, (const detail::type_info *) it->second, name);
}
#define PYBIND11_OVERLOAD_INT(ret_type, name, ...) { \
pybind11::gil_scoped_acquire gil; \
pybind11::function overload = pybind11::get_overload(this, name); \