From 39fd6a946336fb9bb46f55f55b0a2de7d0e2098c Mon Sep 17 00:00:00 2001 From: Dean Moldovan Date: Thu, 17 Aug 2017 02:01:32 +0200 Subject: [PATCH] Reduce binary size overhead of new-style constructors The lookup of the `self` type and value pointer are moved out of template code and into `dispatcher`. This brings down the binary size of constructors back to the level of the old placement-new approach. (It also avoids a second lookup for `init_instance`.) With this implementation, mixing old- and new-style constructors in the same overload set may result in some runtime overhead for temporary allocations/deallocations, but this should be fine as old style constructors are phased out. --- include/pybind11/attr.h | 14 ++- include/pybind11/detail/init.h | 140 ++++++++++++----------------- include/pybind11/pybind11.h | 42 +++++++-- tests/test_factory_constructors.py | 15 ++-- 4 files changed, 108 insertions(+), 103 deletions(-) diff --git a/include/pybind11/attr.h b/include/pybind11/attr.h index 86c54ee81..dce875a6b 100644 --- a/include/pybind11/attr.h +++ b/include/pybind11/attr.h @@ -133,8 +133,8 @@ struct argument_record { /// Internal data structure which holds metadata about a bound function (signature, overloads, etc.) struct function_record { function_record() - : is_constructor(false), is_stateless(false), is_operator(false), - has_args(false), has_kwargs(false), is_method(false) { } + : is_constructor(false), is_new_style_constructor(false), is_stateless(false), + is_operator(false), has_args(false), has_kwargs(false), is_method(false) { } /// Function name char *name = nullptr; /* why no C++ strings? They generate heavier code.. */ @@ -163,6 +163,9 @@ struct function_record { /// True if name == '__init__' bool is_constructor : 1; + /// True if this is a new-style `__init__` defined in `detail/init.h` + bool is_new_style_constructor : 1; + /// True if this is a stateless function pointer bool is_stateless : 1; @@ -281,6 +284,9 @@ inline function_call::function_call(function_record &f, handle p) : args_convert.reserve(f.nargs); } +/// Tag for a new-style `__init__` defined in `detail/init.h` +struct is_new_style_constructor { }; + /** * Partial template specializations to process custom attributes provided to * cpp_function_ and class_. These are either used to initialize the respective @@ -339,6 +345,10 @@ template <> struct process_attribute : process_attribute_defaultis_operator = true; } }; +template <> struct process_attribute : process_attribute_default { + static void init(const is_new_style_constructor &, function_record *r) { r->is_new_style_constructor = true; } +}; + /// Process a keyword argument attribute (*without* a default value) template <> struct process_attribute : process_attribute_default { static void init(const arg &a, function_record *r) { diff --git a/include/pybind11/detail/init.h b/include/pybind11/detail/init.h index 647279cf0..430f4bf52 100644 --- a/include/pybind11/detail/init.h +++ b/include/pybind11/detail/init.h @@ -13,33 +13,29 @@ NAMESPACE_BEGIN(PYBIND11_NAMESPACE) NAMESPACE_BEGIN(detail) + +template <> +class type_caster { +public: + bool load(handle h, bool) { + value = reinterpret_cast(h.ptr()); + return true; + } + + template using cast_op_type = value_and_holder &; + operator value_and_holder &() { return *value; } + static PYBIND11_DESCR name() { return type_descr(_()); } + +private: + value_and_holder *value = nullptr; +}; + NAMESPACE_BEGIN(initimpl) inline void no_nullptr(void *ptr) { if (!ptr) throw type_error("pybind11::init(): factory function returned nullptr"); } -// Makes sure the `value` for the given value_and_holder is not preallocated (e.g. by a previous -// old-style placement new `__init__` that requires a preallocated, uninitialized value). If -// preallocated, deallocate. Returns the (null) value pointer reference ready for allocation. -inline void *&deallocate(value_and_holder &v_h) { - if (v_h) v_h.type->dealloc(v_h); - return v_h.value_ptr(); -} - -PYBIND11_NOINLINE inline value_and_holder load_v_h(handle self_, type_info *tinfo) { - if (!self_ || !tinfo) - throw type_error("__init__(self, ...) called with invalid `self` argument"); - - auto *inst = reinterpret_cast(self_.ptr()); - auto result = inst->get_value_and_holder(tinfo, false); - if (!result.inst) - throw type_error("__init__(self, ...) called with invalid `self` argument"); - - return result; -} - - // Implementing functions for all forms of py::init<...> and py::init(...) template using Cpp = typename Class::type; template using Alias = typename Class::type_alias; @@ -64,7 +60,7 @@ constexpr bool is_alias(void *) { return false; } template void construct_alias_from_cpp(std::true_type /*is_alias_constructible*/, value_and_holder &v_h, Cpp &&base) { - deallocate(v_h) = new Alias(std::move(base)); + v_h.value_ptr() = new Alias(std::move(base)); } template [[noreturn]] void construct_alias_from_cpp(std::false_type /*!is_alias_constructible*/, @@ -98,7 +94,7 @@ void construct(value_and_holder &v_h, Cpp *ptr, bool need_alias) { // it was a normal instance, then steal the holder away into a local variable; thus // the holder and destruction happens when we leave the C++ scope, and the holder // class gets to handle the destruction however it likes. - deallocate(v_h) = ptr; + v_h.value_ptr() = ptr; v_h.set_instance_registered(true); // To prevent init_instance from registering it v_h.type->init_instance(v_h.inst, nullptr); // Set up the holder Holder temp_holder(std::move(v_h.holder>())); // Steal the holder @@ -106,11 +102,9 @@ void construct(value_and_holder &v_h, Cpp *ptr, bool need_alias) { v_h.set_instance_registered(false); construct_alias_from_cpp(is_alias_constructible{}, v_h, std::move(*ptr)); - } - else { - // Otherwise the type isn't inherited, so we don't need an Alias and can just store the Cpp - // pointer directory: - deallocate(v_h) = ptr; + } else { + // Otherwise the type isn't inherited, so we don't need an Alias + v_h.value_ptr() = ptr; } } @@ -119,7 +113,7 @@ void construct(value_and_holder &v_h, Cpp *ptr, bool need_alias) { template = 0> void construct(value_and_holder &v_h, Alias *alias_ptr, bool) { no_nullptr(alias_ptr); - deallocate(v_h) = static_cast *>(alias_ptr); + v_h.value_ptr() = static_cast *>(alias_ptr); } // Holder return: copy its pointer, and move or copy the returned holder into the new instance's @@ -133,7 +127,7 @@ void construct(value_and_holder &v_h, Holder holder, bool need_alias) { throw type_error("pybind11::init(): construction failed: returned holder-wrapped instance " "is not an alias instance"); - deallocate(v_h) = ptr; + v_h.value_ptr() = ptr; v_h.type->init_instance(v_h.inst, &holder); } @@ -148,7 +142,7 @@ void construct(value_and_holder &v_h, Cpp &&result, bool need_alias) { if (Class::has_alias && need_alias) construct_alias_from_cpp(is_alias_constructible{}, v_h, std::move(result)); else - deallocate(v_h) = new Cpp(std::move(result)); + v_h.value_ptr() = new Cpp(std::move(result)); } // return-by-value version 2: returning a value of the alias type itself. We move-construct an @@ -158,50 +152,38 @@ template void construct(value_and_holder &v_h, Alias &&result, bool) { static_assert(std::is_move_constructible>::value, "pybind11::init() return-by-alias-value factory function requires a movable alias class"); - deallocate(v_h) = new Alias(std::move(result)); + v_h.value_ptr() = new Alias(std::move(result)); } // Implementing class for py::init<...>() -template struct constructor { +template +struct constructor { template = 0> static void execute(Class &cl, const Extra&... extra) { - auto *cl_type = get_type_info(typeid(Cpp)); - cl.def("__init__", [cl_type](handle self_, Args... args) { - auto v_h = load_v_h(self_, cl_type); - // If this value is already registered it must mean __init__ is invoked multiple times; - // we really can't support that in C++, so just ignore the second __init__. - if (v_h.instance_registered()) return; - - construct(v_h, new Cpp{std::forward(args)...}, false); - }, extra...); + cl.def("__init__", [](value_and_holder &v_h, Args... args) { + v_h.value_ptr() = new Cpp{std::forward(args)...}; + }, is_new_style_constructor(), extra...); } template , Args...>::value, int> = 0> static void execute(Class &cl, const Extra&... extra) { - auto *cl_type = get_type_info(typeid(Cpp)); - cl.def("__init__", [cl_type](handle self_, Args... args) { - auto v_h = load_v_h(self_, cl_type); - if (v_h.instance_registered()) return; // Ignore duplicate __init__ calls (see above) - - if (Py_TYPE(v_h.inst) == cl_type->type) - construct(v_h, new Cpp{std::forward(args)...}, false); + cl.def("__init__", [](value_and_holder &v_h, Args... args) { + if (Py_TYPE(v_h.inst) == v_h.type->type) + v_h.value_ptr() = new Cpp{std::forward(args)...}; else - construct(v_h, new Alias{std::forward(args)...}, true); - }, extra...); + v_h.value_ptr() = new Alias{std::forward(args)...}; + }, is_new_style_constructor(), extra...); } template , Args...>::value, int> = 0> static void execute(Class &cl, const Extra&... extra) { - auto *cl_type = get_type_info(typeid(Cpp)); - cl.def("__init__", [cl_type](handle self_, Args... args) { - auto v_h = load_v_h(self_, cl_type); - if (v_h.instance_registered()) return; // Ignore duplicate __init__ calls (see above) - construct(v_h, new Alias{std::forward(args)...}, true); - }, extra...); + cl.def("__init__", [](value_and_holder &v_h, Args... args) { + v_h.value_ptr() = new Alias{std::forward(args)...}; + }, is_new_style_constructor(), extra...); } }; @@ -210,17 +192,15 @@ template struct alias_constructor { template , Args...>::value, int> = 0> static void execute(Class &cl, const Extra&... extra) { - auto *cl_type = get_type_info(typeid(Cpp)); - cl.def("__init__", [cl_type](handle self_, Args... args) { - auto v_h = load_v_h(self_, cl_type); - if (v_h.instance_registered()) return; // Ignore duplicate __init__ calls (see above) - construct(v_h, new Alias{std::forward(args)...}, true); - }, extra...); + cl.def("__init__", [](value_and_holder &v_h, Args... args) { + v_h.value_ptr() = new Alias{std::forward(args)...}; + }, is_new_style_constructor(), extra...); } }; // Implementation class for py::init(Func) and py::init(Func, AliasFunc) -template struct factory { +template +struct factory { private: using CFuncType = typename std::remove_reference::type; using AFunc = conditional_t::value, void_type, AFuncIn>; @@ -248,21 +228,16 @@ public: template ::value, int> = 0> void execute(Class &cl, const Extra&... extra) && { - auto *cl_type = get_type_info(typeid(Cpp)); #if defined(PYBIND11_CPP14) - cl.def("__init__", [cl_type, func = std::move(class_factory)] + cl.def("__init__", [func = std::move(class_factory)] #else CFuncType &func = class_factory; - cl.def("__init__", [cl_type, func] + cl.def("__init__", [func] #endif - (handle self_, Args... args) { - auto v_h = load_v_h(self_, cl_type); - // If this value is already registered it must mean __init__ is invoked multiple times; - // we really can't support that in C++, so just ignore the second __init__. - if (v_h.instance_registered()) return; - - construct(v_h, func(std::forward(args)...), Py_TYPE(v_h.inst) != cl_type->type); - }, extra...); + (value_and_holder &v_h, Args... args) { + construct(v_h, func(std::forward(args)...), + Py_TYPE(v_h.inst) != v_h.type->type); + }, is_new_style_constructor(), extra...); } // Add __init__ definition for a class with an alias *and* distinct alias factory; the former is @@ -271,26 +246,21 @@ public: template ::value, int> = 0> void execute(Class &cl, const Extra&... extra) && { - auto *cl_type = get_type_info(typeid(Cpp)); - #if defined(PYBIND11_CPP14) - cl.def("__init__", [cl_type, class_func = std::move(class_factory), alias_func = std::move(alias_factory)] + cl.def("__init__", [class_func = std::move(class_factory), alias_func = std::move(alias_factory)] #else CFuncType &class_func = class_factory; AFuncType &alias_func = alias_factory; - cl.def("__init__", [cl_type, class_func, alias_func] + cl.def("__init__", [class_func, alias_func] #endif - (handle self_, Args... args) { - auto v_h = load_v_h(self_, cl_type); - if (v_h.instance_registered()) return; // (see comment above) - - if (Py_TYPE(v_h.inst) == cl_type->type) + (value_and_holder &v_h, Args... args) { + if (Py_TYPE(v_h.inst) == v_h.type->type) // If the instance type equals the registered type we don't have inheritance, so // don't need the alias and can construct using the class function: construct(v_h, class_func(std::forward(args)...), false); else construct(v_h, alias_func(std::forward(args)...), true); - }, extra...); + }, is_new_style_constructor(), extra...); } }; diff --git a/include/pybind11/pybind11.h b/include/pybind11/pybind11.h index 366ad81fc..3d2d2f05b 100644 --- a/include/pybind11/pybind11.h +++ b/include/pybind11/pybind11.h @@ -242,8 +242,9 @@ protected: .cast() + "."; #endif signature += tinfo->type->tp_name; - } else if (rec->is_constructor && arg_index == 0 && detail::same_type(typeid(handle), *t) && rec->scope) { - // A py::init(...) constructor takes `self` as a `handle`; rewrite it to the type + } else if (rec->is_new_style_constructor && arg_index == 0) { + // A new-style `__init__` takes `self` as `value_and_holder`. + // Rewrite it to the proper class type. #if defined(PYPY_VERSION) signature += rec->scope.attr("__module__").cast() + "."; #endif @@ -425,6 +426,23 @@ protected: handle parent = n_args_in > 0 ? PyTuple_GET_ITEM(args_in, 0) : nullptr, result = PYBIND11_TRY_NEXT_OVERLOAD; + auto self_value_and_holder = value_and_holder(); + if (overloads->is_constructor) { + const auto tinfo = get_type_info((PyTypeObject *) overloads->scope.ptr()); + const auto pi = reinterpret_cast(parent.ptr()); + self_value_and_holder = pi->get_value_and_holder(tinfo, false); + + if (!self_value_and_holder.type || !self_value_and_holder.inst) { + PyErr_SetString(PyExc_TypeError, "__init__(self, ...) called with invalid `self` argument"); + return nullptr; + } + + // If this value is already registered it must mean __init__ is invoked multiple times; + // we really can't support that in C++, so just ignore the second __init__. + if (self_value_and_holder.instance_registered()) + return none().release().ptr(); + } + try { // We do this in two passes: in the first pass, we load arguments with `convert=false`; // in the second, we allow conversion (except for arguments with an explicit @@ -472,6 +490,18 @@ protected: size_t args_to_copy = std::min(pos_args, n_args_in); size_t args_copied = 0; + // 0. Inject new-style `self` argument + if (func.is_new_style_constructor) { + // The `value` may have been preallocated by an old-style `__init__` + // if it was a preceding candidate for overload resolution. + if (self_value_and_holder) + self_value_and_holder.type->dealloc(self_value_and_holder); + + call.args.push_back(reinterpret_cast(&self_value_and_holder)); + call.args_convert.push_back(false); + ++args_copied; + } + // 1. Copy any position arguments given. bool bad_arg = false; for (; args_copied < args_to_copy; ++args_copied) { @@ -715,13 +745,9 @@ protected: PyErr_SetString(PyExc_TypeError, msg.c_str()); return nullptr; } else { - if (overloads->is_constructor) { - auto tinfo = get_type_info((PyTypeObject *) overloads->scope.ptr()); + if (overloads->is_constructor && !self_value_and_holder.holder_constructed()) { auto *pi = reinterpret_cast(parent.ptr()); - auto v_h = pi->get_value_and_holder(tinfo); - if (!v_h.holder_constructed()) { - tinfo->init_instance(pi, nullptr); - } + self_value_and_holder.type->init_instance(pi, nullptr); } return result.ptr(); } diff --git a/tests/test_factory_constructors.py b/tests/test_factory_constructors.py index 360137bc4..78a3910ad 100644 --- a/tests/test_factory_constructors.py +++ b/tests/test_factory_constructors.py @@ -40,8 +40,7 @@ def test_init_factory_basic(): with pytest.raises(TypeError) as excinfo: m.TestFactory3(tag.null_ptr) - assert (str(excinfo.value) == - "pybind11::init(): factory function returned nullptr") + assert str(excinfo.value) == "pybind11::init(): factory function returned nullptr" assert [i.alive() for i in cstats] == [3, 3, 3] assert ConstructorStats.detail_reg_inst() == n_inst + 9 @@ -352,9 +351,9 @@ def test_reallocations(capture, msg): create_and_destroy(1.5) assert msg(capture) == strip_comments(""" noisy new # allocation required to attempt first overload + noisy delete # have to dealloc before considering factory init overload noisy new # pointer factory calling "new", part 1: allocation NoisyAlloc(double 1.5) # ... part two, invoking constructor - noisy delete # have to dealloc before stashing factory-generated pointer --- ~NoisyAlloc() # Destructor noisy delete # operator delete @@ -396,9 +395,9 @@ def test_reallocations(capture, msg): create_and_destroy(4, 0.5) assert msg(capture) == strip_comments(""" noisy new # preallocation needed before invoking placement-new overload + noisy delete # deallocation of preallocated storage noisy new # Factory pointer allocation NoisyAlloc(int 4) # factory pointer construction - noisy delete # deallocation of preallocated storage --- ~NoisyAlloc() # Destructor noisy delete # operator delete @@ -408,6 +407,8 @@ def test_reallocations(capture, msg): create_and_destroy(5, "hi") assert msg(capture) == strip_comments(""" noisy new # preallocation needed before invoking first placement new + noisy delete # delete before considering new-style constructor + noisy new # preallocation for second placement new noisy placement new # Placement new in the second placement new overload NoisyAlloc(int 5) # construction --- @@ -450,11 +451,9 @@ def test_invalid_self(): for arg in (1, 2): with pytest.raises(TypeError) as excinfo: BrokenTF1(arg) - assert (str(excinfo.value) == - "__init__(self, ...) called with invalid `self` argument") + assert str(excinfo.value) == "__init__(self, ...) called with invalid `self` argument" for arg in (1, 2, 3, 4): with pytest.raises(TypeError) as excinfo: BrokenTF6(arg) - assert (str(excinfo.value) == - "__init__(self, ...) called with invalid `self` argument") + assert str(excinfo.value) == "__init__(self, ...) called with invalid `self` argument"