vectorize: pass-through of non-vectorizable args

This extends py::vectorize to automatically pass through non-vectorizable arguments. This removes the need for the documented "explicitly exclude an argument" workaround. Vectorization now applies to arithmetic, std::complex, and POD types, passed as plain value or by const lvalue reference (previously only pass-by-value types were supported). Non-const lvalue references and any other types are passed through as-is. Functions with rvalue reference arguments (whether vectorizable or not) are explicitly prohibited: an rvalue reference is inherently not something that can be passed multiple times and is thus unsuitable to being in a vectorized function. The vectorize returned value is also now more sensitive to inputs: previously it would return by value when all inputs are of size 1; this is now amended to having all inputs of size 1 *and* 0 dimensions. Thus if you pass in, for example, [[1]], you get back a 1x1, 2D array, while previously you got back just the resulting single value. Vectorization of member function specializations is now also supported via `py::vectorize(&Class::method)`; this required passthrough support for the initial object pointer on the wrapping function pointer.
2024-11-22 05:05:11 +00:00 · 2017-03-26 00:51:40 -03:00 · 2017-03-26 00:51:40 -03:00 · f3ce00eaed
commit f3ce00eaed
parent 41f8da4a95
6 changed files with 212 additions and 80 deletions
--- a/docs/advanced/pycpp/numpy.rst
+++ b/docs/advanced/pycpp/numpy.rst
@ -239,27 +239,13 @@ by the compiler. The result is returned as a NumPy array of type
 The scalar argument ``z`` is transparently replicated 4 times.  The input
 arrays ``x`` and ``y`` are automatically converted into the right types (they
 are of type  ``numpy.dtype.int64`` but need to be ``numpy.dtype.int32`` and
-``numpy.dtype.float32``, respectively)
+``numpy.dtype.float32``, respectively).
-Sometimes we might want to explicitly exclude an argument from the vectorization
+.. note::
 because it makes little sense to wrap it in a NumPy array. For instance,
 suppose the function signature was
-.. code-block:: cpp
+    Only arithmetic, complex, and POD types passed by value or by ``const &``
-
+    reference are vectorized; all other arguments are passed through as-is.
-    double my_func(int x, float y, my_custom_type *z);
+    Functions taking rvalue reference arguments cannot be vectorized.
 This can be done with a stateful Lambda closure:
 .. code-block:: cpp
    // Vectorize a lambda function with a capture object (e.g. to exclude some arguments from the vectorization)
    m.def("vectorized_func",
        [](py::array_t<int> x, py::array_t<float> y, my_custom_type *z) {
            auto stateful_closure = [z](int x, float y) { return my_func(x, y, z); };
            return py::vectorize(stateful_closure)(x, y);
        }
    );
 In cases where the computation is too complicated to be reduced to
 ``vectorize``, it will be necessary to create and access the buffer contents
--- a/include/pybind11/cast.h
+++ b/include/pybind11/cast.h
@ -15,6 +15,7 @@
 #include "descr.h"
 #include <array>
 #include <limits>
 #include <tuple>
 NAMESPACE_BEGIN(pybind11)
 NAMESPACE_BEGIN(detail)
--- a/include/pybind11/common.h
+++ b/include/pybind11/common.h
@ -367,6 +367,12 @@ template<size_t ...S> struct make_index_sequence_impl <0, S...> { typedef index_
 template<size_t N> using make_index_sequence = typename make_index_sequence_impl<N>::type;
 #endif
 /// Make an index sequence of the indices of true arguments
 template <typename ISeq, size_t, bool...> struct select_indices_impl { using type = ISeq; };
 template <size_t... IPrev, size_t I, bool B, bool... Bs> struct select_indices_impl<index_sequence<IPrev...>, I, B, Bs...>
    : select_indices_impl<conditional_t<B, index_sequence<IPrev..., I>, index_sequence<IPrev...>>, I + 1, Bs...> {};
 template <bool... Bs> using select_indices = typename select_indices_impl<index_sequence<>, 0, Bs...>::type;
 /// Backports of std::bool_constant and std::negation to accomodate older compilers
 template <bool B> using bool_constant = std::integral_constant<bool, B>;
 template <typename T> struct negation : bool_constant<!T::value> { };
--- a/include/pybind11/numpy.h
+++ b/include/pybind11/numpy.h
@ -1289,8 +1289,8 @@ public:
        return *this;
    }
-    template <size_t K, class T> const T& data() const {
+    template <size_t K, class T = void> T* data() const {
-        return *reinterpret_cast<T*>(m_common_iterator[K].data());
+        return reinterpret_cast<T*>(m_common_iterator[K].data());
    }
 private:
@ -1340,7 +1340,7 @@ enum class broadcast_trivial { non_trivial, c_trivial, f_trivial };
 // buffer; returns `non_trivial` otherwise.
 template <size_t N>
 broadcast_trivial broadcast(const std::array<buffer_info, N> &buffers, ssize_t &ndim, std::vector<ssize_t> &shape) {
-    ndim = std::accumulate(buffers.begin(), buffers.end(), ssize_t(0), [](ssize_t res, const buffer_info& buf) {
+    ndim = std::accumulate(buffers.begin(), buffers.end(), ssize_t(0), [](ssize_t res, const buffer_info &buf) {
        return std::max(res, buf.ndim);
    });
@ -1411,21 +1411,63 @@ broadcast_trivial broadcast(const std::array<buffer_info, N> &buffers, ssize_t &
        broadcast_trivial::non_trivial;
 }
 template <typename T>
 struct vectorize_arg {
    static_assert(!std::is_rvalue_reference<T>::value, "Functions with rvalue reference arguments cannot be vectorized");
    // The wrapped function gets called with this type:
    using call_type = remove_reference_t<T>;
    // Is this a vectorized argument?
    static constexpr bool vectorize =
        satisfies_any_of<call_type, std::is_arithmetic, is_complex, std::is_pod>::value &&
        satisfies_none_of<call_type, std::is_pointer, std::is_array, is_std_array, std::is_enum>::value &&
        (!std::is_reference<T>::value ||
         (std::is_lvalue_reference<T>::value && std::is_const<call_type>::value));
    // Accept this type: an array for vectorized types, otherwise the type as-is:
    using type = conditional_t<vectorize, array_t<remove_cv_t<call_type>, array::forcecast>, T>;
 };
 template <typename Func, typename Return, typename... Args>
 struct vectorize_helper {
-    typename std::remove_reference<Func>::type f;
+private:
    static constexpr size_t N = sizeof...(Args);
    static constexpr size_t NVectorized = constexpr_sum(vectorize_arg<Args>::vectorize...);
    static_assert(NVectorized >= 1,
            "pybind11::vectorize(...) requires a function with at least one vectorizable argument");
 public:
    template <typename T>
-    explicit vectorize_helper(T&&f) : f(std::forward<T>(f)) { }
+    explicit vectorize_helper(T &&f) : f(std::forward<T>(f)) { }
-    object operator()(array_t<Args, array::forcecast>... args) {
+    object operator()(typename vectorize_arg<Args>::type... args) {
-        return run(args..., make_index_sequence<N>());
+        return run(args...,
                   make_index_sequence<N>(),
                   select_indices<vectorize_arg<Args>::vectorize...>(),
                   make_index_sequence<NVectorized>());
    }
-    template <size_t ... Index> object run(array_t<Args, array::forcecast>&... args, index_sequence<Index...> index) {
+private:
-        /* Request buffers from all parameters */
+    remove_reference_t<Func> f;
-        std::array<buffer_info, N> buffers {{ args.request()... }};
+
    template <size_t Index> using param_n_t = typename pack_element<Index, typename vectorize_arg<Args>::call_type...>::type;
    // Runs a vectorized function given arguments tuple and three index sequences:
    //     - Index is the full set of 0 ... (N-1) argument indices;
    //     - VIndex is the subset of argument indices with vectorized parameters, letting us access
    //       vectorized arguments (anything not in this sequence is passed through)
    //     - BIndex is a incremental sequence (beginning at 0) of the same size as VIndex, so that
    //       we can store vectorized buffer_infos in an array (argument VIndex has its buffer at
    //       index BIndex in the array).
    template <size_t... Index, size_t... VIndex, size_t... BIndex> object run(
            typename vectorize_arg<Args>::type &...args,
            index_sequence<Index...> i_seq, index_sequence<VIndex...> vi_seq, index_sequence<BIndex...> bi_seq) {
        // Pointers to values the function was called with; the vectorized ones set here will start
        // out as array_t<T> pointers, but they will be changed them to T pointers before we make
        // call the wrapped function.  Non-vectorized pointers are left as-is.
        std::array<void *, N> params{{ &args... }};
        // The array of `buffer_info`s of vectorized arguments:
        std::array<buffer_info, NVectorized> buffers{{ reinterpret_cast<array *>(params[VIndex])->request()... }};
        /* Determine dimensions parameters of output array */
        ssize_t nd = 0;
@ -1433,61 +1475,79 @@ struct vectorize_helper {
        auto trivial = broadcast(buffers, nd, shape);
        size_t ndim = (size_t) nd;
-        ssize_t size = 1;
+        size_t size = std::accumulate(shape.begin(), shape.end(), (size_t) 1, std::multiplies<size_t>());
-        std::vector<ssize_t> strides(ndim);
+
-        if (ndim > 0) {
+        // If all arguments are 0-dimension arrays (i.e. single values) return a plain value (i.e.
-            if (trivial == broadcast_trivial::f_trivial) {
+        // not wrapped in an array).
-                strides[0] = sizeof(Return);
+        if (size == 1 && ndim == 0) {
-                for (size_t i = 1; i < ndim; ++i) {
+            PYBIND11_EXPAND_SIDE_EFFECTS(params[VIndex] = buffers[BIndex].ptr);
-                    strides[i] = strides[i - 1] * shape[i - 1];
+            return cast(f(*reinterpret_cast<param_n_t<Index> *>(params[Index])...));
                    size *= shape[i - 1];
                }
                size *= shape[ndim - 1];
            }
            else {
                strides[ndim-1] = sizeof(Return);
                for (size_t i = ndim - 1; i > 0; --i) {
                    strides[i - 1] = strides[i] * shape[i];
                    size *= shape[i];
                }
                size *= shape[0];
            }
        }
-        if (size == 1)
+        array_t<Return> result;
-            return cast(f(*reinterpret_cast<Args *>(buffers[Index].ptr)...));
+        if (trivial == broadcast_trivial::f_trivial) result = array_t<Return, array::f_style>(shape);
        else result = array_t<Return>(shape);
-        array_t<Return> result(shape, strides);
+        if (size == 0) return result;
        auto buf = result.request();
        auto output = (Return *) buf.ptr;
        /* Call the function */
-        if (trivial == broadcast_trivial::non_trivial) {
+        if (trivial == broadcast_trivial::non_trivial)
-            apply_broadcast<Index...>(buffers, buf, index);
+            apply_broadcast(buffers, params, result, i_seq, vi_seq, bi_seq);
-        } else {
+        else
-            for (ssize_t i = 0; i < size; ++i)
+            apply_trivial(buffers, params, result.mutable_data(), size, i_seq, vi_seq, bi_seq);
                output[i] = f((reinterpret_cast<Args *>(buffers[Index].ptr)[buffers[Index].size == 1 ? 0 : i])...);
        }
        return result;
    }
-    template <size_t... Index>
+    template <size_t... Index, size_t... VIndex, size_t... BIndex>
-    void apply_broadcast(const std::array<buffer_info, N> &buffers,
+    void apply_trivial(std::array<buffer_info, NVectorized> &buffers,
-                         buffer_info &output, index_sequence<Index...>) {
+                       std::array<void *, N> &params,
-        using input_iterator = multi_array_iterator<N>;
+                       Return *out,
-        using output_iterator = array_iterator<Return>;
+                       size_t size,
                       index_sequence<Index...>, index_sequence<VIndex...>, index_sequence<BIndex...>) {
-        input_iterator input_iter(buffers, output.shape);
+        // Initialize an array of mutable byte references and sizes with references set to the
-        output_iterator output_end = array_end<Return>(output);
+        // appropriate pointer in `params`; as we iterate, we'll increment each pointer by its size
        // (except for singletons, which get an increment of 0).
        std::array<std::pair<unsigned char *&, const size_t>, NVectorized> vecparams{{
            std::pair<unsigned char *&, const size_t>(
                    reinterpret_cast<unsigned char *&>(params[VIndex] = buffers[BIndex].ptr),
                    buffers[BIndex].size == 1 ? 0 : sizeof(param_n_t<VIndex>)
            )...
        }};
-        for (output_iterator iter = array_begin<Return>(output);
+        for (size_t i = 0; i < size; ++i) {
-             iter != output_end; ++iter, ++input_iter) {
+            out[i] = f(*reinterpret_cast<param_n_t<Index> *>(params[Index])...);
-            *iter = f((input_iter.template data<Index, Args>())...);
+            for (auto &x : vecparams) x.first += x.second;
        }
    }
    template <size_t... Index, size_t... VIndex, size_t... BIndex>
    void apply_broadcast(std::array<buffer_info, NVectorized> &buffers,
                         std::array<void *, N> &params,
                         array_t<Return> &output_array,
                         index_sequence<Index...>, index_sequence<VIndex...>, index_sequence<BIndex...>) {
        buffer_info output = output_array.request();
        multi_array_iterator<NVectorized> input_iter(buffers, output.shape);
        for (array_iterator<Return> iter = array_begin<Return>(output), end = array_end<Return>(output);
             iter != end;
             ++iter, ++input_iter) {
            PYBIND11_EXPAND_SIDE_EFFECTS((
                params[VIndex] = input_iter.template data<BIndex>()
            ));
            *iter = f(*reinterpret_cast<param_n_t<Index> *>(std::get<Index>(params))...);
        }
    }
 };
 template <typename Func, typename Return, typename... Args>
 vectorize_helper<Func, Return, Args...>
 vectorize_extractor(const Func &f, Return (*) (Args ...)) {
    return detail::vectorize_helper<Func, Return, Args...>(f);
 }
 template <typename T, int Flags> struct handle_type_name<array_t<T, Flags>> {
    static PYBIND11_DESCR name() {
        return _("numpy.ndarray[") + npy_format_descriptor<T>::name() + _("]");
@ -1496,22 +1556,32 @@ template <typename T, int Flags> struct handle_type_name<array_t<T, Flags>> {
 NAMESPACE_END(detail)
-template <typename Func, typename Return, typename... Args>
+// Vanilla pointer vectorizer:
 detail::vectorize_helper<Func, Return, Args...>
 vectorize(const Func &f, Return (*) (Args ...)) {
    return detail::vectorize_helper<Func, Return, Args...>(f);
 }
 template <typename Return, typename... Args>
-detail::vectorize_helper<Return (*) (Args ...), Return, Args...>
+detail::vectorize_helper<Return (*)(Args...), Return, Args...>
 vectorize(Return (*f) (Args ...)) {
-    return vectorize<Return (*) (Args ...), Return, Args...>(f, f);
+    return detail::vectorize_helper<Return (*)(Args...), Return, Args...>(f);
 }
-template <typename Func, typename FuncType = typename detail::remove_class<decltype(&std::remove_reference<Func>::type::operator())>::type>
+// lambda vectorizer:
 template <typename Func, typename FuncType = typename detail::remove_class<decltype(&detail::remove_reference_t<Func>::operator())>::type>
 auto vectorize(Func &&f) -> decltype(
-        vectorize(std::forward<Func>(f), (FuncType *) nullptr)) {
+        detail::vectorize_extractor(std::forward<Func>(f), (FuncType *) nullptr)) {
-    return vectorize(std::forward<Func>(f), (FuncType *) nullptr);
+    return detail::vectorize_extractor(std::forward<Func>(f), (FuncType *) nullptr);
 }
 // Vectorize a class method (non-const):
 template <typename Return, typename Class, typename... Args,
          typename Helper = detail::vectorize_helper<decltype(std::mem_fn(std::declval<Return (Class::*)(Args...)>())), Return, Class *, Args...>>
 Helper vectorize(Return (Class::*f)(Args...)) {
    return Helper(std::mem_fn(f));
 }
 // Vectorize a class method (non-const):
 template <typename Return, typename Class, typename... Args,
          typename Helper = detail::vectorize_helper<decltype(std::mem_fn(std::declval<Return (Class::*)(Args...) const>())), Return, const Class *, Args...>>
 Helper vectorize(Return (Class::*f)(Args...) const) {
    return Helper(std::mem_fn(f));
 }
 NAMESPACE_END(pybind11)
--- a/tests/test_numpy_vectorize.cpp
+++ b/tests/test_numpy_vectorize.cpp
@ -20,6 +20,17 @@ std::complex<double> my_func3(std::complex<double> c) {
    return c * std::complex<double>(2.f);
 }
 struct VectorizeTestClass {
    VectorizeTestClass(int v) : value{v} {};
    float method(int x, float y) { return y + (float) (x + value); }
    int value = 0;
 };
 struct NonPODClass {
    NonPODClass(int v) : value{v} {}
    int value;
 };
 test_initializer numpy_vectorize([](py::module &m) {
    // Vectorize all arguments of a function (though non-vector arguments are also allowed)
    m.def("vectorized_func", py::vectorize(my_func));
@ -40,6 +51,22 @@ test_initializer numpy_vectorize([](py::module &m) {
    m.def("selective_func", [](py::array_t<std::complex<float>, py::array::c_style>) { return "Complex float branch taken."; });
    // Passthrough test: references and non-pod types should be automatically passed through (in the
    // function definition below, only `b`, `d`, and `g` are vectorized):
    py::class_<NonPODClass>(m, "NonPODClass").def(py::init<int>());
    m.def("vec_passthrough", py::vectorize(
        [](double *a, double b, py::array_t<double> c, const int &d, int &e, NonPODClass f, const double g) {
            return *a + b + c.at(0) + d + e + f.value + g;
        }
    ));
    py::class_<VectorizeTestClass> vtc(m, "VectorizeTestClass");
    vtc .def(py::init<int>())
        .def_readwrite("value", &VectorizeTestClass::value);
    // Automatic vectorizing of methods
    vtc.def("method", py::vectorize(&VectorizeTestClass::method));
    // Internal optimization test for whether the input is trivially broadcastable:
    py::enum_<py::detail::broadcast_trivial>(m, "trivial")
        .value("f_trivial", py::detail::broadcast_trivial::f_trivial)
--- a/tests/test_numpy_vectorize.py
+++ b/tests/test_numpy_vectorize.py
@ -159,3 +159,45 @@ def test_trivial_broadcasting():
    assert vectorized_func(1, y2, 1).flags.f_contiguous
    assert vectorized_func(z1[1::4, 1::4], y2, 1).flags.f_contiguous
    assert vectorized_func(y1[1::4, 1::4], z2, 1).flags.c_contiguous
 def test_passthrough_arguments(doc):
    from pybind11_tests import vec_passthrough, NonPODClass
    assert doc(vec_passthrough) == (
        "vec_passthrough("
        "arg0: float, arg1: numpy.ndarray[float64], arg2: numpy.ndarray[float64], "
        "arg3: numpy.ndarray[int32], arg4: int, arg5: m.NonPODClass, arg6: numpy.ndarray[float64]"
        ") -> object")
    b = np.array([[10, 20, 30]], dtype='float64')
    c = np.array([100, 200])  # NOT a vectorized argument
    d = np.array([[1000], [2000], [3000]], dtype='int')
    g = np.array([[1000000, 2000000, 3000000]], dtype='int')  # requires casting
    assert np.all(
        vec_passthrough(1, b, c, d, 10000, NonPODClass(100000), g) ==
        np.array([[1111111, 2111121, 3111131],
                  [1112111, 2112121, 3112131],
                  [1113111, 2113121, 3113131]]))
 def test_method_vectorization():
    from pybind11_tests import VectorizeTestClass
    o = VectorizeTestClass(3)
    x = np.array([1, 2], dtype='int')
    y = np.array([[10], [20]], dtype='float32')
    assert np.all(o.method(x, y) == [[14, 15], [24, 25]])
 def test_array_collapse():
    from pybind11_tests import vectorized_func
    assert not isinstance(vectorized_func(1, 2, 3), np.ndarray)
    assert not isinstance(vectorized_func(np.array(1), 2, 3), np.ndarray)
    z = vectorized_func([1], 2, 3)
    assert isinstance(z, np.ndarray)
    assert z.shape == (1, )
    z = vectorized_func(1, [[[2]]], 3)
    assert isinstance(z, np.ndarray)
    assert z.shape == (1, 1, 1)