vectorize: pass-through of non-vectorizable args

This extends py::vectorize to automatically pass through
non-vectorizable arguments.  This removes the need for the documented
"explicitly exclude an argument" workaround.

Vectorization now applies to arithmetic, std::complex, and POD types,
passed as plain value or by const lvalue reference (previously only
pass-by-value types were supported).  Non-const lvalue references and
any other types are passed through as-is.

Functions with rvalue reference arguments (whether vectorizable or not)
are explicitly prohibited: an rvalue reference is inherently not
something that can be passed multiple times and is thus unsuitable to
being in a vectorized function.

The vectorize returned value is also now more sensitive to inputs:
previously it would return by value when all inputs are of size 1; this
is now amended to having all inputs of size 1 *and* 0 dimensions.  Thus
if you pass in, for example, [[1]], you get back a 1x1, 2D array, while
previously you got back just the resulting single value.

Vectorization of member function specializations is now also supported
via `py::vectorize(&Class::method)`; this required passthrough support
for the initial object pointer on the wrapping function pointer.
This commit is contained in:
Jason Rhinelander 2017-03-26 00:51:40 -03:00
parent 41f8da4a95
commit f3ce00eaed
6 changed files with 212 additions and 80 deletions

View File

@ -239,27 +239,13 @@ by the compiler. The result is returned as a NumPy array of type
The scalar argument ``z`` is transparently replicated 4 times. The input The scalar argument ``z`` is transparently replicated 4 times. The input
arrays ``x`` and ``y`` are automatically converted into the right types (they arrays ``x`` and ``y`` are automatically converted into the right types (they
are of type ``numpy.dtype.int64`` but need to be ``numpy.dtype.int32`` and are of type ``numpy.dtype.int64`` but need to be ``numpy.dtype.int32`` and
``numpy.dtype.float32``, respectively) ``numpy.dtype.float32``, respectively).
Sometimes we might want to explicitly exclude an argument from the vectorization .. note::
because it makes little sense to wrap it in a NumPy array. For instance,
suppose the function signature was
.. code-block:: cpp Only arithmetic, complex, and POD types passed by value or by ``const &``
reference are vectorized; all other arguments are passed through as-is.
double my_func(int x, float y, my_custom_type *z); Functions taking rvalue reference arguments cannot be vectorized.
This can be done with a stateful Lambda closure:
.. code-block:: cpp
// Vectorize a lambda function with a capture object (e.g. to exclude some arguments from the vectorization)
m.def("vectorized_func",
[](py::array_t<int> x, py::array_t<float> y, my_custom_type *z) {
auto stateful_closure = [z](int x, float y) { return my_func(x, y, z); };
return py::vectorize(stateful_closure)(x, y);
}
);
In cases where the computation is too complicated to be reduced to In cases where the computation is too complicated to be reduced to
``vectorize``, it will be necessary to create and access the buffer contents ``vectorize``, it will be necessary to create and access the buffer contents

View File

@ -15,6 +15,7 @@
#include "descr.h" #include "descr.h"
#include <array> #include <array>
#include <limits> #include <limits>
#include <tuple>
NAMESPACE_BEGIN(pybind11) NAMESPACE_BEGIN(pybind11)
NAMESPACE_BEGIN(detail) NAMESPACE_BEGIN(detail)

View File

@ -367,6 +367,12 @@ template<size_t ...S> struct make_index_sequence_impl <0, S...> { typedef index_
template<size_t N> using make_index_sequence = typename make_index_sequence_impl<N>::type; template<size_t N> using make_index_sequence = typename make_index_sequence_impl<N>::type;
#endif #endif
/// Make an index sequence of the indices of true arguments
template <typename ISeq, size_t, bool...> struct select_indices_impl { using type = ISeq; };
template <size_t... IPrev, size_t I, bool B, bool... Bs> struct select_indices_impl<index_sequence<IPrev...>, I, B, Bs...>
: select_indices_impl<conditional_t<B, index_sequence<IPrev..., I>, index_sequence<IPrev...>>, I + 1, Bs...> {};
template <bool... Bs> using select_indices = typename select_indices_impl<index_sequence<>, 0, Bs...>::type;
/// Backports of std::bool_constant and std::negation to accomodate older compilers /// Backports of std::bool_constant and std::negation to accomodate older compilers
template <bool B> using bool_constant = std::integral_constant<bool, B>; template <bool B> using bool_constant = std::integral_constant<bool, B>;
template <typename T> struct negation : bool_constant<!T::value> { }; template <typename T> struct negation : bool_constant<!T::value> { };

View File

@ -1289,8 +1289,8 @@ public:
return *this; return *this;
} }
template <size_t K, class T> const T& data() const { template <size_t K, class T = void> T* data() const {
return *reinterpret_cast<T*>(m_common_iterator[K].data()); return reinterpret_cast<T*>(m_common_iterator[K].data());
} }
private: private:
@ -1340,7 +1340,7 @@ enum class broadcast_trivial { non_trivial, c_trivial, f_trivial };
// buffer; returns `non_trivial` otherwise. // buffer; returns `non_trivial` otherwise.
template <size_t N> template <size_t N>
broadcast_trivial broadcast(const std::array<buffer_info, N> &buffers, ssize_t &ndim, std::vector<ssize_t> &shape) { broadcast_trivial broadcast(const std::array<buffer_info, N> &buffers, ssize_t &ndim, std::vector<ssize_t> &shape) {
ndim = std::accumulate(buffers.begin(), buffers.end(), ssize_t(0), [](ssize_t res, const buffer_info& buf) { ndim = std::accumulate(buffers.begin(), buffers.end(), ssize_t(0), [](ssize_t res, const buffer_info &buf) {
return std::max(res, buf.ndim); return std::max(res, buf.ndim);
}); });
@ -1411,21 +1411,63 @@ broadcast_trivial broadcast(const std::array<buffer_info, N> &buffers, ssize_t &
broadcast_trivial::non_trivial; broadcast_trivial::non_trivial;
} }
template <typename T>
struct vectorize_arg {
static_assert(!std::is_rvalue_reference<T>::value, "Functions with rvalue reference arguments cannot be vectorized");
// The wrapped function gets called with this type:
using call_type = remove_reference_t<T>;
// Is this a vectorized argument?
static constexpr bool vectorize =
satisfies_any_of<call_type, std::is_arithmetic, is_complex, std::is_pod>::value &&
satisfies_none_of<call_type, std::is_pointer, std::is_array, is_std_array, std::is_enum>::value &&
(!std::is_reference<T>::value ||
(std::is_lvalue_reference<T>::value && std::is_const<call_type>::value));
// Accept this type: an array for vectorized types, otherwise the type as-is:
using type = conditional_t<vectorize, array_t<remove_cv_t<call_type>, array::forcecast>, T>;
};
template <typename Func, typename Return, typename... Args> template <typename Func, typename Return, typename... Args>
struct vectorize_helper { struct vectorize_helper {
typename std::remove_reference<Func>::type f; private:
static constexpr size_t N = sizeof...(Args); static constexpr size_t N = sizeof...(Args);
static constexpr size_t NVectorized = constexpr_sum(vectorize_arg<Args>::vectorize...);
static_assert(NVectorized >= 1,
"pybind11::vectorize(...) requires a function with at least one vectorizable argument");
public:
template <typename T> template <typename T>
explicit vectorize_helper(T&&f) : f(std::forward<T>(f)) { } explicit vectorize_helper(T &&f) : f(std::forward<T>(f)) { }
object operator()(array_t<Args, array::forcecast>... args) { object operator()(typename vectorize_arg<Args>::type... args) {
return run(args..., make_index_sequence<N>()); return run(args...,
make_index_sequence<N>(),
select_indices<vectorize_arg<Args>::vectorize...>(),
make_index_sequence<NVectorized>());
} }
template <size_t ... Index> object run(array_t<Args, array::forcecast>&... args, index_sequence<Index...> index) { private:
/* Request buffers from all parameters */ remove_reference_t<Func> f;
std::array<buffer_info, N> buffers {{ args.request()... }};
template <size_t Index> using param_n_t = typename pack_element<Index, typename vectorize_arg<Args>::call_type...>::type;
// Runs a vectorized function given arguments tuple and three index sequences:
// - Index is the full set of 0 ... (N-1) argument indices;
// - VIndex is the subset of argument indices with vectorized parameters, letting us access
// vectorized arguments (anything not in this sequence is passed through)
// - BIndex is a incremental sequence (beginning at 0) of the same size as VIndex, so that
// we can store vectorized buffer_infos in an array (argument VIndex has its buffer at
// index BIndex in the array).
template <size_t... Index, size_t... VIndex, size_t... BIndex> object run(
typename vectorize_arg<Args>::type &...args,
index_sequence<Index...> i_seq, index_sequence<VIndex...> vi_seq, index_sequence<BIndex...> bi_seq) {
// Pointers to values the function was called with; the vectorized ones set here will start
// out as array_t<T> pointers, but they will be changed them to T pointers before we make
// call the wrapped function. Non-vectorized pointers are left as-is.
std::array<void *, N> params{{ &args... }};
// The array of `buffer_info`s of vectorized arguments:
std::array<buffer_info, NVectorized> buffers{{ reinterpret_cast<array *>(params[VIndex])->request()... }};
/* Determine dimensions parameters of output array */ /* Determine dimensions parameters of output array */
ssize_t nd = 0; ssize_t nd = 0;
@ -1433,61 +1475,79 @@ struct vectorize_helper {
auto trivial = broadcast(buffers, nd, shape); auto trivial = broadcast(buffers, nd, shape);
size_t ndim = (size_t) nd; size_t ndim = (size_t) nd;
ssize_t size = 1; size_t size = std::accumulate(shape.begin(), shape.end(), (size_t) 1, std::multiplies<size_t>());
std::vector<ssize_t> strides(ndim);
if (ndim > 0) { // If all arguments are 0-dimension arrays (i.e. single values) return a plain value (i.e.
if (trivial == broadcast_trivial::f_trivial) { // not wrapped in an array).
strides[0] = sizeof(Return); if (size == 1 && ndim == 0) {
for (size_t i = 1; i < ndim; ++i) { PYBIND11_EXPAND_SIDE_EFFECTS(params[VIndex] = buffers[BIndex].ptr);
strides[i] = strides[i - 1] * shape[i - 1]; return cast(f(*reinterpret_cast<param_n_t<Index> *>(params[Index])...));
size *= shape[i - 1];
}
size *= shape[ndim - 1];
}
else {
strides[ndim-1] = sizeof(Return);
for (size_t i = ndim - 1; i > 0; --i) {
strides[i - 1] = strides[i] * shape[i];
size *= shape[i];
}
size *= shape[0];
}
} }
if (size == 1) array_t<Return> result;
return cast(f(*reinterpret_cast<Args *>(buffers[Index].ptr)...)); if (trivial == broadcast_trivial::f_trivial) result = array_t<Return, array::f_style>(shape);
else result = array_t<Return>(shape);
array_t<Return> result(shape, strides); if (size == 0) return result;
auto buf = result.request();
auto output = (Return *) buf.ptr;
/* Call the function */ /* Call the function */
if (trivial == broadcast_trivial::non_trivial) { if (trivial == broadcast_trivial::non_trivial)
apply_broadcast<Index...>(buffers, buf, index); apply_broadcast(buffers, params, result, i_seq, vi_seq, bi_seq);
} else { else
for (ssize_t i = 0; i < size; ++i) apply_trivial(buffers, params, result.mutable_data(), size, i_seq, vi_seq, bi_seq);
output[i] = f((reinterpret_cast<Args *>(buffers[Index].ptr)[buffers[Index].size == 1 ? 0 : i])...);
}
return result; return result;
} }
template <size_t... Index> template <size_t... Index, size_t... VIndex, size_t... BIndex>
void apply_broadcast(const std::array<buffer_info, N> &buffers, void apply_trivial(std::array<buffer_info, NVectorized> &buffers,
buffer_info &output, index_sequence<Index...>) { std::array<void *, N> &params,
using input_iterator = multi_array_iterator<N>; Return *out,
using output_iterator = array_iterator<Return>; size_t size,
index_sequence<Index...>, index_sequence<VIndex...>, index_sequence<BIndex...>) {
input_iterator input_iter(buffers, output.shape); // Initialize an array of mutable byte references and sizes with references set to the
output_iterator output_end = array_end<Return>(output); // appropriate pointer in `params`; as we iterate, we'll increment each pointer by its size
// (except for singletons, which get an increment of 0).
std::array<std::pair<unsigned char *&, const size_t>, NVectorized> vecparams{{
std::pair<unsigned char *&, const size_t>(
reinterpret_cast<unsigned char *&>(params[VIndex] = buffers[BIndex].ptr),
buffers[BIndex].size == 1 ? 0 : sizeof(param_n_t<VIndex>)
)...
}};
for (output_iterator iter = array_begin<Return>(output); for (size_t i = 0; i < size; ++i) {
iter != output_end; ++iter, ++input_iter) { out[i] = f(*reinterpret_cast<param_n_t<Index> *>(params[Index])...);
*iter = f((input_iter.template data<Index, Args>())...); for (auto &x : vecparams) x.first += x.second;
}
}
template <size_t... Index, size_t... VIndex, size_t... BIndex>
void apply_broadcast(std::array<buffer_info, NVectorized> &buffers,
std::array<void *, N> &params,
array_t<Return> &output_array,
index_sequence<Index...>, index_sequence<VIndex...>, index_sequence<BIndex...>) {
buffer_info output = output_array.request();
multi_array_iterator<NVectorized> input_iter(buffers, output.shape);
for (array_iterator<Return> iter = array_begin<Return>(output), end = array_end<Return>(output);
iter != end;
++iter, ++input_iter) {
PYBIND11_EXPAND_SIDE_EFFECTS((
params[VIndex] = input_iter.template data<BIndex>()
));
*iter = f(*reinterpret_cast<param_n_t<Index> *>(std::get<Index>(params))...);
} }
} }
}; };
template <typename Func, typename Return, typename... Args>
vectorize_helper<Func, Return, Args...>
vectorize_extractor(const Func &f, Return (*) (Args ...)) {
return detail::vectorize_helper<Func, Return, Args...>(f);
}
template <typename T, int Flags> struct handle_type_name<array_t<T, Flags>> { template <typename T, int Flags> struct handle_type_name<array_t<T, Flags>> {
static PYBIND11_DESCR name() { static PYBIND11_DESCR name() {
return _("numpy.ndarray[") + npy_format_descriptor<T>::name() + _("]"); return _("numpy.ndarray[") + npy_format_descriptor<T>::name() + _("]");
@ -1496,22 +1556,32 @@ template <typename T, int Flags> struct handle_type_name<array_t<T, Flags>> {
NAMESPACE_END(detail) NAMESPACE_END(detail)
template <typename Func, typename Return, typename... Args> // Vanilla pointer vectorizer:
detail::vectorize_helper<Func, Return, Args...>
vectorize(const Func &f, Return (*) (Args ...)) {
return detail::vectorize_helper<Func, Return, Args...>(f);
}
template <typename Return, typename... Args> template <typename Return, typename... Args>
detail::vectorize_helper<Return (*) (Args ...), Return, Args...> detail::vectorize_helper<Return (*)(Args...), Return, Args...>
vectorize(Return (*f) (Args ...)) { vectorize(Return (*f) (Args ...)) {
return vectorize<Return (*) (Args ...), Return, Args...>(f, f); return detail::vectorize_helper<Return (*)(Args...), Return, Args...>(f);
} }
template <typename Func, typename FuncType = typename detail::remove_class<decltype(&std::remove_reference<Func>::type::operator())>::type> // lambda vectorizer:
template <typename Func, typename FuncType = typename detail::remove_class<decltype(&detail::remove_reference_t<Func>::operator())>::type>
auto vectorize(Func &&f) -> decltype( auto vectorize(Func &&f) -> decltype(
vectorize(std::forward<Func>(f), (FuncType *) nullptr)) { detail::vectorize_extractor(std::forward<Func>(f), (FuncType *) nullptr)) {
return vectorize(std::forward<Func>(f), (FuncType *) nullptr); return detail::vectorize_extractor(std::forward<Func>(f), (FuncType *) nullptr);
}
// Vectorize a class method (non-const):
template <typename Return, typename Class, typename... Args,
typename Helper = detail::vectorize_helper<decltype(std::mem_fn(std::declval<Return (Class::*)(Args...)>())), Return, Class *, Args...>>
Helper vectorize(Return (Class::*f)(Args...)) {
return Helper(std::mem_fn(f));
}
// Vectorize a class method (non-const):
template <typename Return, typename Class, typename... Args,
typename Helper = detail::vectorize_helper<decltype(std::mem_fn(std::declval<Return (Class::*)(Args...) const>())), Return, const Class *, Args...>>
Helper vectorize(Return (Class::*f)(Args...) const) {
return Helper(std::mem_fn(f));
} }
NAMESPACE_END(pybind11) NAMESPACE_END(pybind11)

View File

@ -20,6 +20,17 @@ std::complex<double> my_func3(std::complex<double> c) {
return c * std::complex<double>(2.f); return c * std::complex<double>(2.f);
} }
struct VectorizeTestClass {
VectorizeTestClass(int v) : value{v} {};
float method(int x, float y) { return y + (float) (x + value); }
int value = 0;
};
struct NonPODClass {
NonPODClass(int v) : value{v} {}
int value;
};
test_initializer numpy_vectorize([](py::module &m) { test_initializer numpy_vectorize([](py::module &m) {
// Vectorize all arguments of a function (though non-vector arguments are also allowed) // Vectorize all arguments of a function (though non-vector arguments are also allowed)
m.def("vectorized_func", py::vectorize(my_func)); m.def("vectorized_func", py::vectorize(my_func));
@ -40,6 +51,22 @@ test_initializer numpy_vectorize([](py::module &m) {
m.def("selective_func", [](py::array_t<std::complex<float>, py::array::c_style>) { return "Complex float branch taken."; }); m.def("selective_func", [](py::array_t<std::complex<float>, py::array::c_style>) { return "Complex float branch taken."; });
// Passthrough test: references and non-pod types should be automatically passed through (in the
// function definition below, only `b`, `d`, and `g` are vectorized):
py::class_<NonPODClass>(m, "NonPODClass").def(py::init<int>());
m.def("vec_passthrough", py::vectorize(
[](double *a, double b, py::array_t<double> c, const int &d, int &e, NonPODClass f, const double g) {
return *a + b + c.at(0) + d + e + f.value + g;
}
));
py::class_<VectorizeTestClass> vtc(m, "VectorizeTestClass");
vtc .def(py::init<int>())
.def_readwrite("value", &VectorizeTestClass::value);
// Automatic vectorizing of methods
vtc.def("method", py::vectorize(&VectorizeTestClass::method));
// Internal optimization test for whether the input is trivially broadcastable: // Internal optimization test for whether the input is trivially broadcastable:
py::enum_<py::detail::broadcast_trivial>(m, "trivial") py::enum_<py::detail::broadcast_trivial>(m, "trivial")
.value("f_trivial", py::detail::broadcast_trivial::f_trivial) .value("f_trivial", py::detail::broadcast_trivial::f_trivial)

View File

@ -159,3 +159,45 @@ def test_trivial_broadcasting():
assert vectorized_func(1, y2, 1).flags.f_contiguous assert vectorized_func(1, y2, 1).flags.f_contiguous
assert vectorized_func(z1[1::4, 1::4], y2, 1).flags.f_contiguous assert vectorized_func(z1[1::4, 1::4], y2, 1).flags.f_contiguous
assert vectorized_func(y1[1::4, 1::4], z2, 1).flags.c_contiguous assert vectorized_func(y1[1::4, 1::4], z2, 1).flags.c_contiguous
def test_passthrough_arguments(doc):
from pybind11_tests import vec_passthrough, NonPODClass
assert doc(vec_passthrough) == (
"vec_passthrough("
"arg0: float, arg1: numpy.ndarray[float64], arg2: numpy.ndarray[float64], "
"arg3: numpy.ndarray[int32], arg4: int, arg5: m.NonPODClass, arg6: numpy.ndarray[float64]"
") -> object")
b = np.array([[10, 20, 30]], dtype='float64')
c = np.array([100, 200]) # NOT a vectorized argument
d = np.array([[1000], [2000], [3000]], dtype='int')
g = np.array([[1000000, 2000000, 3000000]], dtype='int') # requires casting
assert np.all(
vec_passthrough(1, b, c, d, 10000, NonPODClass(100000), g) ==
np.array([[1111111, 2111121, 3111131],
[1112111, 2112121, 3112131],
[1113111, 2113121, 3113131]]))
def test_method_vectorization():
from pybind11_tests import VectorizeTestClass
o = VectorizeTestClass(3)
x = np.array([1, 2], dtype='int')
y = np.array([[10], [20]], dtype='float32')
assert np.all(o.method(x, y) == [[14, 15], [24, 25]])
def test_array_collapse():
from pybind11_tests import vectorized_func
assert not isinstance(vectorized_func(1, 2, 3), np.ndarray)
assert not isinstance(vectorized_func(np.array(1), 2, 3), np.ndarray)
z = vectorized_func([1], 2, 3)
assert isinstance(z, np.ndarray)
assert z.shape == (1, )
z = vectorized_func(1, [[[2]]], 3)
assert isinstance(z, np.ndarray)
assert z.shape == (1, 1, 1)