mirror of
https://github.com/pybind/pybind11.git
synced 2024-11-22 05:05:11 +00:00
Stop forcing c-contiguous in py::vectorize
The only part of the vectorize code that actually needs c-contiguous is the "trivial" broadcast; for non-trivial arguments, the code already uses strides properly (and so handles C-style, F-style, neither, slices, etc.) This commit rewrites `broadcast` to additionally check for C-contiguous storage, then takes off the `c_style` flag for the arguments, which will keep the functionality more or less the same, except for no longer requiring an array copy for non-c-contiguous input arrays. Additionally, if we're given a singleton slice (e.g. a[0::4, 0::4] for a 4x4 or smaller array), we no longer fail triviality because the trivial code path never actually uses the strides on a singleton.
This commit is contained in:
parent
cd3d1fc7df
commit
ae5a8f7eb3
@ -1052,28 +1052,47 @@ private:
|
||||
std::array<common_iter, N> m_common_iterator;
|
||||
};
|
||||
|
||||
// Populates the shape and number of dimensions for the set of buffers. Returns true if the
|
||||
// broadcast is "trivial"--that is, has each buffer being either a singleton or a full-size,
|
||||
// C-contiguous storage buffer.
|
||||
template <size_t N>
|
||||
bool broadcast(const std::array<buffer_info, N>& buffers, size_t& ndim, std::vector<size_t>& shape) {
|
||||
bool broadcast(const std::array<buffer_info, N> &buffers, size_t &ndim, std::vector<size_t> &shape) {
|
||||
ndim = std::accumulate(buffers.begin(), buffers.end(), size_t(0), [](size_t res, const buffer_info& buf) {
|
||||
return std::max(res, buf.ndim);
|
||||
});
|
||||
|
||||
shape = std::vector<size_t>(ndim, 1);
|
||||
shape.clear();
|
||||
shape.resize(ndim, 1);
|
||||
|
||||
bool trivial_broadcast = true;
|
||||
for (size_t i = 0; i < N; ++i) {
|
||||
trivial_broadcast = trivial_broadcast && (buffers[i].size == 1 || buffers[i].ndim == ndim);
|
||||
size_t expect_stride = buffers[i].itemsize;
|
||||
auto res_iter = shape.rbegin();
|
||||
bool i_trivial_broadcast = (buffers[i].size == 1) || (buffers[i].ndim == ndim);
|
||||
for (auto shape_iter = buffers[i].shape.rbegin();
|
||||
shape_iter != buffers[i].shape.rend(); ++shape_iter, ++res_iter) {
|
||||
auto stride_iter = buffers[i].strides.rbegin();
|
||||
auto shape_iter = buffers[i].shape.rbegin();
|
||||
while (shape_iter != buffers[i].shape.rend()) {
|
||||
const auto &dim_size_in = *shape_iter;
|
||||
auto &dim_size_out = *res_iter;
|
||||
|
||||
if (*res_iter == 1)
|
||||
*res_iter = *shape_iter;
|
||||
else if ((*shape_iter != 1) && (*res_iter != *shape_iter))
|
||||
// Each input dimension can either be 1 or `n`, but `n` values must match across buffers
|
||||
if (dim_size_out == 1)
|
||||
dim_size_out = dim_size_in;
|
||||
else if (dim_size_in != 1 && dim_size_in != dim_size_out)
|
||||
pybind11_fail("pybind11::vectorize: incompatible size/dimension of inputs!");
|
||||
|
||||
i_trivial_broadcast = i_trivial_broadcast && (*res_iter == *shape_iter);
|
||||
if (trivial_broadcast && buffers[i].size > 1) {
|
||||
if (dim_size_in == dim_size_out && expect_stride == *stride_iter) {
|
||||
expect_stride *= dim_size_in;
|
||||
++stride_iter;
|
||||
} else {
|
||||
trivial_broadcast = false;
|
||||
}
|
||||
}
|
||||
|
||||
++shape_iter;
|
||||
++res_iter;
|
||||
}
|
||||
trivial_broadcast = trivial_broadcast && i_trivial_broadcast;
|
||||
}
|
||||
return trivial_broadcast;
|
||||
}
|
||||
@ -1081,18 +1100,17 @@ bool broadcast(const std::array<buffer_info, N>& buffers, size_t& ndim, std::vec
|
||||
template <typename Func, typename Return, typename... Args>
|
||||
struct vectorize_helper {
|
||||
typename std::remove_reference<Func>::type f;
|
||||
static constexpr size_t N = sizeof...(Args);
|
||||
|
||||
template <typename T>
|
||||
explicit vectorize_helper(T&&f) : f(std::forward<T>(f)) { }
|
||||
|
||||
object operator()(array_t<Args, array::c_style | array::forcecast>... args) {
|
||||
return run(args..., make_index_sequence<sizeof...(Args)>());
|
||||
object operator()(array_t<Args, array::forcecast>... args) {
|
||||
return run(args..., make_index_sequence<N>());
|
||||
}
|
||||
|
||||
template <size_t ... Index> object run(array_t<Args, array::c_style | array::forcecast>&... args, index_sequence<Index...> index) {
|
||||
template <size_t ... Index> object run(array_t<Args, array::forcecast>&... args, index_sequence<Index...> index) {
|
||||
/* Request buffers from all parameters */
|
||||
const size_t N = sizeof...(Args);
|
||||
|
||||
std::array<buffer_info, N> buffers {{ args.request()... }};
|
||||
|
||||
/* Determine dimensions parameters of output array */
|
||||
@ -1112,27 +1130,24 @@ struct vectorize_helper {
|
||||
}
|
||||
|
||||
if (size == 1)
|
||||
return cast(f(*((Args *) buffers[Index].ptr)...));
|
||||
return cast(f(*reinterpret_cast<Args *>(buffers[Index].ptr)...));
|
||||
|
||||
array_t<Return> result(shape, strides);
|
||||
array_t<Return, array::c_style> result(shape, strides);
|
||||
auto buf = result.request();
|
||||
auto output = (Return *) buf.ptr;
|
||||
|
||||
if (trivial_broadcast) {
|
||||
/* Call the function */
|
||||
for (size_t i = 0; i < size; ++i) {
|
||||
output[i] = f((buffers[Index].size == 1
|
||||
? *((Args *) buffers[Index].ptr)
|
||||
: ((Args *) buffers[Index].ptr)[i])...);
|
||||
}
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
output[i] = f((reinterpret_cast<Args *>(buffers[Index].ptr)[buffers[Index].size == 1 ? 0 : i])...);
|
||||
} else {
|
||||
apply_broadcast<N, Index...>(buffers, buf, index);
|
||||
apply_broadcast<Index...>(buffers, buf, index);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
template <size_t N, size_t... Index>
|
||||
template <size_t... Index>
|
||||
void apply_broadcast(const std::array<buffer_info, N> &buffers,
|
||||
buffer_info &output, index_sequence<Index...>) {
|
||||
using input_iterator = multi_array_iterator<N>;
|
||||
|
@ -38,4 +38,17 @@ test_initializer numpy_vectorize([](py::module &m) {
|
||||
m.def("selective_func", [](py::array_t<int, py::array::c_style>) { return "Int branch taken."; });
|
||||
m.def("selective_func", [](py::array_t<float, py::array::c_style>) { return "Float branch taken."; });
|
||||
m.def("selective_func", [](py::array_t<std::complex<float>, py::array::c_style>) { return "Complex float branch taken."; });
|
||||
|
||||
|
||||
// Internal optimization test for whether the input is trivially broadcastable:
|
||||
m.def("vectorized_is_trivial", [](
|
||||
py::array_t<int, py::array::forcecast> arg1,
|
||||
py::array_t<float, py::array::forcecast> arg2,
|
||||
py::array_t<double, py::array::forcecast> arg3
|
||||
) {
|
||||
size_t ndim;
|
||||
std::vector<size_t> shape;
|
||||
std::array<py::buffer_info, 3> buffers {{ arg1.request(), arg2.request(), arg3.request() }};
|
||||
return py::detail::broadcast(buffers, ndim, shape);
|
||||
});
|
||||
});
|
||||
|
@ -57,6 +57,35 @@ def test_vectorize(capture):
|
||||
my_func(x:int=5, y:float=3, z:float=2)
|
||||
my_func(x:int=6, y:float=3, z:float=2)
|
||||
"""
|
||||
with capture:
|
||||
a, b, c = np.array([[1, 2, 3], [4, 5, 6]], order='F'), np.array([[2], [3]]), 2
|
||||
assert np.allclose(f(a, b, c), a * b * c)
|
||||
assert capture == """
|
||||
my_func(x:int=1, y:float=2, z:float=2)
|
||||
my_func(x:int=2, y:float=2, z:float=2)
|
||||
my_func(x:int=3, y:float=2, z:float=2)
|
||||
my_func(x:int=4, y:float=3, z:float=2)
|
||||
my_func(x:int=5, y:float=3, z:float=2)
|
||||
my_func(x:int=6, y:float=3, z:float=2)
|
||||
"""
|
||||
with capture:
|
||||
a, b, c = np.array([[1, 2, 3], [4, 5, 6]])[::, ::2], np.array([[2], [3]]), 2
|
||||
assert np.allclose(f(a, b, c), a * b * c)
|
||||
assert capture == """
|
||||
my_func(x:int=1, y:float=2, z:float=2)
|
||||
my_func(x:int=3, y:float=2, z:float=2)
|
||||
my_func(x:int=4, y:float=3, z:float=2)
|
||||
my_func(x:int=6, y:float=3, z:float=2)
|
||||
"""
|
||||
with capture:
|
||||
a, b, c = np.array([[1, 2, 3], [4, 5, 6]], order='F')[::, ::2], np.array([[2], [3]]), 2
|
||||
assert np.allclose(f(a, b, c), a * b * c)
|
||||
assert capture == """
|
||||
my_func(x:int=1, y:float=2, z:float=2)
|
||||
my_func(x:int=3, y:float=2, z:float=2)
|
||||
my_func(x:int=4, y:float=3, z:float=2)
|
||||
my_func(x:int=6, y:float=3, z:float=2)
|
||||
"""
|
||||
|
||||
|
||||
def test_type_selection():
|
||||
@ -73,3 +102,32 @@ def test_docs(doc):
|
||||
assert doc(vectorized_func) == """
|
||||
vectorized_func(arg0: numpy.ndarray[int32], arg1: numpy.ndarray[float32], arg2: numpy.ndarray[float64]) -> object
|
||||
""" # noqa: E501 line too long
|
||||
|
||||
|
||||
def test_trivial_broadcasting():
|
||||
from pybind11_tests import vectorized_is_trivial
|
||||
|
||||
assert vectorized_is_trivial(1, 2, 3)
|
||||
assert vectorized_is_trivial(np.array(1), np.array(2), 3)
|
||||
assert vectorized_is_trivial(np.array([1, 3]), np.array([2, 4]), 3)
|
||||
assert vectorized_is_trivial(
|
||||
np.array([[1, 3, 5], [7, 9, 11]]), np.array([[2, 4, 6], [8, 10, 12]]), 3)
|
||||
assert not vectorized_is_trivial(
|
||||
np.array([[1, 2, 3], [4, 5, 6]]), np.array([2, 3, 4]), 2)
|
||||
assert not vectorized_is_trivial(
|
||||
np.array([[1, 2, 3], [4, 5, 6]]), np.array([[2], [3]]), 2)
|
||||
z1 = np.array([[1, 2, 3, 4], [5, 6, 7, 8]], dtype='int32')
|
||||
z2 = np.array(z1, dtype='float32')
|
||||
z3 = np.array(z1, dtype='float64')
|
||||
assert vectorized_is_trivial(z1, z2, z3)
|
||||
assert not vectorized_is_trivial(z1[::2, ::2], 1, 1)
|
||||
assert vectorized_is_trivial(1, 1, z1[::2, ::2])
|
||||
assert not vectorized_is_trivial(1, 1, z3[::2, ::2])
|
||||
assert vectorized_is_trivial(z1, 1, z3[1::4, 1::4])
|
||||
|
||||
y1 = np.array(z1, order='F')
|
||||
y2 = np.array(y1)
|
||||
y3 = np.array(y1)
|
||||
assert not vectorized_is_trivial(y1, y2, y3)
|
||||
assert not vectorized_is_trivial(y1, z2, z3)
|
||||
assert not vectorized_is_trivial(y1, 1, 1)
|
||||
|
Loading…
Reference in New Issue
Block a user