From ae5a8f7eb3407bd420734e1780e83ddceef04403 Mon Sep 17 00:00:00 2001 From: Jason Rhinelander Date: Wed, 15 Mar 2017 00:57:56 -0300 Subject: [PATCH] Stop forcing c-contiguous in py::vectorize The only part of the vectorize code that actually needs c-contiguous is the "trivial" broadcast; for non-trivial arguments, the code already uses strides properly (and so handles C-style, F-style, neither, slices, etc.) This commit rewrites `broadcast` to additionally check for C-contiguous storage, then takes off the `c_style` flag for the arguments, which will keep the functionality more or less the same, except for no longer requiring an array copy for non-c-contiguous input arrays. Additionally, if we're given a singleton slice (e.g. a[0::4, 0::4] for a 4x4 or smaller array), we no longer fail triviality because the trivial code path never actually uses the strides on a singleton. --- include/pybind11/numpy.h | 63 +++++++++++++++++++++------------- tests/test_numpy_vectorize.cpp | 13 +++++++ tests/test_numpy_vectorize.py | 58 +++++++++++++++++++++++++++++++ 3 files changed, 110 insertions(+), 24 deletions(-) diff --git a/include/pybind11/numpy.h b/include/pybind11/numpy.h index 5a766d490..166cbd06a 100644 --- a/include/pybind11/numpy.h +++ b/include/pybind11/numpy.h @@ -1052,28 +1052,47 @@ private: std::array m_common_iterator; }; +// Populates the shape and number of dimensions for the set of buffers. Returns true if the +// broadcast is "trivial"--that is, has each buffer being either a singleton or a full-size, +// C-contiguous storage buffer. template -bool broadcast(const std::array& buffers, size_t& ndim, std::vector& shape) { +bool broadcast(const std::array &buffers, size_t &ndim, std::vector &shape) { ndim = std::accumulate(buffers.begin(), buffers.end(), size_t(0), [](size_t res, const buffer_info& buf) { return std::max(res, buf.ndim); }); - shape = std::vector(ndim, 1); + shape.clear(); + shape.resize(ndim, 1); + bool trivial_broadcast = true; for (size_t i = 0; i < N; ++i) { + trivial_broadcast = trivial_broadcast && (buffers[i].size == 1 || buffers[i].ndim == ndim); + size_t expect_stride = buffers[i].itemsize; auto res_iter = shape.rbegin(); - bool i_trivial_broadcast = (buffers[i].size == 1) || (buffers[i].ndim == ndim); - for (auto shape_iter = buffers[i].shape.rbegin(); - shape_iter != buffers[i].shape.rend(); ++shape_iter, ++res_iter) { + auto stride_iter = buffers[i].strides.rbegin(); + auto shape_iter = buffers[i].shape.rbegin(); + while (shape_iter != buffers[i].shape.rend()) { + const auto &dim_size_in = *shape_iter; + auto &dim_size_out = *res_iter; - if (*res_iter == 1) - *res_iter = *shape_iter; - else if ((*shape_iter != 1) && (*res_iter != *shape_iter)) + // Each input dimension can either be 1 or `n`, but `n` values must match across buffers + if (dim_size_out == 1) + dim_size_out = dim_size_in; + else if (dim_size_in != 1 && dim_size_in != dim_size_out) pybind11_fail("pybind11::vectorize: incompatible size/dimension of inputs!"); - i_trivial_broadcast = i_trivial_broadcast && (*res_iter == *shape_iter); + if (trivial_broadcast && buffers[i].size > 1) { + if (dim_size_in == dim_size_out && expect_stride == *stride_iter) { + expect_stride *= dim_size_in; + ++stride_iter; + } else { + trivial_broadcast = false; + } + } + + ++shape_iter; + ++res_iter; } - trivial_broadcast = trivial_broadcast && i_trivial_broadcast; } return trivial_broadcast; } @@ -1081,18 +1100,17 @@ bool broadcast(const std::array& buffers, size_t& ndim, std::vec template struct vectorize_helper { typename std::remove_reference::type f; + static constexpr size_t N = sizeof...(Args); template explicit vectorize_helper(T&&f) : f(std::forward(f)) { } - object operator()(array_t... args) { - return run(args..., make_index_sequence()); + object operator()(array_t... args) { + return run(args..., make_index_sequence()); } - template object run(array_t&... args, index_sequence index) { + template object run(array_t&... args, index_sequence index) { /* Request buffers from all parameters */ - const size_t N = sizeof...(Args); - std::array buffers {{ args.request()... }}; /* Determine dimensions parameters of output array */ @@ -1112,27 +1130,24 @@ struct vectorize_helper { } if (size == 1) - return cast(f(*((Args *) buffers[Index].ptr)...)); + return cast(f(*reinterpret_cast(buffers[Index].ptr)...)); - array_t result(shape, strides); + array_t result(shape, strides); auto buf = result.request(); auto output = (Return *) buf.ptr; if (trivial_broadcast) { /* Call the function */ - for (size_t i = 0; i < size; ++i) { - output[i] = f((buffers[Index].size == 1 - ? *((Args *) buffers[Index].ptr) - : ((Args *) buffers[Index].ptr)[i])...); - } + for (size_t i = 0; i < size; ++i) + output[i] = f((reinterpret_cast(buffers[Index].ptr)[buffers[Index].size == 1 ? 0 : i])...); } else { - apply_broadcast(buffers, buf, index); + apply_broadcast(buffers, buf, index); } return result; } - template + template void apply_broadcast(const std::array &buffers, buffer_info &output, index_sequence) { using input_iterator = multi_array_iterator; diff --git a/tests/test_numpy_vectorize.cpp b/tests/test_numpy_vectorize.cpp index 6d94db2a1..e5adff800 100644 --- a/tests/test_numpy_vectorize.cpp +++ b/tests/test_numpy_vectorize.cpp @@ -38,4 +38,17 @@ test_initializer numpy_vectorize([](py::module &m) { m.def("selective_func", [](py::array_t) { return "Int branch taken."; }); m.def("selective_func", [](py::array_t) { return "Float branch taken."; }); m.def("selective_func", [](py::array_t, py::array::c_style>) { return "Complex float branch taken."; }); + + + // Internal optimization test for whether the input is trivially broadcastable: + m.def("vectorized_is_trivial", []( + py::array_t arg1, + py::array_t arg2, + py::array_t arg3 + ) { + size_t ndim; + std::vector shape; + std::array buffers {{ arg1.request(), arg2.request(), arg3.request() }}; + return py::detail::broadcast(buffers, ndim, shape); + }); }); diff --git a/tests/test_numpy_vectorize.py b/tests/test_numpy_vectorize.py index 271241c3f..9a8c6ab94 100644 --- a/tests/test_numpy_vectorize.py +++ b/tests/test_numpy_vectorize.py @@ -57,6 +57,35 @@ def test_vectorize(capture): my_func(x:int=5, y:float=3, z:float=2) my_func(x:int=6, y:float=3, z:float=2) """ + with capture: + a, b, c = np.array([[1, 2, 3], [4, 5, 6]], order='F'), np.array([[2], [3]]), 2 + assert np.allclose(f(a, b, c), a * b * c) + assert capture == """ + my_func(x:int=1, y:float=2, z:float=2) + my_func(x:int=2, y:float=2, z:float=2) + my_func(x:int=3, y:float=2, z:float=2) + my_func(x:int=4, y:float=3, z:float=2) + my_func(x:int=5, y:float=3, z:float=2) + my_func(x:int=6, y:float=3, z:float=2) + """ + with capture: + a, b, c = np.array([[1, 2, 3], [4, 5, 6]])[::, ::2], np.array([[2], [3]]), 2 + assert np.allclose(f(a, b, c), a * b * c) + assert capture == """ + my_func(x:int=1, y:float=2, z:float=2) + my_func(x:int=3, y:float=2, z:float=2) + my_func(x:int=4, y:float=3, z:float=2) + my_func(x:int=6, y:float=3, z:float=2) + """ + with capture: + a, b, c = np.array([[1, 2, 3], [4, 5, 6]], order='F')[::, ::2], np.array([[2], [3]]), 2 + assert np.allclose(f(a, b, c), a * b * c) + assert capture == """ + my_func(x:int=1, y:float=2, z:float=2) + my_func(x:int=3, y:float=2, z:float=2) + my_func(x:int=4, y:float=3, z:float=2) + my_func(x:int=6, y:float=3, z:float=2) + """ def test_type_selection(): @@ -73,3 +102,32 @@ def test_docs(doc): assert doc(vectorized_func) == """ vectorized_func(arg0: numpy.ndarray[int32], arg1: numpy.ndarray[float32], arg2: numpy.ndarray[float64]) -> object """ # noqa: E501 line too long + + +def test_trivial_broadcasting(): + from pybind11_tests import vectorized_is_trivial + + assert vectorized_is_trivial(1, 2, 3) + assert vectorized_is_trivial(np.array(1), np.array(2), 3) + assert vectorized_is_trivial(np.array([1, 3]), np.array([2, 4]), 3) + assert vectorized_is_trivial( + np.array([[1, 3, 5], [7, 9, 11]]), np.array([[2, 4, 6], [8, 10, 12]]), 3) + assert not vectorized_is_trivial( + np.array([[1, 2, 3], [4, 5, 6]]), np.array([2, 3, 4]), 2) + assert not vectorized_is_trivial( + np.array([[1, 2, 3], [4, 5, 6]]), np.array([[2], [3]]), 2) + z1 = np.array([[1, 2, 3, 4], [5, 6, 7, 8]], dtype='int32') + z2 = np.array(z1, dtype='float32') + z3 = np.array(z1, dtype='float64') + assert vectorized_is_trivial(z1, z2, z3) + assert not vectorized_is_trivial(z1[::2, ::2], 1, 1) + assert vectorized_is_trivial(1, 1, z1[::2, ::2]) + assert not vectorized_is_trivial(1, 1, z3[::2, ::2]) + assert vectorized_is_trivial(z1, 1, z3[1::4, 1::4]) + + y1 = np.array(z1, order='F') + y2 = np.array(y1) + y3 = np.array(y1) + assert not vectorized_is_trivial(y1, y2, y3) + assert not vectorized_is_trivial(y1, z2, z3) + assert not vectorized_is_trivial(y1, 1, 1)