mirror of
https://github.com/pybind/pybind11.git
synced 2025-01-18 08:55:57 +00:00
Fix buffer protocol implementation (#5407)
* Fix buffer protocol implementation According to the buffer protocol, `ndim` is a _required_ field [1], and should always be set correctly. Additionally, `shape` should be set if flags includes `PyBUF_ND` or higher [2]. The current implementation only set those fields if flags was `PyBUF_STRIDES`. [1] https://docs.python.org/3/c-api/buffer.html#request-independent-fields [2] https://docs.python.org/3/c-api/buffer.html#shape-strides-suboffsets * Apply suggestions from review * Obey contiguity requests for buffer protocol If a contiguous buffer is requested, and the underlying buffer isn't, then that should raise. This matches NumPy behaviour if you do something like: ``` struct.unpack_from('5d', np.arange(20.0)[::4]) # Raises for contiguity ``` Also, if a buffer is contiguous, then it can masquerade as a less-complex buffer, either by dropping strides, or even pretending to be 1D. This matches NumPy behaviour if you do something like: ``` a = np.full((3, 5), 30.0) struct.unpack_from('15d', a) # --> Produces 1D tuple from 2D buffer. ``` * Handle review comments * Test buffer protocol against NumPy * Also check PyBUF_FORMAT results
This commit is contained in:
parent
75e48c5f95
commit
bc041de0db
@ -601,8 +601,10 @@ extern "C" inline int pybind11_getbuffer(PyObject *obj, Py_buffer *view, int fla
|
||||
set_error(PyExc_BufferError, "Writable buffer requested for readonly storage");
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Fill in all the information, and then downgrade as requested by the caller, or raise an
|
||||
// error if that's not possible.
|
||||
view->obj = obj;
|
||||
view->ndim = 1;
|
||||
view->internal = info;
|
||||
view->buf = info->ptr;
|
||||
view->itemsize = info->itemsize;
|
||||
@ -610,15 +612,59 @@ extern "C" inline int pybind11_getbuffer(PyObject *obj, Py_buffer *view, int fla
|
||||
for (auto s : info->shape) {
|
||||
view->len *= s;
|
||||
}
|
||||
view->ndim = static_cast<int>(info->ndim);
|
||||
view->shape = info->shape.data();
|
||||
view->strides = info->strides.data();
|
||||
view->readonly = static_cast<int>(info->readonly);
|
||||
if ((flags & PyBUF_FORMAT) == PyBUF_FORMAT) {
|
||||
view->format = const_cast<char *>(info->format.c_str());
|
||||
}
|
||||
if ((flags & PyBUF_STRIDES) == PyBUF_STRIDES) {
|
||||
view->ndim = (int) info->ndim;
|
||||
view->strides = info->strides.data();
|
||||
view->shape = info->shape.data();
|
||||
|
||||
// Note, all contiguity flags imply PyBUF_STRIDES and lower.
|
||||
if ((flags & PyBUF_C_CONTIGUOUS) == PyBUF_C_CONTIGUOUS) {
|
||||
if (PyBuffer_IsContiguous(view, 'C') == 0) {
|
||||
std::memset(view, 0, sizeof(Py_buffer));
|
||||
delete info;
|
||||
set_error(PyExc_BufferError,
|
||||
"C-contiguous buffer requested for discontiguous storage");
|
||||
return -1;
|
||||
}
|
||||
} else if ((flags & PyBUF_F_CONTIGUOUS) == PyBUF_F_CONTIGUOUS) {
|
||||
if (PyBuffer_IsContiguous(view, 'F') == 0) {
|
||||
std::memset(view, 0, sizeof(Py_buffer));
|
||||
delete info;
|
||||
set_error(PyExc_BufferError,
|
||||
"Fortran-contiguous buffer requested for discontiguous storage");
|
||||
return -1;
|
||||
}
|
||||
} else if ((flags & PyBUF_ANY_CONTIGUOUS) == PyBUF_ANY_CONTIGUOUS) {
|
||||
if (PyBuffer_IsContiguous(view, 'A') == 0) {
|
||||
std::memset(view, 0, sizeof(Py_buffer));
|
||||
delete info;
|
||||
set_error(PyExc_BufferError, "Contiguous buffer requested for discontiguous storage");
|
||||
return -1;
|
||||
}
|
||||
|
||||
} else if ((flags & PyBUF_STRIDES) != PyBUF_STRIDES) {
|
||||
// If no strides are requested, the buffer must be C-contiguous.
|
||||
// https://docs.python.org/3/c-api/buffer.html#contiguity-requests
|
||||
if (PyBuffer_IsContiguous(view, 'C') == 0) {
|
||||
std::memset(view, 0, sizeof(Py_buffer));
|
||||
delete info;
|
||||
set_error(PyExc_BufferError,
|
||||
"C-contiguous buffer requested for discontiguous storage");
|
||||
return -1;
|
||||
}
|
||||
|
||||
view->strides = nullptr;
|
||||
|
||||
// Since this is a contiguous buffer, it can also pretend to be 1D.
|
||||
if ((flags & PyBUF_ND) != PyBUF_ND) {
|
||||
view->shape = nullptr;
|
||||
view->ndim = 0;
|
||||
}
|
||||
}
|
||||
|
||||
Py_INCREF(view->obj);
|
||||
return 0;
|
||||
}
|
||||
|
@ -167,6 +167,125 @@ TEST_SUBMODULE(buffers, m) {
|
||||
sizeof(float)});
|
||||
});
|
||||
|
||||
// A matrix that uses Fortran storage order.
|
||||
class FortranMatrix : public Matrix {
|
||||
public:
|
||||
FortranMatrix(py::ssize_t rows, py::ssize_t cols) : Matrix(cols, rows) {
|
||||
print_created(this,
|
||||
std::to_string(rows) + "x" + std::to_string(cols) + " Fortran matrix");
|
||||
}
|
||||
|
||||
float operator()(py::ssize_t i, py::ssize_t j) const { return Matrix::operator()(j, i); }
|
||||
|
||||
float &operator()(py::ssize_t i, py::ssize_t j) { return Matrix::operator()(j, i); }
|
||||
|
||||
using Matrix::data;
|
||||
|
||||
py::ssize_t rows() const { return Matrix::cols(); }
|
||||
py::ssize_t cols() const { return Matrix::rows(); }
|
||||
};
|
||||
py::class_<FortranMatrix, Matrix>(m, "FortranMatrix", py::buffer_protocol())
|
||||
.def(py::init<py::ssize_t, py::ssize_t>())
|
||||
|
||||
.def("rows", &FortranMatrix::rows)
|
||||
.def("cols", &FortranMatrix::cols)
|
||||
|
||||
/// Bare bones interface
|
||||
.def("__getitem__",
|
||||
[](const FortranMatrix &m, std::pair<py::ssize_t, py::ssize_t> i) {
|
||||
if (i.first >= m.rows() || i.second >= m.cols()) {
|
||||
throw py::index_error();
|
||||
}
|
||||
return m(i.first, i.second);
|
||||
})
|
||||
.def("__setitem__",
|
||||
[](FortranMatrix &m, std::pair<py::ssize_t, py::ssize_t> i, float v) {
|
||||
if (i.first >= m.rows() || i.second >= m.cols()) {
|
||||
throw py::index_error();
|
||||
}
|
||||
m(i.first, i.second) = v;
|
||||
})
|
||||
/// Provide buffer access
|
||||
.def_buffer([](FortranMatrix &m) -> py::buffer_info {
|
||||
return py::buffer_info(m.data(), /* Pointer to buffer */
|
||||
{m.rows(), m.cols()}, /* Buffer dimensions */
|
||||
/* Strides (in bytes) for each index */
|
||||
{sizeof(float), sizeof(float) * size_t(m.rows())});
|
||||
});
|
||||
|
||||
// A matrix that uses a discontiguous underlying memory block.
|
||||
class DiscontiguousMatrix : public Matrix {
|
||||
public:
|
||||
DiscontiguousMatrix(py::ssize_t rows,
|
||||
py::ssize_t cols,
|
||||
py::ssize_t row_factor,
|
||||
py::ssize_t col_factor)
|
||||
: Matrix(rows * row_factor, cols * col_factor), m_row_factor(row_factor),
|
||||
m_col_factor(col_factor) {
|
||||
print_created(this,
|
||||
std::to_string(rows) + "(*" + std::to_string(row_factor) + ")x"
|
||||
+ std::to_string(cols) + "(*" + std::to_string(col_factor)
|
||||
+ ") matrix");
|
||||
}
|
||||
|
||||
~DiscontiguousMatrix() {
|
||||
print_destroyed(this,
|
||||
std::to_string(rows() / m_row_factor) + "(*"
|
||||
+ std::to_string(m_row_factor) + ")x"
|
||||
+ std::to_string(cols() / m_col_factor) + "(*"
|
||||
+ std::to_string(m_col_factor) + ") matrix");
|
||||
}
|
||||
|
||||
float operator()(py::ssize_t i, py::ssize_t j) const {
|
||||
return Matrix::operator()(i * m_row_factor, j * m_col_factor);
|
||||
}
|
||||
|
||||
float &operator()(py::ssize_t i, py::ssize_t j) {
|
||||
return Matrix::operator()(i * m_row_factor, j * m_col_factor);
|
||||
}
|
||||
|
||||
using Matrix::data;
|
||||
|
||||
py::ssize_t rows() const { return Matrix::rows() / m_row_factor; }
|
||||
py::ssize_t cols() const { return Matrix::cols() / m_col_factor; }
|
||||
py::ssize_t row_factor() const { return m_row_factor; }
|
||||
py::ssize_t col_factor() const { return m_col_factor; }
|
||||
|
||||
private:
|
||||
py::ssize_t m_row_factor;
|
||||
py::ssize_t m_col_factor;
|
||||
};
|
||||
py::class_<DiscontiguousMatrix, Matrix>(m, "DiscontiguousMatrix", py::buffer_protocol())
|
||||
.def(py::init<py::ssize_t, py::ssize_t, py::ssize_t, py::ssize_t>())
|
||||
|
||||
.def("rows", &DiscontiguousMatrix::rows)
|
||||
.def("cols", &DiscontiguousMatrix::cols)
|
||||
|
||||
/// Bare bones interface
|
||||
.def("__getitem__",
|
||||
[](const DiscontiguousMatrix &m, std::pair<py::ssize_t, py::ssize_t> i) {
|
||||
if (i.first >= m.rows() || i.second >= m.cols()) {
|
||||
throw py::index_error();
|
||||
}
|
||||
return m(i.first, i.second);
|
||||
})
|
||||
.def("__setitem__",
|
||||
[](DiscontiguousMatrix &m, std::pair<py::ssize_t, py::ssize_t> i, float v) {
|
||||
if (i.first >= m.rows() || i.second >= m.cols()) {
|
||||
throw py::index_error();
|
||||
}
|
||||
m(i.first, i.second) = v;
|
||||
})
|
||||
/// Provide buffer access
|
||||
.def_buffer([](DiscontiguousMatrix &m) -> py::buffer_info {
|
||||
return py::buffer_info(m.data(), /* Pointer to buffer */
|
||||
{m.rows(), m.cols()}, /* Buffer dimensions */
|
||||
/* Strides (in bytes) for each index */
|
||||
{size_t(m.col_factor()) * sizeof(float) * size_t(m.cols())
|
||||
* size_t(m.row_factor()),
|
||||
size_t(m.col_factor()) * sizeof(float)});
|
||||
});
|
||||
|
||||
class BrokenMatrix : public Matrix {
|
||||
public:
|
||||
BrokenMatrix(py::ssize_t rows, py::ssize_t cols) : Matrix(rows, cols) {}
|
||||
@ -268,4 +387,56 @@ TEST_SUBMODULE(buffers, m) {
|
||||
});
|
||||
|
||||
m.def("get_buffer_info", [](const py::buffer &buffer) { return buffer.request(); });
|
||||
|
||||
// Expose Py_buffer for testing.
|
||||
m.attr("PyBUF_FORMAT") = PyBUF_FORMAT;
|
||||
m.attr("PyBUF_SIMPLE") = PyBUF_SIMPLE;
|
||||
m.attr("PyBUF_ND") = PyBUF_ND;
|
||||
m.attr("PyBUF_STRIDES") = PyBUF_STRIDES;
|
||||
m.attr("PyBUF_INDIRECT") = PyBUF_INDIRECT;
|
||||
m.attr("PyBUF_C_CONTIGUOUS") = PyBUF_C_CONTIGUOUS;
|
||||
m.attr("PyBUF_F_CONTIGUOUS") = PyBUF_F_CONTIGUOUS;
|
||||
m.attr("PyBUF_ANY_CONTIGUOUS") = PyBUF_ANY_CONTIGUOUS;
|
||||
|
||||
m.def("get_py_buffer", [](const py::object &object, int flags) {
|
||||
Py_buffer buffer;
|
||||
memset(&buffer, 0, sizeof(Py_buffer));
|
||||
if (PyObject_GetBuffer(object.ptr(), &buffer, flags) == -1) {
|
||||
throw py::error_already_set();
|
||||
}
|
||||
|
||||
auto SimpleNamespace = py::module_::import("types").attr("SimpleNamespace");
|
||||
py::object result = SimpleNamespace("len"_a = buffer.len,
|
||||
"readonly"_a = buffer.readonly,
|
||||
"itemsize"_a = buffer.itemsize,
|
||||
"format"_a = buffer.format,
|
||||
"ndim"_a = buffer.ndim,
|
||||
"shape"_a = py::none(),
|
||||
"strides"_a = py::none(),
|
||||
"suboffsets"_a = py::none());
|
||||
if (buffer.shape != nullptr) {
|
||||
py::list l;
|
||||
for (auto i = 0; i < buffer.ndim; i++) {
|
||||
l.append(buffer.shape[i]);
|
||||
}
|
||||
py::setattr(result, "shape", l);
|
||||
}
|
||||
if (buffer.strides != nullptr) {
|
||||
py::list l;
|
||||
for (auto i = 0; i < buffer.ndim; i++) {
|
||||
l.append(buffer.strides[i]);
|
||||
}
|
||||
py::setattr(result, "strides", l);
|
||||
}
|
||||
if (buffer.suboffsets != nullptr) {
|
||||
py::list l;
|
||||
for (auto i = 0; i < buffer.ndim; i++) {
|
||||
l.append(buffer.suboffsets[i]);
|
||||
}
|
||||
py::setattr(result, "suboffsets", l);
|
||||
}
|
||||
|
||||
PyBuffer_Release(&buffer);
|
||||
return result;
|
||||
});
|
||||
}
|
||||
|
@ -239,3 +239,163 @@ def test_buffer_exception():
|
||||
memoryview(m.BrokenMatrix(1, 1))
|
||||
assert isinstance(excinfo.value.__cause__, RuntimeError)
|
||||
assert "for context" in str(excinfo.value.__cause__)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("type", ["pybind11", "numpy"])
|
||||
def test_c_contiguous_to_pybuffer(type):
|
||||
if type == "pybind11":
|
||||
mat = m.Matrix(5, 4)
|
||||
elif type == "numpy":
|
||||
mat = np.empty((5, 4), dtype=np.float32)
|
||||
else:
|
||||
raise ValueError(f"Unknown parametrization {type}")
|
||||
|
||||
info = m.get_py_buffer(mat, m.PyBUF_SIMPLE)
|
||||
assert info.format is None
|
||||
assert info.itemsize == ctypes.sizeof(ctypes.c_float)
|
||||
assert info.len == 5 * 4 * info.itemsize
|
||||
assert info.ndim == 0 # See discussion on PR #5407.
|
||||
assert info.shape is None
|
||||
assert info.strides is None
|
||||
assert info.suboffsets is None
|
||||
assert not info.readonly
|
||||
info = m.get_py_buffer(mat, m.PyBUF_SIMPLE | m.PyBUF_FORMAT)
|
||||
assert info.format == "f"
|
||||
assert info.itemsize == ctypes.sizeof(ctypes.c_float)
|
||||
assert info.len == 5 * 4 * info.itemsize
|
||||
assert info.ndim == 0 # See discussion on PR #5407.
|
||||
assert info.shape is None
|
||||
assert info.strides is None
|
||||
assert info.suboffsets is None
|
||||
assert not info.readonly
|
||||
info = m.get_py_buffer(mat, m.PyBUF_ND)
|
||||
assert info.itemsize == ctypes.sizeof(ctypes.c_float)
|
||||
assert info.len == 5 * 4 * info.itemsize
|
||||
assert info.ndim == 2
|
||||
assert info.shape == [5, 4]
|
||||
assert info.strides is None
|
||||
assert info.suboffsets is None
|
||||
assert not info.readonly
|
||||
info = m.get_py_buffer(mat, m.PyBUF_STRIDES)
|
||||
assert info.itemsize == ctypes.sizeof(ctypes.c_float)
|
||||
assert info.len == 5 * 4 * info.itemsize
|
||||
assert info.ndim == 2
|
||||
assert info.shape == [5, 4]
|
||||
assert info.strides == [4 * info.itemsize, info.itemsize]
|
||||
assert info.suboffsets is None
|
||||
assert not info.readonly
|
||||
info = m.get_py_buffer(mat, m.PyBUF_INDIRECT)
|
||||
assert info.itemsize == ctypes.sizeof(ctypes.c_float)
|
||||
assert info.len == 5 * 4 * info.itemsize
|
||||
assert info.ndim == 2
|
||||
assert info.shape == [5, 4]
|
||||
assert info.strides == [4 * info.itemsize, info.itemsize]
|
||||
assert info.suboffsets is None # Should be filled in here, but we don't use it.
|
||||
assert not info.readonly
|
||||
|
||||
|
||||
@pytest.mark.parametrize("type", ["pybind11", "numpy"])
|
||||
def test_fortran_contiguous_to_pybuffer(type):
|
||||
if type == "pybind11":
|
||||
mat = m.FortranMatrix(5, 4)
|
||||
elif type == "numpy":
|
||||
mat = np.empty((5, 4), dtype=np.float32, order="F")
|
||||
else:
|
||||
raise ValueError(f"Unknown parametrization {type}")
|
||||
|
||||
# A Fortran-shaped buffer can only be accessed at PyBUF_STRIDES level or higher.
|
||||
info = m.get_py_buffer(mat, m.PyBUF_STRIDES)
|
||||
assert info.itemsize == ctypes.sizeof(ctypes.c_float)
|
||||
assert info.len == 5 * 4 * info.itemsize
|
||||
assert info.ndim == 2
|
||||
assert info.shape == [5, 4]
|
||||
assert info.strides == [info.itemsize, 5 * info.itemsize]
|
||||
assert info.suboffsets is None
|
||||
assert not info.readonly
|
||||
info = m.get_py_buffer(mat, m.PyBUF_INDIRECT)
|
||||
assert info.itemsize == ctypes.sizeof(ctypes.c_float)
|
||||
assert info.len == 5 * 4 * info.itemsize
|
||||
assert info.ndim == 2
|
||||
assert info.shape == [5, 4]
|
||||
assert info.strides == [info.itemsize, 5 * info.itemsize]
|
||||
assert info.suboffsets is None # Should be filled in here, but we don't use it.
|
||||
assert not info.readonly
|
||||
|
||||
|
||||
@pytest.mark.parametrize("type", ["pybind11", "numpy"])
|
||||
def test_discontiguous_to_pybuffer(type):
|
||||
if type == "pybind11":
|
||||
mat = m.DiscontiguousMatrix(5, 4, 2, 3)
|
||||
elif type == "numpy":
|
||||
mat = np.empty((5 * 2, 4 * 3), dtype=np.float32)[::2, ::3]
|
||||
else:
|
||||
raise ValueError(f"Unknown parametrization {type}")
|
||||
|
||||
info = m.get_py_buffer(mat, m.PyBUF_STRIDES)
|
||||
assert info.itemsize == ctypes.sizeof(ctypes.c_float)
|
||||
assert info.len == 5 * 4 * info.itemsize
|
||||
assert info.ndim == 2
|
||||
assert info.shape == [5, 4]
|
||||
assert info.strides == [2 * 4 * 3 * info.itemsize, 3 * info.itemsize]
|
||||
assert info.suboffsets is None
|
||||
assert not info.readonly
|
||||
|
||||
|
||||
@pytest.mark.parametrize("type", ["pybind11", "numpy"])
|
||||
def test_to_pybuffer_contiguity(type):
|
||||
def check_strides(mat):
|
||||
# The full block is memset to 0, so fill it with non-zero in real spots.
|
||||
expected = np.arange(1, 5 * 4 + 1).reshape((5, 4))
|
||||
for i in range(5):
|
||||
for j in range(4):
|
||||
mat[i, j] = expected[i, j]
|
||||
# If all strides are correct, the exposed buffer should match the input.
|
||||
np.testing.assert_array_equal(np.array(mat), expected)
|
||||
|
||||
if type == "pybind11":
|
||||
cmat = m.Matrix(5, 4) # C contiguous.
|
||||
fmat = m.FortranMatrix(5, 4) # Fortran contiguous.
|
||||
dmat = m.DiscontiguousMatrix(5, 4, 2, 3) # Not contiguous.
|
||||
expected_exception = BufferError
|
||||
elif type == "numpy":
|
||||
cmat = np.empty((5, 4), dtype=np.float32) # C contiguous.
|
||||
fmat = np.empty((5, 4), dtype=np.float32, order="F") # Fortran contiguous.
|
||||
dmat = np.empty((5 * 2, 4 * 3), dtype=np.float32)[::2, ::3] # Not contiguous.
|
||||
# NumPy incorrectly raises ValueError; when the minimum NumPy requirement is
|
||||
# above the version that fixes https://github.com/numpy/numpy/issues/3634 then
|
||||
# BufferError can be used everywhere.
|
||||
expected_exception = (BufferError, ValueError)
|
||||
else:
|
||||
raise ValueError(f"Unknown parametrization {type}")
|
||||
|
||||
check_strides(cmat)
|
||||
# Should work in C-contiguous mode, but not Fortran order.
|
||||
m.get_py_buffer(cmat, m.PyBUF_C_CONTIGUOUS)
|
||||
m.get_py_buffer(cmat, m.PyBUF_ANY_CONTIGUOUS)
|
||||
with pytest.raises(expected_exception):
|
||||
m.get_py_buffer(cmat, m.PyBUF_F_CONTIGUOUS)
|
||||
|
||||
check_strides(fmat)
|
||||
# These flags imply C-contiguity, so won't work.
|
||||
with pytest.raises(expected_exception):
|
||||
m.get_py_buffer(fmat, m.PyBUF_SIMPLE)
|
||||
with pytest.raises(expected_exception):
|
||||
m.get_py_buffer(fmat, m.PyBUF_ND)
|
||||
# Should work in Fortran-contiguous mode, but not C order.
|
||||
with pytest.raises(expected_exception):
|
||||
m.get_py_buffer(fmat, m.PyBUF_C_CONTIGUOUS)
|
||||
m.get_py_buffer(fmat, m.PyBUF_ANY_CONTIGUOUS)
|
||||
m.get_py_buffer(fmat, m.PyBUF_F_CONTIGUOUS)
|
||||
|
||||
check_strides(dmat)
|
||||
# Should never work.
|
||||
with pytest.raises(expected_exception):
|
||||
m.get_py_buffer(dmat, m.PyBUF_SIMPLE)
|
||||
with pytest.raises(expected_exception):
|
||||
m.get_py_buffer(dmat, m.PyBUF_ND)
|
||||
with pytest.raises(expected_exception):
|
||||
m.get_py_buffer(dmat, m.PyBUF_C_CONTIGUOUS)
|
||||
with pytest.raises(expected_exception):
|
||||
m.get_py_buffer(dmat, m.PyBUF_ANY_CONTIGUOUS)
|
||||
with pytest.raises(expected_exception):
|
||||
m.get_py_buffer(dmat, m.PyBUF_F_CONTIGUOUS)
|
||||
|
Loading…
Reference in New Issue
Block a user