From ce9bbc0a213bfb21b77ada8de5346be5e350e3c4 Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Tue, 23 May 2023 10:03:33 -0700 Subject: [PATCH 1/3] Python 3.11+: Add `__notes__` to `error_already_set::what()` output. (#4678) * First version adding `__notes__` to `error_already_set::what()` output. * Fix trivial oversight (missing adjustment in existing test). * Minor enhancements of new code. * Re-enable `cmake --target cpptest -j 2` * Revert "Re-enable `cmake --target cpptest -j 2`" This reverts commit 60816285e9e7b95b7d33a60805888b80b2bec641. * Add general comment explaining why the `error_fetch_and_normalize` code is so unusual. --- include/pybind11/pytypes.h | 70 +++++++++++++++++++++++++++++++++----- tests/test_exceptions.py | 36 +++++++++++++++----- 2 files changed, 90 insertions(+), 16 deletions(-) diff --git a/include/pybind11/pytypes.h b/include/pybind11/pytypes.h index f11ed5da7..f5d3f34f3 100644 --- a/include/pybind11/pytypes.h +++ b/include/pybind11/pytypes.h @@ -471,13 +471,24 @@ inline const char *obj_class_name(PyObject *obj) { std::string error_string(); +// The code in this struct is very unusual, to minimize the chances of +// masking bugs (elsewhere) by errors during the error handling (here). +// This is meant to be a lifeline for troubleshooting long-running processes +// that crash under conditions that are virtually impossible to reproduce. +// Low-level implementation alternatives are preferred to higher-level ones +// that might raise cascading exceptions. Last-ditch-kind-of attempts are made +// to report as much of the original error as possible, even if there are +// secondary issues obtaining some of the details. struct error_fetch_and_normalize { - // Immediate normalization is long-established behavior (starting with - // https://github.com/pybind/pybind11/commit/135ba8deafb8bf64a15b24d1513899eb600e2011 - // from Sep 2016) and safest. Normalization could be deferred, but this could mask - // errors elsewhere, the performance gain is very minor in typical situations - // (usually the dominant bottleneck is EH unwinding), and the implementation here - // would be more complex. + // This comment only applies to Python <= 3.11: + // Immediate normalization is long-established behavior (starting with + // https://github.com/pybind/pybind11/commit/135ba8deafb8bf64a15b24d1513899eb600e2011 + // from Sep 2016) and safest. Normalization could be deferred, but this could mask + // errors elsewhere, the performance gain is very minor in typical situations + // (usually the dominant bottleneck is EH unwinding), and the implementation here + // would be more complex. + // Starting with Python 3.12, PyErr_Fetch() normalizes exceptions immediately. + // Any errors during normalization are tracked under __notes__. explicit error_fetch_and_normalize(const char *called) { PyErr_Fetch(&m_type.ptr(), &m_value.ptr(), &m_trace.ptr()); if (!m_type) { @@ -492,6 +503,14 @@ struct error_fetch_and_normalize { "of the original active exception type."); } m_lazy_error_string = exc_type_name_orig; +#if PY_VERSION_HEX >= 0x030C0000 + // The presence of __notes__ is likely due to exception normalization + // errors, although that is not necessarily true, therefore insert a + // hint only: + if (PyObject_HasAttrString(m_value.ptr(), "__notes__")) { + m_lazy_error_string += "[WITH __notes__]"; + } +#else // PyErr_NormalizeException() may change the exception type if there are cascading // failures. This can potentially be extremely confusing. PyErr_NormalizeException(&m_type.ptr(), &m_value.ptr(), &m_trace.ptr()); @@ -506,12 +525,12 @@ struct error_fetch_and_normalize { + " failed to obtain the name " "of the normalized active exception type."); } -#if defined(PYPY_VERSION_NUM) && PYPY_VERSION_NUM < 0x07030a00 +# if defined(PYPY_VERSION_NUM) && PYPY_VERSION_NUM < 0x07030a00 // This behavior runs the risk of masking errors in the error handling, but avoids a // conflict with PyPy, which relies on the normalization here to change OSError to // FileNotFoundError (https://github.com/pybind/pybind11/issues/4075). m_lazy_error_string = exc_type_name_norm; -#else +# else if (exc_type_name_norm != m_lazy_error_string) { std::string msg = std::string(called) + ": MISMATCH of original and normalized " @@ -523,6 +542,7 @@ struct error_fetch_and_normalize { msg += ": " + format_value_and_trace(); pybind11_fail(msg); } +# endif #endif } @@ -558,6 +578,40 @@ struct error_fetch_and_normalize { } } } +#if PY_VERSION_HEX >= 0x030B0000 + auto notes + = reinterpret_steal(PyObject_GetAttrString(m_value.ptr(), "__notes__")); + if (!notes) { + PyErr_Clear(); // No notes is good news. + } else { + auto len_notes = PyList_Size(notes.ptr()); + if (len_notes < 0) { + result += "\nFAILURE obtaining len(__notes__): " + detail::error_string(); + } else { + result += "\n__notes__ (len=" + std::to_string(len_notes) + "):"; + for (ssize_t i = 0; i < len_notes; i++) { + PyObject *note = PyList_GET_ITEM(notes.ptr(), i); + auto note_bytes = reinterpret_steal( + PyUnicode_AsEncodedString(note, "utf-8", "backslashreplace")); + if (!note_bytes) { + result += "\nFAILURE obtaining __notes__[" + std::to_string(i) + + "]: " + detail::error_string(); + } else { + char *buffer = nullptr; + Py_ssize_t length = 0; + if (PyBytes_AsStringAndSize(note_bytes.ptr(), &buffer, &length) + == -1) { + result += "\nFAILURE formatting __notes__[" + std::to_string(i) + + "]: " + detail::error_string(); + } else { + result += '\n'; + result += std::string(buffer, static_cast(length)); + } + } + } + } + } +#endif } else { result = ""; } diff --git a/tests/test_exceptions.py b/tests/test_exceptions.py index a92ab59a3..ccac4536d 100644 --- a/tests/test_exceptions.py +++ b/tests/test_exceptions.py @@ -317,13 +317,7 @@ def test_error_already_set_what_with_happy_exceptions( assert what == expected_what -@pytest.mark.skipif( - # Intentionally very specific: - "sys.version_info == (3, 12, 0, 'alpha', 7)", - reason="WIP: https://github.com/python/cpython/issues/102594", -) -@pytest.mark.skipif("env.PYPY", reason="PyErr_NormalizeException Segmentation fault") -def test_flaky_exception_failure_point_init(): +def _test_flaky_exception_failure_point_init_before_py_3_12(): with pytest.raises(RuntimeError) as excinfo: m.error_already_set_what(FlakyException, ("failure_point_init",)) lines = str(excinfo.value).splitlines() @@ -337,7 +331,33 @@ def test_flaky_exception_failure_point_init(): # Checking the first two lines of the traceback as formatted in error_string(): assert "test_exceptions.py(" in lines[3] assert lines[3].endswith("): __init__") - assert lines[4].endswith("): test_flaky_exception_failure_point_init") + assert lines[4].endswith( + "): _test_flaky_exception_failure_point_init_before_py_3_12" + ) + + +def _test_flaky_exception_failure_point_init_py_3_12(): + # Behavior change in Python 3.12: https://github.com/python/cpython/issues/102594 + what, py_err_set_after_what = m.error_already_set_what( + FlakyException, ("failure_point_init",) + ) + assert not py_err_set_after_what + lines = what.splitlines() + assert lines[0].endswith("ValueError[WITH __notes__]: triggered_failure_point_init") + assert lines[1] == "__notes__ (len=1):" + assert "Normalization failed:" in lines[2] + assert "FlakyException" in lines[2] + + +@pytest.mark.skipif( + "env.PYPY and sys.version_info[:2] < (3, 12)", + reason="PyErr_NormalizeException Segmentation fault", +) +def test_flaky_exception_failure_point_init(): + if sys.version_info[:2] < (3, 12): + _test_flaky_exception_failure_point_init_before_py_3_12() + else: + _test_flaky_exception_failure_point_init_py_3_12() def test_flaky_exception_failure_point_str(): From 6e6bcca5b28461cd16b2af70bba837a4b24482c0 Mon Sep 17 00:00:00 2001 From: Joyce Date: Tue, 23 May 2023 14:05:25 -0300 Subject: [PATCH 2/3] Create s Security Policy (#4671) * Create SECURITY.md * Update test_files.py to include SECURITY.md file * Update MANIFEST.in to include SECURITY.md file --- MANIFEST.in | 2 +- SECURITY.md | 13 +++++++++++++ tests/extra_python_package/test_files.py | 1 + 3 files changed, 15 insertions(+), 1 deletion(-) create mode 100644 SECURITY.md diff --git a/MANIFEST.in b/MANIFEST.in index 31632acc3..7ce83c552 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -3,4 +3,4 @@ recursive-include pybind11/include/pybind11 *.h recursive-include pybind11 *.py recursive-include pybind11 py.typed include pybind11/share/cmake/pybind11/*.cmake -include LICENSE README.rst pyproject.toml setup.py setup.cfg +include LICENSE README.rst SECURITY.md pyproject.toml setup.py setup.cfg diff --git a/SECURITY.md b/SECURITY.md new file mode 100644 index 000000000..3d74611f2 --- /dev/null +++ b/SECURITY.md @@ -0,0 +1,13 @@ +# Security Policy + +## Supported Versions + +Security updates are applied only to the latest release. + +## Reporting a Vulnerability + +If you have discovered a security vulnerability in this project, please report it privately. **Do not disclose it as a public issue.** This gives us time to work with you to fix the issue before public exposure, reducing the chance that the exploit will be used before a patch is released. + +Please disclose it at [security advisory](https://github.com/pybind/pybind11/security/advisories/new). + +This project is maintained by a team of volunteers on a reasonable-effort basis. As such, please give us at least 90 days to work on a fix before public exposure. diff --git a/tests/extra_python_package/test_files.py b/tests/extra_python_package/test_files.py index e5982f962..5b9e14569 100644 --- a/tests/extra_python_package/test_files.py +++ b/tests/extra_python_package/test_files.py @@ -111,6 +111,7 @@ sdist_files = { "MANIFEST.in", "README.rst", "PKG-INFO", + "SECURITY.md", } local_sdist_files = { From 8e1f9d5c40f74632799c8a44287d32d18a6c8630 Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Tue, 23 May 2023 10:49:32 -0700 Subject: [PATCH 3/3] Add `format_descriptor<>` & `npy_format_descriptor<>` `PyObject *` specializations. (#4674) * Add `npy_format_descriptor` to enable `py::array_t` to/from-python conversions. * resolve clang-tidy warning * Use existing constructor instead of adding a static method. Thanks @Skylion007 for pointing out. * Add `format_descriptor` Trivial addition, but still in search for a meaningful test. * Add test_format_descriptor_format * Ensure the Eigen `type_caster`s do not segfault when loading arrays with dtype=object * Use `static_assert()` `!std::is_pointer<>` to replace runtime guards. * Add comments to explain how to check for ref-count bugs. (NO code changes.) * Make the "Pointer types ... are not supported" message Eigen-specific, as suggested by @Lalaland. Move to new pybind11/eigen/common.h header. * Change "format_descriptor_format" implementation as suggested by @Lalaland. Additional tests meant to ensure consistency between py::format_descriptor<>, np.array, np.format_parser turn out to be useful only to highlight long-standing inconsistencies. * resolve clang-tidy warning * Account for np.float128, np.complex256 not being available on Windows, in a future-proof way. * Fully address i|q|l ambiguity (hopefully). * Remove the new `np.format_parser()`-based test, it's much more distracting than useful. * Use bi.itemsize to disambiguate "l" or "L" * Use `py::detail::compare_buffer_info::compare()` to validate the `format_descriptor::format()` strings. * Add `buffer_info::compare` to make `detail::compare_buffer_info::compare` more visible & accessible. * silence clang-tidy warning * pytest-compatible access to np.float128, np.complex256 * Revert "pytest-compatible access to np.float128, np.complex256" This reverts commit e9a289c50fc07199806d14ded644215ab6f03afa. * Use `sizeof(long double) == sizeof(double)` instead of `std::is_same<>` * Report skipped `long double` tests. * Change the name of the new `buffer_info` member function to `item_type_is_equivalent_to`. Add comment defining "equivalent" by example. * Change `item_type_is_equivalent_to<>()` from `static` function to member function, as suggested by @Lalaland --- CMakeLists.txt | 1 + include/pybind11/buffer_info.h | 17 +++++- include/pybind11/detail/common.h | 9 +++ include/pybind11/eigen/common.h | 9 +++ include/pybind11/eigen/matrix.h | 13 +++++ include/pybind11/eigen/tensor.h | 5 ++ include/pybind11/numpy.h | 18 ++++-- tests/extra_python_package/test_files.py | 1 + tests/test_buffers.cpp | 35 ++++++++++++ tests/test_buffers.py | 57 +++++++++++++++++++ tests/test_numpy_array.cpp | 26 +++++++++ tests/test_numpy_array.py | 71 ++++++++++++++++++++++++ 12 files changed, 255 insertions(+), 7 deletions(-) create mode 100644 include/pybind11/eigen/common.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 25a7d1498..0ad74db2b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -126,6 +126,7 @@ set(PYBIND11_HEADERS include/pybind11/complex.h include/pybind11/options.h include/pybind11/eigen.h + include/pybind11/eigen/common.h include/pybind11/eigen/matrix.h include/pybind11/eigen/tensor.h include/pybind11/embed.h diff --git a/include/pybind11/buffer_info.h b/include/pybind11/buffer_info.h index 06120d556..b99ee8bef 100644 --- a/include/pybind11/buffer_info.h +++ b/include/pybind11/buffer_info.h @@ -37,6 +37,9 @@ inline std::vector f_strides(const std::vector &shape, ssize_t return strides; } +template +struct compare_buffer_info; + PYBIND11_NAMESPACE_END(detail) /// Information record describing a Python buffer object @@ -150,6 +153,17 @@ struct buffer_info { Py_buffer *view() const { return m_view; } Py_buffer *&view() { return m_view; } + /* True if the buffer item type is equivalent to `T`. */ + // To define "equivalent" by example: + // `buffer_info::item_type_is_equivalent_to(b)` and + // `buffer_info::item_type_is_equivalent_to(b)` may both be true + // on some platforms, but `int` and `unsigned` will never be equivalent. + // For the ground truth, please inspect `detail::compare_buffer_info<>`. + template + bool item_type_is_equivalent_to() const { + return detail::compare_buffer_info::compare(*this); + } + private: struct private_ctr_tag {}; @@ -170,9 +184,10 @@ private: PYBIND11_NAMESPACE_BEGIN(detail) -template +template struct compare_buffer_info { static bool compare(const buffer_info &b) { + // NOLINTNEXTLINE(bugprone-sizeof-expression) Needed for `PyObject *` return b.format == format_descriptor::format() && b.itemsize == (ssize_t) sizeof(T); } }; diff --git a/include/pybind11/detail/common.h b/include/pybind11/detail/common.h index 1ff0df09f..c2e6f9ec8 100644 --- a/include/pybind11/detail/common.h +++ b/include/pybind11/detail/common.h @@ -1025,6 +1025,15 @@ PYBIND11_RUNTIME_EXCEPTION(reference_cast_error, PyExc_RuntimeError) /// Used in template struct format_descriptor {}; +template +struct format_descriptor< + T, + detail::enable_if_t::value>> { + static constexpr const char c = 'O'; + static constexpr const char value[2] = {c, '\0'}; + static std::string format() { return std::string(1, c); } +}; + PYBIND11_NAMESPACE_BEGIN(detail) // Returns the index of the given type in the type char array below, and in the list in numpy.h // The order here is: bool; 8 ints ((signed,unsigned)x(8,16,32,64)bits); float,double,long double; diff --git a/include/pybind11/eigen/common.h b/include/pybind11/eigen/common.h new file mode 100644 index 000000000..24f56d158 --- /dev/null +++ b/include/pybind11/eigen/common.h @@ -0,0 +1,9 @@ +// Copyright (c) 2023 The pybind Community. + +#pragma once + +// Common message for `static_assert()`s, which are useful to easily +// preempt much less obvious errors. +#define PYBIND11_EIGEN_MESSAGE_POINTER_TYPES_ARE_NOT_SUPPORTED \ + "Pointer types (in particular `PyObject *`) are not supported as scalar types for Eigen " \ + "types." diff --git a/include/pybind11/eigen/matrix.h b/include/pybind11/eigen/matrix.h index bd533bed3..8d4342f81 100644 --- a/include/pybind11/eigen/matrix.h +++ b/include/pybind11/eigen/matrix.h @@ -10,6 +10,7 @@ #pragma once #include "../numpy.h" +#include "common.h" /* HINT: To suppress warnings originating from the Eigen headers, use -isystem. See also: @@ -287,6 +288,8 @@ handle eigen_encapsulate(Type *src) { template struct type_caster::value>> { using Scalar = typename Type::Scalar; + static_assert(!std::is_pointer::value, + PYBIND11_EIGEN_MESSAGE_POINTER_TYPES_ARE_NOT_SUPPORTED); using props = EigenProps; bool load(handle src, bool convert) { @@ -405,6 +408,9 @@ private: // Base class for casting reference/map/block/etc. objects back to python. template struct eigen_map_caster { + static_assert(!std::is_pointer::value, + PYBIND11_EIGEN_MESSAGE_POINTER_TYPES_ARE_NOT_SUPPORTED); + private: using props = EigenProps; @@ -457,6 +463,8 @@ private: using Type = Eigen::Ref; using props = EigenProps; using Scalar = typename props::Scalar; + static_assert(!std::is_pointer::value, + PYBIND11_EIGEN_MESSAGE_POINTER_TYPES_ARE_NOT_SUPPORTED); using MapType = Eigen::Map; using Array = array_t struct type_caster::value>> { + static_assert(!std::is_pointer::value, + PYBIND11_EIGEN_MESSAGE_POINTER_TYPES_ARE_NOT_SUPPORTED); + protected: using Matrix = Eigen::Matrix; @@ -632,6 +643,8 @@ public: template struct type_caster::value>> { using Scalar = typename Type::Scalar; + static_assert(!std::is_pointer::value, + PYBIND11_EIGEN_MESSAGE_POINTER_TYPES_ARE_NOT_SUPPORTED); using StorageIndex = remove_reference_t().outerIndexPtr())>; using Index = typename Type::Index; static constexpr bool rowMajor = Type::IsRowMajor; diff --git a/include/pybind11/eigen/tensor.h b/include/pybind11/eigen/tensor.h index de7dcba89..25d12baca 100644 --- a/include/pybind11/eigen/tensor.h +++ b/include/pybind11/eigen/tensor.h @@ -8,6 +8,7 @@ #pragma once #include "../numpy.h" +#include "common.h" #if defined(__GNUC__) && !defined(__clang__) && !defined(__INTEL_COMPILER) static_assert(__GNUC__ > 5, "Eigen Tensor support in pybind11 requires GCC > 5.0"); @@ -164,6 +165,8 @@ PYBIND11_WARNING_POP template struct type_caster::ValidType> { + static_assert(!std::is_pointer::value, + PYBIND11_EIGEN_MESSAGE_POINTER_TYPES_ARE_NOT_SUPPORTED); using Helper = eigen_tensor_helper; static constexpr auto temp_name = get_tensor_descriptor::value; PYBIND11_TYPE_CASTER(Type, temp_name); @@ -359,6 +362,8 @@ struct get_storage_pointer_type struct type_caster, typename eigen_tensor_helper>::ValidType> { + static_assert(!std::is_pointer::value, + PYBIND11_EIGEN_MESSAGE_POINTER_TYPES_ARE_NOT_SUPPORTED); using MapType = Eigen::TensorMap; using Helper = eigen_tensor_helper>; diff --git a/include/pybind11/numpy.h b/include/pybind11/numpy.h index 854d6e87f..36077ec04 100644 --- a/include/pybind11/numpy.h +++ b/include/pybind11/numpy.h @@ -564,6 +564,8 @@ public: m_ptr = from_args(args).release().ptr(); } + /// Return dtype for the given typenum (one of the NPY_TYPES). + /// https://numpy.org/devdocs/reference/c-api/array.html#c.PyArray_DescrFromType explicit dtype(int typenum) : object(detail::npy_api::get().PyArray_DescrFromType_(typenum), stolen_t{}) { if (m_ptr == nullptr) { @@ -1283,12 +1285,16 @@ private: public: static constexpr int value = values[detail::is_fmt_numeric::index]; - static pybind11::dtype dtype() { - if (auto *ptr = npy_api::get().PyArray_DescrFromType_(value)) { - return reinterpret_steal(ptr); - } - pybind11_fail("Unsupported buffer format!"); - } + static pybind11::dtype dtype() { return pybind11::dtype(/*typenum*/ value); } +}; + +template +struct npy_format_descriptor::value>> { + static constexpr auto name = const_name("object"); + + static constexpr int value = npy_api::NPY_OBJECT_; + + static pybind11::dtype dtype() { return pybind11::dtype(/*typenum*/ value); } }; #define PYBIND11_DECL_CHAR_FMT \ diff --git a/tests/extra_python_package/test_files.py b/tests/extra_python_package/test_files.py index 5b9e14569..57387dd8b 100644 --- a/tests/extra_python_package/test_files.py +++ b/tests/extra_python_package/test_files.py @@ -57,6 +57,7 @@ detail_headers = { } eigen_headers = { + "include/pybind11/eigen/common.h", "include/pybind11/eigen/matrix.h", "include/pybind11/eigen/tensor.h", } diff --git a/tests/test_buffers.cpp b/tests/test_buffers.cpp index 6b6e8cba7..b5b8c355b 100644 --- a/tests/test_buffers.cpp +++ b/tests/test_buffers.cpp @@ -7,12 +7,47 @@ BSD-style license that can be found in the LICENSE file. */ +#include #include #include "constructor_stats.h" #include "pybind11_tests.h" TEST_SUBMODULE(buffers, m) { + m.attr("long_double_and_double_have_same_size") = (sizeof(long double) == sizeof(double)); + + m.def("format_descriptor_format_buffer_info_equiv", + [](const std::string &cpp_name, const py::buffer &buffer) { + // https://google.github.io/styleguide/cppguide.html#Static_and_Global_Variables + static auto *format_table = new std::map; + static auto *equiv_table + = new std::map; + if (format_table->empty()) { +#define PYBIND11_ASSIGN_HELPER(...) \ + (*format_table)[#__VA_ARGS__] = py::format_descriptor<__VA_ARGS__>::format(); \ + (*equiv_table)[#__VA_ARGS__] = &py::buffer_info::item_type_is_equivalent_to<__VA_ARGS__>; + PYBIND11_ASSIGN_HELPER(PyObject *) + PYBIND11_ASSIGN_HELPER(bool) + PYBIND11_ASSIGN_HELPER(std::int8_t) + PYBIND11_ASSIGN_HELPER(std::uint8_t) + PYBIND11_ASSIGN_HELPER(std::int16_t) + PYBIND11_ASSIGN_HELPER(std::uint16_t) + PYBIND11_ASSIGN_HELPER(std::int32_t) + PYBIND11_ASSIGN_HELPER(std::uint32_t) + PYBIND11_ASSIGN_HELPER(std::int64_t) + PYBIND11_ASSIGN_HELPER(std::uint64_t) + PYBIND11_ASSIGN_HELPER(float) + PYBIND11_ASSIGN_HELPER(double) + PYBIND11_ASSIGN_HELPER(long double) + PYBIND11_ASSIGN_HELPER(std::complex) + PYBIND11_ASSIGN_HELPER(std::complex) + PYBIND11_ASSIGN_HELPER(std::complex) +#undef PYBIND11_ASSIGN_HELPER + } + return std::pair( + (*format_table)[cpp_name], (buffer.request().*((*equiv_table)[cpp_name]))()); + }); + // test_from_python / test_to_python: class Matrix { public: diff --git a/tests/test_buffers.py b/tests/test_buffers.py index eb58c4675..63d9d869f 100644 --- a/tests/test_buffers.py +++ b/tests/test_buffers.py @@ -10,6 +10,63 @@ from pybind11_tests import buffers as m np = pytest.importorskip("numpy") +if m.long_double_and_double_have_same_size: + # Determined by the compiler used to build the pybind11 tests + # (e.g. MSVC gets here, but MinGW might not). + np_float128 = None + np_complex256 = None +else: + # Determined by the compiler used to build numpy (e.g. MinGW). + np_float128 = getattr(np, *["float128"] * 2) + np_complex256 = getattr(np, *["complex256"] * 2) + +CPP_NAME_FORMAT_NP_DTYPE_TABLE = [ + ("PyObject *", "O", object), + ("bool", "?", np.bool_), + ("std::int8_t", "b", np.int8), + ("std::uint8_t", "B", np.uint8), + ("std::int16_t", "h", np.int16), + ("std::uint16_t", "H", np.uint16), + ("std::int32_t", "i", np.int32), + ("std::uint32_t", "I", np.uint32), + ("std::int64_t", "q", np.int64), + ("std::uint64_t", "Q", np.uint64), + ("float", "f", np.float32), + ("double", "d", np.float64), + ("long double", "g", np_float128), + ("std::complex", "Zf", np.complex64), + ("std::complex", "Zd", np.complex128), + ("std::complex", "Zg", np_complex256), +] +CPP_NAME_FORMAT_TABLE = [ + (cpp_name, format) + for cpp_name, format, np_dtype in CPP_NAME_FORMAT_NP_DTYPE_TABLE + if np_dtype is not None +] +CPP_NAME_NP_DTYPE_TABLE = [ + (cpp_name, np_dtype) for cpp_name, _, np_dtype in CPP_NAME_FORMAT_NP_DTYPE_TABLE +] + + +@pytest.mark.parametrize(("cpp_name", "np_dtype"), CPP_NAME_NP_DTYPE_TABLE) +def test_format_descriptor_format_buffer_info_equiv(cpp_name, np_dtype): + if np_dtype is None: + pytest.skip( + f"cpp_name=`{cpp_name}`: `long double` and `double` have same size." + ) + if isinstance(np_dtype, str): + pytest.skip(f"np.{np_dtype} does not exist.") + np_array = np.array([], dtype=np_dtype) + for other_cpp_name, expected_format in CPP_NAME_FORMAT_TABLE: + format, np_array_is_matching = m.format_descriptor_format_buffer_info_equiv( + other_cpp_name, np_array + ) + assert format == expected_format + if other_cpp_name == cpp_name: + assert np_array_is_matching + else: + assert not np_array_is_matching + def test_from_python(): with pytest.raises(RuntimeError) as excinfo: diff --git a/tests/test_numpy_array.cpp b/tests/test_numpy_array.cpp index b118e2c6c..8c122a865 100644 --- a/tests/test_numpy_array.cpp +++ b/tests/test_numpy_array.cpp @@ -523,4 +523,30 @@ TEST_SUBMODULE(numpy_array, sm) { sm.def("test_fmt_desc_const_double", [](const py::array_t &) {}); sm.def("round_trip_float", [](double d) { return d; }); + + sm.def("pass_array_pyobject_ptr_return_sum_str_values", + [](const py::array_t &objs) { + std::string sum_str_values; + for (const auto &obj : objs) { + sum_str_values += py::str(obj.attr("value")); + } + return sum_str_values; + }); + + sm.def("pass_array_pyobject_ptr_return_as_list", + [](const py::array_t &objs) -> py::list { return objs; }); + + sm.def("return_array_pyobject_ptr_cpp_loop", [](const py::list &objs) { + py::size_t arr_size = py::len(objs); + py::array_t arr_from_list(static_cast(arr_size)); + PyObject **data = arr_from_list.mutable_data(); + for (py::size_t i = 0; i < arr_size; i++) { + assert(data[i] == nullptr); + data[i] = py::cast(objs[i].attr("value")); + } + return arr_from_list; + }); + + sm.def("return_array_pyobject_ptr_from_list", + [](const py::list &objs) -> py::array_t { return objs; }); } diff --git a/tests/test_numpy_array.py b/tests/test_numpy_array.py index 070813d3a..12e7d17d1 100644 --- a/tests/test_numpy_array.py +++ b/tests/test_numpy_array.py @@ -595,3 +595,74 @@ def test_round_trip_float(): arr = np.zeros((), np.float64) arr[()] = 37.2 assert m.round_trip_float(arr) == 37.2 + + +# HINT: An easy and robust way (although only manual unfortunately) to check for +# ref-count leaks in the test_.*pyobject_ptr.* functions below is to +# * temporarily insert `while True:` (one-by-one), +# * run this test, and +# * run the Linux `top` command in another shell to visually monitor +# `RES` for a minute or two. +# If there is a leak, it is usually evident in seconds because the `RES` +# value increases without bounds. (Don't forget to Ctrl-C the test!) + + +# For use as a temporary user-defined object, to maximize sensitivity of the tests below: +# * Ref-count leaks will be immediately evident. +# * Sanitizers are much more likely to detect heap-use-after-free due to +# other ref-count bugs. +class PyValueHolder: + def __init__(self, value): + self.value = value + + +def WrapWithPyValueHolder(*values): + return [PyValueHolder(v) for v in values] + + +def UnwrapPyValueHolder(vhs): + return [vh.value for vh in vhs] + + +def test_pass_array_pyobject_ptr_return_sum_str_values_ndarray(): + # Intentionally all temporaries, do not change. + assert ( + m.pass_array_pyobject_ptr_return_sum_str_values( + np.array(WrapWithPyValueHolder(-3, "four", 5.0), dtype=object) + ) + == "-3four5.0" + ) + + +def test_pass_array_pyobject_ptr_return_sum_str_values_list(): + # Intentionally all temporaries, do not change. + assert ( + m.pass_array_pyobject_ptr_return_sum_str_values( + WrapWithPyValueHolder(2, "three", -4.0) + ) + == "2three-4.0" + ) + + +def test_pass_array_pyobject_ptr_return_as_list(): + # Intentionally all temporaries, do not change. + assert UnwrapPyValueHolder( + m.pass_array_pyobject_ptr_return_as_list( + np.array(WrapWithPyValueHolder(-1, "two", 3.0), dtype=object) + ) + ) == [-1, "two", 3.0] + + +@pytest.mark.parametrize( + ("return_array_pyobject_ptr", "unwrap"), + [ + (m.return_array_pyobject_ptr_cpp_loop, list), + (m.return_array_pyobject_ptr_from_list, UnwrapPyValueHolder), + ], +) +def test_return_array_pyobject_ptr_cpp_loop(return_array_pyobject_ptr, unwrap): + # Intentionally all temporaries, do not change. + arr_from_list = return_array_pyobject_ptr(WrapWithPyValueHolder(6, "seven", -8.0)) + assert isinstance(arr_from_list, np.ndarray) + assert arr_from_list.dtype == np.dtype("O") + assert unwrap(arr_from_list) == [6, "seven", -8.0]