feat: make numpy.h compatible with both NumPy 1.x and 2.x (#5050)

* API: Make `numpy.h` compatible with both NumPy 1.x and 2.x * TST: Update numpy dtype flags test to not covert flags to char * API: Add `numpy2.h` instead and make `numpy.h` safe This means that users of `numpy.h` cannot be broken, but need to update to `numpy2.h` if they want to compile for NumPy 2. Using Macros simply and didn't bother to try to remove unnecessary code paths. * API: Rather than `numpy2.h` use a define for the user. * Thread `PYBIND11_NUMPY2_SUPPORT` through things and try to adept test matrix * Small fixups (shouldn't matter)? * Fixup. Does upgrading scipy help? (it shouldn't?) (Some other small fixup) * Use NumPy 2 nightlies for ubuntu-latest job also * BUG: Fix numpy.bool check * TST: Fix complexwarning * BUG: Fix the fact that only the 50 slot is filled with the copy alias (There were 3 functions all doing the same, only this slot survived 2.x) * TST: One more test tweak * TST: Use "long" name for long, since it changed on windows * TST: Apparently we didn't always have ulong, so just use `L` * TST: Enforce dtype='l' for test as default isn't long anymore on windows * Rename macro and invert logic to PYBIND11_NUMPY_1_ONLY * PYBIND11_INTERNAL_NUMPY_1_ONLY_DETECTED * Test and code comment expansion * CI: Use pre-releases of numpy/scipy from pip via explicit version * CI: NumPy 2 only available on almalinux (as it is Python >=3.9) * MAINT: Match name more exactly and adopt error phrasing * MAINT: Pushed early, move helper to be private member * fix error message compilation when using NumPy 1.x-only backcompat * silence name shadowing warning * chore: minor optimization Signed-off-by: Henry Schreiner <henryschreineriii@gmail.com> --------- Signed-off-by: Henry Schreiner <henryschreineriii@gmail.com> Co-authored-by: Ralf W. Grosse-Kunstleve <rwgk@google.com> Co-authored-by: Henry Schreiner <henryschreineriii@gmail.com>
2025-03-03 04:57:21 +00:00 · 2024-03-26 23:20:11 +01:00 · 2024-03-26 23:20:11 +01:00 · 705efccecd
commit 705efccecd
parent e0f2c71596
11 changed files with 206 additions and 21 deletions
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@ -108,12 +108,14 @@ jobs:
      run: python -m pip install pytest-github-actions-annotate-failures

    # First build - C++11 mode and inplace
-    # More-or-less randomly adding -DPYBIND11_SIMPLE_GIL_MANAGEMENT=ON here.
+    # More-or-less randomly adding -DPYBIND11_SIMPLE_GIL_MANAGEMENT=ON here
+    # (same for PYBIND11_NUMPY_1_ONLY, but requires a NumPy 1.x at runtime).
    - name: Configure C++11 ${{ matrix.args }}
      run: >
        cmake -S . -B .
        -DPYBIND11_WERROR=ON
        -DPYBIND11_SIMPLE_GIL_MANAGEMENT=ON
+        -DPYBIND11_NUMPY_1_ONLY=ON
        -DDOWNLOAD_CATCH=ON
        -DDOWNLOAD_EIGEN=ON
        -DCMAKE_CXX_STANDARD=11
@ -138,11 +140,13 @@ jobs:

    # Second build - C++17 mode and in a build directory
    # More-or-less randomly adding -DPYBIND11_SIMPLE_GIL_MANAGEMENT=OFF here.
+    # (same for PYBIND11_NUMPY_1_ONLY, but requires a NumPy 1.x at runtime).
    - name: Configure C++17
      run: >
        cmake -S . -B build2
        -DPYBIND11_WERROR=ON
        -DPYBIND11_SIMPLE_GIL_MANAGEMENT=OFF
+        -DPYBIND11_NUMPY_1_ONLY=ON
        -DDOWNLOAD_CATCH=ON
        -DDOWNLOAD_EIGEN=ON
        -DCMAKE_CXX_STANDARD=17
@ -660,6 +664,11 @@ jobs:
      run: |
        python3 -m pip install cmake -r tests/requirements.txt

+    - name: Ensure NumPy 2 is used (required Python >= 3.9)
+      if: matrix.container == 'almalinux:9'
+      run: |
+        python3 -m pip install 'numpy>=2.0.0b1' 'scipy>=1.13.0rc1'
+
    - name: Configure
      shell: bash
      run: >
@ -895,8 +904,10 @@ jobs:
        python-version: ${{ matrix.python }}

    - name: Prepare env
+      # Ensure use of NumPy 2 (via NumPy nightlies but can be changed soon)
      run: |
        python3 -m pip install -r tests/requirements.txt
+        python3 -m pip install 'numpy>=2.0.0b1' 'scipy>=1.13.0rc1'

    - name: Update CMake
      uses: jwlawson/actions-setup-cmake@v2.0
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -109,6 +109,8 @@ option(PYBIND11_TEST "Build pybind11 test suite?" ${PYBIND11_MASTER_PROJECT})
 option(PYBIND11_NOPYTHON "Disable search for Python" OFF)
 option(PYBIND11_SIMPLE_GIL_MANAGEMENT
       "Use simpler GIL management logic that does not support disassociation" OFF)
+option(PYBIND11_NUMPY_1_ONLY
+       "Disable NumPy 2 support to avoid changes to previous pybind11 versions." OFF)
 set(PYBIND11_INTERNALS_VERSION
    ""
    CACHE STRING "Override the ABI version, may be used to enable the unstable ABI.")
@ -116,6 +118,9 @@ set(PYBIND11_INTERNALS_VERSION
 if(PYBIND11_SIMPLE_GIL_MANAGEMENT)
  add_compile_definitions(PYBIND11_SIMPLE_GIL_MANAGEMENT)
 endif()
+if(PYBIND11_NUMPY_1_ONLY)
+  add_compile_definitions(PYBIND11_NUMPY_1_ONLY)
+endif()

 cmake_dependent_option(
  USE_PYTHON_INCLUDE_DIR
--- a/include/pybind11/cast.h
+++ b/include/pybind11/cast.h
@ -327,8 +327,9 @@ public:
            value = false;
            return true;
        }
-        if (convert || (std::strcmp("numpy.bool_", Py_TYPE(src.ptr())->tp_name) == 0)) {
-            // (allow non-implicit conversion for numpy booleans)
+        if (convert || is_numpy_bool(src)) {
+            // (allow non-implicit conversion for numpy booleans), use strncmp
+            // since NumPy 1.x had an additional trailing underscore.

            Py_ssize_t res = -1;
            if (src.is_none()) {
@ -360,6 +361,15 @@ public:
        return handle(src ? Py_True : Py_False).inc_ref();
    }
    PYBIND11_TYPE_CASTER(bool, const_name("bool"));
+
+private:
+    // Test if an object is a NumPy boolean (without fetching the type).
+    static inline bool is_numpy_bool(handle object) {
+        const char *type_name = Py_TYPE(object.ptr())->tp_name;
+        // Name changed to `numpy.bool` in NumPy 2, `numpy.bool_` is needed for 1.x support
+        return std::strcmp("numpy.bool", type_name) == 0
+               || std::strcmp("numpy.bool_", type_name) == 0;
+    }
 };

 // Helper class for UTF-{8,16,32} C++ stl strings:
--- a/include/pybind11/detail/common.h
+++ b/include/pybind11/detail/common.h
@ -296,6 +296,10 @@ PYBIND11_WARNING_DISABLE_MSVC(4505)
 #    undef copysign
 #endif

+#if defined(PYBIND11_NUMPY_1_ONLY)
+#    define PYBIND11_INTERNAL_NUMPY_1_ONLY_DETECTED
+#endif
+
 #if defined(PYPY_VERSION) && !defined(PYBIND11_SIMPLE_GIL_MANAGEMENT)
 #    define PYBIND11_SIMPLE_GIL_MANAGEMENT
 #endif
--- a/include/pybind11/numpy.h
+++ b/include/pybind11/numpy.h
@ -29,10 +29,15 @@
 #include <utility>
 #include <vector>

+#if defined(PYBIND11_NUMPY_1_ONLY) && !defined(PYBIND11_INTERNAL_NUMPY_1_ONLY_DETECTED)
+#    error PYBIND11_NUMPY_1_ONLY must be defined before any pybind11 header is included.
+#endif
+
 /* This will be true on all flat address space platforms and allows us to reduce the
   whole npy_intp / ssize_t / Py_intptr_t business down to just ssize_t for all size
   and dimension types (e.g. shape, strides, indexing), instead of inflicting this
-   upon the library user. */
+   upon the library user.
+   Note that NumPy 2 now uses ssize_t for `npy_intp` to simplify this. */
 static_assert(sizeof(::pybind11::ssize_t) == sizeof(Py_intptr_t), "ssize_t != Py_intptr_t");
 static_assert(std::is_signed<Py_intptr_t>::value, "Py_intptr_t must be signed");
 // We now can reinterpret_cast between py::ssize_t and Py_intptr_t (MSVC + PyPy cares)
@ -53,7 +58,8 @@ struct handle_type_name<array> {
 template <typename type, typename SFINAE = void>
 struct npy_format_descriptor;

-struct PyArrayDescr_Proxy {
+/* NumPy 1 proxy (always includes legacy fields) */
+struct PyArrayDescr1_Proxy {
    PyObject_HEAD
    PyObject *typeobj;
    char kind;
@ -68,6 +74,43 @@ struct PyArrayDescr_Proxy {
    PyObject *names;
 };

+#ifndef PYBIND11_NUMPY_1_ONLY
+struct PyArrayDescr_Proxy {
+    PyObject_HEAD
+    PyObject *typeobj;
+    char kind;
+    char type;
+    char byteorder;
+    char _former_flags;
+    int type_num;
+    /* Additional fields are NumPy version specific. */
+};
+#else
+/* NumPy 1.x only, we can expose all fields */
+using PyArrayDescr_Proxy = PyArrayDescr1_Proxy;
+#endif
+
+/* NumPy 2 proxy, including legacy fields */
+struct PyArrayDescr2_Proxy {
+    PyObject_HEAD
+    PyObject *typeobj;
+    char kind;
+    char type;
+    char byteorder;
+    char _former_flags;
+    int type_num;
+    std::uint64_t flags;
+    ssize_t elsize;
+    ssize_t alignment;
+    PyObject *metadata;
+    Py_hash_t hash;
+    void *reserved_null[2];
+    /* The following fields only exist if 0 <= type_num < 2056 */
+    char *subarray;
+    PyObject *fields;
+    PyObject *names;
+};
+
 struct PyArray_Proxy {
    PyObject_HEAD
    char *data;
@ -131,6 +174,14 @@ PYBIND11_NOINLINE module_ import_numpy_core_submodule(const char *submodule_name
    object numpy_version = numpy_lib.attr("NumpyVersion")(version_string);
    int major_version = numpy_version.attr("major").cast<int>();

+#ifdef PYBIND11_NUMPY_1_ONLY
+    if (major_version >= 2) {
+        throw std::runtime_error(
+            "This extension was built with PYBIND11_NUMPY_1_ONLY defined, "
+            "but NumPy 2 is used in this process. For NumPy2 compatibility, "
+            "this extension needs to be rebuilt without the PYBIND11_NUMPY_1_ONLY define.");
+    }
+#endif
    /* `numpy.core` was renamed to `numpy._core` in NumPy 2.0 as it officially
        became a private module. */
    std::string numpy_core_path = major_version >= 2 ? "numpy._core" : "numpy.core";
@ -203,6 +254,8 @@ struct npy_api {
            NPY_ULONG_, NPY_ULONGLONG_, NPY_UINT_),
    };

+    unsigned int PyArray_RUNTIME_VERSION_;
+
    struct PyArray_Dims {
        Py_intptr_t *ptr;
        int len;
@ -241,6 +294,7 @@ struct npy_api {
    PyObject *(*PyArray_FromAny_)(PyObject *, PyObject *, int, int, int, PyObject *);
    int (*PyArray_DescrConverter_)(PyObject *, PyObject **);
    bool (*PyArray_EquivTypes_)(PyObject *, PyObject *);
+#ifdef PYBIND11_NUMPY_1_ONLY
    int (*PyArray_GetArrayParamsFromObject_)(PyObject *,
                                             PyObject *,
                                             unsigned char,
@ -249,6 +303,7 @@ struct npy_api {
                                             Py_intptr_t *,
                                             PyObject **,
                                             PyObject *);
+#endif
    PyObject *(*PyArray_Squeeze_)(PyObject *);
    // Unused. Not removed because that affects ABI of the class.
    int (*PyArray_SetBaseObject_)(PyObject *, PyObject *);
@ -266,7 +321,8 @@ private:
        API_PyArray_DescrFromScalar = 57,
        API_PyArray_FromAny = 69,
        API_PyArray_Resize = 80,
-        API_PyArray_CopyInto = 82,
+        // CopyInto was slot 82 and 50 was effectively an alias. NumPy 2 removed 82.
+        API_PyArray_CopyInto = 50,
        API_PyArray_NewCopy = 85,
        API_PyArray_NewFromDescr = 94,
        API_PyArray_DescrNewFromType = 96,
@ -275,7 +331,9 @@ private:
        API_PyArray_View = 137,
        API_PyArray_DescrConverter = 174,
        API_PyArray_EquivTypes = 182,
+#ifdef PYBIND11_NUMPY_1_ONLY
        API_PyArray_GetArrayParamsFromObject = 278,
+#endif
        API_PyArray_SetBaseObject = 282
    };

@ -290,7 +348,8 @@ private:
        npy_api api;
 #define DECL_NPY_API(Func) api.Func##_ = (decltype(api.Func##_)) api_ptr[API_##Func];
        DECL_NPY_API(PyArray_GetNDArrayCFeatureVersion);
-        if (api.PyArray_GetNDArrayCFeatureVersion_() < 0x7) {
+        api.PyArray_RUNTIME_VERSION_ = api.PyArray_GetNDArrayCFeatureVersion_();
+        if (api.PyArray_RUNTIME_VERSION_ < 0x7) {
            pybind11_fail("pybind11 numpy support requires numpy >= 1.7.0");
        }
        DECL_NPY_API(PyArray_Type);
@ -309,7 +368,9 @@ private:
        DECL_NPY_API(PyArray_View);
        DECL_NPY_API(PyArray_DescrConverter);
        DECL_NPY_API(PyArray_EquivTypes);
+#ifdef PYBIND11_NUMPY_1_ONLY
        DECL_NPY_API(PyArray_GetArrayParamsFromObject);
+#endif
        DECL_NPY_API(PyArray_SetBaseObject);

 #undef DECL_NPY_API
@ -331,6 +392,14 @@ inline const PyArrayDescr_Proxy *array_descriptor_proxy(const PyObject *ptr) {
    return reinterpret_cast<const PyArrayDescr_Proxy *>(ptr);
 }

+inline const PyArrayDescr1_Proxy *array_descriptor1_proxy(const PyObject *ptr) {
+    return reinterpret_cast<const PyArrayDescr1_Proxy *>(ptr);
+}
+
+inline const PyArrayDescr2_Proxy *array_descriptor2_proxy(const PyObject *ptr) {
+    return reinterpret_cast<const PyArrayDescr2_Proxy *>(ptr);
+}
+
 inline bool check_flags(const void *ptr, int flag) {
    return (flag == (array_proxy(ptr)->flags & flag));
 }
@ -610,10 +679,32 @@ public:
    }

    /// Size of the data type in bytes.
+#ifdef PYBIND11_NUMPY_1_ONLY
    ssize_t itemsize() const { return detail::array_descriptor_proxy(m_ptr)->elsize; }
+#else
+    ssize_t itemsize() const {
+        if (detail::npy_api::get().PyArray_RUNTIME_VERSION_ < 0x12) {
+            return detail::array_descriptor1_proxy(m_ptr)->elsize;
+        }
+        return detail::array_descriptor2_proxy(m_ptr)->elsize;
+    }
+#endif

    /// Returns true for structured data types.
+#ifdef PYBIND11_NUMPY_1_ONLY
    bool has_fields() const { return detail::array_descriptor_proxy(m_ptr)->names != nullptr; }
+#else
+    bool has_fields() const {
+        if (detail::npy_api::get().PyArray_RUNTIME_VERSION_ < 0x12) {
+            return detail::array_descriptor1_proxy(m_ptr)->names != nullptr;
+        }
+        const auto *proxy = detail::array_descriptor2_proxy(m_ptr);
+        if (proxy->type_num < 0 || proxy->type_num >= 2056) {
+            return false;
+        }
+        return proxy->names != nullptr;
+    }
+#endif

    /// Single-character code for dtype's kind.
    /// For example, floating point types are 'f' and integral types are 'i'.
@ -639,11 +730,29 @@ public:
    /// Single character for byteorder
    char byteorder() const { return detail::array_descriptor_proxy(m_ptr)->byteorder; }

-    /// Alignment of the data type
+/// Alignment of the data type
+#ifdef PYBIND11_NUMPY_1_ONLY
    int alignment() const { return detail::array_descriptor_proxy(m_ptr)->alignment; }
+#else
+    ssize_t alignment() const {
+        if (detail::npy_api::get().PyArray_RUNTIME_VERSION_ < 0x12) {
+            return detail::array_descriptor1_proxy(m_ptr)->alignment;
+        }
+        return detail::array_descriptor2_proxy(m_ptr)->alignment;
+    }
+#endif

-    /// Flags for the array descriptor
+/// Flags for the array descriptor
+#ifdef PYBIND11_NUMPY_1_ONLY
    char flags() const { return detail::array_descriptor_proxy(m_ptr)->flags; }
+#else
+    std::uint64_t flags() const {
+        if (detail::npy_api::get().PyArray_RUNTIME_VERSION_ < 0x12) {
+            return (unsigned char) detail::array_descriptor1_proxy(m_ptr)->flags;
+        }
+        return detail::array_descriptor2_proxy(m_ptr)->flags;
+    }
+#endif

 private:
    static object &_dtype_from_pep3118() {
@ -810,9 +919,7 @@ public:
    }

    /// Byte size of a single element
-    ssize_t itemsize() const {
-        return detail::array_descriptor_proxy(detail::array_proxy(m_ptr)->descr)->elsize;
-    }
+    ssize_t itemsize() const { return dtype().itemsize(); }

    /// Total number of bytes
    ssize_t nbytes() const { return size() * itemsize(); }
--- a/tests/conftest.py
+++ b/tests/conftest.py
@ -218,4 +218,5 @@ def pytest_report_header(config):
        f" {pybind11_tests.cpp_std}"
        f" {pybind11_tests.PYBIND11_INTERNALS_ID}"
        f" PYBIND11_SIMPLE_GIL_MANAGEMENT={pybind11_tests.PYBIND11_SIMPLE_GIL_MANAGEMENT}"
+        f" PYBIND11_NUMPY_1_ONLY={pybind11_tests.PYBIND11_NUMPY_1_ONLY}"
    )
--- a/tests/pybind11_tests.cpp
+++ b/tests/pybind11_tests.cpp
@ -95,6 +95,12 @@ PYBIND11_MODULE(pybind11_tests, m) {
 #else
        false;
 #endif
+    m.attr("PYBIND11_NUMPY_1_ONLY") =
+#if defined(PYBIND11_NUMPY_1_ONLY)
+        true;
+#else
+        false;
+#endif

    bind_ConstructorStats(m);

--- a/tests/test_eigen_matrix.py
+++ b/tests/test_eigen_matrix.py
@ -608,7 +608,9 @@ def test_both_ref_mutators():
 def test_nocopy_wrapper():
    # get_elem requires a column-contiguous matrix reference, but should be
    # callable with other types of matrix (via copying):
-    int_matrix_colmajor = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]], order="F")
+    int_matrix_colmajor = np.array(
+        [[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype="l", order="F"
+    )
    dbl_matrix_colmajor = np.array(
        int_matrix_colmajor, dtype="double", order="F", copy=True
    )
--- a/tests/test_numpy_array.py
+++ b/tests/test_numpy_array.py
@ -536,7 +536,12 @@ def test_format_descriptors_for_floating_point_types(test_func):
@pytest.mark.parametrize("contiguity", [None, "C", "F"])
@pytest.mark.parametrize("noconvert", [False, True])
@pytest.mark.filterwarnings(
-    "ignore:Casting complex values to real discards the imaginary part:numpy.ComplexWarning"
+    "ignore:Casting complex values to real discards the imaginary part:"
+    + (
+        "numpy.exceptions.ComplexWarning"
+        if hasattr(np, "exceptions")
+        else "numpy.ComplexWarning"
+    )
 )
 def test_argument_conversions(forcecast, contiguity, noconvert):
    function_name = "accept_double"
@ -583,7 +588,8 @@ def test_argument_conversions(forcecast, contiguity, noconvert):
 def test_dtype_refcount_leak():
    from sys import getrefcount

-    dtype = np.dtype(np.float_)
+    # Was np.float_ but that alias for float64 was removed in NumPy 2.
+    dtype = np.dtype(np.float64)
    a = np.array([1], dtype=dtype)
    before = getrefcount(dtype)
    m.ndim(a)
--- a/tests/test_numpy_dtypes.cpp
+++ b/tests/test_numpy_dtypes.cpp
@ -405,10 +405,35 @@ TEST_SUBMODULE(numpy_dtypes, m) {
    });

    // test_dtype
+    // Below we use `L` for unsigned long as unfortunately the only name that
+    // works reliably on Both NumPy 2.x and old NumPy 1.x.
    std::vector<const char *> dtype_names{
-        "byte",    "short",   "intc",        "int_",  "longlong",   "ubyte",       "ushort",
-        "uintc",   "uint",    "ulonglong",   "half",  "single",     "double",      "longdouble",
-        "csingle", "cdouble", "clongdouble", "bool_", "datetime64", "timedelta64", "object_"};
+        "byte",
+        "short",
+        "intc",
+        "long",
+        "longlong",
+        "ubyte",
+        "ushort",
+        "uintc",
+        "L",
+        "ulonglong",
+        "half",
+        "single",
+        "double",
+        "longdouble",
+        "csingle",
+        "cdouble",
+        "clongdouble",
+        "bool_",
+        "datetime64",
+        "timedelta64",
+        "object_",
+        // platform dependent aliases (int_ and uint are also NumPy version dependent on windows)
+        "int_",
+        "uint",
+        "intp",
+        "uintp"};

    m.def("print_dtypes", []() {
        py::list l;
--- a/tests/test_numpy_dtypes.py
+++ b/tests/test_numpy_dtypes.py
@ -3,6 +3,7 @@ import re
 import pytest

 import env  # noqa: F401
+from pybind11_tests import PYBIND11_NUMPY_1_ONLY
 from pybind11_tests import numpy_dtypes as m

 np = pytest.importorskip("numpy")
@ -172,13 +173,20 @@ def test_dtype(simple_dtype):
        np.zeros(1, m.trailing_padding_dtype())
    )

-    expected_chars = "bhilqBHILQefdgFDG?MmO"
-    assert m.test_dtype_kind() == list("iiiiiuuuuuffffcccbMmO")
+    expected_chars = list("bhilqBHILQefdgFDG?MmO")
+    # Note that int_ and uint size and mapping is NumPy version dependent:
+    expected_chars += [np.dtype(_).char for _ in ("int_", "uint", "intp", "uintp")]
+    assert m.test_dtype_kind() == list("iiiiiuuuuuffffcccbMmOiuiu")
    assert m.test_dtype_char_() == list(expected_chars)
    assert m.test_dtype_num() == [np.dtype(ch).num for ch in expected_chars]
    assert m.test_dtype_byteorder() == [np.dtype(ch).byteorder for ch in expected_chars]
    assert m.test_dtype_alignment() == [np.dtype(ch).alignment for ch in expected_chars]
-    assert m.test_dtype_flags() == [chr(np.dtype(ch).flags) for ch in expected_chars]
+    if not PYBIND11_NUMPY_1_ONLY:
+        assert m.test_dtype_flags() == [np.dtype(ch).flags for ch in expected_chars]
+    else:
+        assert m.test_dtype_flags() == [
+            chr(np.dtype(ch).flags) for ch in expected_chars
+        ]


 def test_recarray(simple_dtype, packed_dtype):