diff --git a/include/pybind11/cast.h b/include/pybind11/cast.h index 1eac3cba5..d3e0e4c5b 100644 --- a/include/pybind11/cast.h +++ b/include/pybind11/cast.h @@ -447,11 +447,15 @@ struct string_caster { return true; } - static handle - cast(const StringType &src, return_value_policy /* policy */, handle /* parent */) { + static handle cast(const StringType &src, return_value_policy policy, handle /* parent */) { const char *buffer = reinterpret_cast(src.data()); auto nbytes = ssize_t(src.size() * sizeof(CharT)); - handle s = decode_utfN(buffer, nbytes); + handle s; + if (policy == return_value_policy::_return_as_bytes) { + s = PyBytes_FromStringAndSize(buffer, nbytes); + } else { + s = decode_utfN(buffer, nbytes); + } if (!s) { throw error_already_set(); } diff --git a/include/pybind11/detail/common.h b/include/pybind11/detail/common.h index ea09bb3fd..67ba322f6 100644 --- a/include/pybind11/detail/common.h +++ b/include/pybind11/detail/common.h @@ -459,7 +459,19 @@ enum class return_value_policy : uint8_t { collected while Python is still using the child. More advanced variations of this scheme are also possible using combinations of return_value_policy::reference and the keep_alive call policy */ - reference_internal + reference_internal, + + /** With this policy, C++ string types are converted to Python bytes, + instead of str. This is most useful when a C++ function returns a + container-like type with nested C++ string types, and `py::bytes` cannot + be applied easily. Dictionary like types might not work, for example, + `Dict[str, bytes]`, because this policy forces all string return values + to be converted to bytes. Note that this return_value_policy is not + concerned with lifetime/ownership semantics, like the other policies, + but the purpose of _return_as_bytes is certain to be orthogonal, because + C++ strings are always copied to Python `bytes` or `str`. + NOTE: This policy is NOT available on master. */ + _return_as_bytes }; PYBIND11_NAMESPACE_BEGIN(detail) diff --git a/include/pybind11/detail/smart_holder_type_casters.h b/include/pybind11/detail/smart_holder_type_casters.h index 4773abf78..67bd237ad 100644 --- a/include/pybind11/detail/smart_holder_type_casters.h +++ b/include/pybind11/detail/smart_holder_type_casters.h @@ -783,12 +783,15 @@ struct smart_holder_type_caster> : smart_holder_type_caster_l break; case return_value_policy::take_ownership: throw cast_error("Invalid return_value_policy for shared_ptr (take_ownership)."); + break; case return_value_policy::copy: case return_value_policy::move: break; case return_value_policy::reference: throw cast_error("Invalid return_value_policy for shared_ptr (reference)."); + break; case return_value_policy::reference_internal: + case return_value_policy::_return_as_bytes: break; } if (!src) { diff --git a/include/pybind11/detail/type_caster_base.h b/include/pybind11/detail/type_caster_base.h index 5b2ec8e1e..9d8944218 100644 --- a/include/pybind11/detail/type_caster_base.h +++ b/include/pybind11/detail/type_caster_base.h @@ -651,6 +651,10 @@ public: keep_alive_impl(inst, parent); break; + case return_value_policy::_return_as_bytes: + pybind11_fail("return_value_policy::_return_as_bytes does not apply."); + break; + default: throw cast_error("unhandled return_value_policy: should not happen!"); } diff --git a/include/pybind11/eigen.h b/include/pybind11/eigen.h index f658168de..beb50266e 100644 --- a/include/pybind11/eigen.h +++ b/include/pybind11/eigen.h @@ -349,6 +349,9 @@ private: return eigen_ref_array(*src); case return_value_policy::reference_internal: return eigen_ref_array(*src, parent); + case return_value_policy::_return_as_bytes: + pybind11_fail("return_value_policy::_return_as_bytes does not apply."); + break; default: throw cast_error("unhandled return_value_policy: should not happen!"); }; diff --git a/tests/test_builtin_casters.cpp b/tests/test_builtin_casters.cpp index 7b67515e5..7e629ff8d 100644 --- a/tests/test_builtin_casters.cpp +++ b/tests/test_builtin_casters.cpp @@ -11,10 +11,17 @@ #include "pybind11_tests.h" +#include + struct ConstRefCasted { int tag; }; +struct StringAttr { + explicit StringAttr(std::string v) : value(std::move(v)) {} + std::string value; +}; + PYBIND11_NAMESPACE_BEGIN(pybind11) PYBIND11_NAMESPACE_BEGIN(detail) template <> @@ -378,4 +385,28 @@ TEST_SUBMODULE(builtin_casters, m) { m.def("takes_const_ref", [](const ConstRefCasted &x) { return x.tag; }); m.def("takes_const_ref_wrap", [](std::reference_wrapper x) { return x.get().tag; }); + + // test return_value_policy::_return_as_bytes + m.def( + "invalid_utf8_string_as_bytes", + []() { return std::string("\xba\xd0\xba\xd0"); }, + py::return_value_policy::_return_as_bytes); + m.def("invalid_utf8_string_as_str", []() { return std::string("\xba\xd0\xba\xd0"); }); + m.def( + "invalid_utf8_char_array_as_bytes", + []() { return "\xba\xd0\xba\xd0"; }, + py::return_value_policy::_return_as_bytes); + py::class_(m, "StringAttr") + .def(py::init()) + .def_property( + "value", + py::cpp_function([](StringAttr &self) { return self.value; }, + py::return_value_policy::_return_as_bytes), + py::cpp_function([](StringAttr &self, std::string v) { self.value = std::move(v); })); +#ifdef PYBIND11_HAS_STRING_VIEW + m.def( + "invalid_utf8_string_view_as_bytes", + []() { return std::string_view("\xba\xd0\xba\xd0"); }, + py::return_value_policy::_return_as_bytes); +#endif } diff --git a/tests/test_builtin_casters.py b/tests/test_builtin_casters.py index d38ae6802..b2a6ccb6b 100644 --- a/tests/test_builtin_casters.py +++ b/tests/test_builtin_casters.py @@ -524,3 +524,17 @@ def test_const_ref_caster(): assert m.takes_const_ptr(x) == 5 assert m.takes_const_ref(x) == 4 assert m.takes_const_ref_wrap(x) == 4 + + +def test_return_as_bytes_policy(): + expected_return_value = b"\xba\xd0\xba\xd0" + assert m.invalid_utf8_string_as_bytes() == expected_return_value + with pytest.raises(UnicodeDecodeError): + m.invalid_utf8_string_as_str() + assert m.invalid_utf8_char_array_as_bytes() == expected_return_value + obj = m.StringAttr(expected_return_value) + assert obj.value == expected_return_value + obj.value = "123" + assert obj.value == b"123" + if hasattr(m, "has_string_view"): + assert m.invalid_utf8_string_view_as_bytes() == expected_return_value diff --git a/tests/test_stl.cpp b/tests/test_stl.cpp index b56a91953..b8bfaaa0a 100644 --- a/tests/test_stl.cpp +++ b/tests/test_stl.cpp @@ -541,4 +541,25 @@ TEST_SUBMODULE(stl, m) { []() { return new std::vector(4513); }, // Without explicitly specifying `take_ownership`, this function leaks. py::return_value_policy::take_ownership); + + // test return_value_policy::_return_as_bytes + m.def( + "invalid_utf8_string_array_as_bytes", + []() { return std::array{{"\xba\xd0\xba\xd0"}}; }, + py::return_value_policy::_return_as_bytes); + m.def("invalid_utf8_string_array_as_str", + []() { return std::array{{"\xba\xd0\xba\xd0"}}; }); +#ifdef PYBIND11_HAS_OPTIONAL + m.def( + "invalid_utf8_optional_string_as_bytes", + []() { return std::optional{"\xba\xd0\xba\xd0"}; }, + py::return_value_policy::_return_as_bytes); +#endif + +#ifdef PYBIND11_TEST_VARIANT + m.def( + "invalid_utf8_variant_string_as_bytes", + []() { return variant{"\xba\xd0\xba\xd0"}; }, + py::return_value_policy::_return_as_bytes); +#endif } diff --git a/tests/test_stl.py b/tests/test_stl.py index 3dc55230a..382ab9b40 100644 --- a/tests/test_stl.py +++ b/tests/test_stl.py @@ -371,3 +371,14 @@ def test_return_vector_bool_raw_ptr(): v = m.return_vector_bool_raw_ptr() assert isinstance(v, list) assert len(v) == 4513 + + +def test_return_as_bytes_policy(): + expected_return_value = b"\xba\xd0\xba\xd0" + assert m.invalid_utf8_string_array_as_bytes() == [expected_return_value] + with pytest.raises(UnicodeDecodeError): + m.invalid_utf8_string_array_as_str() + if hasattr(m, "has_optional"): + assert m.invalid_utf8_optional_string_as_bytes() == expected_return_value + if hasattr(m, "load_variant"): + assert m.invalid_utf8_variant_string_as_bytes() == expected_return_value