[smart_holder] Add a new return value policy return_as_bytes (#3838)

* Add return_as_bytes policy

* Fix format

* Fix test failures

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Fix std::variant

* Resolve comments

* Note this policy experimental

* Add tests for `return_as_bytes` with `def_property`.

* Change comment for the new return_as_bytes enum to note that the policy is not available on master.

* Applying pr3838_sh.patch (exactly as used Google-internally since 2022-03-31).

* Add `case return_as_bytes` to `switch`es in detail/type_caster_base.h and eigen.h

Based on systematic review under https://github.com/pybind/pybind11/pull/3838#issuecomment-1094390333

* Add missing break (clang-tidy).

* More clang-tidy fixes (this time around clang-tidy was run interactively to pre-empt repeat trips through the CI).

* Underscore prefix: _return_as_bytes

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Ralf W. Grosse-Kunstleve <rwgk@google.com>
This commit is contained in:
Xiaofei Wang 2022-04-15 10:17:34 -07:00 committed by GitHub
parent 8532780526
commit 7064d43bc9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 107 additions and 4 deletions

View File

@ -447,11 +447,15 @@ struct string_caster {
return true; return true;
} }
static handle static handle cast(const StringType &src, return_value_policy policy, handle /* parent */) {
cast(const StringType &src, return_value_policy /* policy */, handle /* parent */) {
const char *buffer = reinterpret_cast<const char *>(src.data()); const char *buffer = reinterpret_cast<const char *>(src.data());
auto nbytes = ssize_t(src.size() * sizeof(CharT)); auto nbytes = ssize_t(src.size() * sizeof(CharT));
handle s = decode_utfN(buffer, nbytes); handle s;
if (policy == return_value_policy::_return_as_bytes) {
s = PyBytes_FromStringAndSize(buffer, nbytes);
} else {
s = decode_utfN(buffer, nbytes);
}
if (!s) { if (!s) {
throw error_already_set(); throw error_already_set();
} }

View File

@ -459,7 +459,19 @@ enum class return_value_policy : uint8_t {
collected while Python is still using the child. More advanced collected while Python is still using the child. More advanced
variations of this scheme are also possible using combinations of variations of this scheme are also possible using combinations of
return_value_policy::reference and the keep_alive call policy */ return_value_policy::reference and the keep_alive call policy */
reference_internal reference_internal,
/** With this policy, C++ string types are converted to Python bytes,
instead of str. This is most useful when a C++ function returns a
container-like type with nested C++ string types, and `py::bytes` cannot
be applied easily. Dictionary like types might not work, for example,
`Dict[str, bytes]`, because this policy forces all string return values
to be converted to bytes. Note that this return_value_policy is not
concerned with lifetime/ownership semantics, like the other policies,
but the purpose of _return_as_bytes is certain to be orthogonal, because
C++ strings are always copied to Python `bytes` or `str`.
NOTE: This policy is NOT available on master. */
_return_as_bytes
}; };
PYBIND11_NAMESPACE_BEGIN(detail) PYBIND11_NAMESPACE_BEGIN(detail)

View File

@ -783,12 +783,15 @@ struct smart_holder_type_caster<std::shared_ptr<T>> : smart_holder_type_caster_l
break; break;
case return_value_policy::take_ownership: case return_value_policy::take_ownership:
throw cast_error("Invalid return_value_policy for shared_ptr (take_ownership)."); throw cast_error("Invalid return_value_policy for shared_ptr (take_ownership).");
break;
case return_value_policy::copy: case return_value_policy::copy:
case return_value_policy::move: case return_value_policy::move:
break; break;
case return_value_policy::reference: case return_value_policy::reference:
throw cast_error("Invalid return_value_policy for shared_ptr (reference)."); throw cast_error("Invalid return_value_policy for shared_ptr (reference).");
break;
case return_value_policy::reference_internal: case return_value_policy::reference_internal:
case return_value_policy::_return_as_bytes:
break; break;
} }
if (!src) { if (!src) {

View File

@ -651,6 +651,10 @@ public:
keep_alive_impl(inst, parent); keep_alive_impl(inst, parent);
break; break;
case return_value_policy::_return_as_bytes:
pybind11_fail("return_value_policy::_return_as_bytes does not apply.");
break;
default: default:
throw cast_error("unhandled return_value_policy: should not happen!"); throw cast_error("unhandled return_value_policy: should not happen!");
} }

View File

@ -349,6 +349,9 @@ private:
return eigen_ref_array<props>(*src); return eigen_ref_array<props>(*src);
case return_value_policy::reference_internal: case return_value_policy::reference_internal:
return eigen_ref_array<props>(*src, parent); return eigen_ref_array<props>(*src, parent);
case return_value_policy::_return_as_bytes:
pybind11_fail("return_value_policy::_return_as_bytes does not apply.");
break;
default: default:
throw cast_error("unhandled return_value_policy: should not happen!"); throw cast_error("unhandled return_value_policy: should not happen!");
}; };

View File

@ -11,10 +11,17 @@
#include "pybind11_tests.h" #include "pybind11_tests.h"
#include <utility>
struct ConstRefCasted { struct ConstRefCasted {
int tag; int tag;
}; };
struct StringAttr {
explicit StringAttr(std::string v) : value(std::move(v)) {}
std::string value;
};
PYBIND11_NAMESPACE_BEGIN(pybind11) PYBIND11_NAMESPACE_BEGIN(pybind11)
PYBIND11_NAMESPACE_BEGIN(detail) PYBIND11_NAMESPACE_BEGIN(detail)
template <> template <>
@ -378,4 +385,28 @@ TEST_SUBMODULE(builtin_casters, m) {
m.def("takes_const_ref", [](const ConstRefCasted &x) { return x.tag; }); m.def("takes_const_ref", [](const ConstRefCasted &x) { return x.tag; });
m.def("takes_const_ref_wrap", m.def("takes_const_ref_wrap",
[](std::reference_wrapper<const ConstRefCasted> x) { return x.get().tag; }); [](std::reference_wrapper<const ConstRefCasted> x) { return x.get().tag; });
// test return_value_policy::_return_as_bytes
m.def(
"invalid_utf8_string_as_bytes",
[]() { return std::string("\xba\xd0\xba\xd0"); },
py::return_value_policy::_return_as_bytes);
m.def("invalid_utf8_string_as_str", []() { return std::string("\xba\xd0\xba\xd0"); });
m.def(
"invalid_utf8_char_array_as_bytes",
[]() { return "\xba\xd0\xba\xd0"; },
py::return_value_policy::_return_as_bytes);
py::class_<StringAttr>(m, "StringAttr")
.def(py::init<std::string>())
.def_property(
"value",
py::cpp_function([](StringAttr &self) { return self.value; },
py::return_value_policy::_return_as_bytes),
py::cpp_function([](StringAttr &self, std::string v) { self.value = std::move(v); }));
#ifdef PYBIND11_HAS_STRING_VIEW
m.def(
"invalid_utf8_string_view_as_bytes",
[]() { return std::string_view("\xba\xd0\xba\xd0"); },
py::return_value_policy::_return_as_bytes);
#endif
} }

View File

@ -524,3 +524,17 @@ def test_const_ref_caster():
assert m.takes_const_ptr(x) == 5 assert m.takes_const_ptr(x) == 5
assert m.takes_const_ref(x) == 4 assert m.takes_const_ref(x) == 4
assert m.takes_const_ref_wrap(x) == 4 assert m.takes_const_ref_wrap(x) == 4
def test_return_as_bytes_policy():
expected_return_value = b"\xba\xd0\xba\xd0"
assert m.invalid_utf8_string_as_bytes() == expected_return_value
with pytest.raises(UnicodeDecodeError):
m.invalid_utf8_string_as_str()
assert m.invalid_utf8_char_array_as_bytes() == expected_return_value
obj = m.StringAttr(expected_return_value)
assert obj.value == expected_return_value
obj.value = "123"
assert obj.value == b"123"
if hasattr(m, "has_string_view"):
assert m.invalid_utf8_string_view_as_bytes() == expected_return_value

View File

@ -541,4 +541,25 @@ TEST_SUBMODULE(stl, m) {
[]() { return new std::vector<bool>(4513); }, []() { return new std::vector<bool>(4513); },
// Without explicitly specifying `take_ownership`, this function leaks. // Without explicitly specifying `take_ownership`, this function leaks.
py::return_value_policy::take_ownership); py::return_value_policy::take_ownership);
// test return_value_policy::_return_as_bytes
m.def(
"invalid_utf8_string_array_as_bytes",
[]() { return std::array<std::string, 1>{{"\xba\xd0\xba\xd0"}}; },
py::return_value_policy::_return_as_bytes);
m.def("invalid_utf8_string_array_as_str",
[]() { return std::array<std::string, 1>{{"\xba\xd0\xba\xd0"}}; });
#ifdef PYBIND11_HAS_OPTIONAL
m.def(
"invalid_utf8_optional_string_as_bytes",
[]() { return std::optional<std::string>{"\xba\xd0\xba\xd0"}; },
py::return_value_policy::_return_as_bytes);
#endif
#ifdef PYBIND11_TEST_VARIANT
m.def(
"invalid_utf8_variant_string_as_bytes",
[]() { return variant<std::string, int>{"\xba\xd0\xba\xd0"}; },
py::return_value_policy::_return_as_bytes);
#endif
} }

View File

@ -371,3 +371,14 @@ def test_return_vector_bool_raw_ptr():
v = m.return_vector_bool_raw_ptr() v = m.return_vector_bool_raw_ptr()
assert isinstance(v, list) assert isinstance(v, list)
assert len(v) == 4513 assert len(v) == 4513
def test_return_as_bytes_policy():
expected_return_value = b"\xba\xd0\xba\xd0"
assert m.invalid_utf8_string_array_as_bytes() == [expected_return_value]
with pytest.raises(UnicodeDecodeError):
m.invalid_utf8_string_array_as_str()
if hasattr(m, "has_optional"):
assert m.invalid_utf8_optional_string_as_bytes() == expected_return_value
if hasattr(m, "load_variant"):
assert m.invalid_utf8_variant_string_as_bytes() == expected_return_value