[smart_holder] Add a new return value policy return_as_bytes (#3838)

* Add return_as_bytes policy

* Fix format

* Fix test failures

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Fix std::variant

* Resolve comments

* Note this policy experimental

* Add tests for `return_as_bytes` with `def_property`.

* Change comment for the new return_as_bytes enum to note that the policy is not available on master.

* Applying pr3838_sh.patch (exactly as used Google-internally since 2022-03-31).

* Add `case return_as_bytes` to `switch`es in detail/type_caster_base.h and eigen.h

Based on systematic review under https://github.com/pybind/pybind11/pull/3838#issuecomment-1094390333

* Add missing break (clang-tidy).

* More clang-tidy fixes (this time around clang-tidy was run interactively to pre-empt repeat trips through the CI).

* Underscore prefix: _return_as_bytes

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Ralf W. Grosse-Kunstleve <rwgk@google.com>
This commit is contained in:
Xiaofei Wang 2022-04-15 10:17:34 -07:00 committed by GitHub
parent 8532780526
commit 7064d43bc9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 107 additions and 4 deletions

View File

@ -447,11 +447,15 @@ struct string_caster {
return true;
}
static handle
cast(const StringType &src, return_value_policy /* policy */, handle /* parent */) {
static handle cast(const StringType &src, return_value_policy policy, handle /* parent */) {
const char *buffer = reinterpret_cast<const char *>(src.data());
auto nbytes = ssize_t(src.size() * sizeof(CharT));
handle s = decode_utfN(buffer, nbytes);
handle s;
if (policy == return_value_policy::_return_as_bytes) {
s = PyBytes_FromStringAndSize(buffer, nbytes);
} else {
s = decode_utfN(buffer, nbytes);
}
if (!s) {
throw error_already_set();
}

View File

@ -459,7 +459,19 @@ enum class return_value_policy : uint8_t {
collected while Python is still using the child. More advanced
variations of this scheme are also possible using combinations of
return_value_policy::reference and the keep_alive call policy */
reference_internal
reference_internal,
/** With this policy, C++ string types are converted to Python bytes,
instead of str. This is most useful when a C++ function returns a
container-like type with nested C++ string types, and `py::bytes` cannot
be applied easily. Dictionary like types might not work, for example,
`Dict[str, bytes]`, because this policy forces all string return values
to be converted to bytes. Note that this return_value_policy is not
concerned with lifetime/ownership semantics, like the other policies,
but the purpose of _return_as_bytes is certain to be orthogonal, because
C++ strings are always copied to Python `bytes` or `str`.
NOTE: This policy is NOT available on master. */
_return_as_bytes
};
PYBIND11_NAMESPACE_BEGIN(detail)

View File

@ -783,12 +783,15 @@ struct smart_holder_type_caster<std::shared_ptr<T>> : smart_holder_type_caster_l
break;
case return_value_policy::take_ownership:
throw cast_error("Invalid return_value_policy for shared_ptr (take_ownership).");
break;
case return_value_policy::copy:
case return_value_policy::move:
break;
case return_value_policy::reference:
throw cast_error("Invalid return_value_policy for shared_ptr (reference).");
break;
case return_value_policy::reference_internal:
case return_value_policy::_return_as_bytes:
break;
}
if (!src) {

View File

@ -651,6 +651,10 @@ public:
keep_alive_impl(inst, parent);
break;
case return_value_policy::_return_as_bytes:
pybind11_fail("return_value_policy::_return_as_bytes does not apply.");
break;
default:
throw cast_error("unhandled return_value_policy: should not happen!");
}

View File

@ -349,6 +349,9 @@ private:
return eigen_ref_array<props>(*src);
case return_value_policy::reference_internal:
return eigen_ref_array<props>(*src, parent);
case return_value_policy::_return_as_bytes:
pybind11_fail("return_value_policy::_return_as_bytes does not apply.");
break;
default:
throw cast_error("unhandled return_value_policy: should not happen!");
};

View File

@ -11,10 +11,17 @@
#include "pybind11_tests.h"
#include <utility>
struct ConstRefCasted {
int tag;
};
struct StringAttr {
explicit StringAttr(std::string v) : value(std::move(v)) {}
std::string value;
};
PYBIND11_NAMESPACE_BEGIN(pybind11)
PYBIND11_NAMESPACE_BEGIN(detail)
template <>
@ -378,4 +385,28 @@ TEST_SUBMODULE(builtin_casters, m) {
m.def("takes_const_ref", [](const ConstRefCasted &x) { return x.tag; });
m.def("takes_const_ref_wrap",
[](std::reference_wrapper<const ConstRefCasted> x) { return x.get().tag; });
// test return_value_policy::_return_as_bytes
m.def(
"invalid_utf8_string_as_bytes",
[]() { return std::string("\xba\xd0\xba\xd0"); },
py::return_value_policy::_return_as_bytes);
m.def("invalid_utf8_string_as_str", []() { return std::string("\xba\xd0\xba\xd0"); });
m.def(
"invalid_utf8_char_array_as_bytes",
[]() { return "\xba\xd0\xba\xd0"; },
py::return_value_policy::_return_as_bytes);
py::class_<StringAttr>(m, "StringAttr")
.def(py::init<std::string>())
.def_property(
"value",
py::cpp_function([](StringAttr &self) { return self.value; },
py::return_value_policy::_return_as_bytes),
py::cpp_function([](StringAttr &self, std::string v) { self.value = std::move(v); }));
#ifdef PYBIND11_HAS_STRING_VIEW
m.def(
"invalid_utf8_string_view_as_bytes",
[]() { return std::string_view("\xba\xd0\xba\xd0"); },
py::return_value_policy::_return_as_bytes);
#endif
}

View File

@ -524,3 +524,17 @@ def test_const_ref_caster():
assert m.takes_const_ptr(x) == 5
assert m.takes_const_ref(x) == 4
assert m.takes_const_ref_wrap(x) == 4
def test_return_as_bytes_policy():
expected_return_value = b"\xba\xd0\xba\xd0"
assert m.invalid_utf8_string_as_bytes() == expected_return_value
with pytest.raises(UnicodeDecodeError):
m.invalid_utf8_string_as_str()
assert m.invalid_utf8_char_array_as_bytes() == expected_return_value
obj = m.StringAttr(expected_return_value)
assert obj.value == expected_return_value
obj.value = "123"
assert obj.value == b"123"
if hasattr(m, "has_string_view"):
assert m.invalid_utf8_string_view_as_bytes() == expected_return_value

View File

@ -541,4 +541,25 @@ TEST_SUBMODULE(stl, m) {
[]() { return new std::vector<bool>(4513); },
// Without explicitly specifying `take_ownership`, this function leaks.
py::return_value_policy::take_ownership);
// test return_value_policy::_return_as_bytes
m.def(
"invalid_utf8_string_array_as_bytes",
[]() { return std::array<std::string, 1>{{"\xba\xd0\xba\xd0"}}; },
py::return_value_policy::_return_as_bytes);
m.def("invalid_utf8_string_array_as_str",
[]() { return std::array<std::string, 1>{{"\xba\xd0\xba\xd0"}}; });
#ifdef PYBIND11_HAS_OPTIONAL
m.def(
"invalid_utf8_optional_string_as_bytes",
[]() { return std::optional<std::string>{"\xba\xd0\xba\xd0"}; },
py::return_value_policy::_return_as_bytes);
#endif
#ifdef PYBIND11_TEST_VARIANT
m.def(
"invalid_utf8_variant_string_as_bytes",
[]() { return variant<std::string, int>{"\xba\xd0\xba\xd0"}; },
py::return_value_policy::_return_as_bytes);
#endif
}

View File

@ -371,3 +371,14 @@ def test_return_vector_bool_raw_ptr():
v = m.return_vector_bool_raw_ptr()
assert isinstance(v, list)
assert len(v) == 4513
def test_return_as_bytes_policy():
expected_return_value = b"\xba\xd0\xba\xd0"
assert m.invalid_utf8_string_array_as_bytes() == [expected_return_value]
with pytest.raises(UnicodeDecodeError):
m.invalid_utf8_string_array_as_str()
if hasattr(m, "has_optional"):
assert m.invalid_utf8_optional_string_as_bytes() == expected_return_value
if hasattr(m, "load_variant"):
assert m.invalid_utf8_variant_string_as_bytes() == expected_return_value