Raise codec errors when casting to std::string (#2903)

* Raise codec errors when casting to std::string

Allow the codec's exception to be raised instead of RuntimeError when
casting from py::str to std::string.

PY2 allows ucs surrogates in UTF-8 conversion

Signed-off-by: Shane Loretz <sloretz@openrobotics.org>
Signed-off-by: Shane Loretz <sloretz@osrfoundation.org>

* Attempt to fix py2 error

* Revert all unicode literals

* Fixed

Co-authored-by: Aaron Gokaslan <skylion.aaron@gmail.com>
This commit is contained in:
Shane Loretz 2021-07-13 21:21:55 -07:00 committed by GitHub
parent aca6c3ba37
commit 7331d381af
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 12 additions and 1 deletions

View File

@ -1015,7 +1015,7 @@ public:
if (PyUnicode_Check(m_ptr)) { if (PyUnicode_Check(m_ptr)) {
temp = reinterpret_steal<object>(PyUnicode_AsUTF8String(m_ptr)); temp = reinterpret_steal<object>(PyUnicode_AsUTF8String(m_ptr));
if (!temp) if (!temp)
pybind11_fail("Unable to extract string contents! (encoding issue)"); throw error_already_set();
} }
char *buffer = nullptr; char *buffer = nullptr;
ssize_t length = 0; ssize_t length = 0;

View File

@ -76,6 +76,9 @@ TEST_SUBMODULE(pytypes, m) {
m.def("str_from_object", [](const py::object& obj) { return py::str(obj); }); m.def("str_from_object", [](const py::object& obj) { return py::str(obj); });
m.def("repr_from_object", [](const py::object& obj) { return py::repr(obj); }); m.def("repr_from_object", [](const py::object& obj) { return py::repr(obj); });
m.def("str_from_handle", [](py::handle h) { return py::str(h); }); m.def("str_from_handle", [](py::handle h) { return py::str(h); });
m.def("str_from_string_from_str", [](const py::str& obj) {
return py::str(static_cast<std::string>(obj));
});
m.def("str_format", []() { m.def("str_format", []() {
auto s1 = "{} + {} = {}"_s.format(1, 2, 3); auto s1 = "{} + {} = {}"_s.format(1, 2, 3);

View File

@ -133,6 +133,14 @@ def test_str(doc):
else: else:
assert m.str_from_handle(malformed_utf8) == "b'\\x80'" assert m.str_from_handle(malformed_utf8) == "b'\\x80'"
assert m.str_from_string_from_str("this is a str") == "this is a str"
ucs_surrogates_str = u"\udcc3"
if env.PY2:
assert u"\udcc3" == m.str_from_string_from_str(ucs_surrogates_str)
else:
with pytest.raises(UnicodeEncodeError):
m.str_from_string_from_str(ucs_surrogates_str)
def test_bytes(doc): def test_bytes(doc):
assert m.bytes_from_string().decode() == "foo" assert m.bytes_from_string().decode() == "foo"