fix: improve bytes to str decoding error handling (#4294)

* (bugfix): Improve bytes to str decoding error handling

* regroup test

* Further broaden tests

* Add another decode error test

* Fix bug in tests

* Reviewer suggestions
This commit is contained in:
Aaron Gokaslan 2022-10-29 11:12:24 -04:00 committed by GitHub
parent fcb5554d9f
commit b07223fa69
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 28 additions and 0 deletions

View File

@ -1432,6 +1432,9 @@ public:
str(const char *c, const SzType &n)
: object(PyUnicode_FromStringAndSize(c, ssize_t_cast(n)), stolen_t{}) {
if (!m_ptr) {
if (PyErr_Occurred()) {
throw error_already_set();
}
pybind11_fail("Could not allocate string object!");
}
}
@ -1441,6 +1444,9 @@ public:
// NOLINTNEXTLINE(google-explicit-constructor)
str(const char *c = "") : object(PyUnicode_FromString(c), stolen_t{}) {
if (!m_ptr) {
if (PyErr_Occurred()) {
throw error_already_set();
}
pybind11_fail("Could not allocate string object!");
}
}
@ -1598,6 +1604,9 @@ inline str::str(const bytes &b) {
}
auto obj = reinterpret_steal<object>(PyUnicode_FromStringAndSize(buffer, length));
if (!obj) {
if (PyErr_Occurred()) {
throw error_already_set();
}
pybind11_fail("Could not allocate string object!");
}
m_ptr = obj.release().ptr();

View File

@ -206,7 +206,12 @@ TEST_SUBMODULE(pytypes, m) {
m.def("str_from_char_ssize_t", []() { return py::str{"red", (py::ssize_t) 3}; });
m.def("str_from_char_size_t", []() { return py::str{"blue", (py::size_t) 4}; });
m.def("str_from_string", []() { return py::str(std::string("baz")); });
m.def("str_from_std_string_input", [](const std::string &stri) { return py::str(stri); });
m.def("str_from_cstr_input", [](const char *c_str) { return py::str(c_str); });
m.def("str_from_bytes", []() { return py::str(py::bytes("boo", 3)); });
m.def("str_from_bytes_input",
[](const py::bytes &encoded_str) { return py::str(encoded_str); });
m.def("str_from_object", [](const py::object &obj) { return py::str(obj); });
m.def("repr_from_object", [](const py::object &obj) { return py::repr(obj); });
m.def("str_from_handle", [](py::handle h) { return py::str(h); });

View File

@ -244,6 +244,20 @@ def test_str(doc):
m.str_from_string_from_str(ucs_surrogates_str)
@pytest.mark.parametrize(
"func",
[
m.str_from_bytes_input,
m.str_from_cstr_input,
m.str_from_std_string_input,
],
)
def test_surrogate_pairs_unicode_error(func):
input_str = "\ud83d\ude4f".encode("utf-8", "surrogatepass")
with pytest.raises(UnicodeDecodeError):
func(input_str)
def test_bytes(doc):
assert m.bytes_from_char_ssize_t().decode() == "green"
assert m.bytes_from_char_size_t().decode() == "purple"