mirror of
https://github.com/pybind/pybind11.git
synced 2024-11-25 14:45:12 +00:00
fix: improve bytes to str decoding error handling (#4294)
* (bugfix): Improve bytes to str decoding error handling * regroup test * Further broaden tests * Add another decode error test * Fix bug in tests * Reviewer suggestions
This commit is contained in:
parent
fcb5554d9f
commit
b07223fa69
@ -1432,6 +1432,9 @@ public:
|
|||||||
str(const char *c, const SzType &n)
|
str(const char *c, const SzType &n)
|
||||||
: object(PyUnicode_FromStringAndSize(c, ssize_t_cast(n)), stolen_t{}) {
|
: object(PyUnicode_FromStringAndSize(c, ssize_t_cast(n)), stolen_t{}) {
|
||||||
if (!m_ptr) {
|
if (!m_ptr) {
|
||||||
|
if (PyErr_Occurred()) {
|
||||||
|
throw error_already_set();
|
||||||
|
}
|
||||||
pybind11_fail("Could not allocate string object!");
|
pybind11_fail("Could not allocate string object!");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1441,6 +1444,9 @@ public:
|
|||||||
// NOLINTNEXTLINE(google-explicit-constructor)
|
// NOLINTNEXTLINE(google-explicit-constructor)
|
||||||
str(const char *c = "") : object(PyUnicode_FromString(c), stolen_t{}) {
|
str(const char *c = "") : object(PyUnicode_FromString(c), stolen_t{}) {
|
||||||
if (!m_ptr) {
|
if (!m_ptr) {
|
||||||
|
if (PyErr_Occurred()) {
|
||||||
|
throw error_already_set();
|
||||||
|
}
|
||||||
pybind11_fail("Could not allocate string object!");
|
pybind11_fail("Could not allocate string object!");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1598,6 +1604,9 @@ inline str::str(const bytes &b) {
|
|||||||
}
|
}
|
||||||
auto obj = reinterpret_steal<object>(PyUnicode_FromStringAndSize(buffer, length));
|
auto obj = reinterpret_steal<object>(PyUnicode_FromStringAndSize(buffer, length));
|
||||||
if (!obj) {
|
if (!obj) {
|
||||||
|
if (PyErr_Occurred()) {
|
||||||
|
throw error_already_set();
|
||||||
|
}
|
||||||
pybind11_fail("Could not allocate string object!");
|
pybind11_fail("Could not allocate string object!");
|
||||||
}
|
}
|
||||||
m_ptr = obj.release().ptr();
|
m_ptr = obj.release().ptr();
|
||||||
|
@ -206,7 +206,12 @@ TEST_SUBMODULE(pytypes, m) {
|
|||||||
m.def("str_from_char_ssize_t", []() { return py::str{"red", (py::ssize_t) 3}; });
|
m.def("str_from_char_ssize_t", []() { return py::str{"red", (py::ssize_t) 3}; });
|
||||||
m.def("str_from_char_size_t", []() { return py::str{"blue", (py::size_t) 4}; });
|
m.def("str_from_char_size_t", []() { return py::str{"blue", (py::size_t) 4}; });
|
||||||
m.def("str_from_string", []() { return py::str(std::string("baz")); });
|
m.def("str_from_string", []() { return py::str(std::string("baz")); });
|
||||||
|
m.def("str_from_std_string_input", [](const std::string &stri) { return py::str(stri); });
|
||||||
|
m.def("str_from_cstr_input", [](const char *c_str) { return py::str(c_str); });
|
||||||
m.def("str_from_bytes", []() { return py::str(py::bytes("boo", 3)); });
|
m.def("str_from_bytes", []() { return py::str(py::bytes("boo", 3)); });
|
||||||
|
m.def("str_from_bytes_input",
|
||||||
|
[](const py::bytes &encoded_str) { return py::str(encoded_str); });
|
||||||
|
|
||||||
m.def("str_from_object", [](const py::object &obj) { return py::str(obj); });
|
m.def("str_from_object", [](const py::object &obj) { return py::str(obj); });
|
||||||
m.def("repr_from_object", [](const py::object &obj) { return py::repr(obj); });
|
m.def("repr_from_object", [](const py::object &obj) { return py::repr(obj); });
|
||||||
m.def("str_from_handle", [](py::handle h) { return py::str(h); });
|
m.def("str_from_handle", [](py::handle h) { return py::str(h); });
|
||||||
|
@ -244,6 +244,20 @@ def test_str(doc):
|
|||||||
m.str_from_string_from_str(ucs_surrogates_str)
|
m.str_from_string_from_str(ucs_surrogates_str)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
"func",
|
||||||
|
[
|
||||||
|
m.str_from_bytes_input,
|
||||||
|
m.str_from_cstr_input,
|
||||||
|
m.str_from_std_string_input,
|
||||||
|
],
|
||||||
|
)
|
||||||
|
def test_surrogate_pairs_unicode_error(func):
|
||||||
|
input_str = "\ud83d\ude4f".encode("utf-8", "surrogatepass")
|
||||||
|
with pytest.raises(UnicodeDecodeError):
|
||||||
|
func(input_str)
|
||||||
|
|
||||||
|
|
||||||
def test_bytes(doc):
|
def test_bytes(doc):
|
||||||
assert m.bytes_from_char_ssize_t().decode() == "green"
|
assert m.bytes_from_char_ssize_t().decode() == "green"
|
||||||
assert m.bytes_from_char_size_t().decode() == "purple"
|
assert m.bytes_from_char_size_t().decode() == "purple"
|
||||||
|
Loading…
Reference in New Issue
Block a user