diff --git a/include/pybind11/pytypes.h b/include/pybind11/pytypes.h index 0fca32a4e..b483fb323 100644 --- a/include/pybind11/pytypes.h +++ b/include/pybind11/pytypes.h @@ -1015,7 +1015,7 @@ public: if (PyUnicode_Check(m_ptr)) { temp = reinterpret_steal(PyUnicode_AsUTF8String(m_ptr)); if (!temp) - pybind11_fail("Unable to extract string contents! (encoding issue)"); + throw error_already_set(); } char *buffer = nullptr; ssize_t length = 0; diff --git a/tests/test_pytypes.cpp b/tests/test_pytypes.cpp index 6ed59aad2..d70536d3f 100644 --- a/tests/test_pytypes.cpp +++ b/tests/test_pytypes.cpp @@ -76,6 +76,9 @@ TEST_SUBMODULE(pytypes, m) { m.def("str_from_object", [](const py::object& obj) { return py::str(obj); }); m.def("repr_from_object", [](const py::object& obj) { return py::repr(obj); }); m.def("str_from_handle", [](py::handle h) { return py::str(h); }); + m.def("str_from_string_from_str", [](const py::str& obj) { + return py::str(static_cast(obj)); + }); m.def("str_format", []() { auto s1 = "{} + {} = {}"_s.format(1, 2, 3); diff --git a/tests/test_pytypes.py b/tests/test_pytypes.py index 25e9f6dff..66d6d30a0 100644 --- a/tests/test_pytypes.py +++ b/tests/test_pytypes.py @@ -133,6 +133,14 @@ def test_str(doc): else: assert m.str_from_handle(malformed_utf8) == "b'\\x80'" + assert m.str_from_string_from_str("this is a str") == "this is a str" + ucs_surrogates_str = u"\udcc3" + if env.PY2: + assert u"\udcc3" == m.str_from_string_from_str(ucs_surrogates_str) + else: + with pytest.raises(UnicodeEncodeError): + m.str_from_string_from_str(ucs_surrogates_str) + def test_bytes(doc): assert m.bytes_from_string().decode() == "foo"