From c3987b1aad8b3b8338655a2b9307c04a4737f1da Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Mon, 31 Oct 2022 09:18:05 -0700 Subject: [PATCH] fix: unicode surrogate character in Python exception message. (#4297) * Fix & test for issue #4288 (unicode surrogate character in Python exception message). * DRY `message_unavailable_exc` * fix: add a constexpr Co-authored-by: Aaron Gokaslan * style: pre-commit fixes Co-authored-by: Henry Schreiner Co-authored-by: Aaron Gokaslan Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- include/pybind11/pytypes.h | 22 ++++++++++++++++++++-- tests/test_exceptions.cpp | 5 ----- tests/test_exceptions.py | 14 ++++++++++++++ 3 files changed, 34 insertions(+), 7 deletions(-) diff --git a/include/pybind11/pytypes.h b/include/pybind11/pytypes.h index 80a2e2228..91cbaaba2 100644 --- a/include/pybind11/pytypes.h +++ b/include/pybind11/pytypes.h @@ -501,11 +501,29 @@ struct error_fetch_and_normalize { std::string message_error_string; if (m_value) { auto value_str = reinterpret_steal(PyObject_Str(m_value.ptr())); + constexpr const char *message_unavailable_exc + = ""; if (!value_str) { message_error_string = detail::error_string(); - result = ""; + result = message_unavailable_exc; } else { - result = value_str.cast(); + // Not using `value_str.cast()`, to not potentially throw a secondary + // error_already_set that will then result in process termination (#4288). + auto value_bytes = reinterpret_steal( + PyUnicode_AsEncodedString(value_str.ptr(), "utf-8", "backslashreplace")); + if (!value_bytes) { + message_error_string = detail::error_string(); + result = message_unavailable_exc; + } else { + char *buffer = nullptr; + Py_ssize_t length = 0; + if (PyBytes_AsStringAndSize(value_bytes.ptr(), &buffer, &length) == -1) { + message_error_string = detail::error_string(); + result = message_unavailable_exc; + } else { + result = std::string(buffer, static_cast(length)); + } + } } } else { result = ""; diff --git a/tests/test_exceptions.cpp b/tests/test_exceptions.cpp index 3583f22a5..f57e09506 100644 --- a/tests/test_exceptions.cpp +++ b/tests/test_exceptions.cpp @@ -105,11 +105,6 @@ struct PythonAlreadySetInDestructor { py::str s; }; -std::string error_already_set_what(const py::object &exc_type, const py::object &exc_value) { - PyErr_SetObject(exc_type.ptr(), exc_value.ptr()); - return py::error_already_set().what(); -} - TEST_SUBMODULE(exceptions, m) { m.def("throw_std_exception", []() { throw std::runtime_error("This exception was intentionally thrown."); }); diff --git a/tests/test_exceptions.py b/tests/test_exceptions.py index 70b6ffea9..7eb1a9d62 100644 --- a/tests/test_exceptions.py +++ b/tests/test_exceptions.py @@ -275,6 +275,20 @@ def test_local_translator(msg): assert msg(excinfo.value) == "this mod" +def test_error_already_set_message_with_unicode_surrogate(): # Issue #4288 + assert m.error_already_set_what(RuntimeError, "\ud927") == ( + "RuntimeError: \\ud927", + False, + ) + + +def test_error_already_set_message_with_malformed_utf8(): + assert m.error_already_set_what(RuntimeError, b"\x80") == ( + "RuntimeError: b'\\x80'", + False, + ) + + class FlakyException(Exception): def __init__(self, failure_point): if failure_point == "failure_point_init":