diff --git a/include/pybind11/cast.h b/include/pybind11/cast.h index 309660066..b8638b9c2 100644 --- a/include/pybind11/cast.h +++ b/include/pybind11/cast.h @@ -641,7 +641,6 @@ struct type_caster, enable_if_t::value || sizeof(CharT) == 2 || sizeof(CharT) == 4, "Unsupported wchar_t size != 2/4"); static constexpr size_t UTF_N = 8 * sizeof(CharT); - static constexpr const char *encoding = UTF_N == 8 ? "utf8" : UTF_N == 16 ? "utf16" : "utf32"; using StringType = std::basic_string; @@ -666,7 +665,7 @@ struct type_caster, enable_if_t(PyUnicode_AsEncodedString( - load_src.ptr(), encoding, nullptr)); + load_src.ptr(), UTF_N == 8 ? "utf-8" : UTF_N == 16 ? "utf-16" : "utf-32", nullptr)); if (!utfNbytes) { PyErr_Clear(); return false; } const CharT *buffer = reinterpret_cast(PYBIND11_BYTES_AS_STRING(utfNbytes.ptr())); @@ -679,12 +678,28 @@ struct type_caster, enable_if_t(src.c_str()); ssize_t nbytes = ssize_t(src.size() * sizeof(CharT)); - handle s = PyUnicode_Decode(buffer, nbytes, encoding, nullptr); + handle s = decode_utfN(buffer, nbytes); if (!s) throw error_already_set(); return s; } PYBIND11_TYPE_CASTER(StringType, _(PYBIND11_STRING_NAME)); + +private: + static handle decode_utfN(const char *buffer, ssize_t nbytes) { +#if !defined(PYPY_VERSION) + return + UTF_N == 8 ? PyUnicode_DecodeUTF8(buffer, nbytes, nullptr) : + UTF_N == 16 ? PyUnicode_DecodeUTF16(buffer, nbytes, nullptr, nullptr) : + PyUnicode_DecodeUTF32(buffer, nbytes, nullptr, nullptr); +#else + // PyPy seems to have multiple problems related to PyUnicode_UTF*: the UTF8 version + // sometimes segfaults for unknown reasons, while the UTF16 and 32 versions require a + // non-const char * arguments, which is also a nuissance, so bypass the whole thing by just + // passing the encoding as a string value, which works properly: + return PyUnicode_Decode(buffer, nbytes, UTF_N == 8 ? "utf-8" : UTF_N == 16 ? "utf-16" : "utf-32", nullptr); +#endif + } }; // Type caster for C-style strings. We basically use a std::string type caster, but also add the