mirror of
https://github.com/pybind/pybind11.git
synced 2025-01-18 17:05:53 +00:00
Call PyUnicode_DecodeUTF* directly
Some versions of Python 2.7 reportedly (#713) have issues with PyUnicode_Decode being passed the encoding string, so just skip it entirely by calling the PyUnicode_DecodeUTF* function directly. This will also be slightly more efficient by avoiding having to check the encoding string, and (for python 2) going through the unicode class's decode (python 3 fast-tracks this for all utf-{8,16,32} encodings; python 2 only fast-tracked for the exact string "utf-8", which we weren't passing anyway (we had "utf8")). This doesn't work for PyPy, however: its `PyUnicode_DecodeUTF{8,16,32}` appear rather broken: the UTF8 one segfaults, while the 16/32 require recasting into a non-const `char *` (and might segfault; I didn't get far enough to find out). Just avoid the whole thing by keeping the encoding-passed-as-string version for PyPy, which seems to work reliably.
This commit is contained in:
parent
e5456c2226
commit
ee9296395d
@ -641,7 +641,6 @@ struct type_caster<std::basic_string<CharT, Traits, Allocator>, enable_if_t<is_s
|
||||
static_assert(!std::is_same<CharT, wchar_t>::value || sizeof(CharT) == 2 || sizeof(CharT) == 4,
|
||||
"Unsupported wchar_t size != 2/4");
|
||||
static constexpr size_t UTF_N = 8 * sizeof(CharT);
|
||||
static constexpr const char *encoding = UTF_N == 8 ? "utf8" : UTF_N == 16 ? "utf16" : "utf32";
|
||||
|
||||
using StringType = std::basic_string<CharT, Traits, Allocator>;
|
||||
|
||||
@ -666,7 +665,7 @@ struct type_caster<std::basic_string<CharT, Traits, Allocator>, enable_if_t<is_s
|
||||
}
|
||||
|
||||
object utfNbytes = reinterpret_steal<object>(PyUnicode_AsEncodedString(
|
||||
load_src.ptr(), encoding, nullptr));
|
||||
load_src.ptr(), UTF_N == 8 ? "utf-8" : UTF_N == 16 ? "utf-16" : "utf-32", nullptr));
|
||||
if (!utfNbytes) { PyErr_Clear(); return false; }
|
||||
|
||||
const CharT *buffer = reinterpret_cast<const CharT *>(PYBIND11_BYTES_AS_STRING(utfNbytes.ptr()));
|
||||
@ -679,12 +678,28 @@ struct type_caster<std::basic_string<CharT, Traits, Allocator>, enable_if_t<is_s
|
||||
static handle cast(const StringType &src, return_value_policy /* policy */, handle /* parent */) {
|
||||
const char *buffer = reinterpret_cast<const char *>(src.c_str());
|
||||
ssize_t nbytes = ssize_t(src.size() * sizeof(CharT));
|
||||
handle s = PyUnicode_Decode(buffer, nbytes, encoding, nullptr);
|
||||
handle s = decode_utfN(buffer, nbytes);
|
||||
if (!s) throw error_already_set();
|
||||
return s;
|
||||
}
|
||||
|
||||
PYBIND11_TYPE_CASTER(StringType, _(PYBIND11_STRING_NAME));
|
||||
|
||||
private:
|
||||
static handle decode_utfN(const char *buffer, ssize_t nbytes) {
|
||||
#if !defined(PYPY_VERSION)
|
||||
return
|
||||
UTF_N == 8 ? PyUnicode_DecodeUTF8(buffer, nbytes, nullptr) :
|
||||
UTF_N == 16 ? PyUnicode_DecodeUTF16(buffer, nbytes, nullptr, nullptr) :
|
||||
PyUnicode_DecodeUTF32(buffer, nbytes, nullptr, nullptr);
|
||||
#else
|
||||
// PyPy seems to have multiple problems related to PyUnicode_UTF*: the UTF8 version
|
||||
// sometimes segfaults for unknown reasons, while the UTF16 and 32 versions require a
|
||||
// non-const char * arguments, which is also a nuissance, so bypass the whole thing by just
|
||||
// passing the encoding as a string value, which works properly:
|
||||
return PyUnicode_Decode(buffer, nbytes, UTF_N == 8 ? "utf-8" : UTF_N == 16 ? "utf-16" : "utf-32", nullptr);
|
||||
#endif
|
||||
}
|
||||
};
|
||||
|
||||
// Type caster for C-style strings. We basically use a std::string type caster, but also add the
|
||||
|
Loading…
Reference in New Issue
Block a user