Avoid use of temporary bytes object in string_caster for UTF-8 (#3257)

Fixes #3252
This commit is contained in:
Jeremy Maitin-Shepard 2021-09-09 12:56:10 -07:00 committed by GitHub
parent 4c6bee3514
commit 4d5ad03e1f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -377,6 +377,22 @@ template <typename StringType, bool IsView = false> struct string_caster {
#endif
}
#if PY_VERSION_HEX >= 0x03030000
// On Python >= 3.3, for UTF-8 we avoid the need for a temporary `bytes`
// object by using `PyUnicode_AsUTF8AndSize`.
if (PYBIND11_SILENCE_MSVC_C4127(UTF_N == 8)) {
Py_ssize_t size = -1;
const auto *buffer
= reinterpret_cast<const CharT *>(PyUnicode_AsUTF8AndSize(load_src.ptr(), &size));
if (!buffer) {
PyErr_Clear();
return false;
}
value = StringType(buffer, static_cast<size_t>(size));
return true;
}
#endif
auto utfNbytes = reinterpret_steal<object>(PyUnicode_AsEncodedString(
load_src.ptr(), UTF_N == 8 ? "utf-8" : UTF_N == 16 ? "utf-16" : "utf-32", nullptr));
if (!utfNbytes) { PyErr_Clear(); return false; }