Fix passing in utf8 encoded strings with python 2

Passing utf8 encoded strings from python to a C++ function taking a
std::string was broken.  The previous version was trying to call
'PyUnicode_FromObject' on this data, which failed to convert the string
to unicode with the default ascii codec. Also this incurs an unnecessary
conversion to unicode for data this is immediately converted back to
utf8.

Fix by treating python 2 strings the same python 3 bytes objects, and just
copying over the data if possible.
This commit is contained in:
Ben Frederickson 2017-06-06 12:31:41 -07:00 committed by Jason Rhinelander
parent 0365d491b5
commit 74b501cd85
2 changed files with 11 additions and 4 deletions

View File

@ -734,9 +734,14 @@ struct type_caster<std::basic_string<CharT, Traits, Allocator>, enable_if_t<is_s
#if PY_MAJOR_VERSION >= 3
return load_bytes(load_src);
#else
if (sizeof(CharT) == 1) {
return load_bytes(load_src);
}
// The below is a guaranteed failure in Python 3 when PyUnicode_Check returns false
if (!PYBIND11_BYTES_CHECK(load_src.ptr()))
return false;
temp = reinterpret_steal<object>(PyUnicode_FromObject(load_src.ptr()));
if (!temp) { PyErr_Clear(); return false; }
load_src = temp;
@ -780,9 +785,8 @@ private:
#endif
}
#if PY_MAJOR_VERSION >= 3
// In Python 3, when loading into a std::string or char*, accept a bytes object as-is (i.e.
// without any encoding/decoding attempt). For other C++ char sizes this is a no-op. Python 2,
// When loading into a std::string or char*, accept a bytes object as-is (i.e.
// without any encoding/decoding attempt). For other C++ char sizes this is a no-op.
// which supports loading a unicode from a str, doesn't take this path.
template <typename C = CharT>
bool load_bytes(enable_if_t<sizeof(C) == 1, handle> src) {
@ -798,9 +802,9 @@ private:
return false;
}
template <typename C = CharT>
bool load_bytes(enable_if_t<sizeof(C) != 1, handle>) { return false; }
#endif
};
// Type caster for C-style strings. We basically use a std::string type caster, but also add the

View File

@ -554,6 +554,9 @@ def test_bytes_to_string():
assert string_length(byte("a\x00b")) == 3
assert strlen(byte("a\x00b")) == 1 # C-string limitation
# passing in a utf8 encoded string should work
assert string_length(u'💩'.encode("utf8")) == 4
def test_builtins_cast_return_none():
"""Casters produced with PYBIND11_TYPE_CASTER() should convert nullptr to None"""