From 74b501cd859ae1f44b1f60de13022f3ccbf8aa90 Mon Sep 17 00:00:00 2001 From: Ben Frederickson Date: Tue, 6 Jun 2017 12:31:41 -0700 Subject: [PATCH] Fix passing in utf8 encoded strings with python 2 Passing utf8 encoded strings from python to a C++ function taking a std::string was broken. The previous version was trying to call 'PyUnicode_FromObject' on this data, which failed to convert the string to unicode with the default ascii codec. Also this incurs an unnecessary conversion to unicode for data this is immediately converted back to utf8. Fix by treating python 2 strings the same python 3 bytes objects, and just copying over the data if possible. --- include/pybind11/cast.h | 12 ++++++++---- tests/test_python_types.py | 3 +++ 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/include/pybind11/cast.h b/include/pybind11/cast.h index 3934971a6..9e7b4dda9 100644 --- a/include/pybind11/cast.h +++ b/include/pybind11/cast.h @@ -734,9 +734,14 @@ struct type_caster, enable_if_t= 3 return load_bytes(load_src); #else + if (sizeof(CharT) == 1) { + return load_bytes(load_src); + } + // The below is a guaranteed failure in Python 3 when PyUnicode_Check returns false if (!PYBIND11_BYTES_CHECK(load_src.ptr())) return false; + temp = reinterpret_steal(PyUnicode_FromObject(load_src.ptr())); if (!temp) { PyErr_Clear(); return false; } load_src = temp; @@ -780,9 +785,8 @@ private: #endif } -#if PY_MAJOR_VERSION >= 3 - // In Python 3, when loading into a std::string or char*, accept a bytes object as-is (i.e. - // without any encoding/decoding attempt). For other C++ char sizes this is a no-op. Python 2, + // When loading into a std::string or char*, accept a bytes object as-is (i.e. + // without any encoding/decoding attempt). For other C++ char sizes this is a no-op. // which supports loading a unicode from a str, doesn't take this path. template bool load_bytes(enable_if_t src) { @@ -798,9 +802,9 @@ private: return false; } + template bool load_bytes(enable_if_t) { return false; } -#endif }; // Type caster for C-style strings. We basically use a std::string type caster, but also add the diff --git a/tests/test_python_types.py b/tests/test_python_types.py index 5e2761cb9..08bb3abe9 100644 --- a/tests/test_python_types.py +++ b/tests/test_python_types.py @@ -554,6 +554,9 @@ def test_bytes_to_string(): assert string_length(byte("a\x00b")) == 3 assert strlen(byte("a\x00b")) == 1 # C-string limitation + # passing in a utf8 encoded string should work + assert string_length(u'💩'.encode("utf8")) == 4 + def test_builtins_cast_return_none(): """Casters produced with PYBIND11_TYPE_CASTER() should convert nullptr to None"""