mirror of
https://github.com/pybind/pybind11.git
synced 2024-11-11 08:03:55 +00:00
Unicode fixes and docs (#624)
* Propagate unicode conversion failure If returning a std::string with invalid utf-8 data, we currently fail with an uninformative TypeError instead of propagating the UnicodeDecodeError that Python sets on failure. * Add support for u16/u32strings and literals This adds support for wchar{16,32}_t character literals and the associated std::u{16,32}string types. It also folds the character/string conversion into a single type_caster template, since the type casters for string and wstring were mostly the same anyway. * Added too-long and too-big character conversion errors With this commit, when casting to a single character, as opposed to a C-style string, we make sure the input wasn't a multi-character string or a single character with codepoint too large for the character type. This also changes the character cast op to CharT instead of CharT& (we need to be able to return a temporary decoded char value, but also because there's little gained by bothering with an lvalue return here). Finally it changes the char caster to 'has-a-string-caster' instead of 'is-a-string-caster' because, with the cast_op change above, there's nothing at all gained from inheritance. This also lets us remove the `success` from the string caster (which was only there for the char caster) into the char caster itself. (I also renamed it to 'none' and inverted its value to better reflect its purpose). The None -> nullptr loading also now takes place only under a `convert = true` load pass. Although it's unlikely that a function taking a char also has overloads that can take a None, it seems marginally more correct to treat it as a conversion. This commit simplifies the size assumptions about character sizes with static_asserts to back them up.
This commit is contained in:
parent
ada763b9ee
commit
11a337f16f
@ -94,14 +94,26 @@ as arguments and return values, refer to the section on binding :ref:`classes`.
|
||||
+------------------------------------+---------------------------+-------------------------------+
|
||||
| ``char`` | Character literal | :file:`pybind11/pybind11.h` |
|
||||
+------------------------------------+---------------------------+-------------------------------+
|
||||
| ``char16_t`` | UTF-16 character literal | :file:`pybind11/pybind11.h` |
|
||||
+------------------------------------+---------------------------+-------------------------------+
|
||||
| ``char32_t`` | UTF-32 character literal | :file:`pybind11/pybind11.h` |
|
||||
+------------------------------------+---------------------------+-------------------------------+
|
||||
| ``wchar_t`` | Wide character literal | :file:`pybind11/pybind11.h` |
|
||||
+------------------------------------+---------------------------+-------------------------------+
|
||||
| ``const char *`` | UTF-8 string literal | :file:`pybind11/pybind11.h` |
|
||||
+------------------------------------+---------------------------+-------------------------------+
|
||||
| ``const char16_t *`` | UTF-16 string literal | :file:`pybind11/pybind11.h` |
|
||||
+------------------------------------+---------------------------+-------------------------------+
|
||||
| ``const char32_t *`` | UTF-32 string literal | :file:`pybind11/pybind11.h` |
|
||||
+------------------------------------+---------------------------+-------------------------------+
|
||||
| ``const wchar_t *`` | Wide string literal | :file:`pybind11/pybind11.h` |
|
||||
+------------------------------------+---------------------------+-------------------------------+
|
||||
| ``std::string`` | STL dynamic UTF-8 string | :file:`pybind11/pybind11.h` |
|
||||
+------------------------------------+---------------------------+-------------------------------+
|
||||
| ``std::u16string`` | STL dynamic UTF-16 string | :file:`pybind11/pybind11.h` |
|
||||
+------------------------------------+---------------------------+-------------------------------+
|
||||
| ``std::u32string`` | STL dynamic UTF-32 string | :file:`pybind11/pybind11.h` |
|
||||
+------------------------------------+---------------------------+-------------------------------+
|
||||
| ``std::wstring`` | STL dynamic wide string | :file:`pybind11/pybind11.h` |
|
||||
+------------------------------------+---------------------------+-------------------------------+
|
||||
| ``std::pair<T1, T2>`` | Pair of two custom types | :file:`pybind11/pybind11.h` |
|
||||
|
@ -471,8 +471,15 @@ public:
|
||||
template <typename _T> using cast_op_type = pybind11::detail::cast_op_type<_T>
|
||||
|
||||
|
||||
template <typename CharT> using is_std_char_type = any_of<
|
||||
std::is_same<CharT, char>, /* std::string */
|
||||
std::is_same<CharT, char16_t>, /* std::u16string */
|
||||
std::is_same<CharT, char32_t>, /* std::u32string */
|
||||
std::is_same<CharT, wchar_t> /* std::wstring */
|
||||
>;
|
||||
|
||||
template <typename T>
|
||||
struct type_caster<T, enable_if_t<std::is_arithmetic<T>::value>> {
|
||||
struct type_caster<T, enable_if_t<std::is_arithmetic<T>::value && !is_std_char_type<T>::value>> {
|
||||
using _py_type_0 = conditional_t<sizeof(T) <= sizeof(long), long, long long>;
|
||||
using _py_type_1 = conditional_t<std::is_signed<T>::value, _py_type_0, typename std::make_unsigned<_py_type_0>::type>;
|
||||
using py_type = conditional_t<std::is_floating_point<T>::value, double, _py_type_1>;
|
||||
@ -617,122 +624,144 @@ public:
|
||||
PYBIND11_TYPE_CASTER(bool, _("bool"));
|
||||
};
|
||||
|
||||
template <> class type_caster<std::string> {
|
||||
public:
|
||||
bool load(handle src, bool) {
|
||||
object temp;
|
||||
handle load_src = src;
|
||||
if (!src) {
|
||||
return false;
|
||||
} else if (PyUnicode_Check(load_src.ptr())) {
|
||||
temp = reinterpret_steal<object>(PyUnicode_AsUTF8String(load_src.ptr()));
|
||||
if (!temp) { PyErr_Clear(); return false; } // UnicodeEncodeError
|
||||
load_src = temp;
|
||||
}
|
||||
char *buffer;
|
||||
ssize_t length;
|
||||
int err = PYBIND11_BYTES_AS_STRING_AND_SIZE(load_src.ptr(), &buffer, &length);
|
||||
if (err == -1) { PyErr_Clear(); return false; } // TypeError
|
||||
value = std::string(buffer, (size_t) length);
|
||||
success = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
static handle cast(const std::string &src, return_value_policy /* policy */, handle /* parent */) {
|
||||
return PyUnicode_FromStringAndSize(src.c_str(), (ssize_t) src.length());
|
||||
}
|
||||
|
||||
PYBIND11_TYPE_CASTER(std::string, _(PYBIND11_STRING_NAME));
|
||||
protected:
|
||||
bool success = false;
|
||||
};
|
||||
|
||||
template <> class type_caster<std::wstring> {
|
||||
public:
|
||||
// Helper class for UTF-{8,16,32} C++ stl strings:
|
||||
template <typename CharT, class Traits, class Allocator>
|
||||
struct type_caster<std::basic_string<CharT, Traits, Allocator>, enable_if_t<is_std_char_type<CharT>::value>> {
|
||||
// Simplify life by being able to assume standard char sizes (the standard only guarantees
|
||||
// minimums), but Python requires exact sizes
|
||||
static_assert(!std::is_same<CharT, char>::value || sizeof(CharT) == 1, "Unsupported char size != 1");
|
||||
static_assert(!std::is_same<CharT, char16_t>::value || sizeof(CharT) == 2, "Unsupported char16_t size != 2");
|
||||
static_assert(!std::is_same<CharT, char32_t>::value || sizeof(CharT) == 4, "Unsupported char32_t size != 4");
|
||||
// wchar_t can be either 16 bits (Windows) or 32 (everywhere else)
|
||||
static_assert(!std::is_same<CharT, wchar_t>::value || sizeof(CharT) == 2 || sizeof(CharT) == 4,
|
||||
"Unsupported wchar_t size != 2/4");
|
||||
static constexpr size_t UTF_N = 8 * sizeof(CharT);
|
||||
static constexpr const char *encoding = UTF_N == 8 ? "utf8" : UTF_N == 16 ? "utf16" : "utf32";
|
||||
|
||||
using StringType = std::basic_string<CharT, Traits, Allocator>;
|
||||
|
||||
bool load(handle src, bool) {
|
||||
#if PY_VERSION_MAJOR < 3
|
||||
object temp;
|
||||
#endif
|
||||
handle load_src = src;
|
||||
if (!src) {
|
||||
return false;
|
||||
} else if (!PyUnicode_Check(load_src.ptr())) {
|
||||
#if PY_VERSION_MAJOR >= 3
|
||||
return false;
|
||||
// The below is a guaranteed failure in Python 3 when PyUnicode_Check returns false
|
||||
#else
|
||||
temp = reinterpret_steal<object>(PyUnicode_FromObject(load_src.ptr()));
|
||||
if (!temp) { PyErr_Clear(); return false; }
|
||||
load_src = temp;
|
||||
}
|
||||
wchar_t *buffer = nullptr;
|
||||
ssize_t length = -1;
|
||||
#if PY_MAJOR_VERSION >= 3
|
||||
buffer = PyUnicode_AsWideCharString(load_src.ptr(), &length);
|
||||
#else
|
||||
temp = reinterpret_steal<object>(PyUnicode_AsEncodedString(
|
||||
load_src.ptr(), sizeof(wchar_t) == sizeof(short)
|
||||
? "utf16" : "utf32", nullptr));
|
||||
|
||||
if (temp) {
|
||||
int err = PYBIND11_BYTES_AS_STRING_AND_SIZE(temp.ptr(), (char **) &buffer, &length);
|
||||
if (err == -1) { buffer = nullptr; } // TypeError
|
||||
length = length / (ssize_t) sizeof(wchar_t) - 1; ++buffer; // Skip BOM
|
||||
}
|
||||
#endif
|
||||
if (!buffer) { PyErr_Clear(); return false; }
|
||||
value = std::wstring(buffer, (size_t) length);
|
||||
success = true;
|
||||
}
|
||||
|
||||
object utfNbytes = reinterpret_steal<object>(PyUnicode_AsEncodedString(
|
||||
load_src.ptr(), encoding, nullptr));
|
||||
if (!utfNbytes) { PyErr_Clear(); return false; }
|
||||
|
||||
const CharT *buffer = reinterpret_cast<const CharT *>(PYBIND11_BYTES_AS_STRING(utfNbytes.ptr()));
|
||||
size_t length = (size_t) PYBIND11_BYTES_SIZE(utfNbytes.ptr()) / sizeof(CharT);
|
||||
if (UTF_N > 8) { buffer++; length--; } // Skip BOM for UTF-16/32
|
||||
value = StringType(buffer, length);
|
||||
return true;
|
||||
}
|
||||
|
||||
static handle cast(const std::wstring &src, return_value_policy /* policy */, handle /* parent */) {
|
||||
return PyUnicode_FromWideChar(src.c_str(), (ssize_t) src.length());
|
||||
static handle cast(const StringType &src, return_value_policy /* policy */, handle /* parent */) {
|
||||
const char *buffer = reinterpret_cast<const char *>(src.c_str());
|
||||
ssize_t nbytes = ssize_t(src.size() * sizeof(CharT));
|
||||
handle s = PyUnicode_Decode(buffer, nbytes, encoding, nullptr);
|
||||
if (!s) throw error_already_set();
|
||||
return s;
|
||||
}
|
||||
|
||||
PYBIND11_TYPE_CASTER(std::wstring, _(PYBIND11_STRING_NAME));
|
||||
protected:
|
||||
bool success = false;
|
||||
PYBIND11_TYPE_CASTER(StringType, _(PYBIND11_STRING_NAME));
|
||||
};
|
||||
|
||||
template <> class type_caster<char> : public type_caster<std::string> {
|
||||
// Type caster for C-style strings. We basically use a std::string type caster, but also add the
|
||||
// ability to use None as a nullptr char* (which the string caster doesn't allow).
|
||||
template <typename CharT> struct type_caster<CharT, enable_if_t<is_std_char_type<CharT>::value>> {
|
||||
using StringType = std::basic_string<CharT>;
|
||||
using StringCaster = type_caster<StringType>;
|
||||
StringCaster str_caster;
|
||||
bool none = false;
|
||||
public:
|
||||
bool load(handle src, bool convert) {
|
||||
if (src.is_none()) return true;
|
||||
return type_caster<std::string>::load(src, convert);
|
||||
if (!src) return false;
|
||||
if (src.is_none()) {
|
||||
// Defer accepting None to other overloads (if we aren't in convert mode):
|
||||
if (!convert) return false;
|
||||
none = true;
|
||||
return true;
|
||||
}
|
||||
return str_caster.load(src, convert);
|
||||
}
|
||||
|
||||
static handle cast(const char *src, return_value_policy /* policy */, handle /* parent */) {
|
||||
if (src == nullptr) return none().inc_ref();
|
||||
return PyUnicode_FromString(src);
|
||||
static handle cast(const CharT *src, return_value_policy policy, handle parent) {
|
||||
if (src == nullptr) return pybind11::none().inc_ref();
|
||||
return StringCaster::cast(StringType(src), policy, parent);
|
||||
}
|
||||
|
||||
static handle cast(char src, return_value_policy /* policy */, handle /* parent */) {
|
||||
char str[2] = { src, '\0' };
|
||||
return PyUnicode_DecodeLatin1(str, 1, nullptr);
|
||||
static handle cast(CharT src, return_value_policy policy, handle parent) {
|
||||
if (std::is_same<char, CharT>::value) {
|
||||
handle s = PyUnicode_DecodeLatin1((const char *) &src, 1, nullptr);
|
||||
if (!s) throw error_already_set();
|
||||
return s;
|
||||
}
|
||||
return StringCaster::cast(StringType(1, src), policy, parent);
|
||||
}
|
||||
|
||||
operator char*() { return success ? const_cast<char *>(value.c_str()) : nullptr; }
|
||||
operator char&() { return value[0]; }
|
||||
|
||||
static PYBIND11_DESCR name() { return type_descr(_(PYBIND11_STRING_NAME)); }
|
||||
};
|
||||
|
||||
template <> class type_caster<wchar_t> : public type_caster<std::wstring> {
|
||||
public:
|
||||
bool load(handle src, bool convert) {
|
||||
if (src.is_none()) return true;
|
||||
return type_caster<std::wstring>::load(src, convert);
|
||||
}
|
||||
|
||||
static handle cast(const wchar_t *src, return_value_policy /* policy */, handle /* parent */) {
|
||||
if (src == nullptr) return none().inc_ref();
|
||||
return PyUnicode_FromWideChar(src, (ssize_t) wcslen(src));
|
||||
}
|
||||
|
||||
static handle cast(wchar_t src, return_value_policy /* policy */, handle /* parent */) {
|
||||
wchar_t wstr[2] = { src, L'\0' };
|
||||
return PyUnicode_FromWideChar(wstr, 1);
|
||||
}
|
||||
|
||||
operator wchar_t*() { return success ? const_cast<wchar_t *>(value.c_str()) : nullptr; }
|
||||
operator wchar_t&() { return value[0]; }
|
||||
operator CharT*() { return none ? nullptr : const_cast<CharT *>(static_cast<StringType &>(str_caster).c_str()); }
|
||||
operator CharT() {
|
||||
if (none)
|
||||
throw value_error("Cannot convert None to a character");
|
||||
|
||||
auto &value = static_cast<StringType &>(str_caster);
|
||||
size_t str_len = value.size();
|
||||
if (str_len == 0)
|
||||
throw value_error("Cannot convert empty string to a character");
|
||||
|
||||
// If we're in UTF-8 mode, we have two possible failures: one for a unicode character that
|
||||
// is too high, and one for multiple unicode characters (caught later), so we need to figure
|
||||
// out how long the first encoded character is in bytes to distinguish between these two
|
||||
// errors. We also allow want to allow unicode characters U+0080 through U+00FF, as those
|
||||
// can fit into a single char value.
|
||||
if (StringCaster::UTF_N == 8 && str_len > 1 && str_len <= 4) {
|
||||
unsigned char v0 = static_cast<unsigned char>(value[0]);
|
||||
size_t char0_bytes = !(v0 & 0x80) ? 1 : // low bits only: 0-127
|
||||
(v0 & 0xE0) == 0xC0 ? 2 : // 0b110xxxxx - start of 2-byte sequence
|
||||
(v0 & 0xF0) == 0xE0 ? 3 : // 0b1110xxxx - start of 3-byte sequence
|
||||
4; // 0b11110xxx - start of 4-byte sequence
|
||||
|
||||
if (char0_bytes == str_len) {
|
||||
// If we have a 128-255 value, we can decode it into a single char:
|
||||
if (char0_bytes == 2 && (v0 & 0xFC) == 0xC0) { // 0x110000xx 0x10xxxxxx
|
||||
return static_cast<CharT>(((v0 & 3) << 6) + (static_cast<unsigned char>(value[1]) & 0x3F));
|
||||
}
|
||||
// Otherwise we have a single character, but it's > U+00FF
|
||||
throw value_error("Character code point not in range(0x100)");
|
||||
}
|
||||
}
|
||||
|
||||
// UTF-16 is much easier: we can only have a surrogate pair for values above U+FFFF, thus a
|
||||
// surrogate pair with total length 2 instantly indicates a range error (but not a "your
|
||||
// string was too long" error).
|
||||
else if (StringCaster::UTF_N == 16 && str_len == 2) {
|
||||
char16_t v0 = static_cast<char16_t>(value[0]);
|
||||
if (v0 >= 0xD800 && v0 < 0xE000)
|
||||
throw value_error("Character code point not in range(0x10000)");
|
||||
}
|
||||
|
||||
if (str_len != 1)
|
||||
throw value_error("Expected a character, but multi-character string found");
|
||||
|
||||
return value[0];
|
||||
}
|
||||
|
||||
static PYBIND11_DESCR name() { return type_descr(_(PYBIND11_STRING_NAME)); }
|
||||
template <typename _T> using cast_op_type = typename std::remove_reference<pybind11::detail::cast_op_type<_T>>::type;
|
||||
};
|
||||
|
||||
template <typename T1, typename T2> class type_caster<std::pair<T1, T2>> {
|
||||
|
@ -111,6 +111,7 @@
|
||||
#define PYBIND11_BYTES_FROM_STRING_AND_SIZE PyBytes_FromStringAndSize
|
||||
#define PYBIND11_BYTES_AS_STRING_AND_SIZE PyBytes_AsStringAndSize
|
||||
#define PYBIND11_BYTES_AS_STRING PyBytes_AsString
|
||||
#define PYBIND11_BYTES_SIZE PyBytes_Size
|
||||
#define PYBIND11_LONG_CHECK(o) PyLong_Check(o)
|
||||
#define PYBIND11_LONG_AS_LONGLONG(o) PyLong_AsLongLong(o)
|
||||
#define PYBIND11_LONG_AS_UNSIGNED_LONGLONG(o) PyLong_AsUnsignedLongLong(o)
|
||||
@ -129,6 +130,7 @@
|
||||
#define PYBIND11_BYTES_FROM_STRING_AND_SIZE PyString_FromStringAndSize
|
||||
#define PYBIND11_BYTES_AS_STRING_AND_SIZE PyString_AsStringAndSize
|
||||
#define PYBIND11_BYTES_AS_STRING PyString_AsString
|
||||
#define PYBIND11_BYTES_SIZE PyString_Size
|
||||
#define PYBIND11_LONG_CHECK(o) (PyInt_Check(o) || PyLong_Check(o))
|
||||
#define PYBIND11_LONG_AS_LONGLONG(o) (PyInt_Check(o) ? (long long) PyLong_AsLong(o) : PyLong_AsLongLong(o))
|
||||
#define PYBIND11_LONG_AS_UNSIGNED_LONGLONG(o) (PyInt_Check(o) ? (unsigned long long) PyLong_AsUnsignedLong(o) : PyLong_AsUnsignedLongLong(o))
|
||||
|
@ -17,6 +17,11 @@
|
||||
# include <fcntl.h>
|
||||
#endif
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
# pragma warning(push)
|
||||
# pragma warning(disable: 4127) // warning C4127: Conditional expression is constant
|
||||
#endif
|
||||
|
||||
class ExamplePythonTypes {
|
||||
public:
|
||||
static ExamplePythonTypes *new_instance() {
|
||||
@ -426,4 +431,41 @@ test_initializer python_types([](py::module &m) {
|
||||
"l"_a=l
|
||||
);
|
||||
});
|
||||
|
||||
// Some test characters in utf16 and utf32 encodings. The last one (the 𝐀) contains a null byte
|
||||
char32_t a32 = 0x61 /*a*/, z32 = 0x7a /*z*/, ib32 = 0x203d /*‽*/, cake32 = 0x1f382 /*🎂*/, mathbfA32 = 0x1d400 /*𝐀*/;
|
||||
char16_t b16 = 0x62 /*b*/, z16 = 0x7a, ib16 = 0x203d, cake16_1 = 0xd83c, cake16_2 = 0xdf82, mathbfA16_1 = 0xd835, mathbfA16_2 = 0xdc00;
|
||||
std::wstring wstr;
|
||||
wstr.push_back(0x61); // a
|
||||
wstr.push_back(0x2e18); // ⸘
|
||||
if (sizeof(wchar_t) == 2) { wstr.push_back(mathbfA16_1); wstr.push_back(mathbfA16_2); } // 𝐀, utf16
|
||||
else { wstr.push_back((wchar_t) mathbfA32); } // 𝐀, utf32
|
||||
wstr.push_back(0x7a); // z
|
||||
|
||||
m.def("good_utf8_string", []() { return std::string(u8"Say utf8\u203d \U0001f382 \U0001d400"); }); // Say utf8‽ 🎂 𝐀
|
||||
m.def("good_utf16_string", [=]() { return std::u16string({ b16, ib16, cake16_1, cake16_2, mathbfA16_1, mathbfA16_2, z16 }); }); // b‽🎂𝐀z
|
||||
m.def("good_utf32_string", [=]() { return std::u32string({ a32, mathbfA32, cake32, ib32, z32 }); }); // a𝐀🎂‽z
|
||||
m.def("good_wchar_string", [=]() { return wstr; }); // a‽𝐀z
|
||||
m.def("bad_utf8_string", []() { return std::string("abc\xd0" "def"); });
|
||||
m.def("bad_utf16_string", [=]() { return std::u16string({ b16, char16_t(0xd800), z16 }); });
|
||||
// Under Python 2.7, invalid unicode UTF-32 characters don't appear to trigger UnicodeDecodeError
|
||||
if (PY_MAJOR_VERSION >= 3)
|
||||
m.def("bad_utf32_string", [=]() { return std::u32string({ a32, char32_t(0xd800), z32 }); });
|
||||
if (PY_MAJOR_VERSION >= 3 || sizeof(wchar_t) == 2)
|
||||
m.def("bad_wchar_string", [=]() { return std::wstring({ wchar_t(0x61), wchar_t(0xd800) }); });
|
||||
m.def("u8_Z", []() -> char { return 'Z'; });
|
||||
m.def("u8_eacute", []() -> char { return '\xe9'; });
|
||||
m.def("u16_ibang", [=]() -> char16_t { return ib16; });
|
||||
m.def("u32_mathbfA", [=]() -> char32_t { return mathbfA32; });
|
||||
m.def("wchar_heart", []() -> wchar_t { return 0x2665; });
|
||||
|
||||
m.attr("wchar_size") = py::cast(sizeof(wchar_t));
|
||||
m.def("ord_char", [](char c) -> int { return static_cast<unsigned char>(c); });
|
||||
m.def("ord_char16", [](char16_t c) -> uint16_t { return c; });
|
||||
m.def("ord_char32", [](char32_t c) -> uint32_t { return c; });
|
||||
m.def("ord_wchar", [](wchar_t c) -> int { return c; });
|
||||
});
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
# pragma warning(pop)
|
||||
#endif
|
||||
|
@ -1,3 +1,4 @@
|
||||
# Python < 3 needs this: coding=utf-8
|
||||
import pytest
|
||||
|
||||
from pybind11_tests import ExamplePythonTypes, ConstructorStats, has_optional, has_exp_optional
|
||||
@ -410,3 +411,93 @@ def test_implicit_casting():
|
||||
'int_i1': 42, 'int_i2': 42, 'int_e': 43, 'int_p': 44
|
||||
}
|
||||
assert z['l'] == [3, 6, 9, 12, 15]
|
||||
|
||||
|
||||
def test_unicode_conversion():
|
||||
"""Tests unicode conversion and error reporting."""
|
||||
import pybind11_tests
|
||||
from pybind11_tests import (good_utf8_string, bad_utf8_string,
|
||||
good_utf16_string, bad_utf16_string,
|
||||
good_utf32_string, # bad_utf32_string,
|
||||
good_wchar_string, # bad_wchar_string,
|
||||
u8_Z, u8_eacute, u16_ibang, u32_mathbfA, wchar_heart)
|
||||
|
||||
assert good_utf8_string() == u"Say utf8‽ 🎂 𝐀"
|
||||
assert good_utf16_string() == u"b‽🎂𝐀z"
|
||||
assert good_utf32_string() == u"a𝐀🎂‽z"
|
||||
assert good_wchar_string() == u"a⸘𝐀z"
|
||||
|
||||
with pytest.raises(UnicodeDecodeError):
|
||||
bad_utf8_string()
|
||||
|
||||
with pytest.raises(UnicodeDecodeError):
|
||||
bad_utf16_string()
|
||||
|
||||
# These are provided only if they actually fail (they don't when 32-bit and under Python 2.7)
|
||||
if hasattr(pybind11_tests, "bad_utf32_string"):
|
||||
with pytest.raises(UnicodeDecodeError):
|
||||
pybind11_tests.bad_utf32_string()
|
||||
if hasattr(pybind11_tests, "bad_wchar_string"):
|
||||
with pytest.raises(UnicodeDecodeError):
|
||||
pybind11_tests.bad_wchar_string()
|
||||
|
||||
assert u8_Z() == 'Z'
|
||||
assert u8_eacute() == u'é'
|
||||
assert u16_ibang() == u'‽'
|
||||
assert u32_mathbfA() == u'𝐀'
|
||||
assert wchar_heart() == u'♥'
|
||||
|
||||
|
||||
def test_single_char_arguments():
|
||||
"""Tests failures for passing invalid inputs to char-accepting functions"""
|
||||
from pybind11_tests import ord_char, ord_char16, ord_char32, ord_wchar, wchar_size
|
||||
|
||||
def toobig_message(r):
|
||||
return "Character code point not in range({0:#x})".format(r)
|
||||
toolong_message = "Expected a character, but multi-character string found"
|
||||
|
||||
assert ord_char(u'a') == 0x61 # simple ASCII
|
||||
assert ord_char(u'é') == 0xE9 # requires 2 bytes in utf-8, but can be stuffed in a char
|
||||
with pytest.raises(ValueError) as excinfo:
|
||||
assert ord_char(u'Ā') == 0x100 # requires 2 bytes, doesn't fit in a char
|
||||
assert str(excinfo.value) == toobig_message(0x100)
|
||||
with pytest.raises(ValueError) as excinfo:
|
||||
assert ord_char(u'ab')
|
||||
assert str(excinfo.value) == toolong_message
|
||||
|
||||
assert ord_char16(u'a') == 0x61
|
||||
assert ord_char16(u'é') == 0xE9
|
||||
assert ord_char16(u'Ā') == 0x100
|
||||
assert ord_char16(u'‽') == 0x203d
|
||||
assert ord_char16(u'♥') == 0x2665
|
||||
with pytest.raises(ValueError) as excinfo:
|
||||
assert ord_char16(u'🎂') == 0x1F382 # requires surrogate pair
|
||||
assert str(excinfo.value) == toobig_message(0x10000)
|
||||
with pytest.raises(ValueError) as excinfo:
|
||||
assert ord_char16(u'aa')
|
||||
assert str(excinfo.value) == toolong_message
|
||||
|
||||
assert ord_char32(u'a') == 0x61
|
||||
assert ord_char32(u'é') == 0xE9
|
||||
assert ord_char32(u'Ā') == 0x100
|
||||
assert ord_char32(u'‽') == 0x203d
|
||||
assert ord_char32(u'♥') == 0x2665
|
||||
assert ord_char32(u'🎂') == 0x1F382
|
||||
with pytest.raises(ValueError) as excinfo:
|
||||
assert ord_char32(u'aa')
|
||||
assert str(excinfo.value) == toolong_message
|
||||
|
||||
assert ord_wchar(u'a') == 0x61
|
||||
assert ord_wchar(u'é') == 0xE9
|
||||
assert ord_wchar(u'Ā') == 0x100
|
||||
assert ord_wchar(u'‽') == 0x203d
|
||||
assert ord_wchar(u'♥') == 0x2665
|
||||
if wchar_size == 2:
|
||||
with pytest.raises(ValueError) as excinfo:
|
||||
assert ord_wchar(u'🎂') == 0x1F382 # requires surrogate pair
|
||||
assert str(excinfo.value) == toobig_message(0x10000)
|
||||
else:
|
||||
assert ord_wchar(u'🎂') == 0x1F382
|
||||
with pytest.raises(ValueError) as excinfo:
|
||||
assert ord_wchar(u'aa')
|
||||
assert str(excinfo.value) == toolong_message
|
||||
|
Loading…
Reference in New Issue
Block a user