mirror of
https://github.com/pybind/pybind11.git
synced 2024-11-25 06:35:12 +00:00
Add C++20 char8_t/u8string support (#2026)
* Fix test build in C++20 * Add C++20 char8_t/u8string support
This commit is contained in:
parent
37d04abdee
commit
6e39b765b2
@ -32,6 +32,10 @@
|
|||||||
#include <string_view>
|
#include <string_view>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if defined(__cpp_lib_char8_t) && __cpp_lib_char8_t >= 201811L
|
||||||
|
# define PYBIND11_HAS_U8STRING
|
||||||
|
#endif
|
||||||
|
|
||||||
NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
|
NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
|
||||||
NAMESPACE_BEGIN(detail)
|
NAMESPACE_BEGIN(detail)
|
||||||
|
|
||||||
@ -988,6 +992,9 @@ public:
|
|||||||
|
|
||||||
template <typename CharT> using is_std_char_type = any_of<
|
template <typename CharT> using is_std_char_type = any_of<
|
||||||
std::is_same<CharT, char>, /* std::string */
|
std::is_same<CharT, char>, /* std::string */
|
||||||
|
#if defined(PYBIND11_HAS_U8STRING)
|
||||||
|
std::is_same<CharT, char8_t>, /* std::u8string */
|
||||||
|
#endif
|
||||||
std::is_same<CharT, char16_t>, /* std::u16string */
|
std::is_same<CharT, char16_t>, /* std::u16string */
|
||||||
std::is_same<CharT, char32_t>, /* std::u32string */
|
std::is_same<CharT, char32_t>, /* std::u32string */
|
||||||
std::is_same<CharT, wchar_t> /* std::wstring */
|
std::is_same<CharT, wchar_t> /* std::wstring */
|
||||||
@ -1191,6 +1198,9 @@ template <typename StringType, bool IsView = false> struct string_caster {
|
|||||||
// Simplify life by being able to assume standard char sizes (the standard only guarantees
|
// Simplify life by being able to assume standard char sizes (the standard only guarantees
|
||||||
// minimums, but Python requires exact sizes)
|
// minimums, but Python requires exact sizes)
|
||||||
static_assert(!std::is_same<CharT, char>::value || sizeof(CharT) == 1, "Unsupported char size != 1");
|
static_assert(!std::is_same<CharT, char>::value || sizeof(CharT) == 1, "Unsupported char size != 1");
|
||||||
|
#if defined(PYBIND11_HAS_U8STRING)
|
||||||
|
static_assert(!std::is_same<CharT, char8_t>::value || sizeof(CharT) == 1, "Unsupported char8_t size != 1");
|
||||||
|
#endif
|
||||||
static_assert(!std::is_same<CharT, char16_t>::value || sizeof(CharT) == 2, "Unsupported char16_t size != 2");
|
static_assert(!std::is_same<CharT, char16_t>::value || sizeof(CharT) == 2, "Unsupported char16_t size != 2");
|
||||||
static_assert(!std::is_same<CharT, char32_t>::value || sizeof(CharT) == 4, "Unsupported char32_t size != 4");
|
static_assert(!std::is_same<CharT, char32_t>::value || sizeof(CharT) == 4, "Unsupported char32_t size != 4");
|
||||||
// wchar_t can be either 16 bits (Windows) or 32 (everywhere else)
|
// wchar_t can be either 16 bits (Windows) or 32 (everywhere else)
|
||||||
@ -1209,7 +1219,7 @@ template <typename StringType, bool IsView = false> struct string_caster {
|
|||||||
#if PY_MAJOR_VERSION >= 3
|
#if PY_MAJOR_VERSION >= 3
|
||||||
return load_bytes(load_src);
|
return load_bytes(load_src);
|
||||||
#else
|
#else
|
||||||
if (sizeof(CharT) == 1) {
|
if (std::is_same<CharT, char>::value) {
|
||||||
return load_bytes(load_src);
|
return load_bytes(load_src);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1269,7 +1279,7 @@ private:
|
|||||||
// without any encoding/decoding attempt). For other C++ char sizes this is a no-op.
|
// without any encoding/decoding attempt). For other C++ char sizes this is a no-op.
|
||||||
// which supports loading a unicode from a str, doesn't take this path.
|
// which supports loading a unicode from a str, doesn't take this path.
|
||||||
template <typename C = CharT>
|
template <typename C = CharT>
|
||||||
bool load_bytes(enable_if_t<sizeof(C) == 1, handle> src) {
|
bool load_bytes(enable_if_t<std::is_same<C, char>::value, handle> src) {
|
||||||
if (PYBIND11_BYTES_CHECK(src.ptr())) {
|
if (PYBIND11_BYTES_CHECK(src.ptr())) {
|
||||||
// We were passed a Python 3 raw bytes; accept it into a std::string or char*
|
// We were passed a Python 3 raw bytes; accept it into a std::string or char*
|
||||||
// without any encoding attempt.
|
// without any encoding attempt.
|
||||||
@ -1284,7 +1294,7 @@ private:
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <typename C = CharT>
|
template <typename C = CharT>
|
||||||
bool load_bytes(enable_if_t<sizeof(C) != 1, handle>) { return false; }
|
bool load_bytes(enable_if_t<!std::is_same<C, char>::value, handle>) { return false; }
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename CharT, class Traits, class Allocator>
|
template <typename CharT, class Traits, class Allocator>
|
||||||
|
@ -30,7 +30,7 @@ TEST_SUBMODULE(builtin_casters, m) {
|
|||||||
else { wstr.push_back((wchar_t) mathbfA32); } // 𝐀, utf32
|
else { wstr.push_back((wchar_t) mathbfA32); } // 𝐀, utf32
|
||||||
wstr.push_back(0x7a); // z
|
wstr.push_back(0x7a); // z
|
||||||
|
|
||||||
m.def("good_utf8_string", []() { return std::string(u8"Say utf8\u203d \U0001f382 \U0001d400"); }); // Say utf8‽ 🎂 𝐀
|
m.def("good_utf8_string", []() { return std::string((const char*)u8"Say utf8\u203d \U0001f382 \U0001d400"); }); // Say utf8‽ 🎂 𝐀
|
||||||
m.def("good_utf16_string", [=]() { return std::u16string({ b16, ib16, cake16_1, cake16_2, mathbfA16_1, mathbfA16_2, z16 }); }); // b‽🎂𝐀z
|
m.def("good_utf16_string", [=]() { return std::u16string({ b16, ib16, cake16_1, cake16_2, mathbfA16_1, mathbfA16_2, z16 }); }); // b‽🎂𝐀z
|
||||||
m.def("good_utf32_string", [=]() { return std::u32string({ a32, mathbfA32, cake32, ib32, z32 }); }); // a𝐀🎂‽z
|
m.def("good_utf32_string", [=]() { return std::u32string({ a32, mathbfA32, cake32, ib32, z32 }); }); // a𝐀🎂‽z
|
||||||
m.def("good_wchar_string", [=]() { return wstr; }); // a‽𝐀z
|
m.def("good_wchar_string", [=]() { return wstr; }); // a‽𝐀z
|
||||||
@ -60,6 +60,18 @@ TEST_SUBMODULE(builtin_casters, m) {
|
|||||||
m.def("strlen", [](char *s) { return strlen(s); });
|
m.def("strlen", [](char *s) { return strlen(s); });
|
||||||
m.def("string_length", [](std::string s) { return s.length(); });
|
m.def("string_length", [](std::string s) { return s.length(); });
|
||||||
|
|
||||||
|
#ifdef PYBIND11_HAS_U8STRING
|
||||||
|
m.attr("has_u8string") = true;
|
||||||
|
m.def("good_utf8_u8string", []() { return std::u8string(u8"Say utf8\u203d \U0001f382 \U0001d400"); }); // Say utf8‽ 🎂 𝐀
|
||||||
|
m.def("bad_utf8_u8string", []() { return std::u8string((const char8_t*)"abc\xd0" "def"); });
|
||||||
|
|
||||||
|
m.def("u8_char8_Z", []() -> char8_t { return u8'Z'; });
|
||||||
|
|
||||||
|
// test_single_char_arguments
|
||||||
|
m.def("ord_char8", [](char8_t c) -> int { return static_cast<unsigned char>(c); });
|
||||||
|
m.def("ord_char8_lv", [](char8_t &c) -> int { return static_cast<unsigned char>(c); });
|
||||||
|
#endif
|
||||||
|
|
||||||
// test_string_view
|
// test_string_view
|
||||||
#ifdef PYBIND11_HAS_STRING_VIEW
|
#ifdef PYBIND11_HAS_STRING_VIEW
|
||||||
m.attr("has_string_view") = true;
|
m.attr("has_string_view") = true;
|
||||||
@ -69,9 +81,15 @@ TEST_SUBMODULE(builtin_casters, m) {
|
|||||||
m.def("string_view_chars", [](std::string_view s) { py::list l; for (auto c : s) l.append((std::uint8_t) c); return l; });
|
m.def("string_view_chars", [](std::string_view s) { py::list l; for (auto c : s) l.append((std::uint8_t) c); return l; });
|
||||||
m.def("string_view16_chars", [](std::u16string_view s) { py::list l; for (auto c : s) l.append((int) c); return l; });
|
m.def("string_view16_chars", [](std::u16string_view s) { py::list l; for (auto c : s) l.append((int) c); return l; });
|
||||||
m.def("string_view32_chars", [](std::u32string_view s) { py::list l; for (auto c : s) l.append((int) c); return l; });
|
m.def("string_view32_chars", [](std::u32string_view s) { py::list l; for (auto c : s) l.append((int) c); return l; });
|
||||||
m.def("string_view_return", []() { return std::string_view(u8"utf8 secret \U0001f382"); });
|
m.def("string_view_return", []() { return std::string_view((const char*)u8"utf8 secret \U0001f382"); });
|
||||||
m.def("string_view16_return", []() { return std::u16string_view(u"utf16 secret \U0001f382"); });
|
m.def("string_view16_return", []() { return std::u16string_view(u"utf16 secret \U0001f382"); });
|
||||||
m.def("string_view32_return", []() { return std::u32string_view(U"utf32 secret \U0001f382"); });
|
m.def("string_view32_return", []() { return std::u32string_view(U"utf32 secret \U0001f382"); });
|
||||||
|
|
||||||
|
# ifdef PYBIND11_HAS_U8STRING
|
||||||
|
m.def("string_view8_print", [](std::u8string_view s) { py::print(s, s.size()); });
|
||||||
|
m.def("string_view8_chars", [](std::u8string_view s) { py::list l; for (auto c : s) l.append((std::uint8_t) c); return l; });
|
||||||
|
m.def("string_view8_return", []() { return std::u8string_view(u8"utf8 secret \U0001f382"); });
|
||||||
|
# endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// test_integer_casting
|
// test_integer_casting
|
||||||
|
@ -15,6 +15,8 @@ def test_unicode_conversion():
|
|||||||
assert m.good_utf16_string() == u"b‽🎂𝐀z"
|
assert m.good_utf16_string() == u"b‽🎂𝐀z"
|
||||||
assert m.good_utf32_string() == u"a𝐀🎂‽z"
|
assert m.good_utf32_string() == u"a𝐀🎂‽z"
|
||||||
assert m.good_wchar_string() == u"a⸘𝐀z"
|
assert m.good_wchar_string() == u"a⸘𝐀z"
|
||||||
|
if hasattr(m, "has_u8string"):
|
||||||
|
assert m.good_utf8_u8string() == u"Say utf8‽ 🎂 𝐀"
|
||||||
|
|
||||||
with pytest.raises(UnicodeDecodeError):
|
with pytest.raises(UnicodeDecodeError):
|
||||||
m.bad_utf8_string()
|
m.bad_utf8_string()
|
||||||
@ -29,12 +31,17 @@ def test_unicode_conversion():
|
|||||||
if hasattr(m, "bad_wchar_string"):
|
if hasattr(m, "bad_wchar_string"):
|
||||||
with pytest.raises(UnicodeDecodeError):
|
with pytest.raises(UnicodeDecodeError):
|
||||||
m.bad_wchar_string()
|
m.bad_wchar_string()
|
||||||
|
if hasattr(m, "has_u8string"):
|
||||||
|
with pytest.raises(UnicodeDecodeError):
|
||||||
|
m.bad_utf8_u8string()
|
||||||
|
|
||||||
assert m.u8_Z() == 'Z'
|
assert m.u8_Z() == 'Z'
|
||||||
assert m.u8_eacute() == u'é'
|
assert m.u8_eacute() == u'é'
|
||||||
assert m.u16_ibang() == u'‽'
|
assert m.u16_ibang() == u'‽'
|
||||||
assert m.u32_mathbfA() == u'𝐀'
|
assert m.u32_mathbfA() == u'𝐀'
|
||||||
assert m.wchar_heart() == u'♥'
|
assert m.wchar_heart() == u'♥'
|
||||||
|
if hasattr(m, "has_u8string"):
|
||||||
|
assert m.u8_char8_Z() == 'Z'
|
||||||
|
|
||||||
|
|
||||||
def test_single_char_arguments():
|
def test_single_char_arguments():
|
||||||
@ -92,6 +99,17 @@ def test_single_char_arguments():
|
|||||||
assert m.ord_wchar(u'aa')
|
assert m.ord_wchar(u'aa')
|
||||||
assert str(excinfo.value) == toolong_message
|
assert str(excinfo.value) == toolong_message
|
||||||
|
|
||||||
|
if hasattr(m, "has_u8string"):
|
||||||
|
assert m.ord_char8(u'a') == 0x61 # simple ASCII
|
||||||
|
assert m.ord_char8_lv(u'b') == 0x62
|
||||||
|
assert m.ord_char8(u'é') == 0xE9 # requires 2 bytes in utf-8, but can be stuffed in a char
|
||||||
|
with pytest.raises(ValueError) as excinfo:
|
||||||
|
assert m.ord_char8(u'Ā') == 0x100 # requires 2 bytes, doesn't fit in a char
|
||||||
|
assert str(excinfo.value) == toobig_message(0x100)
|
||||||
|
with pytest.raises(ValueError) as excinfo:
|
||||||
|
assert m.ord_char8(u'ab')
|
||||||
|
assert str(excinfo.value) == toolong_message
|
||||||
|
|
||||||
|
|
||||||
def test_bytes_to_string():
|
def test_bytes_to_string():
|
||||||
"""Tests the ability to pass bytes to C++ string-accepting functions. Note that this is
|
"""Tests the ability to pass bytes to C++ string-accepting functions. Note that this is
|
||||||
@ -116,10 +134,15 @@ def test_string_view(capture):
|
|||||||
assert m.string_view_chars("Hi 🎂") == [72, 105, 32, 0xf0, 0x9f, 0x8e, 0x82]
|
assert m.string_view_chars("Hi 🎂") == [72, 105, 32, 0xf0, 0x9f, 0x8e, 0x82]
|
||||||
assert m.string_view16_chars("Hi 🎂") == [72, 105, 32, 0xd83c, 0xdf82]
|
assert m.string_view16_chars("Hi 🎂") == [72, 105, 32, 0xd83c, 0xdf82]
|
||||||
assert m.string_view32_chars("Hi 🎂") == [72, 105, 32, 127874]
|
assert m.string_view32_chars("Hi 🎂") == [72, 105, 32, 127874]
|
||||||
|
if hasattr(m, "has_u8string"):
|
||||||
|
assert m.string_view8_chars("Hi") == [72, 105]
|
||||||
|
assert m.string_view8_chars("Hi 🎂") == [72, 105, 32, 0xf0, 0x9f, 0x8e, 0x82]
|
||||||
|
|
||||||
assert m.string_view_return() == "utf8 secret 🎂"
|
assert m.string_view_return() == "utf8 secret 🎂"
|
||||||
assert m.string_view16_return() == "utf16 secret 🎂"
|
assert m.string_view16_return() == "utf16 secret 🎂"
|
||||||
assert m.string_view32_return() == "utf32 secret 🎂"
|
assert m.string_view32_return() == "utf32 secret 🎂"
|
||||||
|
if hasattr(m, "has_u8string"):
|
||||||
|
assert m.string_view8_return() == "utf8 secret 🎂"
|
||||||
|
|
||||||
with capture:
|
with capture:
|
||||||
m.string_view_print("Hi")
|
m.string_view_print("Hi")
|
||||||
@ -132,6 +155,14 @@ def test_string_view(capture):
|
|||||||
utf16 🎂 8
|
utf16 🎂 8
|
||||||
utf32 🎂 7
|
utf32 🎂 7
|
||||||
"""
|
"""
|
||||||
|
if hasattr(m, "has_u8string"):
|
||||||
|
with capture:
|
||||||
|
m.string_view8_print("Hi")
|
||||||
|
m.string_view8_print("utf8 🎂")
|
||||||
|
assert capture == """
|
||||||
|
Hi 2
|
||||||
|
utf8 🎂 9
|
||||||
|
"""
|
||||||
|
|
||||||
with capture:
|
with capture:
|
||||||
m.string_view_print("Hi, ascii")
|
m.string_view_print("Hi, ascii")
|
||||||
@ -144,6 +175,14 @@ def test_string_view(capture):
|
|||||||
Hi, utf16 🎂 12
|
Hi, utf16 🎂 12
|
||||||
Hi, utf32 🎂 11
|
Hi, utf32 🎂 11
|
||||||
"""
|
"""
|
||||||
|
if hasattr(m, "has_u8string"):
|
||||||
|
with capture:
|
||||||
|
m.string_view8_print("Hi, ascii")
|
||||||
|
m.string_view8_print("Hi, utf8 🎂")
|
||||||
|
assert capture == """
|
||||||
|
Hi, ascii 9
|
||||||
|
Hi, utf8 🎂 13
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
def test_integer_casting():
|
def test_integer_casting():
|
||||||
|
Loading…
Reference in New Issue
Block a user