mirror of
https://github.com/pybind/pybind11.git
synced 2024-11-22 05:05:11 +00:00
Expand std::string_view support to str, bytes, memoryview (#3521)
* Expand string_view support to str, bytes, memoryview 1. Allows constructing a str or bytes implicitly from a string_view; this is essentially a small shortcut allowing a caller to write `py::bytes{sv}` rather than `py::bytes{sv.data(), sv.size()}`. 2. Allows implicit conversion *to* string_view from py::bytes -- this saves a fair bit more as currently there is no simple way to get such a view of the bytes without copying it (or resorting to Python API calls). (This is not done for `str` because when the str contains unicode we have to allocate to a temporary and so there might not be some string data we can properly view without owning.) 3. Allows `memoryview::from_memory` to accept a string_view. As with the other from_memory calls, it's entirely your responsibility to keep it alive. This also required moving the string_view availability detection into detail/common.h because this PR needs it in pytypes.h, which is higher up the include chain than cast.h where it was being detected currently. * Move string_view include to pytypes.h * CI-testing a fix for the "ambiguous conversion" issue. This change is known to fix the `tensorflow::tstring` issue reported under https://github.com/pybind/pybind11/pull/3521#issuecomment-985100965 TODO: Minimal reproducer for the `tensorflow::tstring` issue. * Make clang-tidy happy (hopefully). * Adding minimal reproducer for the `tensorflow::tstring` issue. Error without the enable_if trick: ``` /usr/local/google/home/rwgk/forked/pybind11/tests/test_builtin_casters.cpp:169:16: error: ambiguous conversion for functional-style cast from 'TypeWithBothOperatorStringAndStringView' to 'py::bytes' return py::bytes(TypeWithBothOperatorStringAndStringView()); ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /usr/local/google/home/rwgk/forked/pybind11/include/pybind11/detail/../pytypes.h:1174:5: note: candidate constructor bytes(const std::string &s) : bytes(s.data(), s.size()) { } ^ /usr/local/google/home/rwgk/forked/pybind11/include/pybind11/detail/../pytypes.h:1191:5: note: candidate constructor bytes(std::string_view s) : bytes(s.data(), s.size()) { } ^ ``` * Adding missing NOLINTNEXTLINE * Also apply ambiguous conversion workaround to str() Co-authored-by: Ralf W. Grosse-Kunstleve <rwgk@google.com>
This commit is contained in:
parent
cd176ceeff
commit
b4939fcbfb
@ -27,23 +27,6 @@
|
|||||||
#include <utility>
|
#include <utility>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#if defined(PYBIND11_CPP17)
|
|
||||||
# if defined(__has_include)
|
|
||||||
# if __has_include(<string_view>)
|
|
||||||
# define PYBIND11_HAS_STRING_VIEW
|
|
||||||
# endif
|
|
||||||
# elif defined(_MSC_VER)
|
|
||||||
# define PYBIND11_HAS_STRING_VIEW
|
|
||||||
# endif
|
|
||||||
#endif
|
|
||||||
#ifdef PYBIND11_HAS_STRING_VIEW
|
|
||||||
#include <string_view>
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if defined(__cpp_lib_char8_t) && __cpp_lib_char8_t >= 201811L
|
|
||||||
# define PYBIND11_HAS_U8STRING
|
|
||||||
#endif
|
|
||||||
|
|
||||||
PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
|
PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
|
||||||
PYBIND11_NAMESPACE_BEGIN(detail)
|
PYBIND11_NAMESPACE_BEGIN(detail)
|
||||||
|
|
||||||
|
@ -183,6 +183,21 @@
|
|||||||
# define PYBIND11_HAS_VARIANT 1
|
# define PYBIND11_HAS_VARIANT 1
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if defined(PYBIND11_CPP17)
|
||||||
|
# if defined(__has_include)
|
||||||
|
# if __has_include(<string_view>)
|
||||||
|
# define PYBIND11_HAS_STRING_VIEW
|
||||||
|
# endif
|
||||||
|
# elif defined(_MSC_VER)
|
||||||
|
# define PYBIND11_HAS_STRING_VIEW
|
||||||
|
# endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(__cpp_lib_char8_t) && __cpp_lib_char8_t >= 201811L
|
||||||
|
# define PYBIND11_HAS_U8STRING
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
#include <Python.h>
|
#include <Python.h>
|
||||||
#include <frameobject.h>
|
#include <frameobject.h>
|
||||||
#include <pythread.h>
|
#include <pythread.h>
|
||||||
|
@ -18,6 +18,10 @@
|
|||||||
# include <optional>
|
# include <optional>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef PYBIND11_HAS_STRING_VIEW
|
||||||
|
# include <string_view>
|
||||||
|
#endif
|
||||||
|
|
||||||
PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
|
PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
|
||||||
|
|
||||||
/* A few forward declarations */
|
/* A few forward declarations */
|
||||||
@ -1085,6 +1089,20 @@ public:
|
|||||||
// NOLINTNEXTLINE(google-explicit-constructor)
|
// NOLINTNEXTLINE(google-explicit-constructor)
|
||||||
str(const std::string &s) : str(s.data(), s.size()) { }
|
str(const std::string &s) : str(s.data(), s.size()) { }
|
||||||
|
|
||||||
|
#ifdef PYBIND11_HAS_STRING_VIEW
|
||||||
|
// enable_if is needed to avoid "ambiguous conversion" errors (see PR #3521).
|
||||||
|
template <typename T, detail::enable_if_t<std::is_same<T, std::string_view>::value, int> = 0>
|
||||||
|
// NOLINTNEXTLINE(google-explicit-constructor)
|
||||||
|
str(T s) : str(s.data(), s.size()) { }
|
||||||
|
|
||||||
|
# ifdef PYBIND11_HAS_U8STRING
|
||||||
|
// reinterpret_cast here is safe (C++20 guarantees char8_t has the same size/alignment as char)
|
||||||
|
// NOLINTNEXTLINE(google-explicit-constructor)
|
||||||
|
str(std::u8string_view s) : str(reinterpret_cast<const char*>(s.data()), s.size()) { }
|
||||||
|
# endif
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
explicit str(const bytes &b);
|
explicit str(const bytes &b);
|
||||||
|
|
||||||
/** \rst
|
/** \rst
|
||||||
@ -1167,6 +1185,26 @@ public:
|
|||||||
pybind11_fail("Unable to extract bytes contents!");
|
pybind11_fail("Unable to extract bytes contents!");
|
||||||
return std::string(buffer, (size_t) length);
|
return std::string(buffer, (size_t) length);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef PYBIND11_HAS_STRING_VIEW
|
||||||
|
// enable_if is needed to avoid "ambiguous conversion" errors (see PR #3521).
|
||||||
|
template <typename T, detail::enable_if_t<std::is_same<T, std::string_view>::value, int> = 0>
|
||||||
|
// NOLINTNEXTLINE(google-explicit-constructor)
|
||||||
|
bytes(T s) : bytes(s.data(), s.size()) { }
|
||||||
|
|
||||||
|
// Obtain a string view that views the current `bytes` buffer value. Note that this is only
|
||||||
|
// valid so long as the `bytes` instance remains alive and so generally should not outlive the
|
||||||
|
// lifetime of the `bytes` instance.
|
||||||
|
// NOLINTNEXTLINE(google-explicit-constructor)
|
||||||
|
operator std::string_view() const {
|
||||||
|
char *buffer = nullptr;
|
||||||
|
ssize_t length = 0;
|
||||||
|
if (PYBIND11_BYTES_AS_STRING_AND_SIZE(m_ptr, &buffer, &length))
|
||||||
|
pybind11_fail("Unable to extract bytes contents!");
|
||||||
|
return {buffer, static_cast<size_t>(length)};
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
};
|
};
|
||||||
// Note: breathe >= 4.17.0 will fail to build docs if the below two constructors
|
// Note: breathe >= 4.17.0 will fail to build docs if the below two constructors
|
||||||
// are included in the doxygen group; close here and reopen after as a workaround
|
// are included in the doxygen group; close here and reopen after as a workaround
|
||||||
@ -1714,6 +1752,13 @@ public:
|
|||||||
static memoryview from_memory(const void *mem, ssize_t size) {
|
static memoryview from_memory(const void *mem, ssize_t size) {
|
||||||
return memoryview::from_memory(const_cast<void*>(mem), size, true);
|
return memoryview::from_memory(const_cast<void*>(mem), size, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef PYBIND11_HAS_STRING_VIEW
|
||||||
|
static memoryview from_memory(std::string_view mem) {
|
||||||
|
return from_memory(const_cast<char*>(mem.data()), static_cast<ssize_t>(mem.size()), true);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -140,11 +140,35 @@ TEST_SUBMODULE(builtin_casters, m) {
|
|||||||
m.def("string_view16_return", []() { return std::u16string_view(u"utf16 secret \U0001f382"); });
|
m.def("string_view16_return", []() { return std::u16string_view(u"utf16 secret \U0001f382"); });
|
||||||
m.def("string_view32_return", []() { return std::u32string_view(U"utf32 secret \U0001f382"); });
|
m.def("string_view32_return", []() { return std::u32string_view(U"utf32 secret \U0001f382"); });
|
||||||
|
|
||||||
|
// The inner lambdas here are to also test implicit conversion
|
||||||
|
using namespace std::literals;
|
||||||
|
m.def("string_view_bytes", []() { return [](py::bytes b) { return b; }("abc \x80\x80 def"sv); });
|
||||||
|
m.def("string_view_str", []() { return [](py::str s) { return s; }("abc \342\200\275 def"sv); });
|
||||||
|
m.def("string_view_from_bytes", [](const py::bytes &b) { return [](std::string_view s) { return s; }(b); });
|
||||||
|
#if PY_MAJOR_VERSION >= 3
|
||||||
|
m.def("string_view_memoryview", []() {
|
||||||
|
static constexpr auto val = "Have some \360\237\216\202"sv;
|
||||||
|
return py::memoryview::from_memory(val);
|
||||||
|
});
|
||||||
|
#endif
|
||||||
|
|
||||||
# ifdef PYBIND11_HAS_U8STRING
|
# ifdef PYBIND11_HAS_U8STRING
|
||||||
m.def("string_view8_print", [](std::u8string_view s) { py::print(s, s.size()); });
|
m.def("string_view8_print", [](std::u8string_view s) { py::print(s, s.size()); });
|
||||||
m.def("string_view8_chars", [](std::u8string_view s) { py::list l; for (auto c : s) l.append((std::uint8_t) c); return l; });
|
m.def("string_view8_chars", [](std::u8string_view s) { py::list l; for (auto c : s) l.append((std::uint8_t) c); return l; });
|
||||||
m.def("string_view8_return", []() { return std::u8string_view(u8"utf8 secret \U0001f382"); });
|
m.def("string_view8_return", []() { return std::u8string_view(u8"utf8 secret \U0001f382"); });
|
||||||
|
m.def("string_view8_str", []() { return py::str{std::u8string_view{u8"abc ‽ def"}}; });
|
||||||
# endif
|
# endif
|
||||||
|
|
||||||
|
struct TypeWithBothOperatorStringAndStringView {
|
||||||
|
// NOLINTNEXTLINE(google-explicit-constructor)
|
||||||
|
operator std::string() const { return "success"; }
|
||||||
|
// NOLINTNEXTLINE(google-explicit-constructor)
|
||||||
|
operator std::string_view() const { return "failure"; }
|
||||||
|
};
|
||||||
|
m.def("bytes_from_type_with_both_operator_string_and_string_view",
|
||||||
|
[]() { return py::bytes(TypeWithBothOperatorStringAndStringView()); });
|
||||||
|
m.def("str_from_type_with_both_operator_string_and_string_view",
|
||||||
|
[]() { return py::str(TypeWithBothOperatorStringAndStringView()); });
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// test_integer_casting
|
// test_integer_casting
|
||||||
|
@ -206,6 +206,17 @@ def test_string_view(capture):
|
|||||||
"""
|
"""
|
||||||
)
|
)
|
||||||
|
|
||||||
|
assert m.string_view_bytes() == b"abc \x80\x80 def"
|
||||||
|
assert m.string_view_str() == u"abc ‽ def"
|
||||||
|
assert m.string_view_from_bytes(u"abc ‽ def".encode("utf-8")) == u"abc ‽ def"
|
||||||
|
if hasattr(m, "has_u8string"):
|
||||||
|
assert m.string_view8_str() == u"abc ‽ def"
|
||||||
|
if not env.PY2:
|
||||||
|
assert m.string_view_memoryview() == "Have some 🎂".encode()
|
||||||
|
|
||||||
|
assert m.bytes_from_type_with_both_operator_string_and_string_view() == b"success"
|
||||||
|
assert m.str_from_type_with_both_operator_string_and_string_view() == "success"
|
||||||
|
|
||||||
|
|
||||||
def test_integer_casting():
|
def test_integer_casting():
|
||||||
"""Issue #929 - out-of-range integer values shouldn't be accepted"""
|
"""Issue #929 - out-of-range integer values shouldn't be accepted"""
|
||||||
|
Loading…
Reference in New Issue
Block a user