Expand std::string_view support to str, bytes, memoryview (#3521)

* Expand string_view support to str, bytes, memoryview

1. Allows constructing a str or bytes implicitly from a string_view;
   this is essentially a small shortcut allowing a caller to write
   `py::bytes{sv}` rather than `py::bytes{sv.data(), sv.size()}`.

2. Allows implicit conversion *to* string_view from py::bytes -- this
   saves a fair bit more as currently there is no simple way to get such
   a view of the bytes without copying it (or resorting to Python API
   calls).

   (This is not done for `str` because when the str contains unicode we
   have to allocate to a temporary and so there might not be some string
   data we can properly view without owning.)

3. Allows `memoryview::from_memory` to accept a string_view.  As with
   the other from_memory calls, it's entirely your responsibility to
   keep it alive.

This also required moving the string_view availability detection into
detail/common.h because this PR needs it in pytypes.h, which is higher
up the include chain than cast.h where it was being detected currently.

* Move string_view include to pytypes.h

* CI-testing a fix for the "ambiguous conversion" issue.

This change is known to fix the `tensorflow::tstring` issue reported under https://github.com/pybind/pybind11/pull/3521#issuecomment-985100965

TODO: Minimal reproducer for the `tensorflow::tstring` issue.

* Make clang-tidy happy (hopefully).

* Adding minimal reproducer for the `tensorflow::tstring` issue.

Error without the enable_if trick:

```
/usr/local/google/home/rwgk/forked/pybind11/tests/test_builtin_casters.cpp:169:16: error: ambiguous conversion for functional-style cast from 'TypeWithBothOperatorStringAndStringView' to 'py::bytes'
        return py::bytes(TypeWithBothOperatorStringAndStringView());
               ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
/usr/local/google/home/rwgk/forked/pybind11/include/pybind11/detail/../pytypes.h:1174:5: note: candidate constructor
    bytes(const std::string &s) : bytes(s.data(), s.size()) { }
    ^
/usr/local/google/home/rwgk/forked/pybind11/include/pybind11/detail/../pytypes.h:1191:5: note: candidate constructor
    bytes(std::string_view s) : bytes(s.data(), s.size()) { }
    ^
```

* Adding missing NOLINTNEXTLINE

* Also apply ambiguous conversion workaround to str()

Co-authored-by: Ralf W. Grosse-Kunstleve <rwgk@google.com>
This commit is contained in:
Jason Rhinelander 2021-12-03 13:20:32 -04:00 committed by GitHub
parent cd176ceeff
commit b4939fcbfb
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 95 additions and 17 deletions

View File

@ -27,23 +27,6 @@
#include <utility>
#include <vector>
#if defined(PYBIND11_CPP17)
# if defined(__has_include)
# if __has_include(<string_view>)
# define PYBIND11_HAS_STRING_VIEW
# endif
# elif defined(_MSC_VER)
# define PYBIND11_HAS_STRING_VIEW
# endif
#endif
#ifdef PYBIND11_HAS_STRING_VIEW
#include <string_view>
#endif
#if defined(__cpp_lib_char8_t) && __cpp_lib_char8_t >= 201811L
# define PYBIND11_HAS_U8STRING
#endif
PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
PYBIND11_NAMESPACE_BEGIN(detail)

View File

@ -183,6 +183,21 @@
# define PYBIND11_HAS_VARIANT 1
#endif
#if defined(PYBIND11_CPP17)
# if defined(__has_include)
# if __has_include(<string_view>)
# define PYBIND11_HAS_STRING_VIEW
# endif
# elif defined(_MSC_VER)
# define PYBIND11_HAS_STRING_VIEW
# endif
#endif
#if defined(__cpp_lib_char8_t) && __cpp_lib_char8_t >= 201811L
# define PYBIND11_HAS_U8STRING
#endif
#include <Python.h>
#include <frameobject.h>
#include <pythread.h>

View File

@ -18,6 +18,10 @@
# include <optional>
#endif
#ifdef PYBIND11_HAS_STRING_VIEW
# include <string_view>
#endif
PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
/* A few forward declarations */
@ -1085,6 +1089,20 @@ public:
// NOLINTNEXTLINE(google-explicit-constructor)
str(const std::string &s) : str(s.data(), s.size()) { }
#ifdef PYBIND11_HAS_STRING_VIEW
// enable_if is needed to avoid "ambiguous conversion" errors (see PR #3521).
template <typename T, detail::enable_if_t<std::is_same<T, std::string_view>::value, int> = 0>
// NOLINTNEXTLINE(google-explicit-constructor)
str(T s) : str(s.data(), s.size()) { }
# ifdef PYBIND11_HAS_U8STRING
// reinterpret_cast here is safe (C++20 guarantees char8_t has the same size/alignment as char)
// NOLINTNEXTLINE(google-explicit-constructor)
str(std::u8string_view s) : str(reinterpret_cast<const char*>(s.data()), s.size()) { }
# endif
#endif
explicit str(const bytes &b);
/** \rst
@ -1167,6 +1185,26 @@ public:
pybind11_fail("Unable to extract bytes contents!");
return std::string(buffer, (size_t) length);
}
#ifdef PYBIND11_HAS_STRING_VIEW
// enable_if is needed to avoid "ambiguous conversion" errors (see PR #3521).
template <typename T, detail::enable_if_t<std::is_same<T, std::string_view>::value, int> = 0>
// NOLINTNEXTLINE(google-explicit-constructor)
bytes(T s) : bytes(s.data(), s.size()) { }
// Obtain a string view that views the current `bytes` buffer value. Note that this is only
// valid so long as the `bytes` instance remains alive and so generally should not outlive the
// lifetime of the `bytes` instance.
// NOLINTNEXTLINE(google-explicit-constructor)
operator std::string_view() const {
char *buffer = nullptr;
ssize_t length = 0;
if (PYBIND11_BYTES_AS_STRING_AND_SIZE(m_ptr, &buffer, &length))
pybind11_fail("Unable to extract bytes contents!");
return {buffer, static_cast<size_t>(length)};
}
#endif
};
// Note: breathe >= 4.17.0 will fail to build docs if the below two constructors
// are included in the doxygen group; close here and reopen after as a workaround
@ -1714,6 +1752,13 @@ public:
static memoryview from_memory(const void *mem, ssize_t size) {
return memoryview::from_memory(const_cast<void*>(mem), size, true);
}
#ifdef PYBIND11_HAS_STRING_VIEW
static memoryview from_memory(std::string_view mem) {
return from_memory(const_cast<char*>(mem.data()), static_cast<ssize_t>(mem.size()), true);
}
#endif
#endif
};

View File

@ -140,11 +140,35 @@ TEST_SUBMODULE(builtin_casters, m) {
m.def("string_view16_return", []() { return std::u16string_view(u"utf16 secret \U0001f382"); });
m.def("string_view32_return", []() { return std::u32string_view(U"utf32 secret \U0001f382"); });
// The inner lambdas here are to also test implicit conversion
using namespace std::literals;
m.def("string_view_bytes", []() { return [](py::bytes b) { return b; }("abc \x80\x80 def"sv); });
m.def("string_view_str", []() { return [](py::str s) { return s; }("abc \342\200\275 def"sv); });
m.def("string_view_from_bytes", [](const py::bytes &b) { return [](std::string_view s) { return s; }(b); });
#if PY_MAJOR_VERSION >= 3
m.def("string_view_memoryview", []() {
static constexpr auto val = "Have some \360\237\216\202"sv;
return py::memoryview::from_memory(val);
});
#endif
# ifdef PYBIND11_HAS_U8STRING
m.def("string_view8_print", [](std::u8string_view s) { py::print(s, s.size()); });
m.def("string_view8_chars", [](std::u8string_view s) { py::list l; for (auto c : s) l.append((std::uint8_t) c); return l; });
m.def("string_view8_return", []() { return std::u8string_view(u8"utf8 secret \U0001f382"); });
m.def("string_view8_str", []() { return py::str{std::u8string_view{u8"abc ‽ def"}}; });
# endif
struct TypeWithBothOperatorStringAndStringView {
// NOLINTNEXTLINE(google-explicit-constructor)
operator std::string() const { return "success"; }
// NOLINTNEXTLINE(google-explicit-constructor)
operator std::string_view() const { return "failure"; }
};
m.def("bytes_from_type_with_both_operator_string_and_string_view",
[]() { return py::bytes(TypeWithBothOperatorStringAndStringView()); });
m.def("str_from_type_with_both_operator_string_and_string_view",
[]() { return py::str(TypeWithBothOperatorStringAndStringView()); });
#endif
// test_integer_casting

View File

@ -206,6 +206,17 @@ def test_string_view(capture):
"""
)
assert m.string_view_bytes() == b"abc \x80\x80 def"
assert m.string_view_str() == u"abc ‽ def"
assert m.string_view_from_bytes(u"abc ‽ def".encode("utf-8")) == u"abc ‽ def"
if hasattr(m, "has_u8string"):
assert m.string_view8_str() == u"abc ‽ def"
if not env.PY2:
assert m.string_view_memoryview() == "Have some 🎂".encode()
assert m.bytes_from_type_with_both_operator_string_and_string_view() == b"success"
assert m.str_from_type_with_both_operator_string_and_string_view() == "success"
def test_integer_casting():
"""Issue #929 - out-of-range integer values shouldn't be accepted"""