mirror of
https://github.com/pybind/pybind11.git
synced 2024-11-25 14:45:12 +00:00
Changing pybind11::str to exclusively hold PyUnicodeObject (#2409)
* Changing pybind11::str to exclusively hold PyUnicodeObject
This commit is contained in:
parent
587d5f840a
commit
0432ae7c52
@ -6,10 +6,15 @@ Changelog
|
|||||||
Starting with version 1.8.0, pybind11 releases use a `semantic versioning
|
Starting with version 1.8.0, pybind11 releases use a `semantic versioning
|
||||||
<http://semver.org>`_ policy.
|
<http://semver.org>`_ policy.
|
||||||
|
|
||||||
v2.6.3 (TBA, not yet released)
|
v2.7.0 (TBA, not yet released)
|
||||||
------------------------------
|
------------------------------
|
||||||
|
|
||||||
* Details to follow here
|
* ``py::str`` changed to exclusively hold `PyUnicodeObject`. Previously
|
||||||
|
``py::str`` could also hold `bytes`, which is probably surprising, was
|
||||||
|
never documented, and can mask bugs (e.g. accidental use of ``py::str``
|
||||||
|
instead of ``py::bytes``).
|
||||||
|
`#2409 <https://github.com/pybind/pybind11/pull/2409>`_
|
||||||
|
|
||||||
|
|
||||||
v2.6.2 (Jan 26, 2021)
|
v2.6.2 (Jan 26, 2021)
|
||||||
---------------------
|
---------------------
|
||||||
|
@ -10,6 +10,31 @@ modernization and other useful information.
|
|||||||
|
|
||||||
.. _upgrade-guide-2.6:
|
.. _upgrade-guide-2.6:
|
||||||
|
|
||||||
|
v2.7
|
||||||
|
====
|
||||||
|
|
||||||
|
*Before* v2.7, ``py::str`` can hold ``PyUnicodeObject`` or ``PyBytesObject``,
|
||||||
|
and ``py::isinstance<str>()`` is ``true`` for both ``py::str`` and
|
||||||
|
``py::bytes``. Starting with v2.7, ``py::str`` exclusively holds
|
||||||
|
``PyUnicodeObject`` (`#2409 <https://github.com/pybind/pybind11/pull/2409>`_),
|
||||||
|
and ``py::isinstance<str>()`` is ``true`` only for ``py::str``. To help in
|
||||||
|
the transition of user code, the ``PYBIND11_STR_LEGACY_PERMISSIVE`` macro
|
||||||
|
is provided as an escape hatch to go back to the legacy behavior. This macro
|
||||||
|
will be removed in future releases. Two types of required fixes are expected
|
||||||
|
to be common:
|
||||||
|
|
||||||
|
* Accidental use of ``py::str`` instead of ``py::bytes``, masked by the legacy
|
||||||
|
behavior. These are probably very easy to fix, by changing from
|
||||||
|
``py::str`` to ``py::bytes``.
|
||||||
|
|
||||||
|
* Reliance on py::isinstance<str>(obj) being ``true`` for
|
||||||
|
``py::bytes``. This is likely to be easy to fix in most cases by adding
|
||||||
|
``|| py::isinstance<bytes>(obj)``, but a fix may be more involved, e.g. if
|
||||||
|
``py::isinstance<T>`` appears in a template. Such situations will require
|
||||||
|
careful review and custom fixes.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
v2.6
|
v2.6
|
||||||
====
|
====
|
||||||
|
|
||||||
|
@ -1656,6 +1656,17 @@ struct pyobject_caster {
|
|||||||
|
|
||||||
template <typename T = type, enable_if_t<std::is_base_of<object, T>::value, int> = 0>
|
template <typename T = type, enable_if_t<std::is_base_of<object, T>::value, int> = 0>
|
||||||
bool load(handle src, bool /* convert */) {
|
bool load(handle src, bool /* convert */) {
|
||||||
|
#if PY_MAJOR_VERSION < 3 && !defined(PYBIND11_STR_LEGACY_PERMISSIVE)
|
||||||
|
// For Python 2, without this implicit conversion, Python code would
|
||||||
|
// need to be cluttered with six.ensure_text() or similar, only to be
|
||||||
|
// un-cluttered later after Python 2 support is dropped.
|
||||||
|
if (std::is_same<T, str>::value && isinstance<bytes>(src)) {
|
||||||
|
PyObject *str_from_bytes = PyUnicode_FromEncodedObject(src.ptr(), "utf-8", nullptr);
|
||||||
|
if (!str_from_bytes) throw error_already_set();
|
||||||
|
value = reinterpret_steal<type>(str_from_bytes);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
if (!isinstance<type>(src))
|
if (!isinstance<type>(src))
|
||||||
return false;
|
return false;
|
||||||
value = reinterpret_borrow<type>(src);
|
value = reinterpret_borrow<type>(src);
|
||||||
|
@ -163,6 +163,19 @@
|
|||||||
#include <typeindex>
|
#include <typeindex>
|
||||||
#include <type_traits>
|
#include <type_traits>
|
||||||
|
|
||||||
|
// #define PYBIND11_STR_LEGACY_PERMISSIVE
|
||||||
|
// If DEFINED, pybind11::str can hold PyUnicodeObject or PyBytesObject
|
||||||
|
// (probably surprising and never documented, but this was the
|
||||||
|
// legacy behavior until and including v2.6.x). As a side-effect,
|
||||||
|
// pybind11::isinstance<str>() is true for both pybind11::str and
|
||||||
|
// pybind11::bytes.
|
||||||
|
// If UNDEFINED, pybind11::str can only hold PyUnicodeObject, and
|
||||||
|
// pybind11::isinstance<str>() is true only for pybind11::str.
|
||||||
|
// However, for Python 2 only (!), the pybind11::str caster
|
||||||
|
// implicitly decodes bytes to PyUnicodeObject. This is to ease
|
||||||
|
// the transition from the legacy behavior to the non-permissive
|
||||||
|
// behavior.
|
||||||
|
|
||||||
#if PY_MAJOR_VERSION >= 3 /// Compatibility macros for various Python versions
|
#if PY_MAJOR_VERSION >= 3 /// Compatibility macros for various Python versions
|
||||||
#define PYBIND11_INSTANCE_METHOD_NEW(ptr, class_) PyInstanceMethod_New(ptr)
|
#define PYBIND11_INSTANCE_METHOD_NEW(ptr, class_) PyInstanceMethod_New(ptr)
|
||||||
#define PYBIND11_INSTANCE_METHOD_CHECK PyInstanceMethod_Check
|
#define PYBIND11_INSTANCE_METHOD_CHECK PyInstanceMethod_Check
|
||||||
|
@ -756,7 +756,12 @@ inline bool PyIterable_Check(PyObject *obj) {
|
|||||||
inline bool PyNone_Check(PyObject *o) { return o == Py_None; }
|
inline bool PyNone_Check(PyObject *o) { return o == Py_None; }
|
||||||
inline bool PyEllipsis_Check(PyObject *o) { return o == Py_Ellipsis; }
|
inline bool PyEllipsis_Check(PyObject *o) { return o == Py_Ellipsis; }
|
||||||
|
|
||||||
|
#ifdef PYBIND11_STR_LEGACY_PERMISSIVE
|
||||||
inline bool PyUnicode_Check_Permissive(PyObject *o) { return PyUnicode_Check(o) || PYBIND11_BYTES_CHECK(o); }
|
inline bool PyUnicode_Check_Permissive(PyObject *o) { return PyUnicode_Check(o) || PYBIND11_BYTES_CHECK(o); }
|
||||||
|
#define PYBIND11_STR_CHECK_FUN detail::PyUnicode_Check_Permissive
|
||||||
|
#else
|
||||||
|
#define PYBIND11_STR_CHECK_FUN PyUnicode_Check
|
||||||
|
#endif
|
||||||
|
|
||||||
inline bool PyStaticMethod_Check(PyObject *o) { return o->ob_type == &PyStaticMethod_Type; }
|
inline bool PyStaticMethod_Check(PyObject *o) { return o->ob_type == &PyStaticMethod_Type; }
|
||||||
|
|
||||||
@ -936,7 +941,7 @@ class bytes;
|
|||||||
|
|
||||||
class str : public object {
|
class str : public object {
|
||||||
public:
|
public:
|
||||||
PYBIND11_OBJECT_CVT(str, object, detail::PyUnicode_Check_Permissive, raw_str)
|
PYBIND11_OBJECT_CVT(str, object, PYBIND11_STR_CHECK_FUN, raw_str)
|
||||||
|
|
||||||
str(const char *c, size_t n)
|
str(const char *c, size_t n)
|
||||||
: object(PyUnicode_FromStringAndSize(c, (ssize_t) n), stolen_t{}) {
|
: object(PyUnicode_FromStringAndSize(c, (ssize_t) n), stolen_t{}) {
|
||||||
|
@ -144,7 +144,7 @@ template <typename Type, typename Value> struct list_caster {
|
|||||||
using value_conv = make_caster<Value>;
|
using value_conv = make_caster<Value>;
|
||||||
|
|
||||||
bool load(handle src, bool convert) {
|
bool load(handle src, bool convert) {
|
||||||
if (!isinstance<sequence>(src) || isinstance<str>(src))
|
if (!isinstance<sequence>(src) || isinstance<bytes>(src) || isinstance<str>(src))
|
||||||
return false;
|
return false;
|
||||||
auto s = reinterpret_borrow<sequence>(src);
|
auto s = reinterpret_borrow<sequence>(src);
|
||||||
value.clear();
|
value.clear();
|
||||||
|
@ -413,4 +413,15 @@ TEST_SUBMODULE(pytypes, m) {
|
|||||||
|
|
||||||
// test_builtin_functions
|
// test_builtin_functions
|
||||||
m.def("get_len", [](py::handle h) { return py::len(h); });
|
m.def("get_len", [](py::handle h) { return py::len(h); });
|
||||||
|
|
||||||
|
#ifdef PYBIND11_STR_LEGACY_PERMISSIVE
|
||||||
|
m.attr("PYBIND11_STR_LEGACY_PERMISSIVE") = true;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
m.def("isinstance_pybind11_bytes", [](py::object o) { return py::isinstance<py::bytes>(o); });
|
||||||
|
m.def("isinstance_pybind11_str", [](py::object o) { return py::isinstance<py::str>(o); });
|
||||||
|
|
||||||
|
m.def("pass_to_pybind11_bytes", [](py::bytes b) { return py::len(b); });
|
||||||
|
m.def("pass_to_pybind11_str", [](py::str s) { return py::len(s); });
|
||||||
|
m.def("pass_to_std_string", [](std::string s) { return s.size(); });
|
||||||
}
|
}
|
||||||
|
@ -120,14 +120,17 @@ def test_str(doc):
|
|||||||
assert s1 == s2
|
assert s1 == s2
|
||||||
|
|
||||||
malformed_utf8 = b"\x80"
|
malformed_utf8 = b"\x80"
|
||||||
assert m.str_from_object(malformed_utf8) is malformed_utf8 # To be fixed; see #2380
|
if hasattr(m, "PYBIND11_STR_LEGACY_PERMISSIVE"):
|
||||||
|
assert m.str_from_object(malformed_utf8) is malformed_utf8
|
||||||
|
elif env.PY2:
|
||||||
|
with pytest.raises(UnicodeDecodeError):
|
||||||
|
m.str_from_object(malformed_utf8)
|
||||||
|
else:
|
||||||
|
assert m.str_from_object(malformed_utf8) == "b'\\x80'"
|
||||||
if env.PY2:
|
if env.PY2:
|
||||||
# with pytest.raises(UnicodeDecodeError):
|
|
||||||
# m.str_from_object(malformed_utf8)
|
|
||||||
with pytest.raises(UnicodeDecodeError):
|
with pytest.raises(UnicodeDecodeError):
|
||||||
m.str_from_handle(malformed_utf8)
|
m.str_from_handle(malformed_utf8)
|
||||||
else:
|
else:
|
||||||
# assert m.str_from_object(malformed_utf8) == "b'\\x80'"
|
|
||||||
assert m.str_from_handle(malformed_utf8) == "b'\\x80'"
|
assert m.str_from_handle(malformed_utf8) == "b'\\x80'"
|
||||||
|
|
||||||
|
|
||||||
@ -303,13 +306,26 @@ def test_pybind11_str_raw_str():
|
|||||||
valid_orig = u"DZ"
|
valid_orig = u"DZ"
|
||||||
valid_utf8 = valid_orig.encode("utf-8")
|
valid_utf8 = valid_orig.encode("utf-8")
|
||||||
valid_cvt = cvt(valid_utf8)
|
valid_cvt = cvt(valid_utf8)
|
||||||
assert type(valid_cvt) == bytes # Probably surprising.
|
if hasattr(m, "PYBIND11_STR_LEGACY_PERMISSIVE"):
|
||||||
assert valid_cvt == b"\xc7\xb1"
|
assert valid_cvt is valid_utf8
|
||||||
|
else:
|
||||||
|
assert type(valid_cvt) is unicode if env.PY2 else str # noqa: F821
|
||||||
|
if env.PY2:
|
||||||
|
assert valid_cvt == valid_orig
|
||||||
|
else:
|
||||||
|
assert valid_cvt == "b'\\xc7\\xb1'"
|
||||||
|
|
||||||
malformed_utf8 = b"\x80"
|
malformed_utf8 = b"\x80"
|
||||||
|
if hasattr(m, "PYBIND11_STR_LEGACY_PERMISSIVE"):
|
||||||
|
assert cvt(malformed_utf8) is malformed_utf8
|
||||||
|
else:
|
||||||
|
if env.PY2:
|
||||||
|
with pytest.raises(UnicodeDecodeError):
|
||||||
|
cvt(malformed_utf8)
|
||||||
|
else:
|
||||||
malformed_cvt = cvt(malformed_utf8)
|
malformed_cvt = cvt(malformed_utf8)
|
||||||
assert type(malformed_cvt) == bytes # Probably surprising.
|
assert type(malformed_cvt) is str
|
||||||
assert malformed_cvt == b"\x80"
|
assert malformed_cvt == "b'\\x80'"
|
||||||
|
|
||||||
|
|
||||||
def test_implicit_casting():
|
def test_implicit_casting():
|
||||||
@ -488,3 +504,40 @@ def test_builtin_functions():
|
|||||||
"object of type 'generator' has no len()",
|
"object of type 'generator' has no len()",
|
||||||
"'generator' has no length",
|
"'generator' has no length",
|
||||||
] # PyPy
|
] # PyPy
|
||||||
|
|
||||||
|
|
||||||
|
def test_isinstance_string_types():
|
||||||
|
assert m.isinstance_pybind11_bytes(b"")
|
||||||
|
assert not m.isinstance_pybind11_bytes(u"")
|
||||||
|
|
||||||
|
assert m.isinstance_pybind11_str(u"")
|
||||||
|
if hasattr(m, "PYBIND11_STR_LEGACY_PERMISSIVE"):
|
||||||
|
assert m.isinstance_pybind11_str(b"")
|
||||||
|
else:
|
||||||
|
assert not m.isinstance_pybind11_str(b"")
|
||||||
|
|
||||||
|
|
||||||
|
def test_pass_bytes_or_unicode_to_string_types():
|
||||||
|
assert m.pass_to_pybind11_bytes(b"Bytes") == 5
|
||||||
|
with pytest.raises(TypeError):
|
||||||
|
m.pass_to_pybind11_bytes(u"Str")
|
||||||
|
|
||||||
|
if hasattr(m, "PYBIND11_STR_LEGACY_PERMISSIVE") or env.PY2:
|
||||||
|
assert m.pass_to_pybind11_str(b"Bytes") == 5
|
||||||
|
else:
|
||||||
|
with pytest.raises(TypeError):
|
||||||
|
m.pass_to_pybind11_str(b"Bytes")
|
||||||
|
assert m.pass_to_pybind11_str(u"Str") == 3
|
||||||
|
|
||||||
|
assert m.pass_to_std_string(b"Bytes") == 5
|
||||||
|
assert m.pass_to_std_string(u"Str") == 3
|
||||||
|
|
||||||
|
malformed_utf8 = b"\x80"
|
||||||
|
if hasattr(m, "PYBIND11_STR_LEGACY_PERMISSIVE"):
|
||||||
|
assert m.pass_to_pybind11_str(malformed_utf8) == 1
|
||||||
|
elif env.PY2:
|
||||||
|
with pytest.raises(UnicodeDecodeError):
|
||||||
|
m.pass_to_pybind11_str(malformed_utf8)
|
||||||
|
else:
|
||||||
|
with pytest.raises(TypeError):
|
||||||
|
m.pass_to_pybind11_str(malformed_utf8)
|
||||||
|
Loading…
Reference in New Issue
Block a user