From 679db2926728b52d0d58107c0919f6a674ffbdd0 Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Mon, 5 Jun 2023 17:40:28 -0700 Subject: [PATCH 01/13] Snapshot of PR #4686 @ commit 3631886d337e128c2f64634bea343fd405d5aa09 applied to pywrapcc. --- include/pybind11/stl.h | 139 +++++++++++++++++++++++++++++++++-------- tests/test_stl.cpp | 7 +++ tests/test_stl.py | 74 ++++++++++++++++++++++ 3 files changed, 194 insertions(+), 26 deletions(-) diff --git a/include/pybind11/stl.h b/include/pybind11/stl.h index 39c1c157..9e0aa935 100644 --- a/include/pybind11/stl.h +++ b/include/pybind11/stl.h @@ -36,6 +36,47 @@ PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE) PYBIND11_NAMESPACE_BEGIN(detail) +// +// Begin: Code developed under https://github.com/google/clif/blob/main/clif/python/runtime.cc +// (currently unpublished) +// + +inline bool PyObjectIsInstanceWithOneOfTpNames(PyObject *obj, + std::initializer_list tp_names) { + if (PyType_Check(obj)) { + return false; + } + const char *obj_tp_name = Py_TYPE(obj)->tp_name; + for (const auto *tp_name : tp_names) { + if (strcmp(obj_tp_name, tp_name) == 0) { + return true; + } + } + return false; +} + +inline bool PyObjectTypeIsConvertibleToStdVector(PyObject *obj) { + if (PySequence_Check(obj) != 0) { + return !PyUnicode_Check(obj) && !PyBytes_Check(obj); + } + return (PyGen_Check(obj) != 0) || (PyAnySet_Check(obj) != 0) + || PyObjectIsInstanceWithOneOfTpNames( + obj, {"dict_keys", "dict_values", "dict_items", "map", "zip"}); +} + +inline bool PyObjectTypeIsConvertibleToStdSet(PyObject *obj) { + return (PyAnySet_Check(obj) != 0) || PyObjectIsInstanceWithOneOfTpNames(obj, {"dict_keys"}); +} + +inline bool PyObjectTypeIsConvertibleToStdMap(PyObject *obj) { + return (PyDict_Check(obj) != 0) + || ((PyMapping_Check(obj) != 0) && (PyObject_HasAttrString(obj, "items") != 0)); +} + +// +// End: Code developed under https://github.com/google/clif/blob/main/clif/python/runtime.cc +// + /// Extracts an const lvalue reference or rvalue reference for U based on the type of T (e.g. for /// forwarding a container element). Typically used indirect via forwarded_type(), below. template @@ -62,22 +103,18 @@ struct set_caster { private: template ::value, int> = 0> - void reserve_maybe(const anyset &s, Type *) { + void reserve_maybe(const sequence &s, Type *) { value.reserve(s.size()); } - void reserve_maybe(const anyset &, void *) {} + void reserve_maybe(const sequence &, void *) {} -public: - bool load(handle src, bool convert) { - if (!isinstance(src)) { - return false; - } - auto s = reinterpret_borrow(src); + bool convert_elements(handle seq, bool convert) { + auto s = reinterpret_borrow(seq); value.clear(); reserve_maybe(s, &value); - for (auto entry : s) { + for (auto it : seq) { key_conv conv; - if (!conv.load(entry, convert)) { + if (!conv.load(it, convert)) { return false; } value.insert(cast_op(std::move(conv))); @@ -85,6 +122,25 @@ struct set_caster { return true; } +public: + bool load(handle src, bool convert) { + if (!PyObjectTypeIsConvertibleToStdSet(src.ptr())) { + return false; + } + if (isinstance(src)) { + return convert_elements(src, convert); + } + if (!convert) { + return false; + } + // Designed to be behavior-equivalent to passing tuple(src) from Python: + // The conversion to a tuple will first exhaust the iterator object, to ensure that + // the iterator is not left in an unpredictable (to the caller) partially-consumed + // state. + assert(isinstance(src)); + return convert_elements(tuple(reinterpret_borrow(src)), convert); + } + template static handle cast(T &&src, const return_value_policy_pack &rvpp, handle parent) { return_value_policy_pack rvpp_local = rvpp; @@ -117,12 +173,7 @@ struct map_caster { } void reserve_maybe(const dict &, void *) {} -public: - bool load(handle src, bool convert) { - if (!isinstance(src)) { - return false; - } - auto d = reinterpret_borrow(src); + bool convert_elements(const dict &d, bool convert) { value.clear(); reserve_maybe(d, &value); for (auto it : d) { @@ -136,6 +187,27 @@ struct map_caster { return true; } +public: + bool load(handle src, bool convert) { + if (!PyObjectTypeIsConvertibleToStdMap(src.ptr())) { + return false; + } + if (isinstance(src)) { + return convert_elements(reinterpret_borrow(src), convert); + } + if (!convert) { + return false; + } + // Designed to be behavior-equivalent to passing dict(src.items()) from Python: + // The conversion to a dict will first exhaust the iterator object, to ensure that + // the iterator is not left in an unpredictable (to the caller) partially-consumed + // state. + auto items = src.attr("items")(); + assert(isinstance(items)); + return convert_elements(dict(reinterpret_borrow(items)), convert); + return false; + } + template static handle cast(T &&src, const return_value_policy_pack &rvpp, handle parent) { dict d; @@ -168,13 +240,35 @@ struct list_caster { using value_conv = make_caster; bool load(handle src, bool convert) { - if (!isinstance(src) || isinstance(src) || isinstance(src)) { + if (!PyObjectTypeIsConvertibleToStdVector(src.ptr())) { return false; } - auto s = reinterpret_borrow(src); + if (isinstance(src)) { + return convert_elements(src, convert); + } + if (!convert) { + return false; + } + // Designed to be behavior-equivalent to passing tuple(src) from Python: + // The conversion to a tuple will first exhaust the generator object, to ensure that + // the generator is not left in an unpredictable (to the caller) partially-consumed + // state. + assert(isinstance(src)); + return convert_elements(tuple(reinterpret_borrow(src)), convert); + } + +private: + template ::value, int> = 0> + void reserve_maybe(const sequence &s, Type *) { + value.reserve(s.size()); + } + void reserve_maybe(const sequence &, void *) {} + + bool convert_elements(handle seq, bool convert) { + auto s = reinterpret_borrow(seq); value.clear(); reserve_maybe(s, &value); - for (auto it : s) { + for (auto it : seq) { value_conv conv; if (!conv.load(it, convert)) { return false; @@ -184,13 +278,6 @@ struct list_caster { return true; } -private: - template ::value, int> = 0> - void reserve_maybe(const sequence &s, Type *) { - value.reserve(s.size()); - } - void reserve_maybe(const sequence &, void *) {} - public: template static handle cast(T &&src, const return_value_policy_pack &rvpp, handle parent) { diff --git a/tests/test_stl.cpp b/tests/test_stl.cpp index ee75427c..bfc32402 100644 --- a/tests/test_stl.cpp +++ b/tests/test_stl.cpp @@ -549,6 +549,13 @@ TEST_SUBMODULE(stl, m) { // Without explicitly specifying `take_ownership`, this function leaks. py::return_value_policy::take_ownership); + m.def("pass_std_vector_int", [](const std::vector &vec_int) { return vec_int.size(); }); + m.def("pass_std_vector_pair_int", [](const std::vector> &vec_pair_int) { + return vec_pair_int.size(); + }); + m.def("pass_std_set_int", [](const std::set &set_int) { return set_int.size(); }); + m.def("pass_std_map_int", [](const std::map &map_int) { return map_int.size(); }); + // test return_value_policy::_return_as_bytes m.def( "invalid_utf8_string_array_as_bytes", diff --git a/tests/test_stl.py b/tests/test_stl.py index 4aa00eed..5b7266eb 100644 --- a/tests/test_stl.py +++ b/tests/test_stl.py @@ -378,6 +378,80 @@ def test_return_vector_bool_raw_ptr(): assert len(v) == 4513 +def test_pass_std_vector_int(): + fn = m.pass_std_vector_int + assert fn([1, 2]) == 2 + assert fn((1, 2)) == 2 + assert fn({1, 2}) == 2 + assert fn({"x": 1, "y": 2}.values()) == 2 + assert fn({1: None, 2: None}.keys()) == 2 + assert fn(i for i in range(3)) == 3 + assert fn(map(lambda i: i, range(4))) == 4 # noqa: C417 + with pytest.raises(TypeError): + fn({}) + + +def test_pass_std_vector_pair_int(): + fn = m.pass_std_vector_pair_int + assert fn({1: 2, 3: 4}.items()) == 2 + assert fn(zip([1, 2], [3, 4])) == 2 + + +def test_pass_std_set_int(): + fn = m.pass_std_set_int + assert fn({1, 2}) == 2 + assert fn({1: None, 2: None}.keys()) == 2 + with pytest.raises(TypeError): + fn([1, 2]) + with pytest.raises(TypeError): + fn((1, 2)) + with pytest.raises(TypeError): + fn({}) + with pytest.raises(TypeError): + fn(i for i in range(3)) + + +def test_list_caster_fully_consumes_generator_object(): + def gen_mix(): + yield from [1, 2.0, 3] + + gen_obj = gen_mix() + with pytest.raises(TypeError): + m.pass_std_vector_int(gen_obj) + assert not tuple(gen_obj) + + +class FakePyMappingMissingItems: + def __getitem__(self, _): + raise RuntimeError("Not expected to be called.") + + +class FakePyMappingWithItems(FakePyMappingMissingItems): + def items(self): + return ((1, 2), (3, 4)) + + +class FakePyMappingBadItems(FakePyMappingMissingItems): + def items(self): + return ((1, 2), (3, "x")) + + +def test_pass_std_map_int(): + fn = m.pass_std_map_int + assert fn({1: 2, 3: 4}) == 2 + assert fn(FakePyMappingWithItems()) == 2 + with pytest.raises(TypeError): + fn(FakePyMappingMissingItems()) + with pytest.raises(TypeError): + fn(FakePyMappingBadItems()) + with pytest.raises(TypeError): + fn([]) + + +# TODO(rwgk): test_set_caster_fully_consumes_iterator_object +# TODO(rwgk): test_map_caster_fully_consumes_iterator_object + + def test_return_as_bytes_policy(): expected_return_value = b"\xba\xd0\xba\xd0" assert m.invalid_utf8_string_array_as_bytes() == [expected_return_value] From 139339efd09e544213c192b699ee79f065ab7999 Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Tue, 6 Jun 2023 18:06:06 -0700 Subject: [PATCH 02/13] Also use `PyObjectTypeIsConvertibleToStdVector()` in `array_caster`. --- include/pybind11/stl.h | 27 +++++++++++++++++++++------ tests/test_stl.cpp | 2 ++ tests/test_stl.py | 10 ++++++---- 3 files changed, 29 insertions(+), 10 deletions(-) diff --git a/include/pybind11/stl.h b/include/pybind11/stl.h index 9e0aa935..e53300c9 100644 --- a/include/pybind11/stl.h +++ b/include/pybind11/stl.h @@ -327,12 +327,8 @@ struct array_caster { return size == Size; } -public: - bool load(handle src, bool convert) { - if (!isinstance(src)) { - return false; - } - auto l = reinterpret_borrow(src); + bool convert_elements(handle seq, bool convert) { + auto l = reinterpret_borrow(seq); if (!require_size(l.size())) { return false; } @@ -347,6 +343,25 @@ struct array_caster { return true; } +public: + bool load(handle src, bool convert) { + if (!PyObjectTypeIsConvertibleToStdVector(src.ptr())) { + return false; + } + if (isinstance(src)) { + return convert_elements(src, convert); + } + if (!convert) { + return false; + } + // Designed to be behavior-equivalent to passing tuple(src) from Python: + // The conversion to a tuple will first exhaust the generator object, to ensure that + // the generator is not left in an unpredictable (to the caller) partially-consumed + // state. + assert(isinstance(src)); + return convert_elements(tuple(reinterpret_borrow(src)), convert); + } + template static handle cast(T &&src, const return_value_policy_pack &rvpp, handle parent) { list l(src.size()); diff --git a/tests/test_stl.cpp b/tests/test_stl.cpp index bfc32402..57dec16f 100644 --- a/tests/test_stl.cpp +++ b/tests/test_stl.cpp @@ -553,6 +553,8 @@ TEST_SUBMODULE(stl, m) { m.def("pass_std_vector_pair_int", [](const std::vector> &vec_pair_int) { return vec_pair_int.size(); }); + m.def("pass_std_array_int_2", + [](const std::array &arr_int) { return arr_int.size(); }); m.def("pass_std_set_int", [](const std::set &set_int) { return set_int.size(); }); m.def("pass_std_map_int", [](const std::map &map_int) { return map_int.size(); }); diff --git a/tests/test_stl.py b/tests/test_stl.py index 5b7266eb..7458ac1c 100644 --- a/tests/test_stl.py +++ b/tests/test_stl.py @@ -378,15 +378,17 @@ def test_return_vector_bool_raw_ptr(): assert len(v) == 4513 -def test_pass_std_vector_int(): - fn = m.pass_std_vector_int +@pytest.mark.parametrize("fn", [m.pass_std_vector_int, m.pass_std_array_int_2]) +def test_pass_std_vector_int(fn): assert fn([1, 2]) == 2 assert fn((1, 2)) == 2 assert fn({1, 2}) == 2 assert fn({"x": 1, "y": 2}.values()) == 2 assert fn({1: None, 2: None}.keys()) == 2 - assert fn(i for i in range(3)) == 3 - assert fn(map(lambda i: i, range(4))) == 4 # noqa: C417 + assert fn(i for i in range(2)) == 2 + assert fn(map(lambda i: i, range(2))) == 2 # noqa: C417 + with pytest.raises(TypeError): + fn({1: 2, 3: 4}) with pytest.raises(TypeError): fn({}) From 832417f412bbf79a4374cc13fa1f9ff6bf953775 Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Fri, 14 Jul 2023 12:53:42 -0700 Subject: [PATCH 03/13] Update comment pointing to clif/python/runtime.cc (code is unchanged). --- include/pybind11/stl.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/include/pybind11/stl.h b/include/pybind11/stl.h index 6ad1d2fc..58facc8b 100644 --- a/include/pybind11/stl.h +++ b/include/pybind11/stl.h @@ -13,6 +13,7 @@ #include "detail/common.h" #include +#include #include #include #include @@ -37,8 +38,8 @@ PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE) PYBIND11_NAMESPACE_BEGIN(detail) // -// Begin: Code developed under https://github.com/google/clif/blob/main/clif/python/runtime.cc -// (currently unpublished) +// Begin: Equivalent of +// https://github.com/google/clif/blob/337b2a106c0552ad85d40cfc3797465756840ea0/clif/python/runtime.cc#L388-L424 // inline bool PyObjectIsInstanceWithOneOfTpNames(PyObject *obj, @@ -74,7 +75,7 @@ inline bool PyObjectTypeIsConvertibleToStdMap(PyObject *obj) { } // -// End: Code developed under https://github.com/google/clif/blob/main/clif/python/runtime.cc +// End: Equivalent of clif/python/runtime.cc // /// Extracts an const lvalue reference or rvalue reference for U based on the type of T (e.g. for From be3bba71ee996b6759ce38996552744423527fb4 Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Fri, 14 Jul 2023 15:36:38 -0700 Subject: [PATCH 04/13] Comprehensive test coverage, enhanced set_caster load implementation. --- include/pybind11/stl.h | 30 ++++++------- tests/test_stl.cpp | 39 +++++++++++++---- tests/test_stl.py | 95 +++++++++++++++++++++++++++++------------- 3 files changed, 112 insertions(+), 52 deletions(-) diff --git a/include/pybind11/stl.h b/include/pybind11/stl.h index 58facc8b..99d2918e 100644 --- a/include/pybind11/stl.h +++ b/include/pybind11/stl.h @@ -104,16 +104,13 @@ struct set_caster { private: template ::value, int> = 0> - void reserve_maybe(const sequence &s, Type *) { + void reserve_maybe(const anyset &s, Type *) { value.reserve(s.size()); } - void reserve_maybe(const sequence &, void *) {} + void reserve_maybe(const anyset &, void *) {} - bool convert_elements(handle seq, bool convert) { - auto s = reinterpret_borrow(seq); - value.clear(); - reserve_maybe(s, &value); - for (auto it : seq) { + bool convert_iterable(iterable itbl, bool convert) { + for (auto it : itbl) { key_conv conv; if (!conv.load(it, convert)) { return false; @@ -123,23 +120,27 @@ struct set_caster { return true; } + bool convert_anyset(anyset s, bool convert) { + value.clear(); + reserve_maybe(s, &value); + return convert_iterable(s, convert); + } + public: bool load(handle src, bool convert) { if (!PyObjectTypeIsConvertibleToStdSet(src.ptr())) { return false; } - if (isinstance(src)) { - return convert_elements(src, convert); + if (isinstance(src)) { + value.clear(); + return convert_anyset(reinterpret_borrow(src), convert); } if (!convert) { return false; } - // Designed to be behavior-equivalent to passing tuple(src) from Python: - // The conversion to a tuple will first exhaust the iterator object, to ensure that - // the iterator is not left in an unpredictable (to the caller) partially-consumed - // state. assert(isinstance(src)); - return convert_elements(tuple(reinterpret_borrow(src)), convert); + value.clear(); + return convert_iterable(reinterpret_borrow(src), convert); } template @@ -206,7 +207,6 @@ struct map_caster { auto items = src.attr("items")(); assert(isinstance(items)); return convert_elements(dict(reinterpret_borrow(items)), convert); - return false; } template diff --git a/tests/test_stl.cpp b/tests/test_stl.cpp index 57dec16f..901ab8f7 100644 --- a/tests/test_stl.cpp +++ b/tests/test_stl.cpp @@ -167,6 +167,14 @@ struct type_caster> } // namespace detail } // namespace PYBIND11_NAMESPACE +int pass_std_vector_int(const std::vector &v) { + int zum = 100; + for (const int i : v) { + zum += 2 * i; + } + return zum; +} + TEST_SUBMODULE(stl, m) { // test_vector m.def("cast_vector", []() { return std::vector{1}; }); @@ -549,14 +557,31 @@ TEST_SUBMODULE(stl, m) { // Without explicitly specifying `take_ownership`, this function leaks. py::return_value_policy::take_ownership); - m.def("pass_std_vector_int", [](const std::vector &vec_int) { return vec_int.size(); }); - m.def("pass_std_vector_pair_int", [](const std::vector> &vec_pair_int) { - return vec_pair_int.size(); + m.def("pass_std_vector_int", pass_std_vector_int); + m.def("pass_std_vector_pair_int", [](const std::vector> &v) { + int zum = 0; + for (const auto &ij : v) { + zum += ij.first * 100 + ij.second; + } + return zum; + }); + m.def("pass_std_array_int_2", [](const std::array &a) { + return pass_std_vector_int(std::vector(a.begin(), a.end())) + 1; + }); + m.def("pass_std_set_int", [](const std::set &s) { + int zum = 200; + for (const int i : s) { + zum += 3 * i; + } + return zum; + }); + m.def("pass_std_map_int", [](const std::map &m) { + int zum = 500; + for (const auto &p : m) { + zum += p.first * 1000 + p.second; + } + return zum; }); - m.def("pass_std_array_int_2", - [](const std::array &arr_int) { return arr_int.size(); }); - m.def("pass_std_set_int", [](const std::set &set_int) { return set_int.size(); }); - m.def("pass_std_map_int", [](const std::map &map_int) { return map_int.size(); }); // test return_value_policy::_return_as_bytes m.def( diff --git a/tests/test_stl.py b/tests/test_stl.py index 8051eab4..2bcbd23f 100644 --- a/tests/test_stl.py +++ b/tests/test_stl.py @@ -381,49 +381,62 @@ def test_return_vector_bool_raw_ptr(): assert len(v) == 4513 -@pytest.mark.parametrize("fn", [m.pass_std_vector_int, m.pass_std_array_int_2]) -def test_pass_std_vector_int(fn): - assert fn([1, 2]) == 2 - assert fn((1, 2)) == 2 - assert fn({1, 2}) == 2 - assert fn({"x": 1, "y": 2}.values()) == 2 - assert fn({1: None, 2: None}.keys()) == 2 - assert fn(i for i in range(2)) == 2 - assert fn(map(lambda i: i, range(2))) == 2 # noqa: C417 +@pytest.mark.parametrize( + ("fn", "offset"), [(m.pass_std_vector_int, 0), (m.pass_std_array_int_2, 1)] +) +def test_pass_std_vector_int(fn, offset): + assert fn([7, 13]) == 140 + offset + assert fn({6, 2}) == 116 + offset + assert fn({"x": 8, "y": 11}.values()) == 138 + offset + assert fn({3: None, 9: None}.keys()) == 124 + offset + assert fn(i for i in [4, 17]) == 142 + offset + assert fn(map(lambda i: i * 3, [8, 7])) == 190 + offset # noqa: C417 with pytest.raises(TypeError): - fn({1: 2, 3: 4}) + fn({"x": 0, "y": 1}) with pytest.raises(TypeError): fn({}) def test_pass_std_vector_pair_int(): fn = m.pass_std_vector_pair_int - assert fn({1: 2, 3: 4}.items()) == 2 - assert fn(zip([1, 2], [3, 4])) == 2 + assert fn({1: 2, 3: 4}.items()) == 406 + assert fn(zip([5, 17], [13, 9])) == 2222 + + +def test_list_caster_fully_consumes_generator_object(): + def gen_mix(): + yield from [1, 2.0, 3] + + gen_obj = gen_mix() + with pytest.raises(TypeError): + m.pass_std_vector_int(gen_obj) + assert not tuple(gen_obj) def test_pass_std_set_int(): fn = m.pass_std_set_int - assert fn({1, 2}) == 2 - assert fn({1: None, 2: None}.keys()) == 2 - with pytest.raises(TypeError): - fn([1, 2]) + assert fn({3, 15}) == 254 + assert fn({5: None, 12: None}.keys()) == 251 with pytest.raises(TypeError): - fn((1, 2)) + fn([]) with pytest.raises(TypeError): fn({}) with pytest.raises(TypeError): - fn(i for i in range(3)) - + fn({}.values()) + with pytest.raises(TypeError): + fn(i for i in []) -def test_list_caster_fully_consumes_generator_object(): - def gen_mix(): - yield from [1, 2.0, 3] - gen_obj = gen_mix() +def test_set_caster_dict_keys_failure(): + dict_keys = {1: None, 2.0: None, 3: None}.keys() + # The asserts does not really exercise anything in pybind11, but if one of + # them fails in some future version of Python, the set_caster load + # implementation may need to be revisited. + assert tuple(dict_keys) == (1, 2.0, 3) + assert tuple(dict_keys) == (1, 2.0, 3) with pytest.raises(TypeError): - m.pass_std_vector_int(gen_obj) - assert not tuple(gen_obj) + m.pass_std_set_int(dict_keys) + assert tuple(dict_keys) == (1, 2.0, 3) class FakePyMappingMissingItems: @@ -433,7 +446,7 @@ def __getitem__(self, _): class FakePyMappingWithItems(FakePyMappingMissingItems): def items(self): - return ((1, 2), (3, 4)) + return ((1, 3), (2, 4)) class FakePyMappingBadItems(FakePyMappingMissingItems): @@ -441,10 +454,19 @@ def items(self): return ((1, 2), (3, "x")) +class FakePyMappingGenObj(FakePyMappingMissingItems): + def __init__(self, gen_obj): + super().__init__() + self.gen_obj = gen_obj + + def items(self): + yield from self.gen_obj + + def test_pass_std_map_int(): fn = m.pass_std_map_int - assert fn({1: 2, 3: 4}) == 2 - assert fn(FakePyMappingWithItems()) == 2 + assert fn({1: 2, 3: 4}) == 4506 + assert fn(FakePyMappingWithItems()) == 3507 with pytest.raises(TypeError): fn(FakePyMappingMissingItems()) with pytest.raises(TypeError): @@ -453,8 +475,21 @@ def test_pass_std_map_int(): fn([]) -# TODO(rwgk): test_set_caster_fully_consumes_iterator_object -# TODO(rwgk): test_map_caster_fully_consumes_iterator_object +@pytest.mark.parametrize( + ("items", "expected_exception"), + [ + (((1, 2), (3, "x"), (4, 5)), TypeError), + (((1, 2), (3, 4, 5), (6, 7)), ValueError), + ], +) +def test_map_caster_fully_consumes_generator_object(items, expected_exception): + def gen_mix(): + yield from items + + gen_obj = gen_mix() + with pytest.raises(expected_exception): + m.pass_std_map_int(FakePyMappingGenObj(gen_obj)) + assert not tuple(gen_obj) def test_return_as_bytes_policy(): From 0ea0eef26fdcf6a20d766d3112919fb7afaaebcb Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Fri, 14 Jul 2023 17:45:58 -0700 Subject: [PATCH 05/13] Resolve clang-tidy eror. --- include/pybind11/stl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/pybind11/stl.h b/include/pybind11/stl.h index 99d2918e..14b21e58 100644 --- a/include/pybind11/stl.h +++ b/include/pybind11/stl.h @@ -109,7 +109,7 @@ struct set_caster { } void reserve_maybe(const anyset &, void *) {} - bool convert_iterable(iterable itbl, bool convert) { + bool convert_iterable(const iterable &itbl, bool convert) { for (auto it : itbl) { key_conv conv; if (!conv.load(it, convert)) { From d9f0ba214309b81f0529342ec186e221b703b00f Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Fri, 14 Jul 2023 22:46:36 -0700 Subject: [PATCH 06/13] Add a long C++ comment explaining what led to the `PyObjectTypeIsConvertibleTo*()` implementations. --- include/pybind11/stl.h | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/include/pybind11/stl.h b/include/pybind11/stl.h index 14b21e58..8963ecc5 100644 --- a/include/pybind11/stl.h +++ b/include/pybind11/stl.h @@ -40,7 +40,34 @@ PYBIND11_NAMESPACE_BEGIN(detail) // // Begin: Equivalent of // https://github.com/google/clif/blob/337b2a106c0552ad85d40cfc3797465756840ea0/clif/python/runtime.cc#L388-L424 -// +/* +The three `PyObjectTypeIsConvertibleTo*()` functions below are +the result of converging the behaviors of pybind11 and PyCLIF +(http://github.com/google/clif). + +Originally PyCLIF was extremely far on the permissive side of the spectrum, +while pybind11 was very far on the strict side. Originally PyCLIF accepted any +Python iterable as input for a C++ `vector`/`set`/`map` argument, as long as +the elements were convertible. The obvious (in hindsight) problem was that +any empty Python iterable could be passed to any of these C++ types, e.g. `{}` +was accpeted for C++ `vector`/`set` arguments, or `[]` for C++ `map` arguments. + +The functions below strike a practical permissive-vs-strict compromise, +informed by tens of thousands of use cases in the wild. A main objective is +to prevent accidents and improve readability: + +- Python literals must match the C++ types. + +- For C++ `set`: The potentially reducing conversion from a Python sequence + (e.g. Python `list` or `tuple`) to a C++ `set` must be explicit, by going + through a Python `set`. + +- However, a Python `set` can still be passed to a C++ `vector`. The rationale + is that this conversion is not reducing. Implicit conversions of this kind + are also fairly commonly used, therefore enforcing explicit conversions + would have an unfavorable cost : benefit ratio; more sloppily speaking, + such an enforcement would be more annyoing than helpful. +*/ inline bool PyObjectIsInstanceWithOneOfTpNames(PyObject *obj, std::initializer_list tp_names) { From 8cfdbca650a8cc8750caa19732533a3c4cf36495 Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Sat, 15 Jul 2023 01:48:15 -0700 Subject: [PATCH 07/13] Minor function name change in test. --- tests/test_stl.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/test_stl.py b/tests/test_stl.py index 2bcbd23f..6db7cf81 100644 --- a/tests/test_stl.py +++ b/tests/test_stl.py @@ -404,10 +404,10 @@ def test_pass_std_vector_pair_int(): def test_list_caster_fully_consumes_generator_object(): - def gen_mix(): + def gen_invalid(): yield from [1, 2.0, 3] - gen_obj = gen_mix() + gen_obj = gen_invalid() with pytest.raises(TypeError): m.pass_std_vector_int(gen_obj) assert not tuple(gen_obj) @@ -483,10 +483,10 @@ def test_pass_std_map_int(): ], ) def test_map_caster_fully_consumes_generator_object(items, expected_exception): - def gen_mix(): + def gen_invalid(): yield from items - gen_obj = gen_mix() + gen_obj = gen_invalid() with pytest.raises(expected_exception): m.pass_std_map_int(FakePyMappingGenObj(gen_obj)) assert not tuple(gen_obj) From 05d4f3f88554ece33879d2ff5d29be8c63df54db Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Mon, 17 Jul 2023 00:27:42 -0700 Subject: [PATCH 08/13] strcmp -> std::strcmp (thanks @Skylion007 for catching this) --- include/pybind11/stl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/pybind11/stl.h b/include/pybind11/stl.h index 8963ecc5..41ad69b0 100644 --- a/include/pybind11/stl.h +++ b/include/pybind11/stl.h @@ -76,7 +76,7 @@ inline bool PyObjectIsInstanceWithOneOfTpNames(PyObject *obj, } const char *obj_tp_name = Py_TYPE(obj)->tp_name; for (const auto *tp_name : tp_names) { - if (strcmp(obj_tp_name, tp_name) == 0) { + if (std::strcmp(obj_tp_name, tp_name) == 0) { return true; } } From 35f435b795e90b0faa6a276283ffcb6a0b70b449 Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Mon, 17 Jul 2023 01:04:20 -0700 Subject: [PATCH 09/13] Add `PyCallable_Check(items)` in `PyObjectTypeIsConvertibleToStdMap()` --- include/pybind11/stl.h | 16 ++++++++++++++-- tests/test_stl.py | 17 ++++++++++++++++- 2 files changed, 30 insertions(+), 3 deletions(-) diff --git a/include/pybind11/stl.h b/include/pybind11/stl.h index 41ad69b0..2ef2e06f 100644 --- a/include/pybind11/stl.h +++ b/include/pybind11/stl.h @@ -97,8 +97,20 @@ inline bool PyObjectTypeIsConvertibleToStdSet(PyObject *obj) { } inline bool PyObjectTypeIsConvertibleToStdMap(PyObject *obj) { - return (PyDict_Check(obj) != 0) - || ((PyMapping_Check(obj) != 0) && (PyObject_HasAttrString(obj, "items") != 0)); + if (PyDict_Check(obj)) { + return true; + } + if (!PyMapping_Check(obj)) { + return false; + } + PyObject *items = PyObject_GetAttrString(obj, "items"); + if (items == nullptr) { + PyErr_Clear(); + return false; + } + bool is_convertible = (PyCallable_Check(items) != 0); + Py_DECREF(items); + return is_convertible; } // diff --git a/tests/test_stl.py b/tests/test_stl.py index 6db7cf81..2a7e916c 100644 --- a/tests/test_stl.py +++ b/tests/test_stl.py @@ -454,6 +454,17 @@ def items(self): return ((1, 2), (3, "x")) +class FakePyMappingItemsNotCallable(FakePyMappingMissingItems): + @property + def items(self): + return ((1, 2), (3, 4)) + + +class FakePyMappingItemsWithArg(FakePyMappingMissingItems): + def items(self, _): + return ((1, 2), (3, 4)) + + class FakePyMappingGenObj(FakePyMappingMissingItems): def __init__(self, gen_obj): super().__init__() @@ -466,13 +477,17 @@ def items(self): def test_pass_std_map_int(): fn = m.pass_std_map_int assert fn({1: 2, 3: 4}) == 4506 + with pytest.raises(TypeError): + fn([]) assert fn(FakePyMappingWithItems()) == 3507 with pytest.raises(TypeError): fn(FakePyMappingMissingItems()) with pytest.raises(TypeError): fn(FakePyMappingBadItems()) with pytest.raises(TypeError): - fn([]) + fn(FakePyMappingItemsNotCallable()) + with pytest.raises(TypeError): + fn(FakePyMappingItemsWithArg()) @pytest.mark.parametrize( From c6d00f7dec820e025d44b5625d2b5f5251008543 Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Mon, 17 Jul 2023 01:29:45 -0700 Subject: [PATCH 10/13] Resolve clang-tidy error --- include/pybind11/stl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/pybind11/stl.h b/include/pybind11/stl.h index 2ef2e06f..5a1820a0 100644 --- a/include/pybind11/stl.h +++ b/include/pybind11/stl.h @@ -100,7 +100,7 @@ inline bool PyObjectTypeIsConvertibleToStdMap(PyObject *obj) { if (PyDict_Check(obj)) { return true; } - if (!PyMapping_Check(obj)) { + if (PyMapping_Check(obj) == 0) { return false; } PyObject *items = PyObject_GetAttrString(obj, "items"); From 5bff85745f3f6a2b3df727293344ec0550116724 Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Mon, 17 Jul 2023 12:45:39 -0700 Subject: [PATCH 11/13] Use `PyMapping_Items()` instead of `src.attr("items")()`, to be internally consistent with `PyMapping_Check()` --- include/pybind11/stl.h | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/include/pybind11/stl.h b/include/pybind11/stl.h index 5a1820a0..2d77a91b 100644 --- a/include/pybind11/stl.h +++ b/include/pybind11/stl.h @@ -100,6 +100,9 @@ inline bool PyObjectTypeIsConvertibleToStdMap(PyObject *obj) { if (PyDict_Check(obj)) { return true; } + // Implicit requirement in the conditions below: + // A type with `.__getitem__()` & `.items()` methods must implement these + // to be compatible with https://docs.python.org/3/c-api/mapping.html if (PyMapping_Check(obj) == 0) { return false; } @@ -239,11 +242,10 @@ struct map_caster { if (!convert) { return false; } - // Designed to be behavior-equivalent to passing dict(src.items()) from Python: - // The conversion to a dict will first exhaust the iterator object, to ensure that - // the iterator is not left in an unpredictable (to the caller) partially-consumed - // state. - auto items = src.attr("items")(); + auto items = reinterpret_steal(PyMapping_Items(src.ptr())); + if (!items) { + throw error_already_set(); + } assert(isinstance(items)); return convert_elements(dict(reinterpret_borrow(items)), convert); } From 76da0d27057fc7a8b40ab009c22963e7fe430585 Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Mon, 17 Jul 2023 14:12:52 -0700 Subject: [PATCH 12/13] Update link to PyCLIF sources. --- include/pybind11/stl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/pybind11/stl.h b/include/pybind11/stl.h index 2d77a91b..b424bded 100644 --- a/include/pybind11/stl.h +++ b/include/pybind11/stl.h @@ -39,7 +39,7 @@ PYBIND11_NAMESPACE_BEGIN(detail) // // Begin: Equivalent of -// https://github.com/google/clif/blob/337b2a106c0552ad85d40cfc3797465756840ea0/clif/python/runtime.cc#L388-L424 +// https://github.com/google/clif/blob/ae4eee1de07cdf115c0c9bf9fec9ff28efce6f6c/clif/python/runtime.cc#L388-L438 /* The three `PyObjectTypeIsConvertibleTo*()` functions below are the result of converging the behaviors of pybind11 and PyCLIF From 2956aa9da29a5e54ddd80f97391a85b6620c4c23 Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Mon, 17 Jul 2023 15:02:04 -0700 Subject: [PATCH 13/13] Fix typo (thanks @wangxf123456 for catching this) --- include/pybind11/stl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/pybind11/stl.h b/include/pybind11/stl.h index b424bded..ae81a70e 100644 --- a/include/pybind11/stl.h +++ b/include/pybind11/stl.h @@ -66,7 +66,7 @@ to prevent accidents and improve readability: is that this conversion is not reducing. Implicit conversions of this kind are also fairly commonly used, therefore enforcing explicit conversions would have an unfavorable cost : benefit ratio; more sloppily speaking, - such an enforcement would be more annyoing than helpful. + such an enforcement would be more annoying than helpful. */ inline bool PyObjectIsInstanceWithOneOfTpNames(PyObject *obj,