diff --git a/Makefile b/Makefile index 595aa1a2..df82f941 100644 --- a/Makefile +++ b/Makefile @@ -44,6 +44,7 @@ ifneq ($(SANITIZE_ADDRESS),0) OPTLEVEL := 0 CXXFLAGS += -fsanitize=address -fno-omit-frame-pointer -static-libasan LDFLAGS += -fsanitize=address -static-libasan + ASAN_OPTIONS=malloc_context_size=50 endif SANITIZE_UNDEFINED ?= 0 @@ -89,7 +90,7 @@ TEST_INCLUDE := -I tests -I $(GTEST_DIR)/include TESTRUNNER := tests/run ALL_SOURCES := $(SOURCES) $(EXAMPLE_SOURCES) $(TEST_SOURCES) -ALL_HEADERS := include/$(LIBRARY)/**.h +ALL_HEADERS := include/libpy/**.h ALL_FLAGS := 'CFLAGS=$(CFLAGS) CXXFLAGS=$(CXXFLAGS) LDFLAGS=$(LDFLAGS)' @@ -130,6 +131,8 @@ example-%: examples/% .PHONY: test test: $(TESTRUNNER) @GTEST_OUTPUT=$(GTEST_OUTPUT) \ + ASAN_OPTIONS=$(ASAN_OPTIONS) \ + LSAN_OPTIONS=$(LSAN_OPTIONS) \ LD_LIBRARY_PATH=. \ LSAN_OPTIONS=$(LSAN_OPTIONS) \ $< --gtest_filter=$(GTEST_FILTER) @@ -139,7 +142,8 @@ gdbtest: $(TESTRUNNER) @LD_LIBRARY_PATH=. GTEST_BREAK_ON_FAILURE=$(GTEST_BREAK) gdb -ex run $< tests/%.o: tests/%.cc .compiler_flags - $(CXX) $(CXXFLAGS) $(INCLUDE) $(TEST_INCLUDE) -MD -fPIC -c $< -o $@ + $(CXX) $(CXXFLAGS) $(INCLUDE) $(TEST_INCLUDE) -DLIBPY_COMPILING_FOR_TESTS \ + -MD -fPIC -c $< -o $@ $(TESTRUNNER): gtest.a $(TEST_OBJECTS) $(SONAME) $(CXX) -o $@ $(TEST_OBJECTS) gtest.a $(TEST_INCLUDE) \ diff --git a/etc/format-all b/etc/format-all deleted file mode 100755 index a382037f..00000000 --- a/etc/format-all +++ /dev/null @@ -1,5 +0,0 @@ -#!/bin/bash -set -e - -find -name *.h -exec clang-format -i {} + -find -name *.cc -exec clang-format -i {} + diff --git a/include/libpy/any.h b/include/libpy/any.h index 97c94ff8..aeb491a4 100644 --- a/include/libpy/any.h +++ b/include/libpy/any.h @@ -8,6 +8,8 @@ #include #include "libpy/demangle.h" +#include "libpy/exception.h" +#include "libpy/to_object.h" namespace py { namespace detail { @@ -29,6 +31,7 @@ struct any_vtable_impl { void (*destruct)(void* addr); bool (*ne)(const void* lhs, const void* rhs); bool (*eq)(const void* lhs, const void* rhs); + py::scoped_ref<> (*to_object)(const void* addr); py::util::demangled_cstring (*type_name)(); }; @@ -64,6 +67,18 @@ constexpr any_vtable_impl any_vtable_instance = { [](const void* lhs, const void* rhs) -> bool { return *static_cast(lhs) == *static_cast(rhs); }, + [](const void* addr) -> scoped_ref<> { + if constexpr (py::has_to_object) { + return to_object(*static_cast(addr)); + } + else { + static_cast(addr); + throw py::exception(PyExc_TypeError, + "cannot convert values of type ", + py::util::type_name().get(), + " into Python object"); + } + }, []() { return py::util::type_name(); }, }; } // namespace detail @@ -165,10 +180,14 @@ class any_vtable { return m_impl->ne(lhs, rhs); } - inline bool eq(const void* lhs, const void* rhs) { + inline bool eq(const void* lhs, const void* rhs) const { return m_impl->eq(lhs, rhs); } + inline scoped_ref<> to_object(const void* addr) const { + return m_impl->to_object(addr); + } + inline py::util::demangled_cstring type_name() const { return m_impl->type_name(); } @@ -422,4 +441,15 @@ template any_cref make_any_cref(T& ob) { return {&ob, any_vtable::make()}; } + +namespace dispatch { +/** Convert an any_ref into a Python object. + */ +template<> +struct to_object { + static PyObject* f(const py::any_ref& ref) { + return ref.vtable().to_object(ref.addr()).escape(); + } +}; +} // namespace dispatch } // namespace py diff --git a/include/libpy/call_function.h b/include/libpy/call_function.h index 5a2a8b0b..fe3f4f8f 100644 --- a/include/libpy/call_function.h +++ b/include/libpy/call_function.h @@ -6,9 +6,8 @@ #include #include -#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION -#include +#include "libpy/numpy_utils.h" #include "libpy/scoped_ref.h" #include "libpy/to_object.h" diff --git a/include/libpy/numpy_utils.h b/include/libpy/numpy_utils.h index a04924d7..f99c1444 100644 --- a/include/libpy/numpy_utils.h +++ b/include/libpy/numpy_utils.h @@ -7,6 +7,59 @@ #include #include + +// Numpy expects code is being used in a way that has a 1:1 correspondence from source +// file to Python extension (as a shared object), for example: +// +// // my_extension.c +// #include arrayobject.h +// ... +// PyMODINIT_FUNC PyInit_myextension() { +// import_array(); +// // Rest of module setup. +// } +// +// Normally when writing a C++ library, header files provide declarations for functions +// and types but not the definitions (except for templates and inline functions) and the +// actual definition lives in one or more C++ files that get linked together into a single +// shared object. Code that just wants to consume the library can include the files so +// that the compiler knows the types and signatures provided by the library, but it +// doesn't need to include the definitions in the consumer's compiled output. Instead, +// when the object is loaded, it will also load the shared object and resolve all of the +// symbols to the definitions in the shared object. This allows the implementation to +// changed in backwards compatible ways and ensures that in a single process, all users of +// the library have the same version. One downside is that the linker uses it's own path +// resolution machinery to find the actual shared object file to use. Also, the resolution +// happens when the shared object is loaded, no code can run before the symbols are +// resolved. Numpy doesn't want users to need to deal with C/C++ linker stuff, and just +// wants to be able to use the Python import system to find the numpy shared object(s). To +// do this, numpy uses it's own linking system. The `numpy/arrayobject.h` will put a +// `static void** PyArray_API = nullptr` name into *each* object that includes it. Many if +// not all of the API functions in numpy are actually macros that resolve to something +// like: `((PyArrayAPIObject*) PyArray_API)->function`. The `import_array()` macro will +// import (through Python) the needed numpy extension modules to get the `PyArray_API` out +// of a capsule-like object. `import_array()` is doing something very similar to what a +// linker does, but without special compiler/linker assistance. +// +// This whole system works fine for when a single TU turns into a single object; however, +// the test suite for libpy links all the test files together along with `main.cc` into a +// single program. This has made it very hard to figure out when and how to initialize the +// `PyArray_API` value. Instead, we now set a macro when compiling for the tests +// (`LIBPY_COMPILING_FOR_TESTS`) which will control the `NO_IMPORT_ARRAY` flag. This flag +// tells numpy to declare the `PyArray_API` flag as an `extern "C" void** PyArray_API`, +// meaning we expect to have this symbol defined by another object we are to be linked +// with. In `main.cc` we also set `LIBPY_TEST_MAIN` to disable `NO_IMPORT_ARRAY` which +// causes changes the declaration of `PyArray_API` to change to: `#define PyArray_API +// PY_ARRAY_UNIQUE_SYMBOL` and then `void** PyArray_API`. Importantly, this removes the +// `static` and `extern` causing the symbol to have external linkage. Then, because the +// tests are declaring the same symbol as extern, they will all resolve to the same +// `PyArray_API` instance and we only need to call `import_array` once in `main.cc`. +#if LIBPY_COMPILING_FOR_TESTS +#define PY_ARRAY_UNIQUE_SYMBOL PyArray_API_libpy +#ifndef LIBPY_TEST_MAIN +#define NO_IMPORT_ARRAY +#endif +#endif #define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION #include @@ -20,21 +73,29 @@ #include "libpy/to_object.h" namespace py { -class ensure_import_array { -public: - ensure_import_array() { +inline void ensure_import_array() { + // If `NO_IMPORT_ARRAY` is defined, this is a nop. +#ifndef NO_IMPORT_ARRAY + if (PyArray_API) { + // already imported + return; + } + #if PY_MAJOR_VERSION == 2 - import_array(); + []() -> void { import_array(); }(); #else - // this macro returns NULL in Python 3 so we need to put it in a - // function to call it to ignore the return statement - []() -> std::nullptr_t { - import_array(); - return nullptr; - }(); + // this macro returns NULL in Python 3 so we need to put it in a + // function to call it to ignore the return statement + []() -> std::nullptr_t { + import_array(); + return nullptr; + }(); #endif + if (PyErr_Occurred()) { + throw py::exception{}; } -}; +#endif +} /** A strong typedef of npy_bool to not be ambiguous with `unsigned char` but may still be used in a vector without the dreaded `std::vector`. @@ -112,152 +173,53 @@ namespace dispatch { template struct new_dtype; -template<> -struct new_dtype { +template +struct new_dtype_from_typecode { static PyArray_Descr* get() { - auto ob = PyArray_DescrFromType(NPY_INT8); - if (!ob) { - return nullptr; - } - Py_INCREF(ob); - return ob; + PyArray_Descr* out = PyArray_DescrFromType(typecode); + Py_XINCREF(out); + return out; } }; template<> -struct new_dtype { - static PyArray_Descr* get() { - auto ob = PyArray_DescrFromType(NPY_INT16); - if (!ob) { - return nullptr; - } - Py_INCREF(ob); - return ob; - } -}; +struct new_dtype : new_dtype_from_typecode {}; template<> -struct new_dtype { - static PyArray_Descr* get() { - auto ob = PyArray_DescrFromType(NPY_INT32); - if (!ob) { - return nullptr; - } - Py_INCREF(ob); - return ob; - } -}; +struct new_dtype : new_dtype_from_typecode {}; template<> -struct new_dtype { - static PyArray_Descr* get() { - auto ob = PyArray_DescrFromType(NPY_INT64); - if (!ob) { - return nullptr; - } - Py_INCREF(ob); - return ob; - } -}; +struct new_dtype : new_dtype_from_typecode {}; template<> -struct new_dtype { - static PyArray_Descr* get() { - auto ob = PyArray_DescrFromType(NPY_UINT8); - if (!ob) { - return nullptr; - } - Py_INCREF(ob); - return ob; - } -}; +struct new_dtype : new_dtype_from_typecode {}; template<> -struct new_dtype { - static PyArray_Descr* get() { - auto ob = PyArray_DescrFromType(NPY_UINT16); - if (!ob) { - return nullptr; - } - Py_INCREF(ob); - return ob; - } -}; +struct new_dtype : new_dtype_from_typecode {}; template<> -struct new_dtype { - static PyArray_Descr* get() { - auto ob = PyArray_DescrFromType(NPY_UINT32); - if (!ob) { - return nullptr; - } - Py_INCREF(ob); - return ob; - } -}; +struct new_dtype : new_dtype_from_typecode {}; template<> -struct new_dtype { - static PyArray_Descr* get() { - auto ob = PyArray_DescrFromType(NPY_UINT64); - if (!ob) { - return nullptr; - } - Py_INCREF(ob); - return ob; - } -}; +struct new_dtype : new_dtype_from_typecode {}; template<> -struct new_dtype { - static PyArray_Descr* get() { - auto ob = PyArray_DescrFromType(NPY_FLOAT32); - if (!ob) { - return nullptr; - } - Py_INCREF(ob); - return ob; - } -}; +struct new_dtype : new_dtype_from_typecode {}; template<> -struct new_dtype { - static PyArray_Descr* get() { - auto ob = PyArray_DescrFromType(NPY_FLOAT64); - if (!ob) { - return nullptr; - } - Py_INCREF(ob); - return ob; - } -}; +struct new_dtype : new_dtype_from_typecode {}; template<> -struct new_dtype { - static PyArray_Descr* get() { - auto ob = PyArray_DescrFromType(NPY_OBJECT); - if (!ob) { - return nullptr; - } - Py_INCREF(ob); - return ob; - } -}; +struct new_dtype : new_dtype_from_typecode {}; + +template<> +struct new_dtype : new_dtype_from_typecode {}; template struct new_dtype> : new_dtype {}; template<> -struct new_dtype { - static PyArray_Descr* get() { - auto ob = PyArray_DescrFromType(NPY_BOOL); - if (!ob) { - return nullptr; - } - Py_INCREF(ob); - return ob; - } -}; +struct new_dtype : new_dtype_from_typecode {}; template<> struct new_dtype : public new_dtype {}; @@ -517,6 +479,13 @@ struct from_object { } }; +template<> +struct to_object { + static PyObject* f(py_bool v) { + return PyBool_FromLong(v.value); + } +}; + /** Convert a datetime64 in to a numpy array scalar. */ template @@ -527,47 +496,7 @@ struct to_object> { return nullptr; } std::int64_t as_int = static_cast(dt); - return PyArray_Scalar(&as_int, descr.get(), NULL); - } -}; - -/** Convert an any_ref into a numpy array scalar. - */ -template<> -struct to_object { - static PyObject* f(const py::any_ref& ref) { - auto descr = py::dispatch::vtable_to_dtype(ref.vtable()); - if (!descr) { - return nullptr; - } - - // Construct a dimension-0 array from the numpy dtype. - // This is equivalent to np.ndarray((), dtype). - scoped_ref arr(PyArray_NewFromDescr(&PyArray_Type, - descr.get(), - 0, - nullptr, - nullptr, - nullptr, - NPY_ARRAY_CARRAY, - nullptr)); - if (!arr) { - return nullptr; - } - // `PyArray_NewFromDescr` steals a reference to `descr` on success, and we're done - // with it, so release it to avoid double decrefing. - std::move(descr).escape(); - - // Copy data from the any_ref into numpy array. - ref.vtable().copy_construct(PyArray_DATA( - reinterpret_cast(arr.get())), - ref.addr()); - - PyObject* unconverted_out = std::move(arr).escape(); - - // Convert 0-dimensional array into an array scalar with the proper type, - // (e.g. `np.datetime64`). - return PyArray_Return(reinterpret_cast(unconverted_out)); + return PyArray_Scalar(&as_int, descr.get(), nullptr); } }; } // namespace dispatch @@ -695,7 +624,7 @@ scoped_ref<> move_to_numpy_array(std::vector&& values) { return nullptr; } return move_to_numpy_array, 1>(std::move(values), - descr, + std::move(descr), {values.size()}, {sizeof(T)}); } @@ -707,7 +636,7 @@ inline scoped_ref<> move_to_numpy_array(py::any_vector&& values) { return nullptr; } return move_to_numpy_array(std::move(values), - descr, + std::move(descr), {values.size()}, {static_cast( values.vtable().size())}); diff --git a/include/libpy/scoped_ref.h b/include/libpy/scoped_ref.h index d9c589e4..5f925e0d 100644 --- a/include/libpy/scoped_ref.h +++ b/include/libpy/scoped_ref.h @@ -118,4 +118,8 @@ class scoped_ref final { return m_ref != other.get(); } }; +static_assert(std::is_standard_layout>::value, + "scoped_ref<> should be standard layout"); +static_assert(sizeof(scoped_ref<>) == sizeof(PyObject*), + "alias type should be the same size as aliased type"); } // namespace py diff --git a/include/libpy/to_object.h b/include/libpy/to_object.h index 7bd459c7..3631d588 100644 --- a/include/libpy/to_object.h +++ b/include/libpy/to_object.h @@ -26,6 +26,29 @@ template struct to_object; } // namespace dispatch +namespace detail { +template +struct has_to_object { +private: + template + static decltype(py::dispatch::to_object::f(std::declval()), std::true_type{}) + test(int); + + template + static std::false_type test(long); + +public: + static constexpr bool value = std::is_same_v(0)), std::true_type>; +}; +} // namespace detail + +/** Compile time boolean to detect if `to_object` works for a given type. This exists to + make it easier to use `if constexpr` to test this condition instead of using more + complicated SFINAE. + */ +template +constexpr bool has_to_object = detail::has_to_object::value; + /** Convert a C++ object into a Python object recursively. @param ob The object to convert @@ -130,6 +153,13 @@ struct to_object : public detail::int_to_object template<> struct to_object : public detail::int_to_object {}; +template<> +struct to_object { + static PyObject* f(float value) { + return PyFloat_FromDouble(value); + } +}; + template<> struct to_object { static PyObject* f(double value) { diff --git a/testleaks.supp b/testleaks.supp index 1b9b39eb..31932297 100644 --- a/testleaks.supp +++ b/testleaks.supp @@ -1,4 +1,8 @@ leak:libpython +leak:_ctypes +leak:_struct leak:_ctypes.cpython leak:_ctypes.x86_64-linux-gnu.so leak:numpy/core/src/multiarray/arraytypes.c.src +leak:numpy/core/src/multiarray/alloc.c +leak:numpy/core/src/multiarray/ctors.c diff --git a/tests/main.cc b/tests/main.cc index 2e3106b0..757e70aa 100644 --- a/tests/main.cc +++ b/tests/main.cc @@ -1,6 +1,15 @@ #include "gtest/gtest.h" +#include + +#define LIBPY_TEST_MAIN +#include "libpy/numpy_utils.h" int main(int argc, char** argv) { + Py_Initialize(); + py::ensure_import_array(); + testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); + int out = RUN_ALL_TESTS(); + Py_Finalize(); + return out; } diff --git a/tests/test_from_object.cc b/tests/test_from_object.cc index 7a1c6eaf..988cadcc 100644 --- a/tests/test_from_object.cc +++ b/tests/test_from_object.cc @@ -98,8 +98,6 @@ TEST_F(from_object, test_overflow) { } TEST_F(from_object, ndarray_view) { - py::ensure_import_array _; - { auto ndarray = py::move_to_numpy_array(std::vector{0, 1, 2, 3}); auto view = py::from_object>(ndarray); @@ -142,8 +140,6 @@ TEST_F(from_object, ndarray_view) { } TEST_F(from_object, ndarray_view_any_ref) { - py::ensure_import_array _; - { auto ndarray = py::move_to_numpy_array(std::vector{0, 1, 2, 3}); auto view = py::from_object>(ndarray); diff --git a/tests/test_to_object.cc b/tests/test_to_object.cc index fcc2b680..77aee061 100644 --- a/tests/test_to_object.cc +++ b/tests/test_to_object.cc @@ -4,9 +4,11 @@ #include "gtest/gtest.h" +#include "libpy/any.h" #include "libpy/dense_hash_map.h" #include "libpy/itertools.h" #include "libpy/meta.h" +#include "libpy/numpy_utils.h" #include "libpy/to_object.h" #include "test_utils.h" @@ -171,4 +173,51 @@ TEST_F(to_object, test_vector_to_object) { std::apply([&](auto... vec) { (test_vector_to_object_impl(vec), ...); }, vectors); } +TEST_F(to_object, any_ref_of_object_refcnt) { + PyObject* ob = Py_None; + Py_ssize_t baseline_refcnt = Py_REFCNT(ob); + { + Py_INCREF(ob); + py::scoped_ref sr(ob); + ASSERT_EQ(Py_REFCNT(ob), baseline_refcnt + 1); + + py::any_ref ref(&sr, py::any_vtable::make()); + // `any_ref` is *non-owning* so the reference count doesn't change + ASSERT_EQ(Py_REFCNT(ob), baseline_refcnt + 1); + { + auto to_object_result = py::to_object(ref); + ASSERT_TRUE(to_object_result); + // `to_object` returns a new owning reference + ASSERT_EQ(Py_REFCNT(ob), baseline_refcnt + 2); + + EXPECT_EQ(to_object_result.get(), ob); + } + + // `to_object_result` goes out of scope, releasing its reference + ASSERT_EQ(Py_REFCNT(ob), baseline_refcnt + 1); + } + + // `sr` goes out of scope, releasing its reference + ASSERT_EQ(Py_REFCNT(ob), baseline_refcnt); +} + +TEST_F(to_object, any_ref_non_convertible_object) { + // The most simple type which can be put into an `any_ref`. There is no `to_object` + // dispatch, so we expect `to_object(S{})` would throw a runtime exception. + struct S { + bool operator==(const S&) const { + return true; + } + + bool operator!=(const S&) const { + return false; + } + }; + + S value; + py::any_ref ref(&value, py::any_vtable::make()); + + EXPECT_THROW(py::to_object(ref), py::exception); + PyErr_Clear(); +} } // namespace test_to_object diff --git a/tests/test_utils.h b/tests/test_utils.h index 7a1156f1..0aec17b6 100644 --- a/tests/test_utils.h +++ b/tests/test_utils.h @@ -42,9 +42,6 @@ inline std::string format_current_python_exception() { class with_python_interpreter : public testing::Test { public: inline static void SetUpTestCase() { - // initialize the Python interpreter state - Py_Initialize(); - py::scoped_ref sys(PyImport_ImportModule("sys")); if (!sys) { throw py::exception(); @@ -63,11 +60,6 @@ class with_python_interpreter : public testing::Test { } } - inline static void TearDownTestCase() { - // tear down the Python interpreter state - Py_Finalize(); - } - virtual void TearDown() { EXPECT_FALSE(PyErr_Occurred()) << format_current_python_exception(); PyErr_Clear();