From 98ea078189926c24a289d129c0cd3e74d54f93ea Mon Sep 17 00:00:00 2001 From: Aaron Lun Date: Fri, 13 Dec 2024 11:19:51 -0800 Subject: [PATCH] Revert to passing matrices to/from C++ via uintptr_t's. (#35) This was initially motivated by the failure of pybind11's auto-conversion for std::shared_ptr in downstream packages when using a prebuilt wheel of mattress. More generally, this change avoids any reliance on pybind11 in downstream packages; as long as they can take a uintptr_t, they can use mattress. We also introduce a BoundMatrix class inside a mattress.h header that can be used by downstreams to get the casting correct. GC protection is now fully handled in C++ via BoundMatrix::original, which reduces the risk of bugs when memory management was previously split across C++ and Python. --- CHANGELOG.md | 6 + MANIFEST.in | 1 + README.md | 20 +- lib/CMakeLists.txt | 1 + lib/src/common.cpp | 298 ++++++++++-------- lib/src/compressed_sparse_matrix.cpp | 46 ++- lib/src/def.h | 12 - .../delayed_binary_isometric_operation.cpp | 53 ++-- lib/src/delayed_bind.cpp | 18 +- lib/src/delayed_subset.cpp | 18 +- lib/src/delayed_transpose.cpp | 10 +- ...layed_unary_isometric_operation_simple.cpp | 65 ++-- ...ed_unary_isometric_operation_with_args.cpp | 109 ++++--- lib/src/dense_matrix.cpp | 14 +- lib/src/fragmented_sparse_matrix.cpp | 48 ++- src/mattress/InitializedMatrix.py | 36 +-- src/mattress/__init__.py | 11 + src/mattress/include/mattress.h | 57 ++++ src/mattress/initialize.py | 111 ++----- 19 files changed, 552 insertions(+), 382 deletions(-) create mode 100644 MANIFEST.in delete mode 100644 lib/src/def.h create mode 100644 src/mattress/include/mattress.h diff --git a/CHANGELOG.md b/CHANGELOG.md index df4d9ad..faf89e6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,11 @@ # Changelog +## Version 0.3.1 + +- Cast to/from `uintptr_t` so that downstream packages aren't forced to rely on **pybind11** converters. +- Added a `mattress.h` to ensure developers use the correct types during casting. +- Shift all responsibility for GC protection to C++ via the new `mattress::BoundMatrix` class. + ## Version 0.3.0 - Switch to **pybind11** for the Python/C++ interface, with CMake for the build system. diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..d8905f5 --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1 @@ +recursive-include src/mattress/include * diff --git a/README.md b/README.md index c34eed0..327cd59 100644 --- a/README.md +++ b/README.md @@ -32,22 +32,26 @@ pip install mattress The aim is to allow package C++ code to accept [all types of matrix representations](#supported-matrices) without requiring re-compilation of the associated code. To achive this: -1. Add `assorthead.includes()` to the compiler's include path. +1. Add `mattress.includes()` and `assorthead.includes()` to the compiler's include path. This can be done through `include_dirs=` of the `Extension()` definition in `setup.py` or by adding a `target_include_directories()` in CMake, depending on the build system. 2. Call `mattress.initialize()` on a Python matrix object to wrap it in a **tatami**-compatible C++ representation. This returns an `InitializedMatrix` with a `ptr` property that contains a pointer to the C++ matrix. -3. Pass `ptr` to [**pybind11**](https://pybind11.readthedocs.io)-wrapped C++ code as a [shared pointer to a `tatami::Matrix`](lib/src/def.h), +3. Pass `ptr` to C++ code as a `uintptr_t` referencing a `tatami::Matrix`, which can be interrogated as described in the [**tatami** documentation](https://github.com/tatami-inc/tatami). So, for example, the C++ code in our downstream package might look like the code below: ```cpp -int do_something(const std::shared_ptr >& mat) { +#include "mattress.h" + +int do_something(uintptr_t ptr) { + const auto& mat_ptr = mattress::cast(ptr)->ptr; // Do something with the tatami interface. return 1; } +// Assuming we're using pybind11, but any framework that can accept a uintptr_t is fine. PYBIND11_MODULE(lib_downstream, m) { m.def("do_something", &do_something); } @@ -64,7 +68,7 @@ def do_something(x): return lib.do_something(tmat.ptr) ``` -See [`lib/src/def.h`](lib/src/def.h) for the exact definitions of the interface types used by **mattress**. +Check out [the included header](src/mattress/include/mattress.h) for more definitions. ## Supported matrices @@ -165,10 +169,10 @@ init2 = initialize(wrapped) This is more efficient as it re-uses the `InitializedMatrix` already generated from `x`. It is also more convenient as we don't have to carry around `x` to generate `init2`. -## Extending `initialize()` +## Extending to custom matrices Developers can extend **mattress** to custom matrix classes by registering new methods with the `initialize()` generic. -This should return a `InitializedMatrix` object containing a shared pointer to a `tatami::Matrix` instance (see [`lib/src/def.h`](lib/src/def.h) for types). +This should return a `InitializedMatrix` object containing a `uintptr_t` cast from a pointer to a `tatami::Matrix` (see [the included header](src/mattress/include/mattress.h)). Once this is done, all calls to `initialize()` will be able to handle matrices of the newly registered types. ```python @@ -178,9 +182,9 @@ import mattress @mattress.initialize.register def _initialize_my_custom_matrix(x: MyCustomMatrix): data = x.some_internal_data - return mattress.InitializedMatrix(lib.initialize_custom(data), objects=[data]) + return mattress.InitializedMatrix(lib.initialize_custom(data)) ``` If the initialized `tatami::Matrix` contains references to Python-managed data, e.g., in NumPy arrays, we must ensure that the data is not garbage-collected during the lifetime of the `tatami::Matrix`. -This is achieved by storing a reference to the data in the `objects=` argument of the `InitializedMatrix`. +This is achieved by storing a reference to the data in the `original` member of the `mattress::BoundMatrix`. diff --git a/lib/CMakeLists.txt b/lib/CMakeLists.txt index bb79f4b..7e8ed7d 100644 --- a/lib/CMakeLists.txt +++ b/lib/CMakeLists.txt @@ -24,6 +24,7 @@ pybind11_add_module(mattress ) target_include_directories(mattress PRIVATE "${ASSORTHEAD_INCLUDE_DIR}") +target_include_directories(mattress PRIVATE "../src/mattress/include") set_property(TARGET mattress PROPERTY CXX_STANDARD 17) diff --git a/lib/src/common.cpp b/lib/src/common.cpp index cccaaa0..ecaaa61 100644 --- a/lib/src/common.cpp +++ b/lib/src/common.cpp @@ -1,4 +1,4 @@ -#include "def.h" +#include "mattress.h" #include "utils.h" #include "pybind11/pybind11.h" @@ -6,30 +6,38 @@ #include "tatami_stats/tatami_stats.hpp" -pybind11::tuple extract_dim(const MatrixPointer& mat) { +void free_mattress(uintptr_t ptr) { + delete mattress::cast(ptr); +} + +pybind11::tuple extract_dim(uintptr_t ptr) { + const auto& mat = mattress::cast(ptr)->ptr; pybind11::tuple output(2); output[0] = mat->nrow(); output[1] = mat->ncol(); return output; } -bool extract_sparse(const MatrixPointer& mat) { +bool extract_sparse(uintptr_t ptr) { + const auto& mat = mattress::cast(ptr)->ptr; return mat->is_sparse(); } -pybind11::array_t extract_row(const MatrixPointer& mat, MatrixIndex r) { - pybind11::array_t output(mat->ncol()); - auto optr = static_cast(output.request().ptr); - auto ext = tatami::consecutive_extractor(mat.get(), true, r, 1); +pybind11::array_t extract_row(uintptr_t ptr, mattress::MatrixIndex r) { + const auto& mat = mattress::cast(ptr)->ptr; + pybind11::array_t output(mat->ncol()); + auto optr = static_cast(output.request().ptr); + auto ext = tatami::consecutive_extractor(mat.get(), true, r, 1); auto out = ext->fetch(optr); tatami::copy_n(out, output.size(), optr); return output; } -pybind11::array_t extract_column(const MatrixPointer& mat, MatrixIndex c) { - pybind11::array_t output(mat->nrow()); - auto optr = static_cast(output.request().ptr); - auto ext = tatami::consecutive_extractor(mat.get(), false, c, 1); +pybind11::array_t extract_column(uintptr_t ptr, mattress::MatrixIndex c) { + const auto& mat = mattress::cast(ptr)->ptr; + pybind11::array_t output(mat->nrow()); + auto optr = static_cast(output.request().ptr); + auto ext = tatami::consecutive_extractor(mat.get(), false, c, 1); auto out = ext->fetch(optr); tatami::copy_n(out, output.size(), optr); return output; @@ -37,100 +45,111 @@ pybind11::array_t extract_column(const MatrixPointer& mat, MatrixIn /** Stats **/ -pybind11::array_t compute_column_sums(const MatrixPointer& mat, int num_threads) { - pybind11::array_t output(mat->ncol()); - auto optr = static_cast(output.request().ptr); +pybind11::array_t compute_column_sums(uintptr_t ptr, int num_threads) { + const auto& mat = mattress::cast(ptr)->ptr; + pybind11::array_t output(mat->ncol()); + auto optr = static_cast(output.request().ptr); tatami_stats::sums::Options opt; opt.num_threads = num_threads; tatami_stats::sums::apply(false, mat.get(), optr, opt); return output; } -pybind11::array_t compute_row_sums(const MatrixPointer& mat, int num_threads) { - pybind11::array_t output(mat->nrow()); - auto optr = static_cast(output.request().ptr); +pybind11::array_t compute_row_sums(uintptr_t ptr, int num_threads) { + const auto& mat = mattress::cast(ptr)->ptr; + pybind11::array_t output(mat->nrow()); + auto optr = static_cast(output.request().ptr); tatami_stats::sums::Options opt; opt.num_threads = num_threads; tatami_stats::sums::apply(true, mat.get(), optr, opt); return output; } -pybind11::array_t compute_column_variances(const MatrixPointer& mat, int num_threads) { - pybind11::array_t output(mat->ncol()); - auto optr = static_cast(output.request().ptr); +pybind11::array_t compute_column_variances(uintptr_t ptr, int num_threads) { + const auto& mat = mattress::cast(ptr)->ptr; + pybind11::array_t output(mat->ncol()); + auto optr = static_cast(output.request().ptr); tatami_stats::variances::Options opt; opt.num_threads = num_threads; tatami_stats::variances::apply(false, mat.get(), optr, opt); return output; } -pybind11::array_t compute_row_variances(const MatrixPointer& mat, int num_threads) { - pybind11::array_t output(mat->nrow()); - auto optr = static_cast(output.request().ptr); +pybind11::array_t compute_row_variances(uintptr_t ptr, int num_threads) { + const auto& mat = mattress::cast(ptr)->ptr; + pybind11::array_t output(mat->nrow()); + auto optr = static_cast(output.request().ptr); tatami_stats::variances::Options opt; opt.num_threads = num_threads; tatami_stats::variances::apply(true, mat.get(), optr, opt); return output; } -pybind11::array_t compute_column_medians(const MatrixPointer& mat, int num_threads) { - pybind11::array_t output(mat->ncol()); - auto optr = static_cast(output.request().ptr); +pybind11::array_t compute_column_medians(uintptr_t ptr, int num_threads) { + const auto& mat = mattress::cast(ptr)->ptr; + pybind11::array_t output(mat->ncol()); + auto optr = static_cast(output.request().ptr); tatami_stats::medians::Options opt; opt.num_threads = num_threads; tatami_stats::medians::apply(false, mat.get(), optr, opt); return output; } -pybind11::array_t compute_row_medians(const MatrixPointer& mat, int num_threads) { - pybind11::array_t output(mat->nrow()); - auto optr = static_cast(output.request().ptr); +pybind11::array_t compute_row_medians(uintptr_t ptr, int num_threads) { + const auto& mat = mattress::cast(ptr)->ptr; + pybind11::array_t output(mat->nrow()); + auto optr = static_cast(output.request().ptr); tatami_stats::medians::Options opt; opt.num_threads = num_threads; tatami_stats::medians::apply(true, mat.get(), optr, opt); return output; } -pybind11::array_t compute_column_mins(const MatrixPointer& mat, int num_threads) { - pybind11::array_t output(mat->ncol()); - auto optr = static_cast(output.request().ptr); +pybind11::array_t compute_column_mins(uintptr_t ptr, int num_threads) { + const auto& mat = mattress::cast(ptr)->ptr; + pybind11::array_t output(mat->ncol()); + auto optr = static_cast(output.request().ptr); tatami_stats::ranges::Options opt; opt.num_threads = num_threads; - tatami_stats::ranges::apply(false, mat.get(), optr, static_cast(NULL), opt); + tatami_stats::ranges::apply(false, mat.get(), optr, static_cast(NULL), opt); return output; } -pybind11::array_t compute_row_mins(const MatrixPointer& mat, int num_threads) { - pybind11::array_t output(mat->nrow()); - auto optr = static_cast(output.request().ptr); +pybind11::array_t compute_row_mins(uintptr_t ptr, int num_threads) { + const auto& mat = mattress::cast(ptr)->ptr; + pybind11::array_t output(mat->nrow()); + auto optr = static_cast(output.request().ptr); tatami_stats::ranges::Options opt; opt.num_threads = num_threads; - tatami_stats::ranges::apply(true, mat.get(), optr, static_cast(NULL), opt); + tatami_stats::ranges::apply(true, mat.get(), optr, static_cast(NULL), opt); return output; } -pybind11::array_t compute_column_maxs(const MatrixPointer& mat, int num_threads) { - pybind11::array_t output(mat->ncol()); - auto optr = static_cast(output.request().ptr); +pybind11::array_t compute_column_maxs(uintptr_t ptr, int num_threads) { + const auto& mat = mattress::cast(ptr)->ptr; + pybind11::array_t output(mat->ncol()); + auto optr = static_cast(output.request().ptr); tatami_stats::ranges::Options opt; opt.num_threads = num_threads; - tatami_stats::ranges::apply(false, mat.get(), static_cast(NULL), optr, opt); + tatami_stats::ranges::apply(false, mat.get(), static_cast(NULL), optr, opt); return output; } -pybind11::array_t compute_row_maxs(const MatrixPointer& mat, int num_threads) { - pybind11::array_t output(mat->nrow()); - auto optr = static_cast(output.request().ptr); +pybind11::array_t compute_row_maxs(uintptr_t ptr, int num_threads) { + const auto& mat = mattress::cast(ptr)->ptr; + pybind11::array_t output(mat->nrow()); + auto optr = static_cast(output.request().ptr); tatami_stats::ranges::Options opt; opt.num_threads = num_threads; - tatami_stats::ranges::apply(true, mat.get(), static_cast(NULL), optr, opt); + tatami_stats::ranges::apply(true, mat.get(), static_cast(NULL), optr, opt); return output; } -pybind11::tuple compute_row_ranges(const MatrixPointer& mat, int num_threads) { - pybind11::array_t mnout(mat->nrow()), mxout(mat->nrow()); - auto mnptr = static_cast(mnout.request().ptr); - auto mxptr = static_cast(mxout.request().ptr); +pybind11::tuple compute_row_ranges(uintptr_t ptr, int num_threads) { + const auto& mat = mattress::cast(ptr)->ptr; + pybind11::array_t mnout(mat->nrow()), mxout(mat->nrow()); + auto mnptr = static_cast(mnout.request().ptr); + auto mxptr = static_cast(mxout.request().ptr); tatami_stats::ranges::Options opt; opt.num_threads = num_threads; tatami_stats::ranges::apply(true, mat.get(), mnptr, mxptr, opt); @@ -141,10 +160,11 @@ pybind11::tuple compute_row_ranges(const MatrixPointer& mat, int num_threads) { return output; } -pybind11::tuple compute_column_ranges(const MatrixPointer& mat, int num_threads) { - pybind11::array_t mnout(mat->ncol()), mxout(mat->ncol()); - auto mnptr = static_cast(mnout.request().ptr); - auto mxptr = static_cast(mxout.request().ptr); +pybind11::tuple compute_column_ranges(uintptr_t ptr, int num_threads) { + const auto& mat = mattress::cast(ptr)->ptr; + pybind11::array_t mnout(mat->ncol()), mxout(mat->ncol()); + auto mnptr = static_cast(mnout.request().ptr); + auto mxptr = static_cast(mxout.request().ptr); tatami_stats::ranges::Options opt; opt.num_threads = num_threads; tatami_stats::ranges::apply(false, mat.get(), mnptr, mxptr, opt); @@ -155,18 +175,20 @@ pybind11::tuple compute_column_ranges(const MatrixPointer& mat, int num_threads) return output; } -pybind11::array_t compute_row_nan_counts(const MatrixPointer& mat, int num_threads) { - pybind11::array_t output(mat->nrow()); - auto optr = static_cast(output.request().ptr); +pybind11::array_t compute_row_nan_counts(uintptr_t ptr, int num_threads) { + const auto& mat = mattress::cast(ptr)->ptr; + pybind11::array_t output(mat->nrow()); + auto optr = static_cast(output.request().ptr); tatami_stats::counts::nan::Options opt; opt.num_threads = num_threads; tatami_stats::counts::nan::apply(true, mat.get(), optr, opt); return output; } -pybind11::array_t compute_column_nan_counts(const MatrixPointer& mat, int num_threads) { - pybind11::array_t output(mat->ncol()); - auto optr = static_cast(output.request().ptr); +pybind11::array_t compute_column_nan_counts(uintptr_t ptr, int num_threads) { + const auto& mat = mattress::cast(ptr)->ptr; + pybind11::array_t output(mat->ncol()); + auto optr = static_cast(output.request().ptr); tatami_stats::counts::nan::Options opt; opt.num_threads = num_threads; tatami_stats::counts::nan::apply(false, mat.get(), optr, opt); @@ -175,19 +197,21 @@ pybind11::array_t compute_column_nan_counts(const MatrixPointer& ma /** Grouped stats **/ -pybind11::array_t compute_row_sums_by_group(const MatrixPointer& mat, const pybind11::array& grouping, int num_threads) { - auto gptr = check_numpy_array(grouping); +pybind11::array_t compute_row_sums_by_group(uintptr_t ptr, const pybind11::array& grouping, int num_threads) { + const auto& mat = mattress::cast(ptr)->ptr; size_t ncol = mat->ncol(); + size_t nrow = mat->nrow(); + + auto gptr = check_numpy_array(grouping); if (grouping.size() != ncol) { throw std::runtime_error("'grouping' should have length equal to the number of columns"); } size_t ngroups = tatami_stats::total_groups(gptr, ncol); - size_t nrow = mat->nrow(); - pybind11::array_t output({ nrow, ngroups }); + pybind11::array_t output({ nrow, ngroups }); - auto optr = static_cast(output.request().ptr); - std::vector ptrs(ngroups); + auto optr = static_cast(output.request().ptr); + std::vector ptrs(ngroups); for (size_t g = 0; g < ngroups; ++g) { ptrs[g] = optr + g * nrow; } @@ -198,19 +222,21 @@ pybind11::array_t compute_row_sums_by_group(const MatrixPointer& ma return output; } -pybind11::array_t compute_column_sums_by_group(const MatrixPointer& mat, const pybind11::array& grouping, int num_threads) { - auto gptr = check_numpy_array(grouping); +pybind11::array_t compute_column_sums_by_group(uintptr_t ptr, const pybind11::array& grouping, int num_threads) { + const auto& mat = mattress::cast(ptr)->ptr; size_t nrow = mat->nrow(); + size_t ncol = mat->ncol(); + + auto gptr = check_numpy_array(grouping); if (grouping.size() != nrow) { throw std::runtime_error("'grouping' should have length equal to the number of rows"); } size_t ngroups = tatami_stats::total_groups(gptr, nrow); - size_t ncol = mat->ncol(); - pybind11::array_t output({ ncol, ngroups }); + pybind11::array_t output({ ncol, ngroups }); - auto optr = static_cast(output.request().ptr); - std::vector ptrs(ngroups); + auto optr = static_cast(output.request().ptr); + std::vector ptrs(ngroups); for (size_t g = 0; g < ngroups; ++g) { ptrs[g] = optr + g * ncol; } @@ -221,20 +247,22 @@ pybind11::array_t compute_column_sums_by_group(const MatrixPointer& return output; } -pybind11::array_t compute_row_variances_by_group(const MatrixPointer& mat, const pybind11::array_t& grouping, int num_threads) { - auto gptr = check_numpy_array(grouping); +pybind11::array_t compute_row_variances_by_group(uintptr_t ptr, const pybind11::array_t& grouping, int num_threads) { + const auto& mat = mattress::cast(ptr)->ptr; size_t ncol = mat->ncol(); + size_t nrow = mat->nrow(); + + auto gptr = check_numpy_array(grouping); if (grouping.size() != ncol) { throw std::runtime_error("'grouping' should have length equal to the number of columns"); } - auto group_sizes = tatami_stats::tabulate_groups(gptr, ncol); + auto group_sizes = tatami_stats::tabulate_groups(gptr, ncol); size_t ngroups = group_sizes.size(); - size_t nrow = mat->nrow(); - pybind11::array_t output({ nrow, ngroups }); + pybind11::array_t output({ nrow, ngroups }); - auto optr = static_cast(output.request().ptr); - std::vector ptrs(ngroups); + auto optr = static_cast(output.request().ptr); + std::vector ptrs(ngroups); for (size_t g = 0; g < ngroups; ++g) { ptrs[g] = optr + g * nrow; } @@ -245,20 +273,22 @@ pybind11::array_t compute_row_variances_by_group(const MatrixPointe return output; } -pybind11::array_t compute_column_variances_by_group(const MatrixPointer& mat, const pybind11::array_t& grouping, int num_threads) { - auto gptr = check_numpy_array(grouping); +pybind11::array_t compute_column_variances_by_group(uintptr_t ptr, const pybind11::array_t& grouping, int num_threads) { + const auto& mat = mattress::cast(ptr)->ptr; size_t nrow = mat->nrow(); + size_t ncol = mat->ncol(); + + auto gptr = check_numpy_array(grouping); if (grouping.size() != nrow) { throw std::runtime_error("'grouping' should have length equal to the number of rows"); } - auto group_sizes = tatami_stats::tabulate_groups(gptr, nrow); + auto group_sizes = tatami_stats::tabulate_groups(gptr, nrow); size_t ngroups = group_sizes.size(); - size_t ncol = mat->ncol(); - pybind11::array_t output({ ncol, ngroups }); + pybind11::array_t output({ ncol, ngroups }); - auto optr = static_cast(output.request().ptr); - std::vector ptrs(ngroups); + auto optr = static_cast(output.request().ptr); + std::vector ptrs(ngroups); for (size_t g = 0; g < ngroups; ++g) { ptrs[g] = optr + g * ncol; } @@ -269,20 +299,22 @@ pybind11::array_t compute_column_variances_by_group(const MatrixPoi return output; } -pybind11::array_t compute_row_medians_by_group(const MatrixPointer& mat, const pybind11::array_t& grouping, int num_threads) { - auto gptr = check_numpy_array(grouping); +pybind11::array_t compute_row_medians_by_group(uintptr_t ptr, const pybind11::array_t& grouping, int num_threads) { + const auto& mat = mattress::cast(ptr)->ptr; size_t ncol = mat->ncol(); + size_t nrow = mat->nrow(); + + auto gptr = check_numpy_array(grouping); if (grouping.size() != ncol) { throw std::runtime_error("'grouping' should have length equal to the number of columns"); } - auto group_sizes = tatami_stats::tabulate_groups(gptr, ncol); + auto group_sizes = tatami_stats::tabulate_groups(gptr, ncol); size_t ngroups = group_sizes.size(); - size_t nrow = mat->nrow(); - pybind11::array_t output({ nrow, ngroups }); + pybind11::array_t output({ nrow, ngroups }); - auto optr = static_cast(output.request().ptr); - std::vector ptrs(ngroups); + auto optr = static_cast(output.request().ptr); + std::vector ptrs(ngroups); for (size_t g = 0; g < ngroups; ++g) { ptrs[g] = optr + g * nrow; } @@ -293,20 +325,22 @@ pybind11::array_t compute_row_medians_by_group(const MatrixPointer& return output; } -pybind11::array_t compute_column_medians_by_group(const MatrixPointer& mat, const pybind11::array_t& grouping, int num_threads) { - auto gptr = check_numpy_array(grouping); +pybind11::array_t compute_column_medians_by_group(uintptr_t ptr, const pybind11::array_t& grouping, int num_threads) { + const auto& mat = mattress::cast(ptr)->ptr; size_t nrow = mat->nrow(); + size_t ncol = mat->ncol(); + + auto gptr = check_numpy_array(grouping); if (grouping.size() != nrow) { throw std::runtime_error("'grouping' should have length equal to the number of rows"); } - auto group_sizes = tatami_stats::tabulate_groups(gptr, nrow); + auto group_sizes = tatami_stats::tabulate_groups(gptr, nrow); size_t ngroups = group_sizes.size(); - size_t ncol = mat->ncol(); - pybind11::array_t output({ ncol, ngroups }); + pybind11::array_t output({ ncol, ngroups }); - auto optr = static_cast(output.request().ptr); - std::vector ptrs(ngroups); + auto optr = static_cast(output.request().ptr); + std::vector ptrs(ngroups); for (size_t g = 0; g < ngroups; ++g) { ptrs[g] = optr + g * ncol; } @@ -319,52 +353,54 @@ pybind11::array_t compute_column_medians_by_group(const MatrixPoint /** Extraction **/ -pybind11::array_t extract_dense_subset(MatrixPointer mat, bool row_noop, const pybind11::array& row_sub, bool col_noop, const pybind11::array& col_sub) { +pybind11::array_t extract_dense_subset(uintptr_t ptr, bool row_noop, const pybind11::array& row_sub, bool col_noop, const pybind11::array& col_sub) { + auto mat = mattress::cast(ptr)->ptr; + if (!row_noop) { - auto rptr = check_numpy_array(row_sub); - auto tmp = tatami::make_DelayedSubset<0>(std::move(mat), tatami::ArrayView(rptr, row_sub.size())); + auto rptr = check_numpy_array(row_sub); + auto tmp = tatami::make_DelayedSubset<0>(std::move(mat), tatami::ArrayView(rptr, row_sub.size())); mat.swap(tmp); } if (!col_noop) { - auto cptr = check_numpy_array(col_sub); - auto tmp = tatami::make_DelayedSubset<1>(std::move(mat), tatami::ArrayView(cptr, col_sub.size())); + auto cptr = check_numpy_array(col_sub); + auto tmp = tatami::make_DelayedSubset<1>(std::move(mat), tatami::ArrayView(cptr, col_sub.size())); mat.swap(tmp); } size_t NR = mat->nrow(), NC = mat->ncol(); - pybind11::array_t output({ NR, NC }); - auto optr = static_cast(output.request().ptr); + pybind11::array_t output({ NR, NC }); + auto optr = static_cast(output.request().ptr); tatami::convert_to_dense(mat.get(), false, optr); return output; } -pybind11::object extract_sparse_subset(MatrixPointer mat, bool row_noop, const pybind11::array& row_sub, bool col_noop, const pybind11::array& col_sub) { +pybind11::object extract_sparse_subset(uintptr_t ptr, bool row_noop, const pybind11::array& row_sub, bool col_noop, const pybind11::array& col_sub) { + auto mat = mattress::cast(ptr)->ptr; + if (!row_noop) { - auto rptr = check_numpy_array(row_sub); - auto tmp = tatami::make_DelayedSubset<0>(std::move(mat), tatami::ArrayView(rptr, row_sub.size())); + auto rptr = check_numpy_array(row_sub); + auto tmp = tatami::make_DelayedSubset<0>(std::move(mat), tatami::ArrayView(rptr, row_sub.size())); mat.swap(tmp); } if (!col_noop) { - auto cptr = check_numpy_array(col_sub); - auto tmp = tatami::make_DelayedSubset<1>(std::move(mat), tatami::ArrayView(cptr, col_sub.size())); + auto cptr = check_numpy_array(col_sub); + auto tmp = tatami::make_DelayedSubset<1>(std::move(mat), tatami::ArrayView(cptr, col_sub.size())); mat.swap(tmp); } - int NC = mat->ncol(); - int NR = mat->nrow(); + size_t NR = mat->nrow(), NC = mat->ncol(); pybind11::list content(NC); - if (mat->prefer_rows()) { - std::vector > vcollection(NC); - std::vector > icollection(NC); + std::vector > vcollection(NC); + std::vector > icollection(NC); - auto ext = tatami::consecutive_extractor(mat.get(), true, 0, NR); - std::vector vbuffer(NC); - std::vector ibuffer(NC); + auto ext = tatami::consecutive_extractor(mat.get(), true, 0, NR); + std::vector vbuffer(NC); + std::vector ibuffer(NC); - for (int r = 0; r < NR; ++r) { + for (size_t r = 0; r < NR; ++r) { auto info = ext->fetch(vbuffer.data(), ibuffer.data()); for (int i = 0; i < info.number; ++i) { auto c = info.index[i]; @@ -373,11 +409,11 @@ pybind11::object extract_sparse_subset(MatrixPointer mat, bool row_noop, const p } } - for (int c = 0; c < NC; ++c) { + for (size_t c = 0; c < NC; ++c) { if (vcollection[c].size()) { pybind11::list tmp(2); - tmp[0] = pybind11::array_t(icollection[c].size(), icollection[c].data()); - tmp[1] = pybind11::array_t(vcollection[c].size(), vcollection[c].data()); + tmp[0] = pybind11::array_t(icollection[c].size(), icollection[c].data()); + tmp[1] = pybind11::array_t(vcollection[c].size(), vcollection[c].data()); content[c] = std::move(tmp); } else { content[c] = pybind11::none(); @@ -385,16 +421,16 @@ pybind11::object extract_sparse_subset(MatrixPointer mat, bool row_noop, const p } } else { - auto ext = tatami::consecutive_extractor(mat.get(), false, 0, NC); - std::vector vbuffer(NC); - std::vector ibuffer(NC); + auto ext = tatami::consecutive_extractor(mat.get(), false, 0, NC); + std::vector vbuffer(NC); + std::vector ibuffer(NC); - for (int c = 0; c < NC; ++c) { + for (size_t c = 0; c < NC; ++c) { auto info = ext->fetch(vbuffer.data(), ibuffer.data()); if (info.number) { pybind11::list tmp(2); - tmp[0] = pybind11::array_t(info.number, info.index); - tmp[1] = pybind11::array_t(info.number, info.value); + tmp[0] = pybind11::array_t(info.number, info.index); + tmp[1] = pybind11::array_t(info.number, info.value); content[c] = std::move(tmp); } else { content[c] = pybind11::none(); @@ -410,6 +446,8 @@ pybind11::object extract_sparse_subset(MatrixPointer mat, bool row_noop, const p } void init_common(pybind11::module& m) { + m.def("free_mattress", &free_mattress); + m.def("extract_dim", &extract_dim); m.def("extract_sparse", &extract_sparse); @@ -440,6 +478,4 @@ void init_common(pybind11::module& m) { m.def("extract_dense_subset", &extract_dense_subset); m.def("extract_sparse_subset", &extract_sparse_subset); - - pybind11::class_, MatrixPointer>(m, "Matrix"); } diff --git a/lib/src/compressed_sparse_matrix.cpp b/lib/src/compressed_sparse_matrix.cpp index 053823f..5f25a82 100644 --- a/lib/src/compressed_sparse_matrix.cpp +++ b/lib/src/compressed_sparse_matrix.cpp @@ -1,4 +1,4 @@ -#include "def.h" +#include "mattress.h" #include "utils.h" #include "pybind11/pybind11.h" @@ -9,7 +9,14 @@ #include template -MatrixPointer initialize_compressed_sparse_matrix_raw(MatrixIndex nr, MatrixValue nc, const pybind11::array& data, const pybind11::array& index, const pybind11::array& indptr, bool byrow) { +uintptr_t initialize_compressed_sparse_matrix_raw( + mattress::MatrixIndex nr, + mattress::MatrixValue nc, + const pybind11::array& data, + const pybind11::array& index, + const pybind11::array& indptr, + bool byrow) +{ size_t expected = (byrow ? nr : nc); if (indptr.size() != expected + 1) { throw std::runtime_error("unexpected length for the 'indptr' array"); @@ -27,12 +34,28 @@ MatrixPointer initialize_compressed_sparse_matrix_raw(MatrixIndex nr, MatrixValu } tatami::ArrayView iview(check_contiguous_numpy_array(index), nz); - typedef tatami::CompressedSparseMatrix Spmat; - return MatrixPointer(new Spmat(nr, nc, std::move(dview), std::move(iview), std::move(pview), byrow)); + auto tmp = std::make_unique(); + typedef tatami::CompressedSparseMatrix Spmat; + tmp->ptr.reset(new Spmat(nr, nc, std::move(dview), std::move(iview), std::move(pview), byrow)); + + pybind11::tuple objects(3); + objects[0] = data; + objects[1] = index; + objects[2] = indptr; + tmp->original = std::move(objects); + + return mattress::cast(tmp.release()); } template -MatrixPointer initialize_compressed_sparse_matrix_itype(MatrixIndex nr, MatrixValue nc, const pybind11::array& data, const pybind11::array& index, const pybind11::array& indptr, bool byrow) { +uintptr_t initialize_compressed_sparse_matrix_itype( + mattress::MatrixIndex nr, + mattress::MatrixValue nc, + const pybind11::array& data, + const pybind11::array& index, + const pybind11::array& indptr, + bool byrow) +{ auto dtype = index.dtype(); if (dtype.is(pybind11::dtype::of())) { @@ -54,10 +77,17 @@ MatrixPointer initialize_compressed_sparse_matrix_itype(MatrixIndex nr, MatrixVa } throw std::runtime_error("unrecognized index type '" + std::string(dtype.kind(), 1) + std::to_string(dtype.itemsize()) + "' for compressed sparse matrix initialization"); - return MatrixPointer(); + return 0; } -MatrixPointer initialize_compressed_sparse_matrix(MatrixIndex nr, MatrixValue nc, const pybind11::array& data, const pybind11::array& index, const pybind11::array& indptr, bool byrow) { +uintptr_t initialize_compressed_sparse_matrix( + mattress::MatrixIndex nr, + mattress::MatrixValue nc, + const pybind11::array& data, + const pybind11::array& index, + const pybind11::array& indptr, + bool byrow) +{ auto dtype = data.dtype(); if (dtype.is(pybind11::dtype::of())) { @@ -83,7 +113,7 @@ MatrixPointer initialize_compressed_sparse_matrix(MatrixIndex nr, MatrixValue nc } throw std::runtime_error("unrecognized data type '" + std::string(dtype.kind(), 1) + std::to_string(dtype.itemsize()) + "' for compressed sparse matrix initialization"); - return MatrixPointer(); + return 0; } void init_compressed_sparse_matrix(pybind11::module& m) { diff --git a/lib/src/def.h b/lib/src/def.h deleted file mode 100644 index 5ed256d..0000000 --- a/lib/src/def.h +++ /dev/null @@ -1,12 +0,0 @@ -#ifndef DEF_H -#define DEF_H - -#include "tatami/tatami.hpp" -#include -#include - -typedef double MatrixValue; -typedef uint32_t MatrixIndex; -typedef std::shared_ptr > MatrixPointer; - -#endif diff --git a/lib/src/delayed_binary_isometric_operation.cpp b/lib/src/delayed_binary_isometric_operation.cpp index 0bf6f6e..f70d90e 100644 --- a/lib/src/delayed_binary_isometric_operation.cpp +++ b/lib/src/delayed_binary_isometric_operation.cpp @@ -1,4 +1,4 @@ -#include "def.h" +#include "mattress.h" #include "pybind11/pybind11.h" #include "pybind11/numpy.h" @@ -8,45 +8,60 @@ #include #include -MatrixPointer initialize_delayed_binary_isometric_operation(MatrixPointer left, MatrixPointer right, const std::string& op) { +template +uintptr_t convert(uintptr_t left, uintptr_t right, Operation_ op) { + auto lbound = mattress::cast(left); + auto rbound = mattress::cast(right); + + auto tmp = std::make_unique(); + tmp->ptr = tatami::make_DelayedBinaryIsometricOperation(lbound->ptr, rbound->ptr, std::move(op)); + + pybind11::tuple original(2); + original[0] = lbound->original; + original[1] = rbound->original; + tmp->original = std::move(original); + return mattress::cast(tmp.release()); +} + +uintptr_t initialize_delayed_binary_isometric_operation(uintptr_t left, uintptr_t right, const std::string& op) { if (op == "add") { - return (tatami::make_DelayedBinaryIsometricOperation(std::move(left), std::move(right), tatami::make_DelayedBinaryIsometricAdd())); + return convert(left, right, tatami::make_DelayedBinaryIsometricAdd()); } else if (op == "subtract") { - return (tatami::make_DelayedBinaryIsometricOperation(std::move(left), std::move(right), tatami::make_DelayedBinaryIsometricSubtract())); + return convert(left, right, tatami::make_DelayedBinaryIsometricSubtract()); } else if (op == "multiply") { - return (tatami::make_DelayedBinaryIsometricOperation(std::move(left), std::move(right), tatami::make_DelayedBinaryIsometricMultiply())); + return convert(left, right, tatami::make_DelayedBinaryIsometricMultiply()); } else if (op == "divide") { - return (tatami::make_DelayedBinaryIsometricOperation(std::move(left), std::move(right), tatami::make_DelayedBinaryIsometricDivide())); + return convert(left, right, tatami::make_DelayedBinaryIsometricDivide()); } else if (op == "remainder") { - return (tatami::make_DelayedBinaryIsometricOperation(std::move(left), std::move(right), tatami::make_DelayedBinaryIsometricModulo())); + return convert(left, right, tatami::make_DelayedBinaryIsometricModulo()); } else if (op == "floor_divide") { - return (tatami::make_DelayedBinaryIsometricOperation(std::move(left), std::move(right), tatami::make_DelayedBinaryIsometricIntegerDivide())); + return convert(left, right, tatami::make_DelayedBinaryIsometricIntegerDivide()); } else if (op == "power") { - return (tatami::make_DelayedBinaryIsometricOperation(std::move(left), std::move(right), tatami::make_DelayedBinaryIsometricPower())); + return convert(left, right, tatami::make_DelayedBinaryIsometricPower()); } else if (op == "equal") { - return (tatami::make_DelayedBinaryIsometricOperation(std::move(left), std::move(right), tatami::make_DelayedBinaryIsometricEqual())); + return convert(left, right, tatami::make_DelayedBinaryIsometricEqual()); } else if (op == "not_equal") { - return (tatami::make_DelayedBinaryIsometricOperation(std::move(left), std::move(right), tatami::make_DelayedBinaryIsometricNotEqual())); + return convert(left, right, tatami::make_DelayedBinaryIsometricNotEqual()); } else if (op == "greater") { - return (tatami::make_DelayedBinaryIsometricOperation(std::move(left), std::move(right), tatami::make_DelayedBinaryIsometricGreaterThan())); + return convert(left, right, tatami::make_DelayedBinaryIsometricGreaterThan()); } else if (op == "greater_equal") { - return (tatami::make_DelayedBinaryIsometricOperation(std::move(left), std::move(right), tatami::make_DelayedBinaryIsometricGreaterThanOrEqual())); + return convert(left, right, tatami::make_DelayedBinaryIsometricGreaterThanOrEqual()); } else if (op == "less") { - return (tatami::make_DelayedBinaryIsometricOperation(std::move(left), std::move(right), tatami::make_DelayedBinaryIsometricLessThan())); + return convert(left, right, tatami::make_DelayedBinaryIsometricLessThan()); } else if (op == "less_equal") { - return (tatami::make_DelayedBinaryIsometricOperation(std::move(left), std::move(right), tatami::make_DelayedBinaryIsometricLessThanOrEqual())); + return convert(left, right, tatami::make_DelayedBinaryIsometricLessThanOrEqual()); } else if (op == "logical_and") { - return (tatami::make_DelayedBinaryIsometricOperation(std::move(left), std::move(right), tatami::make_DelayedBinaryIsometricBooleanAnd())); + return convert(left, right, tatami::make_DelayedBinaryIsometricBooleanAnd()); } else if (op == "logical_or") { - return (tatami::make_DelayedBinaryIsometricOperation(std::move(left), std::move(right), tatami::make_DelayedBinaryIsometricBooleanOr())); + return convert(left, right, tatami::make_DelayedBinaryIsometricBooleanOr()); } else if (op == "logical_xor") { - return (tatami::make_DelayedBinaryIsometricOperation(std::move(left), std::move(right), tatami::make_DelayedBinaryIsometricBooleanXor())); + return convert(left, right, tatami::make_DelayedBinaryIsometricBooleanXor()); } throw std::runtime_error("unknown binary isometric operation '" + op + "'"); - return MatrixPointer(); + return 0; } void init_delayed_binary_isometric_operation(pybind11::module& m) { diff --git a/lib/src/delayed_bind.cpp b/lib/src/delayed_bind.cpp index ffed7f9..2bd17df 100644 --- a/lib/src/delayed_bind.cpp +++ b/lib/src/delayed_bind.cpp @@ -1,4 +1,4 @@ -#include "def.h" +#include "mattress.h" #include "tatami/tatami.hpp" @@ -6,13 +6,21 @@ #include -MatrixPointer initialize_delayed_bind(pybind11::list inputs, int along) { - std::vector combined; +uintptr_t initialize_delayed_bind(const pybind11::list& inputs, int along) { + std::vector > > combined; combined.reserve(inputs.size()); + pybind11::tuple originals(inputs.size()); + for (size_t i = 0, n = inputs.size(); i < n; ++i) { - combined.push_back(inputs[i].cast()); + auto bound = mattress::cast(inputs[i].cast()); + combined.push_back(bound->ptr); + originals[i] = bound->original; } - return tatami::make_DelayedBind(std::move(combined), along == 0); + + auto tmp = std::make_unique(); + tmp->ptr = tatami::make_DelayedBind(std::move(combined), along == 0); + tmp->original = std::move(originals); + return mattress::cast(tmp.release()); } void init_delayed_bind(pybind11::module& m) { diff --git a/lib/src/delayed_subset.cpp b/lib/src/delayed_subset.cpp index ec71733..1f17547 100644 --- a/lib/src/delayed_subset.cpp +++ b/lib/src/delayed_subset.cpp @@ -1,4 +1,4 @@ -#include "def.h" +#include "mattress.h" #include "utils.h" #include "pybind11/pybind11.h" @@ -7,9 +7,19 @@ #include #include -MatrixPointer initialize_delayed_subset(MatrixPointer mat, const pybind11::array& subset, bool byrow) { - auto sptr = check_numpy_array(subset); - return tatami::make_DelayedSubset(std::move(mat), tatami::ArrayView(sptr, subset.size()), byrow); +uintptr_t initialize_delayed_subset(uintptr_t ptr, const pybind11::array& subset, bool byrow) { + auto bound = mattress::cast(ptr); + auto sptr = check_numpy_array(subset); + + auto tmp = std::make_unique(); + tmp->ptr = tatami::make_DelayedSubset(bound->ptr, tatami::ArrayView(sptr, subset.size()), byrow); + + pybind11::tuple original(2); + original[0] = bound->original; + original[1] = subset; + tmp->original = std::move(original); + + return mattress::cast(tmp.release()); } void init_delayed_subset(pybind11::module& m) { diff --git a/lib/src/delayed_transpose.cpp b/lib/src/delayed_transpose.cpp index 934a4e8..75a41e9 100644 --- a/lib/src/delayed_transpose.cpp +++ b/lib/src/delayed_transpose.cpp @@ -1,4 +1,4 @@ -#include "def.h" +#include "mattress.h" #include "pybind11/pybind11.h" #include "pybind11/numpy.h" @@ -6,8 +6,12 @@ #include #include -MatrixPointer initialize_delayed_transpose(MatrixPointer mat) { - return tatami::make_DelayedTranspose(std::move(mat)); +uintptr_t initialize_delayed_transpose(uintptr_t ptr) { + auto bound = mattress::cast(ptr); + auto tmp = std::make_unique(); + tmp->ptr = tatami::make_DelayedTranspose(bound->ptr); + tmp->original = bound->original; + return mattress::cast(tmp.release()); } void init_delayed_transpose(pybind11::module& m) { diff --git a/lib/src/delayed_unary_isometric_operation_simple.cpp b/lib/src/delayed_unary_isometric_operation_simple.cpp index 8f5b03d..42b8c5f 100644 --- a/lib/src/delayed_unary_isometric_operation_simple.cpp +++ b/lib/src/delayed_unary_isometric_operation_simple.cpp @@ -1,4 +1,4 @@ -#include "def.h" +#include "mattress.h" #include "pybind11/pybind11.h" #include "pybind11/numpy.h" @@ -8,69 +8,78 @@ #include #include -MatrixPointer initialize_delayed_unary_isometric_operation_simple(MatrixPointer ptr, const std::string& op) { +template +uintptr_t convert(uintptr_t ptr, Operation_ op) { + auto bound = mattress::cast(ptr); + auto tmp = std::make_unique(); + tmp->ptr = tatami::make_DelayedUnaryIsometricOperation(bound->ptr, std::move(op)); + tmp->original = bound->original; + return mattress::cast(tmp.release()); +} + +uintptr_t initialize_delayed_unary_isometric_operation_simple(uintptr_t ptr, const std::string& op) { if (op == "abs") { - return tatami::make_DelayedUnaryIsometricOperation(std::move(ptr), tatami::DelayedUnaryIsometricAbs<>()); + return convert(ptr, tatami::DelayedUnaryIsometricAbs<>()); } else if (op == "sign") { - return tatami::make_DelayedUnaryIsometricOperation(std::move(ptr), tatami::DelayedUnaryIsometricSign<>()); + return convert(ptr, tatami::DelayedUnaryIsometricSign<>()); } else if (op == "log") { - return tatami::make_DelayedUnaryIsometricOperation(std::move(ptr), tatami::DelayedUnaryIsometricLog<>()); + return convert(ptr, tatami::DelayedUnaryIsometricLog<>()); } else if (op == "log2") { - return tatami::make_DelayedUnaryIsometricOperation(std::move(ptr), tatami::DelayedUnaryIsometricLog(2.0)); + return convert(ptr, tatami::DelayedUnaryIsometricLog(2.0)); } else if (op == "log10") { - return tatami::make_DelayedUnaryIsometricOperation(std::move(ptr), tatami::DelayedUnaryIsometricLog(10.0)); + return convert(ptr, tatami::DelayedUnaryIsometricLog(10.0)); } else if (op == "log1p") { - return tatami::make_DelayedUnaryIsometricOperation(std::move(ptr), tatami::DelayedUnaryIsometricLog1p<>()); + return convert(ptr, tatami::DelayedUnaryIsometricLog1p<>()); } else if (op == "sqrt") { - return tatami::make_DelayedUnaryIsometricOperation(std::move(ptr), tatami::DelayedUnaryIsometricSqrt<>()); + return convert(ptr, tatami::DelayedUnaryIsometricSqrt<>()); } else if (op == "ceil") { - return tatami::make_DelayedUnaryIsometricOperation(std::move(ptr), tatami::DelayedUnaryIsometricCeiling<>()); + return convert(ptr, tatami::DelayedUnaryIsometricCeiling<>()); } else if (op == "floor") { - return tatami::make_DelayedUnaryIsometricOperation(std::move(ptr), tatami::DelayedUnaryIsometricFloor<>()); + return convert(ptr, tatami::DelayedUnaryIsometricFloor<>()); } else if (op == "trunc") { - return tatami::make_DelayedUnaryIsometricOperation(std::move(ptr), tatami::DelayedUnaryIsometricTrunc<>()); + return convert(ptr, tatami::DelayedUnaryIsometricTrunc<>()); } else if (op == "round") { - return tatami::make_DelayedUnaryIsometricOperation(std::move(ptr), tatami::DelayedUnaryIsometricRound<>()); + return convert(ptr, tatami::DelayedUnaryIsometricRound<>()); } else if (op == "exp") { - return tatami::make_DelayedUnaryIsometricOperation(std::move(ptr), tatami::DelayedUnaryIsometricExp<>()); + return convert(ptr, tatami::DelayedUnaryIsometricExp<>()); } else if (op == "expm1") { - return tatami::make_DelayedUnaryIsometricOperation(std::move(ptr), tatami::DelayedUnaryIsometricExpm1<>()); + return convert(ptr, tatami::DelayedUnaryIsometricExpm1<>()); } else if (op == "cos") { - return tatami::make_DelayedUnaryIsometricOperation(std::move(ptr), tatami::DelayedUnaryIsometricCos<>()); + return convert(ptr, tatami::DelayedUnaryIsometricCos<>()); } else if (op == "sin") { - return tatami::make_DelayedUnaryIsometricOperation(std::move(ptr), tatami::DelayedUnaryIsometricSin<>()); + return convert(ptr, tatami::DelayedUnaryIsometricSin<>()); } else if (op == "tan") { - return tatami::make_DelayedUnaryIsometricOperation(std::move(ptr), tatami::DelayedUnaryIsometricTan<>()); + return convert(ptr, tatami::DelayedUnaryIsometricTan<>()); } else if (op == "cosh") { - return tatami::make_DelayedUnaryIsometricOperation(std::move(ptr), tatami::DelayedUnaryIsometricCosh<>()); + return convert(ptr, tatami::DelayedUnaryIsometricCosh<>()); } else if (op == "sinh") { - return tatami::make_DelayedUnaryIsometricOperation(std::move(ptr), tatami::DelayedUnaryIsometricSinh<>()); + return convert(ptr, tatami::DelayedUnaryIsometricSinh<>()); } else if (op == "tanh") { - return tatami::make_DelayedUnaryIsometricOperation(std::move(ptr), tatami::DelayedUnaryIsometricTanh<>()); + return convert(ptr, tatami::DelayedUnaryIsometricTanh<>()); } else if (op == "arccos") { - return tatami::make_DelayedUnaryIsometricOperation(std::move(ptr), tatami::DelayedUnaryIsometricAcos<>()); + return convert(ptr, tatami::DelayedUnaryIsometricAcos<>()); } else if (op == "arcsin") { - return tatami::make_DelayedUnaryIsometricOperation(std::move(ptr), tatami::DelayedUnaryIsometricAsin<>()); + return convert(ptr, tatami::DelayedUnaryIsometricAsin<>()); } else if (op == "arctan") { - return tatami::make_DelayedUnaryIsometricOperation(std::move(ptr), tatami::DelayedUnaryIsometricAtan<>()); + return convert(ptr, tatami::DelayedUnaryIsometricAtan<>()); } else if (op == "arccosh") { - return tatami::make_DelayedUnaryIsometricOperation(std::move(ptr), tatami::DelayedUnaryIsometricAcosh<>()); + return convert(ptr, tatami::DelayedUnaryIsometricAcosh<>()); } else if (op == "arcsinh") { - return tatami::make_DelayedUnaryIsometricOperation(std::move(ptr), tatami::DelayedUnaryIsometricAsinh<>()); + return convert(ptr, tatami::DelayedUnaryIsometricAsinh<>()); } else if (op == "arctanh") { - return tatami::make_DelayedUnaryIsometricOperation(std::move(ptr), tatami::DelayedUnaryIsometricAtanh<>()); + return convert(ptr, tatami::DelayedUnaryIsometricAtanh<>()); } throw std::runtime_error("unknown binary isometric operation '" + op + "'"); - return MatrixPointer(); + return 0; } void init_delayed_unary_isometric_operation_simple(pybind11::module& m) { diff --git a/lib/src/delayed_unary_isometric_operation_with_args.cpp b/lib/src/delayed_unary_isometric_operation_with_args.cpp index e68141a..1d39119 100644 --- a/lib/src/delayed_unary_isometric_operation_with_args.cpp +++ b/lib/src/delayed_unary_isometric_operation_with_args.cpp @@ -1,4 +1,4 @@ -#include "def.h" +#include "mattress.h" #include "utils.h" #include "pybind11/pybind11.h" @@ -9,110 +9,131 @@ #include #include +template +uintptr_t convert(const mattress::BoundMatrix* bound, const pybind11::array& arg, Operation_ op) { + auto tmp = std::make_unique(); + tmp->ptr = tatami::make_DelayedUnaryIsometricOperation(bound->ptr, std::move(op)); + pybind11::tuple original(2); + original[0] = bound->original; + original[1] = arg; + tmp->original = std::move(original); + return mattress::cast(tmp.release()); +} + template -MatrixPointer initialize_delayed_unary_isometric_operation_with_vector_internal(MatrixPointer mat, const std::string& op, bool by_row, const pybind11::array& arg) { +uintptr_t initialize_delayed_unary_isometric_operation_with_vector_internal(uintptr_t ptr, const std::string& op, bool by_row, const pybind11::array& arg) { + auto bound = mattress::cast(ptr); auto aptr = check_numpy_array(arg); - size_t expected = by_row ? mat->nrow() : mat->ncol(); + size_t expected = by_row ? bound->ptr->nrow() : bound->ptr->ncol(); if (expected != arg.size()) { throw std::runtime_error("unexpected length of array for isometric unary operation"); } tatami::ArrayView aview(aptr, expected); if (op == "add") { - return tatami::make_DelayedUnaryIsometricOperation(std::move(mat), tatami::make_DelayedUnaryIsometricAddVector(std::move(aview), by_row)); + return convert(bound, arg, tatami::make_DelayedUnaryIsometricAddVector(std::move(aview), by_row)); } else if (op == "subtract") { - return tatami::make_DelayedUnaryIsometricOperation(std::move(mat), tatami::make_DelayedUnaryIsometricSubtractVector(std::move(aview), by_row)); + return convert(bound, arg, tatami::make_DelayedUnaryIsometricSubtractVector(std::move(aview), by_row)); } else if (op == "multiply") { - return tatami::make_DelayedUnaryIsometricOperation(std::move(mat), tatami::make_DelayedUnaryIsometricMultiplyVector(std::move(aview), by_row)); + return convert(bound, arg, tatami::make_DelayedUnaryIsometricMultiplyVector(std::move(aview), by_row)); } else if (op == "divide") { - return tatami::make_DelayedUnaryIsometricOperation(std::move(mat), tatami::make_DelayedUnaryIsometricDivideVector(std::move(aview), by_row)); + return convert(bound, arg, tatami::make_DelayedUnaryIsometricDivideVector(std::move(aview), by_row)); } else if (op == "remainder") { - return tatami::make_DelayedUnaryIsometricOperation(std::move(mat), tatami::make_DelayedUnaryIsometricModuloVector(std::move(aview), by_row)); + return convert(bound, arg, tatami::make_DelayedUnaryIsometricModuloVector(std::move(aview), by_row)); } else if (op == "floor_divide") { - return tatami::make_DelayedUnaryIsometricOperation(std::move(mat), tatami::make_DelayedUnaryIsometricIntegerDivideVector(std::move(aview), by_row)); + return convert(bound, arg, tatami::make_DelayedUnaryIsometricIntegerDivideVector(std::move(aview), by_row)); } else if (op == "power") { - return tatami::make_DelayedUnaryIsometricOperation(std::move(mat), tatami::make_DelayedUnaryIsometricPowerVector(std::move(aview), by_row)); + return convert(bound, arg, tatami::make_DelayedUnaryIsometricPowerVector(std::move(aview), by_row)); } else if (op == "equal") { - return tatami::make_DelayedUnaryIsometricOperation(std::move(mat), tatami::make_DelayedUnaryIsometricEqualVector(std::move(aview), by_row)); + return convert(bound, arg, tatami::make_DelayedUnaryIsometricEqualVector(std::move(aview), by_row)); } else if (op == "not_equal") { - return tatami::make_DelayedUnaryIsometricOperation(std::move(mat), tatami::make_DelayedUnaryIsometricNotEqualVector(std::move(aview), by_row)); + return convert(bound, arg, tatami::make_DelayedUnaryIsometricNotEqualVector(std::move(aview), by_row)); } else if ((right_ && op == "greater") || (!right_ && op == "less")) { - return tatami::make_DelayedUnaryIsometricOperation(std::move(mat), tatami::make_DelayedUnaryIsometricGreaterThanVector(std::move(aview), by_row)); + return convert(bound, arg, tatami::make_DelayedUnaryIsometricGreaterThanVector(std::move(aview), by_row)); } else if ((right_ && op == "greater_equal") || (!right_ && op == "less_equal")) { - return tatami::make_DelayedUnaryIsometricOperation(std::move(mat), tatami::make_DelayedUnaryIsometricGreaterThanOrEqualVector(std::move(aview), by_row)); + return convert(bound, arg, tatami::make_DelayedUnaryIsometricGreaterThanOrEqualVector(std::move(aview), by_row)); } else if ((right_ && op == "less") || (!right_ && op == "greater")) { - return tatami::make_DelayedUnaryIsometricOperation(std::move(mat), tatami::make_DelayedUnaryIsometricLessThanVector(std::move(aview), by_row)); + return convert(bound, arg, tatami::make_DelayedUnaryIsometricLessThanVector(std::move(aview), by_row)); } else if ((right_ && op == "less_equal") || (!right_ && op == "greater_equal")) { - return tatami::make_DelayedUnaryIsometricOperation(std::move(mat), tatami::make_DelayedUnaryIsometricLessThanOrEqualVector(std::move(aview), by_row)); + return convert(bound, arg, tatami::make_DelayedUnaryIsometricLessThanOrEqualVector(std::move(aview), by_row)); } else if (op == "logical_and") { - return tatami::make_DelayedUnaryIsometricOperation(std::move(mat), tatami::make_DelayedUnaryIsometricBooleanAndVector(std::move(aview), by_row)); + return convert(bound, arg, tatami::make_DelayedUnaryIsometricBooleanAndVector(std::move(aview), by_row)); } else if (op == "logical_or") { - return tatami::make_DelayedUnaryIsometricOperation(std::move(mat), tatami::make_DelayedUnaryIsometricBooleanOrVector(std::move(aview), by_row)); + return convert(bound, arg, tatami::make_DelayedUnaryIsometricBooleanOrVector(std::move(aview), by_row)); } else if (op == "logical_xor") { - return tatami::make_DelayedUnaryIsometricOperation(std::move(mat), tatami::make_DelayedUnaryIsometricBooleanXorVector(std::move(aview), by_row)); + return convert(bound, arg, tatami::make_DelayedUnaryIsometricBooleanXorVector(std::move(aview), by_row)); } throw std::runtime_error("unknown unary isometric vector operation '" + op + "'"); - return MatrixPointer(); + return 0; } -MatrixPointer initialize_delayed_unary_isometric_operation_with_vector(MatrixPointer mat, const std::string& op, bool right, bool by_row, const pybind11::array& args) { +uintptr_t initialize_delayed_unary_isometric_operation_with_vector(uintptr_t ptr, const std::string& op, bool right, bool by_row, const pybind11::array& args) { if (right) { - return initialize_delayed_unary_isometric_operation_with_vector_internal(std::move(mat), op, by_row, args); + return initialize_delayed_unary_isometric_operation_with_vector_internal(ptr, op, by_row, args); } else { - return initialize_delayed_unary_isometric_operation_with_vector_internal(std::move(mat), op, by_row, args); + return initialize_delayed_unary_isometric_operation_with_vector_internal(ptr, op, by_row, args); } } +template +uintptr_t convert(uintptr_t ptr, Operation_ op) { + auto bound = mattress::cast(ptr); + auto tmp = std::make_unique(); + tmp->ptr = tatami::make_DelayedUnaryIsometricOperation(bound->ptr, std::move(op)); + tmp->original = bound->original; + return mattress::cast(tmp.release()); +} + template -MatrixPointer initialize_delayed_unary_isometric_operation_with_scalar_internal(MatrixPointer mat, const std::string& op, double arg) { +uintptr_t initialize_delayed_unary_isometric_operation_with_scalar_internal(uintptr_t ptr, const std::string& op, double arg) { if (op == "add") { - return tatami::make_DelayedUnaryIsometricOperation(std::move(mat), tatami::make_DelayedUnaryIsometricAddScalar(arg)); + return convert(ptr, tatami::make_DelayedUnaryIsometricAddScalar(arg)); } else if (op == "subtract") { - return tatami::make_DelayedUnaryIsometricOperation(std::move(mat), tatami::make_DelayedUnaryIsometricSubtractScalar(arg)); + return convert(ptr, tatami::make_DelayedUnaryIsometricSubtractScalar(arg)); } else if (op == "multiply") { - return tatami::make_DelayedUnaryIsometricOperation(std::move(mat), tatami::make_DelayedUnaryIsometricMultiplyScalar(arg)); + return convert(ptr, tatami::make_DelayedUnaryIsometricMultiplyScalar(arg)); } else if (op == "divide") { - return tatami::make_DelayedUnaryIsometricOperation(std::move(mat), tatami::make_DelayedUnaryIsometricDivideScalar(arg)); + return convert(ptr, tatami::make_DelayedUnaryIsometricDivideScalar(arg)); } else if (op == "remainder") { - return tatami::make_DelayedUnaryIsometricOperation(std::move(mat), tatami::make_DelayedUnaryIsometricModuloScalar(arg)); + return convert(ptr, tatami::make_DelayedUnaryIsometricModuloScalar(arg)); } else if (op == "floor_divide") { - return tatami::make_DelayedUnaryIsometricOperation(std::move(mat), tatami::make_DelayedUnaryIsometricIntegerDivideScalar(arg)); + return convert(ptr, tatami::make_DelayedUnaryIsometricIntegerDivideScalar(arg)); } else if (op == "power") { - return tatami::make_DelayedUnaryIsometricOperation(std::move(mat), tatami::make_DelayedUnaryIsometricPowerScalar(arg)); + return convert(ptr, tatami::make_DelayedUnaryIsometricPowerScalar(arg)); } else if (op == "equal") { - return tatami::make_DelayedUnaryIsometricOperation(std::move(mat), tatami::make_DelayedUnaryIsometricEqualScalar(arg)); + return convert(ptr, tatami::make_DelayedUnaryIsometricEqualScalar(arg)); } else if (op == "not_equal") { - return tatami::make_DelayedUnaryIsometricOperation(std::move(mat), tatami::make_DelayedUnaryIsometricNotEqualScalar(arg)); + return convert(ptr, tatami::make_DelayedUnaryIsometricNotEqualScalar(arg)); } else if ((right_ && op == "greater") || (!right_ && op == "less")) { - return tatami::make_DelayedUnaryIsometricOperation(std::move(mat), tatami::make_DelayedUnaryIsometricGreaterThanScalar(arg)); + return convert(ptr, tatami::make_DelayedUnaryIsometricGreaterThanScalar(arg)); } else if ((right_ && op == "greater_equal") || (!right_ && op == "less_equal")) { - return tatami::make_DelayedUnaryIsometricOperation(std::move(mat), tatami::make_DelayedUnaryIsometricGreaterThanOrEqualScalar(arg)); + return convert(ptr, tatami::make_DelayedUnaryIsometricGreaterThanOrEqualScalar(arg)); } else if ((right_ && op == "less") || (!right_ && op == "greater")) { - return tatami::make_DelayedUnaryIsometricOperation(std::move(mat), tatami::make_DelayedUnaryIsometricLessThanScalar(arg)); + return convert(ptr, tatami::make_DelayedUnaryIsometricLessThanScalar(arg)); } else if ((right_ && op == "less_equal") || (!right_ && op == "greater_equal")) { - return tatami::make_DelayedUnaryIsometricOperation(std::move(mat), tatami::make_DelayedUnaryIsometricLessThanOrEqualScalar(arg)); + return convert(ptr, tatami::make_DelayedUnaryIsometricLessThanOrEqualScalar(arg)); } else if (op == "logical_and") { - return tatami::make_DelayedUnaryIsometricOperation(std::move(mat), tatami::make_DelayedUnaryIsometricBooleanAndScalar(arg)); + return convert(ptr, tatami::make_DelayedUnaryIsometricBooleanAndScalar(arg)); } else if (op == "logical_or") { - return tatami::make_DelayedUnaryIsometricOperation(std::move(mat), tatami::make_DelayedUnaryIsometricBooleanOrScalar(arg)); + return convert(ptr, tatami::make_DelayedUnaryIsometricBooleanOrScalar(arg)); } else if (op == "logical_xor") { - return tatami::make_DelayedUnaryIsometricOperation(std::move(mat), tatami::make_DelayedUnaryIsometricBooleanXorScalar(arg)); + return convert(ptr, tatami::make_DelayedUnaryIsometricBooleanXorScalar(arg)); } throw std::runtime_error("unknown unary isometric scalar operation '" + op + "'"); - return MatrixPointer(); + return 0; } -MatrixPointer initialize_delayed_unary_isometric_operation_with_scalar(MatrixPointer mat, const std::string& op, bool right, double arg) { +uintptr_t initialize_delayed_unary_isometric_operation_with_scalar(uintptr_t ptr, const std::string& op, bool right, double arg) { if (right) { - return initialize_delayed_unary_isometric_operation_with_scalar_internal(std::move(mat), op, arg); + return initialize_delayed_unary_isometric_operation_with_scalar_internal(ptr, op, arg); } else { - return initialize_delayed_unary_isometric_operation_with_scalar_internal(std::move(mat), op, arg); + return initialize_delayed_unary_isometric_operation_with_scalar_internal(ptr, op, arg); } } diff --git a/lib/src/dense_matrix.cpp b/lib/src/dense_matrix.cpp index ec1ce74..08d69cf 100644 --- a/lib/src/dense_matrix.cpp +++ b/lib/src/dense_matrix.cpp @@ -1,4 +1,4 @@ -#include "def.h" +#include "mattress.h" #include "utils.h" #include "tatami/tatami.hpp" @@ -11,7 +11,7 @@ #include template -MatrixPointer initialize_dense_matrix_internal(MatrixIndex nr, MatrixIndex nc, const pybind11::array& buffer) { +uintptr_t initialize_dense_matrix_internal(mattress::MatrixIndex nr, mattress::MatrixIndex nc, const pybind11::array& buffer) { size_t expected = static_cast(nr) * static_cast(nc); if (buffer.size() != expected) { throw std::runtime_error("unexpected size for the dense matrix buffer"); @@ -27,12 +27,16 @@ MatrixPointer initialize_dense_matrix_internal(MatrixIndex nr, MatrixIndex nc, c throw std::runtime_error("numpy array contents should be contiguous"); } + auto tmp = std::make_unique(); auto ptr = get_numpy_array_data(buffer); tatami::ArrayView view(ptr, expected); - return MatrixPointer(new tatami::DenseMatrix(nr, nc, std::move(view), byrow)); + tmp->ptr.reset(new tatami::DenseMatrix(nr, nc, std::move(view), byrow)); + tmp->original = buffer; + + return mattress::cast(tmp.release()); } -MatrixPointer initialize_dense_matrix(MatrixIndex nr, MatrixIndex nc, const pybind11::array& buffer) { +uintptr_t initialize_dense_matrix(mattress::MatrixIndex nr, mattress::MatrixIndex nc, const pybind11::array& buffer) { // Don't make any kind of copy of buffer to coerce the type or storage // order, as this should be handled by the caller; we don't provide any // protection from GC for the arrays referenced by the views. @@ -61,7 +65,7 @@ MatrixPointer initialize_dense_matrix(MatrixIndex nr, MatrixIndex nc, const pybi } throw std::runtime_error("unrecognized array type '" + std::string(dtype.kind(), 1) + std::to_string(dtype.itemsize()) + "' for dense matrix initialization"); - return MatrixPointer(); + return 0; } void init_dense_matrix(pybind11::module& m) { diff --git a/lib/src/fragmented_sparse_matrix.cpp b/lib/src/fragmented_sparse_matrix.cpp index 2fe962d..bd06a8c 100644 --- a/lib/src/fragmented_sparse_matrix.cpp +++ b/lib/src/fragmented_sparse_matrix.cpp @@ -1,4 +1,4 @@ -#include "def.h" +#include "mattress.h" #include "utils.h" #include "pybind11/pybind11.h" @@ -8,14 +8,20 @@ #include template -MatrixPointer initialize_fragmented_sparse_matrix_raw(MatrixIndex nr, MatrixValue nc, const pybind11::list& data, const pybind11::list& indices, bool byrow) { - MatrixIndex nvec = byrow ? nr : nc; +uintptr_t initialize_fragmented_sparse_matrix_raw( + mattress::MatrixIndex nr, + mattress::MatrixValue nc, + const pybind11::list& data, + const pybind11::list& indices, + bool byrow) +{ + mattress::MatrixIndex nvec = byrow ? nr : nc; std::vector > data_vec; data_vec.reserve(nvec); std::vector > idx_vec; idx_vec.reserve(nvec); - for (MatrixIndex i = 0; i < nvec; ++i) { + for (mattress::MatrixIndex i = 0; i < nvec; ++i) { auto curdata = data[i]; if (pybind11::isinstance(curdata)) { data_vec.emplace_back(static_cast(NULL), 0); @@ -35,11 +41,27 @@ MatrixPointer initialize_fragmented_sparse_matrix_raw(MatrixIndex nr, MatrixValu idx_vec.emplace_back(check_numpy_array(castidx), castidx.size()); } - return MatrixPointer(new tatami::FragmentedSparseMatrix(nr, nc, std::move(data_vec), std::move(idx_vec), byrow, false)); + auto tmp = std::make_unique(); + typedef tatami::FragmentedSparseMatrix SpMat; + tmp->ptr.reset(new SpMat(nr, nc, std::move(data_vec), std::move(idx_vec), byrow, /* check = */ false)); + + pybind11::tuple original(2); + original[0] = data; + original[1] = indices; + tmp->original = std::move(original); + + return mattress::cast(tmp.release()); } template -MatrixPointer initialize_fragmented_sparse_matrix_itype(MatrixIndex nr, MatrixValue nc, const pybind11::list& data, const pybind11::list& indices, bool byrow, const pybind11::dtype& index_type) { +uintptr_t initialize_fragmented_sparse_matrix_itype( + mattress::MatrixIndex nr, + mattress::MatrixValue nc, + const pybind11::list& data, + const pybind11::list& indices, + bool byrow, + const pybind11::dtype& index_type) +{ if (index_type.is(pybind11::dtype::of())) { return initialize_fragmented_sparse_matrix_raw(nr, nc, data, indices, byrow); } else if (index_type.is(pybind11::dtype::of())) { @@ -59,10 +81,18 @@ MatrixPointer initialize_fragmented_sparse_matrix_itype(MatrixIndex nr, MatrixVa } throw std::runtime_error("unrecognized index type '" + std::string(index_type.kind(), 1) + std::to_string(index_type.itemsize()) + "' for fragmented sparse matrix initialization"); - return MatrixPointer(); + return 0; } -MatrixPointer initialize_fragmented_sparse_matrix(MatrixIndex nr, MatrixValue nc, const pybind11::list& data, const pybind11::list& indices, bool byrow, const pybind11::dtype& data_type, const pybind11::dtype& index_type) { +uintptr_t initialize_fragmented_sparse_matrix( + mattress::MatrixIndex nr, + mattress::MatrixValue nc, + const pybind11::list& data, + const pybind11::list& indices, + bool byrow, + const pybind11::dtype& data_type, + const pybind11::dtype& index_type) +{ if (data_type.is(pybind11::dtype::of())) { return initialize_fragmented_sparse_matrix_itype(nr, nc, data, indices, byrow, index_type); } else if (data_type.is(pybind11::dtype::of())) { @@ -86,7 +116,7 @@ MatrixPointer initialize_fragmented_sparse_matrix(MatrixIndex nr, MatrixValue nc } throw std::runtime_error("unrecognized data type '" + std::string(data_type.kind(), 1) + std::to_string(data_type.itemsize()) + "' for fragmented sparse matrix initialization"); - return MatrixPointer(); + return 0; } void init_fragmented_sparse_matrix(pybind11::module& m) { diff --git a/src/mattress/InitializedMatrix.py b/src/mattress/InitializedMatrix.py index 270cb85..989fba9 100644 --- a/src/mattress/InitializedMatrix.py +++ b/src/mattress/InitializedMatrix.py @@ -24,27 +24,25 @@ def _factorize(group): class InitializedMatrix: """Pointer to an initialized ``tatami::matrix``, for use in C++ code. Instances of this class should only be created by developers and used - within package functions; this is done by fetching the :py:attr:`~ptr` - attribute and passing it as a ``std::shared_ptr >`` in C++ code, e.g., via pybind11. All ``InitializedMatrix`` - instances are expected to be transient within a Python session; they should - not be serialized, nor should they be visible to end users. Each instance - will automatically free the C++-allocated memory upon garbage collection. + within package functions; this is done by passing the :py:attr:`~ptr` + address to C++ and casting it to a ``mattress::BoundMatrix``. All + ``InitializedMatrix`` instances are expected to be transient within a + Python session; they should not be serialized, nor should they be visible + to end users. Each instance will automatically free the C++-allocated + memory upon garbage collection. """ - def __init__(self, ptr, objects: list): + def __init__(self, ptr: int): """ Args: ptr: - Shared pointer to a ``tatami::Matrix`` - instance, created and wrapped by pybind11. - - objects: - List of Python objects (typically NumPy arrays) to protect from - garbage collection, as their data is referenced by ``ptr``. + Address of a ``mattress::BoundMatrix`` instance. """ self._ptr = ptr - self._objects = objects + + def __del__(self): + """Free the instance at :py:attr:`~ptr`.""" + lib.free_mattress(self._ptr) def nrow(self) -> int: """Get number of rows. @@ -69,16 +67,10 @@ def shape(self) -> Tuple[int, int]: @property def ptr(self): - """Shared pointer to a ``tatami::Matrix`` instance, - to be passed to C++ code via pybind11.""" + """An address to a ``mattress::BoundMatrix`` instance, to be passed + as a ``uintptr_t`` to C++ for casting.""" return self._ptr - @property - def objects(self) -> list: - """List of objects to protect from garbage collection as they are - referenced by :py:attr:`~ptr`.""" - return self._objects - @property def dtype(self) -> numpy.dtype: """Type of the matrix, to masquerade as a NumPy-like object.""" diff --git a/src/mattress/__init__.py b/src/mattress/__init__.py index 23c85a5..c5c9e6a 100644 --- a/src/mattress/__init__.py +++ b/src/mattress/__init__.py @@ -17,3 +17,14 @@ from .initialize import initialize from .InitializedMatrix import InitializedMatrix + +def includes() -> str: + """Provides access to mattress C++ headers. + + Returns: + str: Path to a directory containing the mattress header. + """ + import os + import inspect + dirname = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))) + return os.path.join(dirname, "include") diff --git a/src/mattress/include/mattress.h b/src/mattress/include/mattress.h new file mode 100644 index 0000000..312ac30 --- /dev/null +++ b/src/mattress/include/mattress.h @@ -0,0 +1,57 @@ +#ifndef RTATAMI_H +#define RTATAMI_H + +#include "pybind11/pybind11.h" +#include "tatami/tatami.hpp" +#include + +namespace mattress { + +/** + * Type of the matrix value. + */ +typedef double MatrixValue; + +/** + * Type of the matrix index. + */ +typedef uint32_t MatrixIndex; + +/** + * @brief Pointer to a **tatami** matrix. + * + * The `tatami::Matrix` is allowed to hold views on Python-owned data, to avoid copies when moving from Python to C++. + * However, if garbage collection occurs on the Python-owned data, the use of that data in C++ becomes invalid. + * To avoid this, we hold the original Python objects to protect them from the garbage collector until this object is also destroyed. + */ +struct BoundMatrix { + /** + * Pointer to a `tatami::Matrix`. + */ + std::shared_ptr > ptr; + + /** + * Python object containing the data referenced by `ptr`. + */ + pybind11::object original; +}; + +/** + * @param ptr A stored pointer. + * @return Pointer to a `BoundMatrix`. + */ +inline BoundMatrix* cast(uintptr_t ptr) { + return static_cast(reinterpret_cast(ptr)); +} + +/** + * @param ptr Pointer to a `BoundMatrix`. + * @return A stored pointer. + */ +inline uintptr_t cast(const BoundMatrix* ptr) { + return reinterpret_cast(static_cast(const_cast(ptr))); +} + +} + +#endif diff --git a/src/mattress/initialize.py b/src/mattress/initialize.py index 17ca3e1..3fe3745 100644 --- a/src/mattress/initialize.py +++ b/src/mattress/initialize.py @@ -44,24 +44,19 @@ def _initialize_numpy(x: numpy.ndarray) -> InitializedMatrix: if len(x.shape) != 2: raise ValueError("'x' should be a 2-dimensional array") x = _contiguify(x) - return InitializedMatrix( - ptr=lib.initialize_dense_matrix(x.shape[0], x.shape[1], x), - objects=[x] - ) + return InitializedMatrix(lib.initialize_dense_matrix(x.shape[0], x.shape[1], x)) if is_package_installed("scipy"): import scipy.sparse + @initialize.register def _initialize_sparse_csr_array(x: scipy.sparse.csr_array) -> InitializedMatrix: dtmp = _contiguify(x.data) itmp = _contiguify(x.indices) indtmp = x.indptr.astype(numpy.uint64, copy=False, order="A") - return InitializedMatrix( - ptr=lib.initialize_compressed_sparse_matrix(x.shape[0], x.shape[1], dtmp, itmp, indtmp, True), - objects=[dtmp, itmp, indtmp], - ) + return InitializedMatrix(lib.initialize_compressed_sparse_matrix(x.shape[0], x.shape[1], dtmp, itmp, indtmp, True)) @initialize.register @@ -74,10 +69,7 @@ def _initialize_sparse_csc_array(x: scipy.sparse.csc_array) -> InitializedMatrix dtmp = _contiguify(x.data) itmp = _contiguify(x.indices) indtmp = x.indptr.astype(numpy.uint64, copy=False, order="A") - return InitializedMatrix( - ptr=lib.initialize_compressed_sparse_matrix(x.shape[0], x.shape[1], dtmp, itmp, indtmp, False), - objects=[dtmp, itmp, indtmp], - ) + return InitializedMatrix(lib.initialize_compressed_sparse_matrix(x.shape[0], x.shape[1], dtmp, itmp, indtmp, False)) @initialize.register @@ -107,117 +99,68 @@ def _initialize_SparseNdarray(x: delayedarray.SparseNdarray) -> InitializedMatri dvecs = [None] * nc ivecs = [None] * nc - return InitializedMatrix( - ptr=lib.initialize_fragmented_sparse_matrix(x.shape[0], x.shape[1], dvecs, ivecs, False, x.dtype, x.index_dtype), - objects=[dvecs, ivecs] - ) + return InitializedMatrix(lib.initialize_fragmented_sparse_matrix(x.shape[0], x.shape[1], dvecs, ivecs, False, x.dtype, x.index_dtype)) @initialize.register -def _initialize_delayed_unary_isometric_operation_simple( - x: delayedarray.UnaryIsometricOpSimple, -) -> InitializedMatrix: +def _initialize_delayed_unary_isometric_operation_simple(x: delayedarray.UnaryIsometricOpSimple) -> InitializedMatrix: components = initialize(x.seed) - ptr = lib.initialize_delayed_unary_isometric_operation_simple( - components.ptr, x.operation.encode("UTF-8") - ) - return InitializedMatrix(ptr, components.objects) + ptr = lib.initialize_delayed_unary_isometric_operation_simple(components.ptr, x.operation) + return InitializedMatrix(ptr) @initialize.register -def _initialize_delayed_unary_isometric_operation_with_args( - x: delayedarray.UnaryIsometricOpWithArgs, -) -> InitializedMatrix: +def _initialize_delayed_unary_isometric_operation_with_args(x: delayedarray.UnaryIsometricOpWithArgs) -> InitializedMatrix: components = initialize(x.seed) - obj = components.objects if isinstance(x.value, numpy.ndarray): contents = x.value.astype(numpy.float64, copy=False, order="A") - ptr = lib.initialize_delayed_unary_isometric_operation_with_vector( - components.ptr, x.operation.encode("UTF-8"), x.right, (x.along == 0), contents - ) - obj.append(contents) + ptr = lib.initialize_delayed_unary_isometric_operation_with_vector(components.ptr, x.operation, x.right, (x.along == 0), contents) else: - ptr = lib.initialize_delayed_unary_isometric_operation_with_scalar( - components.ptr, x.operation.encode("UTF-8"), x.right, x.value - ) + ptr = lib.initialize_delayed_unary_isometric_operation_with_scalar(components.ptr, x.operation, x.right, x.value) - return InitializedMatrix(ptr, obj) + return InitializedMatrix(ptr) @initialize.register -def _initialize_delayed_subset( - x: delayedarray.Subset, -) -> InitializedMatrix: +def _initialize_delayed_subset(x: delayedarray.Subset) -> InitializedMatrix: components = initialize(x.seed) - for dim in range(2): current = x.subset[dim] noop, current = _sanitize_subset(current, x.shape[dim]) if not noop: - ptr = lib.initialize_delayed_subset( - components.ptr, current, dim == 0 - ) - components = InitializedMatrix(ptr, components.objects + [current]) - + ptr = lib.initialize_delayed_subset(components.ptr, current, dim == 0) + components = InitializedMatrix(ptr) return components @initialize.register -def _initialize_delayed_bind( - x: delayedarray.Combine, -) -> InitializedMatrix: - collected = [] - objects = [] - for i, s in enumerate(x.seeds): - components = initialize(s) - collected.append(components.ptr) - objects += components.objects - - ptr = lib.initialize_delayed_bind(collected, x.along) - return InitializedMatrix(ptr, objects) +def _initialize_delayed_bind(x: delayedarray.Combine) -> InitializedMatrix: + collected = [initialize(s) for s in x.seeds] + return InitializedMatrix(lib.initialize_delayed_bind([s.ptr for s in collected], x.along)) @initialize.register -def _initialize_delayed_transpose( - x: delayedarray.Transpose, -) -> InitializedMatrix: +def _initialize_delayed_transpose(x: delayedarray.Transpose) -> InitializedMatrix: components = initialize(x.seed) - if x.perm == (1, 0): ptr = lib.initialize_delayed_transpose(components.ptr) - components = InitializedMatrix(ptr, components.objects) - + components = InitializedMatrix(ptr) return components @initialize.register -def _initialize_delayed_binary_isometric_operation( - x: delayedarray.BinaryIsometricOp, -) -> InitializedMatrix: +def _initialize_delayed_binary_isometric_operation(x: delayedarray.BinaryIsometricOp) -> InitializedMatrix: lcomponents = initialize(x.left) rcomponents = initialize(x.right) - - ptr = lib.initialize_delayed_binary_isometric_operation( - lcomponents.ptr, rcomponents.ptr, x.operation.encode("UTF-8") - ) - - return InitializedMatrix(ptr, lcomponents.objects + rcomponents.objects) + ptr = lib.initialize_delayed_binary_isometric_operation(lcomponents.ptr, rcomponents.ptr, x.operation) + return InitializedMatrix(ptr) @initialize.register -def _initialize_delayed_round( - x: delayedarray.Round, -) -> InitializedMatrix: +def _initialize_delayed_round(x: delayedarray.Round) -> InitializedMatrix: components = initialize(x.seed) - if x.decimals != 0: - raise NotImplementedError( - "non-zero decimals in 'delayedarray.Round' are not yet supported" - ) - - ptr = lib.initialize_delayed_unary_isometric_operation_simple( - components.ptr, "round".encode("UTF-8") - ) - - return InitializedMatrix(ptr, components.objects) + raise NotImplementedError("non-zero decimals in 'delayedarray.Round' are not yet supported") + ptr = lib.initialize_delayed_unary_isometric_operation_simple(components.ptr, "round") + return InitializedMatrix(ptr)