From 25b25c7020af96ee3f7995cfb364eb53f2ba5f44 Mon Sep 17 00:00:00 2001 From: Adrian-Diaz Date: Sat, 8 Feb 2025 20:12:04 -0700 Subject: [PATCH 1/6] BUG: excessive prints --- examples/test_tpetra_carray.cpp | 30 ++++++++++++++++++-------- examples/test_tpetra_farray.cpp | 38 +++++++++++++++++++++++---------- 2 files changed, 48 insertions(+), 20 deletions(-) diff --git a/examples/test_tpetra_carray.cpp b/examples/test_tpetra_carray.cpp index 405df559..ec90355d 100644 --- a/examples/test_tpetra_carray.cpp +++ b/examples/test_tpetra_carray.cpp @@ -77,7 +77,9 @@ int main(int argc, char* argv[]) void TpetraCArrayOneDimensionExample() { - + + int process_rank; + MPI_Comm_rank(MPI_COMM_WORLD, &process_rank); int n = 20; //global dimension //distributed dual array with layout left @@ -87,7 +89,8 @@ void TpetraCArrayOneDimensionExample() int nlocal = myarray.size(); // set values on host copy of data - printf("Printing host copy of data (should be global ids):\n"); + if(process_rank==0) + printf("Printing host copy of data (should be global ids):\n"); for (int i = 0; i < nlocal; i++) { //set each array element to the corresponding global index //we get global indices using a partition map member in the array @@ -106,7 +109,8 @@ void TpetraCArrayOneDimensionExample() }); myarray.update_host(); Kokkos::fence(); - printf("---Data multiplied by 2 on device---\n"); + if(process_rank==0) + printf("---Data multiplied by 2 on device---\n"); // Print host copy of data myarray.print(); @@ -115,7 +119,9 @@ void TpetraCArrayOneDimensionExample() void TpetraCArrayTwoDimensionExample() { - + + int process_rank; + MPI_Comm_rank(MPI_COMM_WORLD, &process_rank); int nx = 20; //global dimension int ny = 5; @@ -126,7 +132,8 @@ void TpetraCArrayTwoDimensionExample() int nxlocal = myarray.dims(0); // set values on host copy of data - printf("Printing host copy of data (should be global ids):\n"); + if(process_rank==0) + printf("Printing host copy of data (should be global ids):\n"); for (int i = 0; i < nxlocal; i++) { for (int j = 0; j < ny; j++){ //set each array element to a computed global degree of freedom index @@ -148,7 +155,8 @@ void TpetraCArrayTwoDimensionExample() }); myarray.update_host(); Kokkos::fence(); - printf("---Data multiplied by 2 on device---\n"); + if(process_rank==0) + printf("---Data multiplied by 2 on device---\n"); // Print host copy of data myarray.print(); @@ -157,7 +165,9 @@ void TpetraCArrayTwoDimensionExample() void TpetraCArraySevenDimensionExample() { - + + int process_rank; + MPI_Comm_rank(MPI_COMM_WORLD, &process_rank); int nx = 20; //global dimension int ny = 3; int nz = 3; @@ -173,7 +183,8 @@ void TpetraCArraySevenDimensionExample() int nxlocal = myarray.dims(0); // set values on host copy of data - printf("Printing host copy of data (should be global ids):\n"); + if(process_rank==0) + printf("Printing host copy of data (should be global ids):\n"); for (int i = 0; i < nxlocal; i++) { for (int j = 0; j < ny; j++){ for (int k = 0; k < nz; k++){ @@ -216,7 +227,8 @@ void TpetraCArraySevenDimensionExample() }); myarray.update_host(); Kokkos::fence(); - printf("---Data multiplied by 2 on device---\n"); + if(process_rank==0) + printf("---Data multiplied by 2 on device---\n"); // Print host copy of data myarray.print(); diff --git a/examples/test_tpetra_farray.cpp b/examples/test_tpetra_farray.cpp index 58bc6a68..e6c1f011 100644 --- a/examples/test_tpetra_farray.cpp +++ b/examples/test_tpetra_farray.cpp @@ -67,8 +67,12 @@ int main(int argc, char* argv[]) } void TpetraFArrayOneDimensionExample() -{ - printf("\n====================Running 1D TpetraFarray example====================\n"); +{ + int process_rank; + MPI_Comm_rank(MPI_COMM_WORLD, &process_rank); + + if(process_rank==0) + printf("\n====================Running 1D TpetraFarray example====================\n"); int n = 20; //global dimension @@ -79,7 +83,8 @@ void TpetraFArrayOneDimensionExample() int nlocal = myarray.size(); // set values on host copy of data - printf("Printing host copy of data (should be global ids):\n"); + if(process_rank==0) + printf("Printing host copy of data (should be global ids):\n"); for (int i = 0; i < nlocal; i++) { //set each array element to the corresponding global index //we get global indices using a partition map member in the array @@ -98,7 +103,8 @@ void TpetraFArrayOneDimensionExample() }); myarray.update_host(); Kokkos::fence(); - printf("---Data multiplied by 2 on device---\n"); + if(process_rank==0) + printf("---Data multiplied by 2 on device---\n"); // Print host copy of data myarray.print(); @@ -106,8 +112,11 @@ void TpetraFArrayOneDimensionExample() } void TpetraFArrayTwoDimensionExample() -{ - printf("\n====================Running 2D TpetraFarray example====================\n"); +{ + int process_rank; + MPI_Comm_rank(MPI_COMM_WORLD, &process_rank); + if(process_rank==0) + printf("\n====================Running 2D TpetraFarray example====================\n"); int nx = 20; //global dimension int ny = 5; @@ -119,7 +128,8 @@ void TpetraFArrayTwoDimensionExample() int nxlocal = myarray.dims(0); // set values on host copy of data - printf("Printing host copy of data (should be global ids):\n"); + if(process_rank==0) + printf("Printing host copy of data (should be global ids):\n"); for (int i = 0; i < nxlocal; i++) { for (int j = 0; j < ny; j++){ //set each array element to a computed global degree of freedom index @@ -141,7 +151,8 @@ void TpetraFArrayTwoDimensionExample() }); myarray.update_host(); Kokkos::fence(); - printf("---Data multiplied by 2 on device---\n"); + if(process_rank==0) + printf("---Data multiplied by 2 on device---\n"); // Print host copy of data myarray.print(); @@ -150,7 +161,10 @@ void TpetraFArrayTwoDimensionExample() void TpetraFArraySevenDimensionExample() { - printf("\n====================Running 7D TpetraFarray example====================\n"); + int process_rank; + MPI_Comm_rank(MPI_COMM_WORLD, &process_rank); + if(process_rank==0) + printf("\n====================Running 7D TpetraFarray example====================\n"); int nx = 20; //global dimension int ny = 3; @@ -167,7 +181,8 @@ void TpetraFArraySevenDimensionExample() int nxlocal = myarray.dims(0); // set values on host copy of data - printf("Printing host copy of data (should be global ids):\n"); + if(process_rank==0) + printf("Printing host copy of data (should be global ids):\n"); for (int i = 0; i < nxlocal; i++) { for (int j = 0; j < ny; j++){ for (int k = 0; k < nz; k++){ @@ -210,7 +225,8 @@ void TpetraFArraySevenDimensionExample() }); myarray.update_host(); Kokkos::fence(); - printf("---Data multiplied by 2 on device---\n"); + if(process_rank==0) + printf("---Data multiplied by 2 on device---\n"); // Print host copy of data myarray.print(); From ae4f8ed8ce37d787f38bab24ff2b49045d4055c4 Mon Sep 17 00:00:00 2001 From: Adrian-Diaz Date: Sat, 8 Feb 2025 23:35:24 -0700 Subject: [PATCH 2/6] WIP: Tpetra CRS matrix --- examples/CMakeLists.txt | 3 + examples/test_tpetra_crs.cpp | 104 ++++++++++++++++++++++++ src/include/tpetra_wrapper_types.h | 124 ++++++++++------------------- 3 files changed, 150 insertions(+), 81 deletions(-) create mode 100644 examples/test_tpetra_crs.cpp diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index 66b589a3..c5bbd634 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -126,6 +126,9 @@ if (KOKKOS) add_executable(test_tpetra_carray test_tpetra_carray.cpp) target_link_libraries(test_tpetra_carray ${LINKING_LIBRARIES}) + add_executable(test_tpetra_crs test_tpetra_crs.cpp) + target_link_libraries(test_tpetra_crs ${LINKING_LIBRARIES}) + add_executable(test_tpetra_mesh test_tpetra_mesh.cpp) target_link_libraries(test_tpetra_mesh ${LINKING_LIBRARIES}) endif() diff --git a/examples/test_tpetra_crs.cpp b/examples/test_tpetra_crs.cpp new file mode 100644 index 00000000..80c71970 --- /dev/null +++ b/examples/test_tpetra_crs.cpp @@ -0,0 +1,104 @@ +/********************************************************************************************** + � 2020. Triad National Security, LLC. All rights reserved. + This program was produced under U.S. Government contract 89233218CNA000001 for Los Alamos + National Laboratory (LANL), which is operated by Triad National Security, LLC for the U.S. + Department of Energy/National Nuclear Security Administration. All rights in the program are + reserved by Triad National Security, LLC, and the U.S. Department of Energy/National Nuclear + Security Administration. The Government is granted for itself and others acting on its behalf a + nonexclusive, paid-up, irrevocable worldwide license in this material to reproduce, prepare + derivative works, distribute copies to the public, perform publicly and display publicly, and + to permit others to do so. + This program is open source under the BSD-3 License. + Redistribution and use in source and binary forms, with or without modification, are permitted + provided that the following conditions are met: + 1. Redistributions of source code must retain the above copyright notice, this list of + conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright notice, this list of + conditions and the following disclaimer in the documentation and/or other materials + provided with the distribution. + 3. Neither the name of the copyright holder nor the names of its contributors may be used + to endorse or promote products derived from this software without specific prior + written permission. + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + **********************************************************************************************/ +#include +#include + +#include "matar.h" +#include "Kokkos_DualView.hpp" + +using namespace mtr; // matar namespace + +void TpetraCRSMatrixExample(); + +int main(int argc, char* argv[]) +{ + MPI_Init(&argc, &argv); + int process_rank; + MPI_Comm_rank(MPI_COMM_WORLD, &process_rank); + Kokkos::initialize(); + { + // Run TpetraFArray 1D example + TpetraCRSMatrixExample(); + } // end of kokkos scope + Kokkos::finalize(); + MPI_Barrier(MPI_COMM_WORLD); + if(process_rank==0) + printf("\nfinished\n\n"); + MPI_Finalize(); +} + +void TpetraCRSMatrixExample() +{ + int process_rank; + MPI_Comm_rank(MPI_COMM_WORLD, &process_rank); + + if(process_rank==0) + printf("\n====================Running TpetraCRSMatrix example====================\n"); + + int n = 20; //global dimension + + //distributed dual array with layout left + TpetraCRSMatrix mymatrix(); + + // //local size + // int nlocal = myarray.size(); + + // // set values on host copy of data + // if(process_rank==0) + // printf("Printing host copy of data (should be global ids):\n"); + // for (int i = 0; i < nlocal; i++) { + // //set each array element to the corresponding global index + // //we get global indices using a partition map member in the array + // myarray.host(i) = myarray.pmap.getGlobalIndex(i); + // } + + // myarray.update_device(); + + // // Print host copy of data + // myarray.print(); + // Kokkos::fence(); + + // // Manupulate data on device and update host + // FOR_ALL(i, 0, nlocal,{ + // myarray(i) = 2*myarray(i); + // }); + // myarray.update_host(); + // Kokkos::fence(); + // if(process_rank==0) + // printf("---Data multiplied by 2 on device---\n"); + + // // Print host copy of data + // myarray.print(); + // Kokkos::fence(); +} diff --git a/src/include/tpetra_wrapper_types.h b/src/include/tpetra_wrapper_types.h index 23796d4b..b2069fb6 100644 --- a/src/include/tpetra_wrapper_types.h +++ b/src/include/tpetra_wrapper_types.h @@ -2860,14 +2860,10 @@ class TpetraCRSMatrix { // //CRS row distributed matrix constructor for rectangular matrix // TpetraCRSMatrix(size_t global_dim1, size_t dim2, const std::string& tag_string = DEFAULTSTRINGARRAY, MPI_Comm mpi_comm = MPI_COMM_WORLD); - // TpetraCRSMatrix(size_t dim1, input_row_map_type input_strides, DCArrayKokkos crs_graph, - // const std::string& tag_string = DEFAULTSTRINGARRAY, MPI_Comm mpi_comm = MPI_COMM_WORLD); + // Constructor that takes local data in a matar ragged type + TpetraCRSMatrix(size_t dim1, input_row_map_type input_strides, DCArrayKokkos crs_graph, + TArray1D input_values, const std::string& tag_string = DEFAULTSTRINGARRAY, MPI_Comm mpi_comm = MPI_COMM_WORLD); - //CRS matrix constructor with arbitrary row graph and column map supplied - TpetraCRSMatrix(TpetraPartitionMap &input_pmap, size_t dim1, const std::string& tag_string = DEFAULTSTRINGARRAY); - - //CRS matric constructor with arbitrary row graph; builds column map for you and thus one less arg - TpetraCRSMatrix(Teuchos::RCP> input_pmap, size_t dim1, const std::string& tag_string = DEFAULTSTRINGARRAY); KOKKOS_INLINE_FUNCTION T& operator()(size_t i, size_t j) const; @@ -2994,89 +2990,53 @@ TpetraCRSMatrix::TpetraCRSMatrix(): tpetra_pmap // } // Constructor that takes local data in a matar ragged type -// template -// TpetraCRSMatrix::TpetraCRSMatrix(size_t dim0, input_row_map_type input_strides, DCArrayKokkos crs_graph, -// TArray1D input_values, const std::string& tag_string, MPI_Comm mpi_comm) { -// mpi_comm_ = mpi_comm; -// global_dim1_ = dim0; -// Teuchos::RCP> teuchos_comm = Teuchos::rcp(new Teuchos::MpiComm(mpi_comm_)); -// tpetra_pmap = Teuchos::rcp(new Tpetra::Map((long long int) dim0, 0, teuchos_comm)); -// pmap = TpetraPartitionMap(tpetra_pmap); -// dim1_ = tpetra_pmap->getLocalNumElements(); -// mystrides_ = input_strides; -// this_array_ = input_values; -// global_indices_array input_crs_graph = crs_graph.get_kokkos_dual_view().d_view; +template +TpetraCRSMatrix::TpetraCRSMatrix(size_t dim1, input_row_map_type input_strides, DCArrayKokkos crs_graph, + TArray1D input_values, const std::string& tag_string, MPI_Comm mpi_comm) { + mpi_comm_ = mpi_comm; + global_dim1_ = dim1; + Teuchos::RCP> teuchos_comm = Teuchos::rcp(new Teuchos::MpiComm(mpi_comm_)); + tpetra_pmap = Teuchos::rcp(new Tpetra::Map((long long int) dim1, 0, teuchos_comm)); + pmap = TpetraPartitionMap(tpetra_pmap); + dim1_ = tpetra_pmap->getLocalNumElements(); + mystrides_ = input_strides; + this_array_ = input_values; + global_indices_array input_crs_graph = crs_graph.get_kokkos_dual_view().d_view; -// //build column map for the global conductivity matrix -// Teuchos::RCP > colmap; -// const Teuchos::RCP > dommap = tpetra_pmap; + //build column map for the global conductivity matrix + Teuchos::RCP > colmap; + const Teuchos::RCP > dommap = tpetra_pmap; -// Tpetra::Details::makeColMap(colmap, tpetra_pmap, input_crs_graph.get_kokkos_dual_view().d_view, nullptr); -// tpetra_column_pmap = colmap; -// size_t nnz = input_crs_graph.size(); + Tpetra::Details::makeColMap(colmap, tpetra_pmap, input_crs_graph.get_kokkos_dual_view().d_view, nullptr); + tpetra_column_pmap = colmap; + size_t nnz = input_crs_graph.size(); -// //debug print -// //std::cout << "DOF GRAPH SIZE ON RANK " << myrank << " IS " << nnz << std::endl; + //debug print + //std::cout << "DOF GRAPH SIZE ON RANK " << myrank << " IS " << nnz << std::endl; -// //local indices in the graph using the constructed column map -// crs_local_indices_ = indices_array("crs_local_indices", nnz); + //local indices in the graph using the constructed column map + crs_local_indices_ = indices_array("crs_local_indices", nnz); -// //row offsets with compatible template arguments -// row_map_type row_offsets_pass("row_offsets", dim1_ + 1); -// for(int ipass = 0; ipass < dim1_ + 1; ipass++){ -// row_offsets_pass(ipass) = input_values.start_index_(ipass); -// } + //row offsets with compatible template arguments + row_map_type row_offsets_pass("row_offsets", dim1_ + 1); + for(int ipass = 0; ipass < dim1_ + 1; ipass++){ + row_offsets_pass(ipass) = input_values.start_index_(ipass); + } -// size_t entrycount = 0; -// for(int irow = 0; irow < dim1_; irow++){ -// for(int istride = 0; istride < mystrides_(irow); istride++){ -// crs_local_indices_(entrycount) = tpetra_column_pmap->getLocalElement(crs_graph(entrycount)); -// entrycount++; -// } -// } + size_t entrycount = 0; + for(int irow = 0; irow < dim1_; irow++){ + for(int istride = 0; istride < mystrides_(irow); istride++){ + crs_local_indices_(entrycount) = tpetra_column_pmap->getLocalElement(crs_graph(entrycount)); + entrycount++; + } + } -// //sort values and indices -// Tpetra::Import_Util::sortCrsEntries(row_offsets_pass, crs_local_indices_.d_view, this_array_.get_kokkos_view()); + //sort values and indices + Tpetra::Import_Util::sortCrsEntries(row_offsets_pass, crs_local_indices_.d_view, this_array_.get_kokkos_view()); -// tpetra_crs_matrix = Teuchos::rcp(new MAT(tpetra_pmap, tpetra_column_pmap, start_index_.d_view, crs_local_indices_.d_view, this_array_.get_kokkos_view())); -// tpetra_crs_matrix->fillComplete(); -// } - -// Overloaded 2D constructor where you provide a partition map -template -TpetraCRSMatrix::TpetraCRSMatrix(TpetraPartitionMap &input_pmap, - size_t dim1, const std::string& tag_string) { - // mpi_comm_ = input_pmap.mpi_comm_; - // global_dim1_ = input_pmap.num_global_; - // tpetra_pmap = input_pmap.tpetra_map; - // pmap = input_pmap; - // dims_[0] = tpetra_pmap->getLocalNumElements(); - // dims_[1] = dim1; - // order_ = 2; - // length_ = (dims_[0] * dims_[1]); - // // Create host ViewCArray - // set_mpi_type(); - // this_array_ = TArray1D(tag_string, dims_[0], dim1); - // tpetra_vector = Teuchos::rcp(new MV(tpetra_pmap, this_array_)); -} - -// Overloaded 2D constructor taking an RPC pointer to a Tpetra Map -template -TpetraCRSMatrix::TpetraCRSMatrix(Teuchos::RCP> input_pmap, - size_t dim1, const std::string& tag_string) { - - // global_dim1_ = input_pmap->getGlobalNumElements(); - // dims_[0] = input_pmap->getLocalNumElements(); - // dims_[1] = dim1; - // tpetra_pmap = input_pmap; - // pmap = TpetraPartitionMap(tpetra_pmap); - // order_ = 2; - // length_ = (dims_[0] * dims_[1]); - // // Create host ViewCArray - // set_mpi_type(); - // this_array_ = TArray1D(tag_string, dims_[0], dim1); - // tpetra_vector = Teuchos::rcp(new MV(tpetra_pmap, this_array_)); + tpetra_crs_matrix = Teuchos::rcp(new MAT(tpetra_pmap, tpetra_column_pmap, start_index_.d_view, crs_local_indices_.d_view, this_array_.get_kokkos_view())); + tpetra_crs_matrix->fillComplete(); } template @@ -3170,8 +3130,10 @@ TpetraCRSMatrix& TpetraCRSMatrix Date: Mon, 10 Feb 2025 01:00:13 -0700 Subject: [PATCH 3/6] WIP: Tpetra CRS matrix --- examples/test_tpetra_crs.cpp | 32 +++++++++++-- src/include/tpetra_wrapper_types.h | 75 ++++++++++++++++++++++++++---- 2 files changed, 95 insertions(+), 12 deletions(-) diff --git a/examples/test_tpetra_crs.cpp b/examples/test_tpetra_crs.cpp index 80c71970..bdf0453d 100644 --- a/examples/test_tpetra_crs.cpp +++ b/examples/test_tpetra_crs.cpp @@ -65,11 +65,37 @@ void TpetraCRSMatrixExample() if(process_rank==0) printf("\n====================Running TpetraCRSMatrix example====================\n"); + + //construct a row map over MPI ranks + long long int n = 100; //global dimension + TpetraPartitionMap<> input_pmap(n); + int nlocal = input_pmap.size(); + + //construct strides, index graph, and values arrays + DCArrayKokkos matrix_strides(nlocal, "matrix_strides"); + //set strides; map is contiguous so Trilinos leaves device view of map empty (BE WARNED) + const long long int min_global_index = input_pmap.getMinGlobalIndex(); + FOR_ALL(i, 0, nlocal,{ + matrix_strides(i) = (min_global_index+i) + 1; + }); - int n = 20; //global dimension + //global indices array + RaggedRightArrayKokkos input_csr(matrix_strides); + FOR_ALL(i, 0, nlocal,{ + for(int j = 0; j < matrix_strides(i); j++){ + input_csr(i,j) = j; + } + }); - //distributed dual array with layout left - TpetraCRSMatrix mymatrix(); + //values array + RaggedRightArrayKokkos input_values(matrix_strides); + FOR_ALL(i, 0, nlocal,{ + for(int j = 0; j < matrix_strides(i); j++){ + input_values(i,j) = 3*(min_global_index+j); + } + }); + TpetraCRSMatrix mymatrix(input_pmap, matrix_strides, input_csr, input_values); + mymatrix.print(); // //local size // int nlocal = myarray.size(); diff --git a/src/include/tpetra_wrapper_types.h b/src/include/tpetra_wrapper_types.h index b2069fb6..21d45709 100644 --- a/src/include/tpetra_wrapper_types.h +++ b/src/include/tpetra_wrapper_types.h @@ -106,11 +106,11 @@ class TpetraPartitionMap { size_t length_; MPI_Datatype mpi_datatype_; TArray1D_host host; - TArray1D_dev device; public: - + + TArray1D_dev device; //pointer to wrapped Tpetra map Teuchos::RCP> tpetra_map; // map of node indices @@ -134,7 +134,7 @@ class TpetraPartitionMap { TpetraPartitionMap(Teuchos::RCP> input_tpetra_map, const std::string& tag_string = DEFAULTSTRINGARRAY); KOKKOS_INLINE_FUNCTION - long long int& operator()(size_t i) const; + const long long int& operator()(size_t i) const; KOKKOS_INLINE_FUNCTION TpetraPartitionMap& operator=(const TpetraPartitionMap& temp); @@ -223,7 +223,7 @@ TpetraPartitionMap::TpetraPartitionMap(Teuchos::RCP KOKKOS_INLINE_FUNCTION -long long int& TpetraPartitionMap::operator()(size_t i) const { +const long long int& TpetraPartitionMap::operator()(size_t i) const { assert(order_ == 1 && "Tensor order (rank) does not match constructor in TpetraPartitionMap 1D!"); assert(i >= 0 && i < dims_[0] && "i is out of bounds in TpetraPartitionMap 1D!"); return device(i); @@ -2796,10 +2796,10 @@ template ; - using TArray1D_Host = RaggedRightArrayKokkos; - using row_map_type = Kokkos::View; - using input_row_map_type = DCArrayKokkos; + using TArray1D = RaggedRightArrayKokkos; + using TArray1D_Host = RaggedRightArrayKokkos; + using row_map_type = Kokkos::View; + using input_row_map_type = DCArrayKokkos; using values_array = Kokkos::View; using global_indices_array = Kokkos::View; using indices_array = Kokkos::View; @@ -2860,10 +2860,15 @@ class TpetraCRSMatrix { // //CRS row distributed matrix constructor for rectangular matrix // TpetraCRSMatrix(size_t global_dim1, size_t dim2, const std::string& tag_string = DEFAULTSTRINGARRAY, MPI_Comm mpi_comm = MPI_COMM_WORLD); - // Constructor that takes local data in a matar ragged type + // Constructor that takes local data in a matar ragged type and build map from rows of that (unfinished) TpetraCRSMatrix(size_t dim1, input_row_map_type input_strides, DCArrayKokkos crs_graph, TArray1D input_values, const std::string& tag_string = DEFAULTSTRINGARRAY, MPI_Comm mpi_comm = MPI_COMM_WORLD); + // Constructor that takes local data in a matar ragged type and a row partition map (avoids multiple copies partition map data) + TpetraCRSMatrix(TpetraPartitionMap input_pmap, input_row_map_type input_strides, + DCArrayKokkos crs_graph, TArray1D input_values, + const std::string& tag_string = DEFAULTSTRINGARRAY); + KOKKOS_INLINE_FUNCTION T& operator()(size_t i, size_t j) const; @@ -3039,6 +3044,58 @@ TpetraCRSMatrix::TpetraCRSMatrix(size_t dim1, i tpetra_crs_matrix->fillComplete(); } +// Constructor that takes local data in a matar ragged type +template +TpetraCRSMatrix::TpetraCRSMatrix(TpetraPartitionMap input_pmap, input_row_map_type input_strides, + DCArrayKokkos crs_graph, TArray1D input_values, + const std::string& tag_string) { + mpi_comm_ = input_pmap.mpi_comm_; + global_dim1_ = input_pmap.num_global_; + Teuchos::RCP> teuchos_comm = Teuchos::rcp(new Teuchos::MpiComm(mpi_comm_)); + tpetra_pmap = input_pmap.tpetra_map; + pmap = input_pmap; + dim1_ = tpetra_pmap->getLocalNumElements(); + mystrides_ = input_strides; + this_array_ = input_values; + global_indices_array input_crs_graph = crs_graph.get_kokkos_dual_view().d_view; + + + //build column map for the global conductivity matrix + Teuchos::RCP > colmap; + const Teuchos::RCP > dommap = tpetra_pmap; + + Tpetra::Details::makeColMap(colmap, tpetra_pmap, input_crs_graph.get_kokkos_dual_view().d_view, nullptr); + tpetra_column_pmap = colmap; + size_t nnz = input_crs_graph.size(); + + //debug print + //std::cout << "DOF GRAPH SIZE ON RANK " << myrank << " IS " << nnz << std::endl; + + //local indices in the graph using the constructed column map + crs_local_indices_ = indices_array("crs_local_indices", nnz); + + //row offsets with compatible template arguments + row_map_type row_offsets_pass("row_offsets", dim1_ + 1); + for(int ipass = 0; ipass < dim1_ + 1; ipass++){ + row_offsets_pass(ipass) = input_values.start_index_(ipass); + } + + size_t entrycount = 0; + for(int irow = 0; irow < dim1_; irow++){ + for(int istride = 0; istride < mystrides_(irow); istride++){ + crs_local_indices_(entrycount) = tpetra_column_pmap->getLocalElement(crs_graph(entrycount)); + entrycount++; + } + } + + //sort values and indices + Tpetra::Import_Util::sortCrsEntries(row_offsets_pass, crs_local_indices_.d_view, this_array_.get_kokkos_view()); + + tpetra_crs_matrix = Teuchos::rcp(new MAT(tpetra_pmap, tpetra_column_pmap, start_index_.d_view, crs_local_indices_.d_view, this_array_.get_kokkos_view())); + tpetra_crs_matrix->fillComplete(); +} + +//select MPI datatype template void TpetraCRSMatrix::set_mpi_type() { if (typeid(T).name() == typeid(bool).name()) { From 393ed2e3122194f85b7d31c893493c3d2a47b9fa Mon Sep 17 00:00:00 2001 From: Adrian-Diaz Date: Mon, 10 Feb 2025 13:22:20 -0700 Subject: [PATCH 4/6] WIP: Tpetra CRS matrix wrapper --- examples/test_tpetra_crs.cpp | 9 ++++---- src/include/tpetra_wrapper_types.h | 35 +++++++++++++++--------------- 2 files changed, 23 insertions(+), 21 deletions(-) diff --git a/examples/test_tpetra_crs.cpp b/examples/test_tpetra_crs.cpp index bdf0453d..0bd3e852 100644 --- a/examples/test_tpetra_crs.cpp +++ b/examples/test_tpetra_crs.cpp @@ -80,21 +80,22 @@ void TpetraCRSMatrixExample() }); //global indices array - RaggedRightArrayKokkos input_csr(matrix_strides); + RaggedRightArrayKokkos input_crs(matrix_strides,"graph_indices"); FOR_ALL(i, 0, nlocal,{ for(int j = 0; j < matrix_strides(i); j++){ - input_csr(i,j) = j; + input_crs(i,j) = j; } }); //values array - RaggedRightArrayKokkos input_values(matrix_strides); + RaggedRightArrayKokkos input_values(matrix_strides,"ragged_values"); FOR_ALL(i, 0, nlocal,{ for(int j = 0; j < matrix_strides(i); j++){ input_values(i,j) = 3*(min_global_index+j); } }); - TpetraCRSMatrix mymatrix(input_pmap, matrix_strides, input_csr, input_values); + TpetraCRSMatrix mymatrix(input_pmap, matrix_strides, input_crs, input_values); + //TpetraCRSMatrix mymatrix(input_pmap, matrix_strides); mymatrix.print(); // //local size diff --git a/src/include/tpetra_wrapper_types.h b/src/include/tpetra_wrapper_types.h index 21d45709..b7e426b3 100644 --- a/src/include/tpetra_wrapper_types.h +++ b/src/include/tpetra_wrapper_types.h @@ -2796,9 +2796,10 @@ template ; - using TArray1D_Host = RaggedRightArrayKokkos; + using TArray1D = RaggedRightArrayKokkos; + using TArray1D_Host = RaggedRightArrayKokkos; using row_map_type = Kokkos::View; + using input_row_graph_type = RaggedRightArrayKokkos; using input_row_map_type = DCArrayKokkos; using values_array = Kokkos::View; using global_indices_array = Kokkos::View; @@ -2840,9 +2841,9 @@ class TpetraCRSMatrix { TpetraPartitionMap pmap; TpetraPartitionMap column_pmap; TpetraPartitionMap comm_pmap; - Teuchos::RCP> tpetra_pmap; - Teuchos::RCP> tpetra_column_pmap; - Teuchos::RCP> tpetra_comm_pmap; + Teuchos::RCP> tpetra_pmap; + Teuchos::RCP> tpetra_column_pmap; + Teuchos::RCP> tpetra_comm_pmap; Teuchos::RCP tpetra_crs_matrix; TpetraCRSMatrix(); @@ -2865,8 +2866,8 @@ class TpetraCRSMatrix { TArray1D input_values, const std::string& tag_string = DEFAULTSTRINGARRAY, MPI_Comm mpi_comm = MPI_COMM_WORLD); // Constructor that takes local data in a matar ragged type and a row partition map (avoids multiple copies partition map data) - TpetraCRSMatrix(TpetraPartitionMap input_pmap, input_row_map_type input_strides, - DCArrayKokkos crs_graph, TArray1D input_values, + TpetraCRSMatrix(TpetraPartitionMap &input_pmap, input_row_map_type input_strides, + input_row_graph_type crs_graph, TArray1D input_values, const std::string& tag_string = DEFAULTSTRINGARRAY); @@ -3046,27 +3047,27 @@ TpetraCRSMatrix::TpetraCRSMatrix(size_t dim1, i // Constructor that takes local data in a matar ragged type template -TpetraCRSMatrix::TpetraCRSMatrix(TpetraPartitionMap input_pmap, input_row_map_type input_strides, - DCArrayKokkos crs_graph, TArray1D input_values, +TpetraCRSMatrix::TpetraCRSMatrix(TpetraPartitionMap &input_pmap, input_row_map_type input_strides, + input_row_graph_type crs_graph, TArray1D input_values, const std::string& tag_string) { mpi_comm_ = input_pmap.mpi_comm_; global_dim1_ = input_pmap.num_global_; Teuchos::RCP> teuchos_comm = Teuchos::rcp(new Teuchos::MpiComm(mpi_comm_)); - tpetra_pmap = input_pmap.tpetra_map; pmap = input_pmap; + tpetra_pmap = pmap.tpetra_map; dim1_ = tpetra_pmap->getLocalNumElements(); - mystrides_ = input_strides; + mystrides_ = input_strides.get_kokkos_dual_view().d_view; this_array_ = input_values; - global_indices_array input_crs_graph = crs_graph.get_kokkos_dual_view().d_view; + global_indices_array input_crs_graph = crs_graph.get_kokkos_view(); //build column map for the global conductivity matrix Teuchos::RCP > colmap; const Teuchos::RCP > dommap = tpetra_pmap; - Tpetra::Details::makeColMap(colmap, tpetra_pmap, input_crs_graph.get_kokkos_dual_view().d_view, nullptr); + Tpetra::Details::makeColMap(colmap, tpetra_pmap, input_crs_graph, nullptr); tpetra_column_pmap = colmap; - size_t nnz = input_crs_graph.size(); + size_t nnz = crs_graph.size(); //debug print //std::cout << "DOF GRAPH SIZE ON RANK " << myrank << " IS " << nnz << std::endl; @@ -3083,15 +3084,15 @@ TpetraCRSMatrix::TpetraCRSMatrix(TpetraPartitio size_t entrycount = 0; for(int irow = 0; irow < dim1_; irow++){ for(int istride = 0; istride < mystrides_(irow); istride++){ - crs_local_indices_(entrycount) = tpetra_column_pmap->getLocalElement(crs_graph(entrycount)); + crs_local_indices_(entrycount) = tpetra_column_pmap->getLocalElement(input_crs_graph(entrycount)); entrycount++; } } //sort values and indices - Tpetra::Import_Util::sortCrsEntries(row_offsets_pass, crs_local_indices_.d_view, this_array_.get_kokkos_view()); + Tpetra::Import_Util::sortCrsEntries(row_offsets_pass, crs_local_indices_, this_array_.get_kokkos_view()); - tpetra_crs_matrix = Teuchos::rcp(new MAT(tpetra_pmap, tpetra_column_pmap, start_index_.d_view, crs_local_indices_.d_view, this_array_.get_kokkos_view())); + tpetra_crs_matrix = Teuchos::rcp(new MAT(tpetra_pmap, tpetra_column_pmap, input_values.start_index_, crs_local_indices_, this_array_.get_kokkos_view())); tpetra_crs_matrix->fillComplete(); } From 9f416a416b6cc11cf209bedb5eb10ccffb360b6c Mon Sep 17 00:00:00 2001 From: Adrian-Diaz Date: Tue, 11 Feb 2025 22:48:03 -0700 Subject: [PATCH 5/6] WIP: CRS matrix wrapper --- src/include/kokkos_types.h | 12 ++++----- src/include/tpetra_wrapper_types.h | 40 ++++++++++++------------------ 2 files changed, 22 insertions(+), 30 deletions(-) diff --git a/src/include/kokkos_types.h b/src/include/kokkos_types.h index a54c02c7..1c90cb14 100644 --- a/src/include/kokkos_types.h +++ b/src/include/kokkos_types.h @@ -7740,8 +7740,8 @@ RaggedRightArrayKokkos::RaggedRightArra template void RaggedRightArrayKokkos::data_setup(const std::string& tag_string) { //allocate start indices - std::string append_indices_string("start_indices"); - std::string append_array_string("array"); + std::string append_indices_string("_start_indices"); + std::string append_array_string("_array"); std::string temp_copy_string = tag_string; std::string start_index_tag_string = temp_copy_string.append(append_indices_string); temp_copy_string = tag_string; @@ -8171,8 +8171,8 @@ template ::data_setup(const std::string& tag_string) { //allocate start indices - std::string append_indices_string("start_indices"); - std::string append_array_string("array"); + std::string append_indices_string("_start_indices"); + std::string append_array_string("_array"); std::string temp_copy_string = tag_string; std::string start_index_tag_string = temp_copy_string.append(append_indices_string); temp_copy_string = tag_string; @@ -8488,8 +8488,8 @@ RaggedDownArrayKokkos::RaggedDownArrayK template void RaggedDownArrayKokkos::data_setup(const std::string& tag_string) { //allocate start indices - std::string append_indices_string("start_indices"); - std::string append_array_string("array"); + std::string append_indices_string("_start_indices"); + std::string append_array_string("_array"); std::string temp_copy_string = tag_string; std::string start_index_tag_string = temp_copy_string.append(append_indices_string); temp_copy_string = tag_string; diff --git a/src/include/tpetra_wrapper_types.h b/src/include/tpetra_wrapper_types.h index b7e426b3..0340908f 100644 --- a/src/include/tpetra_wrapper_types.h +++ b/src/include/tpetra_wrapper_types.h @@ -2804,6 +2804,7 @@ class TpetraCRSMatrix { using values_array = Kokkos::View; using global_indices_array = Kokkos::View; using indices_array = Kokkos::View; + using indices_array_dview = Kokkos::DualView; size_t dim1_; size_t global_dim1_; @@ -2814,7 +2815,6 @@ class TpetraCRSMatrix { TArray1D this_array_; row_map_type mystrides_; row_map_type start_index_; - indices_array crs_local_indices_; // Trilinos type definitions typedef Tpetra::CrsMatrix MAT; //stands for matrix @@ -3022,26 +3022,22 @@ TpetraCRSMatrix::TpetraCRSMatrix(size_t dim1, i //std::cout << "DOF GRAPH SIZE ON RANK " << myrank << " IS " << nnz << std::endl; //local indices in the graph using the constructed column map - crs_local_indices_ = indices_array("crs_local_indices", nnz); - - //row offsets with compatible template arguments - row_map_type row_offsets_pass("row_offsets", dim1_ + 1); - for(int ipass = 0; ipass < dim1_ + 1; ipass++){ - row_offsets_pass(ipass) = input_values.start_index_(ipass); - } + indices_array_dview crs_local_indices_ = indices_array_dview("crs_local_indices", nnz); size_t entrycount = 0; for(int irow = 0; irow < dim1_; irow++){ - for(int istride = 0; istride < mystrides_(irow); istride++){ - crs_local_indices_(entrycount) = tpetra_column_pmap->getLocalElement(crs_graph(entrycount)); + for(int istride = 0; istride < input_strides.get_kokkos_dual_view().h_view(irow); istride++){ + crs_local_indices_.h_view(entrycount) = tpetra_column_pmap->getLocalElement(crs_graph(entrycount)); entrycount++; } } + crs_local_indices_.template modify(); + crs_local_indices_.template sync(); //sort values and indices - Tpetra::Import_Util::sortCrsEntries(row_offsets_pass, crs_local_indices_.d_view, this_array_.get_kokkos_view()); + Tpetra::Import_Util::sortCrsEntries(input_values.start_index_, crs_local_indices_.d_view, this_array_.get_kokkos_view()); - tpetra_crs_matrix = Teuchos::rcp(new MAT(tpetra_pmap, tpetra_column_pmap, start_index_.d_view, crs_local_indices_.d_view, this_array_.get_kokkos_view())); + tpetra_crs_matrix = Teuchos::rcp(new MAT(tpetra_pmap, tpetra_column_pmap, input_values.start_index_, crs_local_indices_.d_view, this_array_.get_kokkos_view())); tpetra_crs_matrix->fillComplete(); } @@ -3073,26 +3069,23 @@ TpetraCRSMatrix::TpetraCRSMatrix(TpetraPartitio //std::cout << "DOF GRAPH SIZE ON RANK " << myrank << " IS " << nnz << std::endl; //local indices in the graph using the constructed column map - crs_local_indices_ = indices_array("crs_local_indices", nnz); - - //row offsets with compatible template arguments - row_map_type row_offsets_pass("row_offsets", dim1_ + 1); - for(int ipass = 0; ipass < dim1_ + 1; ipass++){ - row_offsets_pass(ipass) = input_values.start_index_(ipass); - } + indices_array_dview crs_local_indices_ = indices_array_dview("crs_local_indices", nnz); size_t entrycount = 0; for(int irow = 0; irow < dim1_; irow++){ - for(int istride = 0; istride < mystrides_(irow); istride++){ - crs_local_indices_(entrycount) = tpetra_column_pmap->getLocalElement(input_crs_graph(entrycount)); + for(int istride = 0; istride < input_strides.get_kokkos_dual_view().h_view(irow); istride++){ + crs_local_indices_.h_view(entrycount) = tpetra_column_pmap->getLocalElement(input_crs_graph(entrycount)); entrycount++; } } + + crs_local_indices_.template modify(); + crs_local_indices_.template sync(); //sort values and indices - Tpetra::Import_Util::sortCrsEntries(row_offsets_pass, crs_local_indices_, this_array_.get_kokkos_view()); + Tpetra::Import_Util::sortCrsEntries(input_values.start_index_, crs_local_indices_.d_view, this_array_.get_kokkos_view()); - tpetra_crs_matrix = Teuchos::rcp(new MAT(tpetra_pmap, tpetra_column_pmap, input_values.start_index_, crs_local_indices_, this_array_.get_kokkos_view())); + tpetra_crs_matrix = Teuchos::rcp(new MAT(tpetra_pmap, tpetra_column_pmap, input_values.start_index_, crs_local_indices_.d_view, this_array_.get_kokkos_view())); tpetra_crs_matrix->fillComplete(); } @@ -3179,7 +3172,6 @@ TpetraCRSMatrix& TpetraCRSMatrix Date: Fri, 14 Feb 2025 10:57:59 -0700 Subject: [PATCH 6/6] ENH: Trilinos 16 version use --- scripts/trilinos-install.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/trilinos-install.sh b/scripts/trilinos-install.sh index 13f5b253..7c630665 100644 --- a/scripts/trilinos-install.sh +++ b/scripts/trilinos-install.sh @@ -12,7 +12,7 @@ echo "Trilinos Kokkos Build Type: $kokkos_build_type" if [ ! -d "${TRILINOS_SOURCE_DIR}" ] then echo "Directory Trilinos does not exist, downloading Trilinos...." - git clone --depth 1 https://github.com/trilinos/Trilinos.git ${TRILINOS_SOURCE_DIR} + git clone --depth 1 --branch trilinos-release-16-0-branch https://github.com/trilinos/Trilinos.git ${TRILINOS_SOURCE_DIR} fi #check if Trilinos build directory exists, create Trilinos/build if it doesn't