From 606305390ba1d0dca6d9d85cf8de3f22c73c8f9b Mon Sep 17 00:00:00 2001 From: Nathaniel Morgan Date: Mon, 7 Oct 2024 11:48:06 -0600 Subject: [PATCH 1/5] updated to FOR_REDUCE --- examples/CSCKokkos.cpp | 14 ++-- examples/CSRKokkos.cpp | 22 +++---- examples/kokkos_for.cpp | 64 +++++++++---------- examples/laplaceMPI/laplace_mpi.cpp | 8 +-- examples/main_kokkos.cpp | 24 +++---- examples/mtr-kokkos-simple.cpp | 6 +- .../srcMacros/local_free_energy.cpp | 10 +-- examples/phaseField/srcMacros/outputs.cpp | 26 ++++---- examples/phaseFieldMPI/system.cpp | 10 +-- examples/sparsetests/powerIter.cpp | 14 ++-- examples/sparsetests/sparsePowerIter.cpp | 8 +-- examples/test_for.cpp | 42 ++++++------ examples/watt-graph/kokkos_floyd.cpp | 4 +- src/include/macros.h | 18 +++--- 14 files changed, 135 insertions(+), 135 deletions(-) diff --git a/examples/CSCKokkos.cpp b/examples/CSCKokkos.cpp index e46c0ed6..f3f95beb 100644 --- a/examples/CSCKokkos.cpp +++ b/examples/CSCKokkos.cpp @@ -1,5 +1,5 @@ /********************************************************************************************** - © 2020. Triad National Security, LLC. All rights reserved. + � 2020. Triad National Security, LLC. All rights reserved. This program was produced under U.S. Government contract 89233218CNA000001 for Los Alamos National Laboratory (LANL), which is operated by Triad National Security, LLC for the U.S. Department of Energy/National Nuclear Security Administration. All rights in the program are @@ -98,21 +98,21 @@ int main(int argc, char* argv[]) int loc_total = 0; loc_total += 0; // Get rid of warning - REDUCE_SUM(i, 0, nnz, - loc_total, { + FOR_REDUCE_SUM(i, 0, nnz, + loc_total, { loc_total += values[i]; }, total); printf("Sum of nnz from pointer method %d\n", total); total = 0; - REDUCE_SUM(i, 0, nnz, - loc_total, { + FOR_REDUCE_SUM(i, 0, nnz, + loc_total, { loc_total += a_start[i]; }, total); printf("Sum of start indices form .get_starts() %d\n", total); total = 0; - REDUCE_SUM(i, 0, dim1, - j, 0, dim2 - 1, + FOR_REDUCE_SUM(i, 0, dim1, + j, 0, dim2 - 1, loc_total, { loc_total += A(i, j); }, total); diff --git a/examples/CSRKokkos.cpp b/examples/CSRKokkos.cpp index 2bcb3499..7dab444d 100644 --- a/examples/CSRKokkos.cpp +++ b/examples/CSRKokkos.cpp @@ -1,5 +1,5 @@ /********************************************************************************************** - © 2020. Triad National Security, LLC. All rights reserved. + � 2020. Triad National Security, LLC. All rights reserved. This program was produced under U.S. Government contract 89233218CNA000001 for Los Alamos National Laboratory (LANL), which is operated by Triad National Security, LLC for the U.S. Department of Energy/National Nuclear Security Administration. All rights in the program are @@ -96,22 +96,22 @@ int main(int argc, char* argv[]) printf("And has %ld non zero elements\n", A.nnz()); }); - REDUCE_SUM(i, 0, nnz, - loc_total, { - loc_total += res[i]; + FOR_REDUCE_SUM(i, 0, nnz, + loc_total, { + loc_total += res[i]; }, total); printf("Sum of nnz from pointer method %d\n", total); total = 0; - REDUCE_SUM(i, 0, nnz, - loc_total, { - loc_total += a_start[i]; + FOR_REDUCE_SUM(i, 0, nnz, + loc_total, { + loc_total += a_start[i]; }, total); printf("Sum of start indices form .get_starts() %d\n", total); total = 0; - REDUCE_SUM(i, 0, dim1, - j, 0, dim2, - loc_total, { - loc_total += A(i,j); + FOR_REDUCE_SUM(i, 0, dim1, + j, 0, dim2, + loc_total, { + loc_total += A(i,j); }, total); printf("Sum of nnz in array notation %d\n", total); auto ss = A.begin(0); diff --git a/examples/kokkos_for.cpp b/examples/kokkos_for.cpp index 0026211a..9ae42235 100644 --- a/examples/kokkos_for.cpp +++ b/examples/kokkos_for.cpp @@ -1,5 +1,5 @@ /********************************************************************************************** - © 2020. Triad National Security, LLC. All rights reserved. + � 2020. Triad National Security, LLC. All rights reserved. This program was produced under U.S. Government contract 89233218CNA000001 for Los Alamos National Laboratory (LANL), which is operated by Triad National Security, LLC for the U.S. Department of Energy/National Nuclear Security Administration. All rights in the program are @@ -60,24 +60,24 @@ int main() // Kokkos::View arr_3D("ARR_3D", 10,10,10); CArrayKokkos arr_3D(10, 10, 10); FOR_ALL(i, 0, 10, - j, 0, 10, - k, 0, 10, { + j, 0, 10, + k, 0, 10, { arr_3D(i, j, k) = k * 10 * 10 + j * 10 + i; }); int loc_sum = 0; int result = 0; - REDUCE_SUM(i, 0, 10, - loc_sum, { + FOR_REDUCE_SUM(i, 0, 10, + loc_sum, { loc_sum += arr(i) * arr(i); }, result); printf("1D reduce sum: %i vs. 985960\n", result); loc_sum = 0; result = 0; - REDUCE_SUM(i, 0, 10, - j, 0, 10, - loc_sum, { + FOR_REDUCE_SUM(i, 0, 10, + j, 0, 10, + loc_sum, { loc_sum += arr_2D(i, j) * arr_2D(i, j); }, result); @@ -85,10 +85,10 @@ int main() loc_sum = 0; result = 0; - REDUCE_SUM(i, 0, 10, - j, 0, 10, - k, 0, 10, - loc_sum, { + FOR_REDUCE_SUM(i, 0, 10, + j, 0, 10, + k, 0, 10, + loc_sum, { loc_sum += arr_3D(i, j, k) * arr_3D(i, j, k); }, result); @@ -96,10 +96,10 @@ int main() result = 0; int loc_max = 2000; - REDUCE_MAX(i, 0, 10, - j, 0, 10, - k, 0, 10, - loc_max, { + FOR_REDUCE_MAX(i, 0, 10, + j, 0, 10, + k, 0, 10, + loc_max, { if (loc_max < arr_3D(i, j, k)) { loc_max = arr_3D(i, j, k); } @@ -122,10 +122,10 @@ int main() result = 0; int loc_min = 2000; - REDUCE_MIN(i, 0, 10, - j, 0, 10, - k, 0, 10, - loc_min, { + FOR_REDUCE_MIN(i, 0, 10, + j, 0, 10, + k, 0, 10, + loc_min, { if (loc_min > arr_3D(i, j, k)) { loc_min = arr_3D(i, j, k); } @@ -171,7 +171,7 @@ int main() }); // end parallel do DO_REDUCE_MAX(i, 1, 10, - loc_max, { + loc_max, { if (loc_max < matrix1D(i)) { loc_max = matrix1D(i); } @@ -180,8 +180,8 @@ int main() printf("result max 1D matrix = %i\n", result); DO_REDUCE_MAX(j, 1, 10, - i, 1, 10, - loc_max, { + i, 1, 10, + loc_max, { if (loc_max < matrix2D(i, j)) { loc_max = matrix2D(i, j); } @@ -189,9 +189,9 @@ int main() printf("result max 2D matrix = %i\n", result); DO_REDUCE_MAX(k, 1, 10, - j, 1, 10, - i, 1, 10, - loc_max, { + j, 1, 10, + i, 1, 10, + loc_max, { if (loc_max < matrix3D(i, j, k)) { loc_max = matrix3D(i, j, k); } @@ -199,7 +199,7 @@ int main() printf("result max 3D matrix = %i\n", result); DO_REDUCE_MIN(i, 1, 10, - loc_min, { + loc_min, { if (loc_min > matrix1D(i)) { loc_min = matrix1D(i); } @@ -207,8 +207,8 @@ int main() printf("result min 1D matrix = %i\n", result); DO_REDUCE_MIN(j, 1, 10, - i, 1, 10, - loc_min, { + i, 1, 10, + loc_min, { if (loc_min > matrix2D(i, j)) { loc_min = matrix2D(i, j); } @@ -216,9 +216,9 @@ int main() printf("result min 2D matrix = %i\n", result); DO_REDUCE_MIN(k, 1, 10, - j, 1, 10, - i, 1, 10, - loc_min, { + j, 1, 10, + i, 1, 10, + loc_min, { if (loc_min > matrix3D(i, j, k)) { loc_min = matrix3D(i, j, k); } diff --git a/examples/laplaceMPI/laplace_mpi.cpp b/examples/laplaceMPI/laplace_mpi.cpp index b5ddfc87..3cb407f3 100644 --- a/examples/laplaceMPI/laplace_mpi.cpp +++ b/examples/laplaceMPI/laplace_mpi.cpp @@ -1,5 +1,5 @@ /********************************************************************************************** - © 2020. Triad National Security, LLC. All rights reserved. + � 2020. Triad National Security, LLC. All rights reserved. This program was produced under U.S. Government contract 89233218CNA000001 for Los Alamos National Laboratory (LANL), which is operated by Triad National Security, LLC for the U.S. Department of Energy/National Nuclear Security Administration. All rights in the program are @@ -292,9 +292,9 @@ int main(int argc, char* argv[]) // calculate max difference between temperature and temperature_previous double loc_max_value = 100.0; - REDUCE_MAX(i, height_index_start, height_index_end, - j, 1, width_loc - 1, - loc_max_value, { + FOR_REDUCE_MAX(i, height_index_start, height_index_end, + j, 1, width_loc - 1, + loc_max_value, { double value = fabs(temperature_loc(i, j) - temperature_previous_loc(i, j)); if (value > loc_max_value) { loc_max_value = value; diff --git a/examples/main_kokkos.cpp b/examples/main_kokkos.cpp index 2dd5dc6f..d6d918d1 100644 --- a/examples/main_kokkos.cpp +++ b/examples/main_kokkos.cpp @@ -146,10 +146,10 @@ class ModelA // NOTE: if private vars are accessed, requires REDUCE_SUM_CLASS // do summation in parallel on GPU - REDUCE_SUM_CLASS(k, 1, 6, - j, 1, 5, - i, 1, 4, - loc_sum, { + FOR_REDUCE_SUM_CLASS(k, 1, 6, + j, 1, 5, + i, 1, 4, + loc_sum, { loc_sum += matrix(i, j, k, 1); }, val); @@ -182,10 +182,10 @@ class ModelB // NOTE: if private vars are accessed, requires REDUCE_SUM_CLASS // do summation in parallel on GPU - REDUCE_SUM_CLASS(k, 1, 6, - j, 1, 5, - i, 1, 4, - loc_sum, { + FOR_REDUCE_SUM_CLASS(k, 1, 6, + j, 1, 5, + i, 1, 4, + loc_sum, { loc_sum += matrix(i, j, k, 1); }, val); @@ -1064,10 +1064,10 @@ void pass_by_ref_two(const FMatrixKokkos& matrix) int val = 0; // do summation in parallel on GPU - REDUCE_SUM(k, 1, 6, - j, 1, 5, - i, 1, 4, - loc_sum, { + FOR_REDUCE_SUM(k, 1, 6, + j, 1, 5, + i, 1, 4, + loc_sum, { loc_sum += matrix(i, j, k, 1); }, val); diff --git a/examples/mtr-kokkos-simple.cpp b/examples/mtr-kokkos-simple.cpp index b6a9d11b..916339e6 100644 --- a/examples/mtr-kokkos-simple.cpp +++ b/examples/mtr-kokkos-simple.cpp @@ -257,8 +257,8 @@ int main(int argc, char *argv[]) { int result; // calculate dot product if(k0){ - REDUCE_SUM(j, 0, i-1, + FOR_REDUCE_SUM(j, 0, i-1, loc_sum, { loc_sum += L(i,j)*x(j); }, result); diff --git a/examples/phaseField/srcMacros/local_free_energy.cpp b/examples/phaseField/srcMacros/local_free_energy.cpp index a26ad265..ea76cc0a 100644 --- a/examples/phaseField/srcMacros/local_free_energy.cpp +++ b/examples/phaseField/srcMacros/local_free_energy.cpp @@ -1,5 +1,5 @@ /********************************************************************************************** - © 2020. Triad National Security, LLC. All rights reserved. + � 2020. Triad National Security, LLC. All rights reserved. This program was produced under U.S. Government contract 89233218CNA000001 for Los Alamos National Laboratory (LANL), which is operated by Triad National Security, LLC for the U.S. Department of Energy/National Nuclear Security Administration. All rights in the program are @@ -49,10 +49,10 @@ double calculate_total_free_energy(int* nn, double* delta, double kappa, DCArray // double total_energy = 0.0; double loc_sum = 0.0; - REDUCE_SUM(i, 1, nx - 1, - j, 1, ny - 1, - k, 1, nz - 1, - loc_sum, { + FOR_REDUCE_SUM(i, 1, nx - 1, + j, 1, ny - 1, + k, 1, nz - 1, + loc_sum, { // central difference spatial derivative of comp double dcdx = (comp(i + 1, j, k) - comp(i - 1, j, k)) / (2.0 * dx); double dcdy = (comp(i, j + 1, k) - comp(i, j - 1, k)) / (2.0 * dy); diff --git a/examples/phaseField/srcMacros/outputs.cpp b/examples/phaseField/srcMacros/outputs.cpp index a323f3cf..6f863953 100644 --- a/examples/phaseField/srcMacros/outputs.cpp +++ b/examples/phaseField/srcMacros/outputs.cpp @@ -1,5 +1,5 @@ /********************************************************************************************** - © 2020. Triad National Security, LLC. All rights reserved. + � 2020. Triad National Security, LLC. All rights reserved. This program was produced under U.S. Government contract 89233218CNA000001 for Los Alamos National Laboratory (LANL), which is operated by Triad National Security, LLC for the U.S. Department of Energy/National Nuclear Security Administration. All rights in the program are @@ -47,20 +47,20 @@ void track_progress(int iter, int* nn, DCArrayKokkos& comp) // sum of comp field double sum_comp = 0.0; double loc_sum = 0.0; - REDUCE_SUM(i, 0, nx, - j, 0, ny, - k, 0, nz, - loc_sum, { + FOR_REDUCE_SUM(i, 0, nx, + j, 0, ny, + k, 0, nz, + loc_sum, { loc_sum += comp(i, j, k); }, sum_comp); // max of comp field double max_comp; double loc_max; - REDUCE_MAX(i, 0, nx, - j, 0, ny, - k, 0, nz, - loc_max, { + FOR_REDUCE_MAX(i, 0, nx, + j, 0, ny, + k, 0, nz, + loc_max, { if (loc_max < comp(i, j, k)) { loc_max = comp(i, j, k); } @@ -70,10 +70,10 @@ void track_progress(int iter, int* nn, DCArrayKokkos& comp) // min of comp field double min_comp; double loc_min; - REDUCE_MIN(i, 0, nx, - j, 0, ny, - k, 0, nz, - loc_min, { + FOR_REDUCE_MIN(i, 0, nx, + j, 0, ny, + k, 0, nz, + loc_min, { if (loc_min > comp(i, j, k)) { loc_min = comp(i, j, k); } diff --git a/examples/phaseFieldMPI/system.cpp b/examples/phaseFieldMPI/system.cpp index 7c3f940c..7967b7be 100644 --- a/examples/phaseFieldMPI/system.cpp +++ b/examples/phaseFieldMPI/system.cpp @@ -137,7 +137,7 @@ double System::calculate_total_free_energy() #if 0 // bulk free energy + interfacial energy - REDUCE_SUM(k, 1, ga.comp.dims(0) - 1, + FOR_REDUCE_SUM(k, 1, ga.comp.dims(0) - 1, j, 1, ga.comp.dims(1) - 1, i, 1, ga.comp.dims(2) - 1, loc_sum, { @@ -152,7 +152,7 @@ double System::calculate_total_free_energy() #endif // bulk free energy only - REDUCE_SUM(k, 0, ga.comp.dims(0), + FOR_REDUCE_SUM(k, 0, ga.comp.dims(0), j, 0, ga.comp.dims(1), i, 0, ga.comp.dims(2), loc_sum, { @@ -200,7 +200,7 @@ void System::track_progress(int iter) // sum of comp field double sum_comp = 0.0; double loc_sum = 0.0; - REDUCE_SUM(k, 0, ga.comp.dims(0), + FOR_REDUCE_SUM(k, 0, ga.comp.dims(0), j, 0, ga.comp.dims(1), i, 0, ga.comp.dims(2), loc_sum, { @@ -210,7 +210,7 @@ void System::track_progress(int iter) // max of comp field double max_comp; double loc_max; - REDUCE_MAX(k, 0, ga.comp.dims(0), + FOR_REDUCE_MAX(k, 0, ga.comp.dims(0), j, 0, ga.comp.dims(1), i, 0, ga.comp.dims(2), loc_max, { @@ -222,7 +222,7 @@ void System::track_progress(int iter) // min of comp field double min_comp; double loc_min; - REDUCE_MIN(k, 0, ga.comp.dims(0), + FOR_REDUCE_MIN(k, 0, ga.comp.dims(0), j, 0, ga.comp.dims(1), i, 0, ga.comp.dims(2), loc_min, { diff --git a/examples/sparsetests/powerIter.cpp b/examples/sparsetests/powerIter.cpp index 270be239..66877ddc 100644 --- a/examples/sparsetests/powerIter.cpp +++ b/examples/sparsetests/powerIter.cpp @@ -1,5 +1,5 @@ /********************************************************************************************** - © 2020. Triad National Security, LLC. All rights reserved. + � 2020. Triad National Security, LLC. All rights reserved. This program was produced under U.S. Government contract 89233218CNA000001 for Los Alamos National Laboratory (LANL), which is operated by Triad National Security, LLC for the U.S. Department of Energy/National Nuclear Security Administration. All rights in the program are @@ -72,7 +72,7 @@ void renorm(CArrayKokkos& b) double loc_total = 0; int n = b.dims(0); int i = 0; - REDUCE_SUM(i, 0, n, + FOR_REDUCE_SUM(i, 0, n, loc_total, { loc_total += b(i) * b(i); } , total); total = 1 / sqrt(total); @@ -98,7 +98,7 @@ double innerProd(CArrayKokkos& a, CArrayKokkos& b) double total = 0; double loc_total = 0; int n = b.dims(0); - REDUCE_SUM(i, 0, n, + FOR_REDUCE_SUM(i, 0, n, loc_total, { loc_total += a(i) * b(i); }, total); @@ -110,7 +110,7 @@ double l1Change(CArrayKokkos& a, CArrayKokkos& b) double total = 0; double loc_total = 0; int n = b.dims(0); - REDUCE_SUM(i, 0, n, + FOR_REDUCE_SUM(i, 0, n, loc_total, { loc_total += abs(a(i) - b(i)); }, total); @@ -151,7 +151,7 @@ void renormSp(CArrayKokkos& b) double loc_total = 0; int n = b.dims(0); int i = 0; - REDUCE_SUM(i, 0, n, + FOR_REDUCE_SUM(i, 0, n, loc_total, { loc_total += b(i) * b(i); } , total); total = 1 / sqrt(total); @@ -177,7 +177,7 @@ double innerProdSp(CArrayKokkos& a, CArrayKokkos& b) double total = 0; double loc_total = 0; int n = b.dims(0); - REDUCE_SUM(i, 0, n, + FOR_REDUCE_SUM(i, 0, n, loc_total, { loc_total += a(i) * b(i); }, total); @@ -189,7 +189,7 @@ double l1ChangeSp(CArrayKokkos& a, CArrayKokkos& b) double total = 0; double loc_total = 0; int n = b.dims(0); - REDUCE_SUM(i, 0, n, + FOR_REDUCE_SUM(i, 0, n, loc_total, { loc_total += abs(a(i) - b(i)); }, total); diff --git a/examples/sparsetests/sparsePowerIter.cpp b/examples/sparsetests/sparsePowerIter.cpp index 5d066f2b..72be7bc9 100644 --- a/examples/sparsetests/sparsePowerIter.cpp +++ b/examples/sparsetests/sparsePowerIter.cpp @@ -1,5 +1,5 @@ /********************************************************************************************** - © 2020. Triad National Security, LLC. All rights reserved. + � 2020. Triad National Security, LLC. All rights reserved. This program was produced under U.S. Government contract 89233218CNA000001 for Los Alamos National Laboratory (LANL), which is operated by Triad National Security, LLC for the U.S. Department of Energy/National Nuclear Security Administration. All rights in the program are @@ -61,7 +61,7 @@ void renormSp(CArrayKokkos& b) double loc_total = 0; int n = b.dims(0); int i = 0; - REDUCE_SUM(i, 0, n, + FOR_REDUCE_SUM(i, 0, n, loc_total, { loc_total += b(i) * b(i); } , total); total = 1 / sqrt(total); @@ -87,7 +87,7 @@ double innerProdSp(CArrayKokkos& a, CArrayKokkos& b) double total = 0; double loc_total = 0; int n = b.dims(0); - REDUCE_SUM(i, 0, n, + FOR_REDUCE_SUM(i, 0, n, loc_total, { loc_total += a(i) * b(i); }, total); @@ -99,7 +99,7 @@ double l1ChangeSp(CArrayKokkos& a, CArrayKokkos& b) double total = 0; double loc_total = 0; int n = b.dims(0); - REDUCE_SUM(i, 0, n, + FOR_REDUCE_SUM(i, 0, n, loc_total, { loc_total += abs(a(i) - b(i)); }, total); diff --git a/examples/test_for.cpp b/examples/test_for.cpp index a4acab89..4aac408e 100644 --- a/examples/test_for.cpp +++ b/examples/test_for.cpp @@ -1,5 +1,5 @@ /********************************************************************************************** - © 2020. Triad National Security, LLC. All rights reserved. + � 2020. Triad National Security, LLC. All rights reserved. This program was produced under U.S. Government contract 89233218CNA000001 for Los Alamos National Laboratory (LANL), which is operated by Triad National Security, LLC for the U.S. Department of Energy/National Nuclear Security Administration. All rights in the program are @@ -66,7 +66,7 @@ int main() int loc_sum = 0; int result = 0; - REDUCE_SUM(i, 0, 10, + FOR_REDUCE_SUM(i, 0, 10, loc_sum, { loc_sum += arr(i) * arr(i); }, result); @@ -80,9 +80,9 @@ int main() loc_sum = 0; result = 0; - REDUCE_SUM(i, 0, 10, - j, 0, 10, - loc_sum, { + FOR_REDUCE_SUM(i, 0, 10, + j, 0, 10, + loc_sum, { loc_sum += arr_2D(i, j) * arr_2D(i, j); }, result); @@ -97,10 +97,10 @@ int main() loc_sum = 0; result = 0; - REDUCE_SUM(i, 0, 10, - j, 0, 10, - k, 0, 10, - loc_sum, { + FOR_REDUCE_SUM(i, 0, 10, + j, 0, 10, + k, 0, 10, + loc_sum, { loc_sum += arr_3D(i, j, k) * arr_3D(i, j, k); }, result); @@ -116,29 +116,29 @@ int main() std::cout << "3D reduce : " << result << " vs. " << loc_sum << " \n"; int loc_max; - REDUCE_MAX(i, 0, 10, - j, 0, 10, - k, 0, 10, - loc_max, { + FOR_REDUCE_MAX(i, 0, 10, + j, 0, 10, + k, 0, 10, + loc_max, { loc_max = std::max(arr_3D(i, j, k), loc_max); }, result); std::cout << "3D reduce MAX : " << result << " \n"; int loc_min; - REDUCE_MIN(i, 0, 10, - j, 0, 10, - k, 0, 10, - loc_min, { + FOR_REDUCE_MIN(i, 0, 10, + j, 0, 10, + k, 0, 10, + loc_min, { loc_min = std::min(arr_3D(i, j, k), loc_min); }, result); std::cout << "3D reduce MIN : " << result << " \n"; - REDUCE_MIN_CLASS(i, 0, 10, - j, 0, 10, - k, 0, 10, - loc_min, { + FOR_REDUCE_MIN_CLASS(i, 0, 10, + j, 0, 10, + k, 0, 10, + loc_min, { loc_min = std::min(arr_3D(i, j, k), loc_min); }, result); diff --git a/examples/watt-graph/kokkos_floyd.cpp b/examples/watt-graph/kokkos_floyd.cpp index 16f8d409..6f8dfb5f 100644 --- a/examples/watt-graph/kokkos_floyd.cpp +++ b/examples/watt-graph/kokkos_floyd.cpp @@ -1,5 +1,5 @@ /********************************************************************************************** - © 2020. Triad National Security, LLC. All rights reserved. + � 2020. Triad National Security, LLC. All rights reserved. This program was produced under U.S. Government contract 89233218CNA000001 for Los Alamos National Laboratory (LANL), which is operated by Triad National Security, LLC for the U.S. Department of Energy/National Nuclear Security Administration. All rights in the program are @@ -109,7 +109,7 @@ double averageDistance(CArrayKokkos G, int n) { double total = 0; double loc_sum; - REDUCE_SUM(i, 0, n, + FOR_REDUCE_SUM(i, 0, n, j, 0, n, loc_sum, { loc_sum += ((double) G(i, j)) / n; diff --git a/src/include/macros.h b/src/include/macros.h index 3fafe5f8..9baf643f 100644 --- a/src/include/macros.h +++ b/src/include/macros.h @@ -194,7 +194,7 @@ (result) ) #define \ - REDUCE_SUM(...) \ + FOR_REDUCE_SUM(...) \ GET_MACRO(__VA_ARGS__, _13, RSUM3D, _11, _10, RSUM2D, _8, _7, RSUM1D)(__VA_ARGS__) @@ -246,7 +246,7 @@ Kokkos::Max< decltype(result) > ( (result) ) ) #define \ - REDUCE_MAX(...) \ + FOR_REDUCE_MAX(...) \ GET_MACRO(__VA_ARGS__, _13, RMAX3D, _11, _10, RMAX2D, _8, _7, RMAX1D)(__VA_ARGS__) @@ -301,7 +301,7 @@ Kokkos::Min< decltype(result) >(result) ) #define \ - REDUCE_MIN(...) \ + FOR_REDUCE_MIN(...) \ GET_MACRO(__VA_ARGS__, _13, RMIN3D, _11, _10, RMIN2D, _8, _7, RMIN1D)(__VA_ARGS__) @@ -377,7 +377,7 @@ Kokkos::parallel_reduce( \ (result) ) #define \ -REDUCE_SUM_CLASS(...) \ +FOR_REDUCE_SUM_CLASS(...) \ GET_MACRO(__VA_ARGS__, _13, RSUMCLASS3D, _11, _10, RSUMCLASS2D, _8, _7, RSUMCLASS1D)(__VA_ARGS__) @@ -406,7 +406,7 @@ Kokkos::parallel_reduce( \ Kokkos::Max< decltype(result) > ( (result) ) ) #define \ -REDUCE_MAX_CLASS(...) \ +FOR_REDUCE_MAX_CLASS(...) \ GET_MACRO(__VA_ARGS__, _13, RMAXCLASS3D, _11, _10, RMAXCLASS2D, _8, _7, RMAXCLASS1D)(__VA_ARGS__) @@ -433,7 +433,7 @@ Kokkos::parallel_reduce( \ Kokkos::Min< decltype(result) >(result) ) #define \ -REDUCE_MIN_CLASS(...) \ +FOR_REDUCE_MIN_CLASS(...) \ GET_MACRO(__VA_ARGS__, _13, RMINCLASS3D, _11, _10, RMINCLASS2D, _8, _7, RMINCLASS1D)(__VA_ARGS__) #define \ @@ -889,7 +889,7 @@ void reduce_max (int i_start, int i_end, (result) ) #define \ - REDUCE_SUM(...) \ + FOR_REDUCE_SUM(...) \ GET_MACRO(__VA_ARGS__, _13, RSUM3D, _11, _10, RSUM2D, _8, _7, RSUM1D)(__VA_ARGS__) @@ -933,7 +933,7 @@ void reduce_max (int i_start, int i_end, (result) ) #define \ - REDUCE_MAX(...) \ + FOR_REDUCE_MAX(...) \ GET_MACRO(__VA_ARGS__, _13, RMAX3D, _11, _10, RMAX2D, _8, _7, RMAX1D)(__VA_ARGS__) @@ -977,7 +977,7 @@ void reduce_max (int i_start, int i_end, (result) ) #define \ - REDUCE_MIN(...) \ + FOR_REDUCE_MIN(...) \ GET_MACRO(__VA_ARGS__, _13, RMIN3D, _11, _10, RMIN2D, _8, _7, RMIN1D)(__VA_ARGS__) From cec7dd28acde1f4751453453fa403388b4a2c2b2 Mon Sep 17 00:00:00 2001 From: Nathaniel Morgan Date: Mon, 7 Oct 2024 12:16:56 -0600 Subject: [PATCH 2/5] added nested parallelism to mtr simple --- examples/mtr-kokkos-simple.cpp | 50 ++++++++++++++++++++-------------- 1 file changed, 29 insertions(+), 21 deletions(-) diff --git a/examples/mtr-kokkos-simple.cpp b/examples/mtr-kokkos-simple.cpp index 916339e6..ae4c6348 100644 --- a/examples/mtr-kokkos-simple.cpp +++ b/examples/mtr-kokkos-simple.cpp @@ -1,5 +1,5 @@ /********************************************************************************************** - © 2020. Triad National Security, LLC. All rights reserved. + © 2020. Triad National Security, LLC. All rights reserved. This program was produced under U.S. Government contract 89233218CNA000001 for Los Alamos National Laboratory (LANL), which is operated by Triad National Security, LLC for the U.S. Department of Energy/National Nuclear Security Administration. All rights in the program are @@ -134,11 +134,11 @@ int main(int argc, char *argv[]) { FMatrixDevice matrix3D; // declare variable and allocate sizes and dimensions later - matrix3D = FMatrixDevice (10,10,10); // allocate dimensions and sizes + matrix3D = FMatrixDevice (10,10,10, "mat3d"); // allocate dimensions and sizes // Array example following the Fortran index convention, // indicies go from 0 to less than N, first index varies the fastest - FArrayDevice arr3D(10,10,10); + FArrayDevice arr3D(10,10,10, "arr3d"); // Initialize matrix1D @@ -186,15 +186,15 @@ int main(int argc, char *argv[]) { // A 2D array example following the C index convention // indicies go from 0 to less than N, last index varies the fastest - CArrayDevice A(N,N); // dense array - CArrayDevice B(N,N); - CArrayDevice C(N,N); - CArrayDevice D(N,N); + CArrayDevice A(N,N, "A"); // dense array + CArrayDevice B(N,N, "B"); + CArrayDevice C(N,N, "C"); + CArrayDevice D(N,N, "D"); - CArrayDevice L(N,N); // lower triangular array - CArrayDevice U(N,N); // upper triangular array - CArrayDevice x(N); - CArrayDevice y(N); + CArrayDevice L(N,N, "L"); // lower triangular array + CArrayDevice U(N,N, "U"); // upper triangular array + CArrayDevice x(N, "x"); + CArrayDevice y(N, "y"); @@ -249,7 +249,10 @@ int main(int argc, char *argv[]) { }); // end parallel for // backwards substitution - for (int k = N-1; k>=0; k--){ + //for (int k = N-1; k>=0; k--){ + FOR_FIRST(id, 0, N,{ + + int k = ((N-1) - id); // make it count backwards x(k) = y(k); @@ -257,31 +260,32 @@ int main(int argc, char *argv[]) { int result; // calculate dot product if(k0){ - FOR_REDUCE_SUM(j, 0, i-1, + FOR_REDUCE_SUM_SECOND(j, 0, i-1, loc_sum, { loc_sum += L(i,j)*x(j); }, result); } // end if x(i) = (y(i)- result)/U(i,i); - } // end for i + }); // end for i @@ -321,8 +325,8 @@ int main(int argc, char *argv[]) { int length = 20; // Parallel Jacobi solver for steady 2D heat transfer - CArrayDevice Temp(length+2, length+2); - CArrayDevice Temp_previous(length+2, length+2); + CArrayDual Temp(length+2, length+2, "Temp"); + CArrayDevice Temp_previous(length+2, length+2, "Temp_old"); // heat source is bottom right corner of mesh, T=100 in that corner // temperature of left wall is T_cold=0. @@ -423,10 +427,14 @@ int main(int argc, char *argv[]) { printf("\n"); printf("Temperature profile\n"); - // print temperature result + + // copy values to the CPU, if on a GPU + Temp.update_host(); + + // print temperature result on CPU for(int i=length+1; i>=0; i--){ for (int j=0; j<=length+1; j++){ - printf(" %5.2f ", Temp(i,j)); + printf(" %5.2f ", Temp.host(i,j)); } // for j printf("\n"); }; // for i From 64f31f2acaa7bfe44bb4b7b444e9282e56fc160c Mon Sep 17 00:00:00 2001 From: Nathaniel Morgan Date: Mon, 7 Oct 2024 12:24:37 -0600 Subject: [PATCH 3/5] updated formatting --- examples/mtr-kokkos-simple.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/examples/mtr-kokkos-simple.cpp b/examples/mtr-kokkos-simple.cpp index ae4c6348..ad319281 100644 --- a/examples/mtr-kokkos-simple.cpp +++ b/examples/mtr-kokkos-simple.cpp @@ -250,7 +250,7 @@ int main(int argc, char *argv[]) { // backwards substitution //for (int k = N-1; k>=0; k--){ - FOR_FIRST(id, 0, N,{ + FOR_FIRST(id, 0, N,{ int k = ((N-1) - id); // make it count backwards @@ -267,12 +267,12 @@ int main(int argc, char *argv[]) { } // end if x(k) -= result; x(k) /= U(k,k); - }); // end for k backwards + }); // end for k backwards // forward substitution //for (int i = 0; i Date: Mon, 7 Oct 2024 12:27:34 -0600 Subject: [PATCH 4/5] updated array sizes in mtr simple --- examples/mtr-kokkos-simple.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/mtr-kokkos-simple.cpp b/examples/mtr-kokkos-simple.cpp index ad319281..1690cb4c 100644 --- a/examples/mtr-kokkos-simple.cpp +++ b/examples/mtr-kokkos-simple.cpp @@ -182,7 +182,7 @@ int main(int argc, char *argv[]) { // =============== - int N=20; // array dimensions are NxN + int N=200; // array dimensions are NxN // A 2D array example following the C index convention // indicies go from 0 to less than N, last index varies the fastest From a63df38d32e974bda59a2a351ef88325d35b06c8 Mon Sep 17 00:00:00 2001 From: Nathaniel Morgan Date: Mon, 7 Oct 2024 15:27:12 -0600 Subject: [PATCH 5/5] renamed to pop_back(), matching std::vector --- src/include/kokkos_types.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/include/kokkos_types.h b/src/include/kokkos_types.h index 3a923b36..b7e1ef05 100644 --- a/src/include/kokkos_types.h +++ b/src/include/kokkos_types.h @@ -6729,7 +6729,7 @@ class DynamicArrayKokkos { void push_back(T value); - void pop(); + void pop_back(); // Methods returns the raw pointer (most likely GPU) of the Kokkos View KOKKOS_INLINE_FUNCTION @@ -7040,7 +7040,7 @@ size_t DynamicArrayKokkos::order() const { } template -void DynamicArrayKokkos::pop() { +void DynamicArrayKokkos::pop_back() { dims_actual_size_[0]--; } @@ -7175,7 +7175,7 @@ class DynamicMatrixKokkos { void push_back(T value); - void pop(); + void pop_back(); // Methods returns the raw pointer (most likely GPU) of the Kokkos View KOKKOS_INLINE_FUNCTION @@ -7486,7 +7486,7 @@ size_t DynamicMatrixKokkos::order() const { } template -void DynamicMatrixKokkos::pop() { +void DynamicMatrixKokkos::pop_back() { dims_actual_size_[0]--; }