From 6e93b7794471ee02034329dc71c75b519fa7711d Mon Sep 17 00:00:00 2001 From: AlexanderSinn Date: Wed, 10 Apr 2024 19:58:05 +0200 Subject: [PATCH 1/3] TinyProfiler shorten output into other section --- Src/Base/AMReX_TinyProfiler.H | 2 + Src/Base/AMReX_TinyProfiler.cpp | 69 +++++++++++++++++++++++++++++++++ 2 files changed, 71 insertions(+) diff --git a/Src/Base/AMReX_TinyProfiler.H b/Src/Base/AMReX_TinyProfiler.H index 38e328d0127..6d6a95be203 100644 --- a/Src/Base/AMReX_TinyProfiler.H +++ b/Src/Base/AMReX_TinyProfiler.H @@ -102,6 +102,7 @@ private: double dtexmin{std::numeric_limits::max()}; double dtexavg{0.0}, dtexmax{0.0}; bool usesCUPTI{false}; + bool do_print{true}; std::string fname; static bool compex (const ProcStats& lhs, const ProcStats& rhs) { return lhs.dtexmax > rhs.dtexmax; @@ -153,6 +154,7 @@ private: static int device_synchronize_around_region; static int n_print_tabs; static int verbose; + static double print_threshold; static void PrintStats (std::map& regstats, double dt_max); static void PrintMemStats (std::map& memstats, diff --git a/Src/Base/AMReX_TinyProfiler.cpp b/Src/Base/AMReX_TinyProfiler.cpp index 955d4edb50c..eed6da0aa64 100644 --- a/Src/Base/AMReX_TinyProfiler.cpp +++ b/Src/Base/AMReX_TinyProfiler.cpp @@ -42,6 +42,7 @@ double TinyProfiler::t_init = std::numeric_limits::max(); int TinyProfiler::device_synchronize_around_region = 0; int TinyProfiler::n_print_tabs = 0; int TinyProfiler::verbose = 0; +double TinyProfiler::print_threshold = 1.; namespace { constexpr char mainregion[] = "main"; @@ -393,6 +394,9 @@ TinyProfiler::Initialize () noexcept pp.queryAdd("device_synchronize_around_region", device_synchronize_around_region); pp.queryAdd("verbose", verbose); pp.queryAdd("v", verbose); + // Specify the maximum percentage of inclusive time + // that the "Other" section in the output can have (defaultĀ 1%) + pp.queryAdd("print_threshold", print_threshold); } } @@ -613,8 +617,61 @@ TinyProfiler::PrintStats (std::map& regstats, double dt_max) #ifdef AMREX_USE_CUPTI const std::string hlinehlf((maxfnamelen+wnc+2+(wt+2)*3+wp+2)/2-12,'-'); #endif + + ProcStats other_procstat; + bool print_other_procstat = false; + + // try to combine low-performance impact functions into "Other" to clean up the output + if (print_threshold > 0.) { + // initialize other_procstat to zero + other_procstat.nmin = 0; + other_procstat.dtinmin = 0.; + other_procstat.dtexmin = 0.; + other_procstat.fname = "Other"; + int num_procstats_in_other = 0; + + // sort by exclusive time and iterate backwards over the profiled funcitons + std::sort(allprocstats.begin(), allprocstats.end(), ProcStats::compin); + for (int i = allprocstats.size()-1; i >= 0; --i) { + // include function in "Other" if together they are below the threshold + if ((other_procstat.dtinmax + allprocstats[i].dtinmax)*(100.0/dt_max) + < print_threshold) { + allprocstats[i].do_print = false; + ++num_procstats_in_other; + + // add time for function to "Other" + // for min and max this is not exact but produces an upper limit + other_procstat.nmin += allprocstats[i].nmin; + other_procstat.navg += allprocstats[i].navg; + other_procstat.nmax += allprocstats[i].nmax; + + other_procstat.dtinmin += allprocstats[i].dtinmin; + other_procstat.dtinavg += allprocstats[i].dtinavg; + other_procstat.dtinmax += allprocstats[i].dtinmax; + + other_procstat.dtexmin += allprocstats[i].dtexmin; + other_procstat.dtexavg += allprocstats[i].dtexavg; + other_procstat.dtexmax += allprocstats[i].dtexmax; + } else { + break; + } + } + + if (num_procstats_in_other == 1) { + // if only one funciton would be included in "Other" + // the output would not get shorter + allprocstats[allprocstats.size() - 1].do_print = true; + } else if (num_procstats_in_other >= 2) { + print_other_procstat = true; + } + } + // Exclusive time std::sort(allprocstats.begin(), allprocstats.end(), ProcStats::compex); + if (print_other_procstat) { + // make sure "Other" is printed at the end of the list + allprocstats.push_back(other_procstat); + } amrex::OutStream() << "\n" << hline << "\n"; amrex::OutStream() << std::left << std::setw(maxfnamelen) << "Name" @@ -627,6 +684,7 @@ TinyProfiler::PrintStats (std::map& regstats, double dt_max) << "\n" << hline << "\n"; for (const auto & allprocstat : allprocstats) { + if (!allprocstat.do_print) continue; #ifdef AMREX_USE_CUPTI if (it->usesCUPTI) { @@ -663,9 +721,16 @@ TinyProfiler::PrintStats (std::map& regstats, double dt_max) #endif } amrex::OutStream() << hline << "\n"; + if (print_other_procstat) { + allprocstats.pop_back(); + } // Inclusive time std::sort(allprocstats.begin(), allprocstats.end(), ProcStats::compin); + if (print_other_procstat) { + // make sure "Other" is printed at the end of the list + allprocstats.push_back(other_procstat); + } amrex::OutStream() << "\n" << hline << "\n"; amrex::OutStream() << std::left << std::setw(maxfnamelen) << "Name" @@ -678,6 +743,7 @@ TinyProfiler::PrintStats (std::map& regstats, double dt_max) << "\n" << hline << "\n"; for (const auto & allprocstat : allprocstats) { + if (!allprocstat.do_print) continue; #ifdef AMREX_USE_CUPTI if (it->usesCUPTI) { @@ -713,6 +779,9 @@ TinyProfiler::PrintStats (std::map& regstats, double dt_max) #endif } amrex::OutStream() << hline << "\n\n"; + if (print_other_procstat) { + allprocstats.pop_back(); + } } } From cc539701b812f4404bb0274e1885620f01b0301e Mon Sep 17 00:00:00 2001 From: AlexanderSinn Date: Wed, 10 Apr 2024 21:20:25 +0200 Subject: [PATCH 2/3] add suggestions --- Src/Base/AMReX_TinyProfiler.cpp | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/Src/Base/AMReX_TinyProfiler.cpp b/Src/Base/AMReX_TinyProfiler.cpp index eed6da0aa64..57e01f7c5e9 100644 --- a/Src/Base/AMReX_TinyProfiler.cpp +++ b/Src/Base/AMReX_TinyProfiler.cpp @@ -630,7 +630,7 @@ TinyProfiler::PrintStats (std::map& regstats, double dt_max) other_procstat.fname = "Other"; int num_procstats_in_other = 0; - // sort by exclusive time and iterate backwards over the profiled funcitons + // sort by exclusive time and iterate backwards over the profiled functions std::sort(allprocstats.begin(), allprocstats.end(), ProcStats::compin); for (int i = allprocstats.size()-1; i >= 0; --i) { // include function in "Other" if together they are below the threshold @@ -658,9 +658,9 @@ TinyProfiler::PrintStats (std::map& regstats, double dt_max) } if (num_procstats_in_other == 1) { - // if only one funciton would be included in "Other" + // if only one function would be included in "Other" // the output would not get shorter - allprocstats[allprocstats.size() - 1].do_print = true; + allprocstats.back().do_print = true; } else if (num_procstats_in_other >= 2) { print_other_procstat = true; } @@ -779,9 +779,6 @@ TinyProfiler::PrintStats (std::map& regstats, double dt_max) #endif } amrex::OutStream() << hline << "\n\n"; - if (print_other_procstat) { - allprocstats.pop_back(); - } } } From 9bc223f642e1eb3ecb8bf1078094c2c8bceffbdb Mon Sep 17 00:00:00 2001 From: AlexanderSinn Date: Tue, 16 Apr 2024 12:39:29 +0200 Subject: [PATCH 3/3] fix clang-tidy --- Src/Base/AMReX_TinyProfiler.cpp | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/Src/Base/AMReX_TinyProfiler.cpp b/Src/Base/AMReX_TinyProfiler.cpp index 212bcabb300..1cffe78e75a 100644 --- a/Src/Base/AMReX_TinyProfiler.cpp +++ b/Src/Base/AMReX_TinyProfiler.cpp @@ -511,7 +511,7 @@ TinyProfiler::PrintStats (std::map& regstats, double dt_max) // sort by exclusive time and iterate backwards over the profiled functions std::sort(allprocstats.begin(), allprocstats.end(), ProcStats::compin); - for (int i = allprocstats.size()-1; i >= 0; --i) { + for (Long i = static_cast(allprocstats.size())-1; i >= 0; --i) { // include function in "Other" if together they are below the threshold if ((other_procstat.dtinmax + allprocstats[i].dtinmax)*(100.0/dt_max) < print_threshold) { @@ -563,7 +563,9 @@ TinyProfiler::PrintStats (std::map& regstats, double dt_max) << "\n" << hline << "\n"; for (const auto & allprocstat : allprocstats) { - if (!allprocstat.do_print) continue; + if (!allprocstat.do_print) { + continue; + } amrex::OutStream() << std::setprecision(4) << std::left << std::setw(maxfnamelen) << allprocstat.fname << std::right @@ -599,7 +601,9 @@ TinyProfiler::PrintStats (std::map& regstats, double dt_max) << "\n" << hline << "\n"; for (const auto & allprocstat : allprocstats) { - if (!allprocstat.do_print) continue; + if (!allprocstat.do_print) { + continue; + } amrex::OutStream() << std::setprecision(4) << std::left << std::setw(maxfnamelen) << allprocstat.fname << std::right