diff --git a/Src/Base/AMReX_TinyProfiler.H b/Src/Base/AMReX_TinyProfiler.H index 1a1674d584e..4ddbb05ea7e 100644 --- a/Src/Base/AMReX_TinyProfiler.H +++ b/Src/Base/AMReX_TinyProfiler.H @@ -95,6 +95,7 @@ private: double dtinavg{0.0}, dtinmax{0.0}; double dtexmin{std::numeric_limits::max()}; double dtexavg{0.0}, dtexmax{0.0}; + bool do_print{true}; std::string fname; static bool compex (const ProcStats& lhs, const ProcStats& rhs) { return lhs.dtexmax > rhs.dtexmax; @@ -145,6 +146,7 @@ private: static int device_synchronize_around_region; static int n_print_tabs; static int verbose; + static double print_threshold; static void PrintStats (std::map& regstats, double dt_max); static void PrintMemStats (std::map& memstats, diff --git a/Src/Base/AMReX_TinyProfiler.cpp b/Src/Base/AMReX_TinyProfiler.cpp index 2047df6daec..1cffe78e75a 100644 --- a/Src/Base/AMReX_TinyProfiler.cpp +++ b/Src/Base/AMReX_TinyProfiler.cpp @@ -37,6 +37,7 @@ double TinyProfiler::t_init = std::numeric_limits::max(); int TinyProfiler::device_synchronize_around_region = 0; int TinyProfiler::n_print_tabs = 0; int TinyProfiler::verbose = 0; +double TinyProfiler::print_threshold = 1.; namespace { constexpr char mainregion[] = "main"; @@ -278,6 +279,9 @@ TinyProfiler::Initialize () noexcept pp.queryAdd("device_synchronize_around_region", device_synchronize_around_region); pp.queryAdd("verbose", verbose); pp.queryAdd("v", verbose); + // Specify the maximum percentage of inclusive time + // that the "Other" section in the output can have (defaultĀ 1%) + pp.queryAdd("print_threshold", print_threshold); } } @@ -492,8 +496,61 @@ TinyProfiler::PrintStats (std::map& regstats, double dt_max) wp = std::max(wp, int(std::string("Max %").size())); const std::string hline(maxfnamelen+wnc+2+(wt+2)*3+wp+2,'-'); + + ProcStats other_procstat; + bool print_other_procstat = false; + + // try to combine low-performance impact functions into "Other" to clean up the output + if (print_threshold > 0.) { + // initialize other_procstat to zero + other_procstat.nmin = 0; + other_procstat.dtinmin = 0.; + other_procstat.dtexmin = 0.; + other_procstat.fname = "Other"; + int num_procstats_in_other = 0; + + // sort by exclusive time and iterate backwards over the profiled functions + std::sort(allprocstats.begin(), allprocstats.end(), ProcStats::compin); + for (Long i = static_cast(allprocstats.size())-1; i >= 0; --i) { + // include function in "Other" if together they are below the threshold + if ((other_procstat.dtinmax + allprocstats[i].dtinmax)*(100.0/dt_max) + < print_threshold) { + allprocstats[i].do_print = false; + ++num_procstats_in_other; + + // add time for function to "Other" + // for min and max this is not exact but produces an upper limit + other_procstat.nmin += allprocstats[i].nmin; + other_procstat.navg += allprocstats[i].navg; + other_procstat.nmax += allprocstats[i].nmax; + + other_procstat.dtinmin += allprocstats[i].dtinmin; + other_procstat.dtinavg += allprocstats[i].dtinavg; + other_procstat.dtinmax += allprocstats[i].dtinmax; + + other_procstat.dtexmin += allprocstats[i].dtexmin; + other_procstat.dtexavg += allprocstats[i].dtexavg; + other_procstat.dtexmax += allprocstats[i].dtexmax; + } else { + break; + } + } + + if (num_procstats_in_other == 1) { + // if only one function would be included in "Other" + // the output would not get shorter + allprocstats.back().do_print = true; + } else if (num_procstats_in_other >= 2) { + print_other_procstat = true; + } + } + // Exclusive time std::sort(allprocstats.begin(), allprocstats.end(), ProcStats::compex); + if (print_other_procstat) { + // make sure "Other" is printed at the end of the list + allprocstats.push_back(other_procstat); + } amrex::OutStream() << "\n" << hline << "\n"; amrex::OutStream() << std::left << std::setw(maxfnamelen) << "Name" @@ -506,6 +563,9 @@ TinyProfiler::PrintStats (std::map& regstats, double dt_max) << "\n" << hline << "\n"; for (const auto & allprocstat : allprocstats) { + if (!allprocstat.do_print) { + continue; + } amrex::OutStream() << std::setprecision(4) << std::left << std::setw(maxfnamelen) << allprocstat.fname << std::right @@ -519,9 +579,16 @@ TinyProfiler::PrintStats (std::map& regstats, double dt_max) amrex::OutStream() << "\n"; } amrex::OutStream() << hline << "\n"; + if (print_other_procstat) { + allprocstats.pop_back(); + } // Inclusive time std::sort(allprocstats.begin(), allprocstats.end(), ProcStats::compin); + if (print_other_procstat) { + // make sure "Other" is printed at the end of the list + allprocstats.push_back(other_procstat); + } amrex::OutStream() << "\n" << hline << "\n"; amrex::OutStream() << std::left << std::setw(maxfnamelen) << "Name" @@ -534,6 +601,9 @@ TinyProfiler::PrintStats (std::map& regstats, double dt_max) << "\n" << hline << "\n"; for (const auto & allprocstat : allprocstats) { + if (!allprocstat.do_print) { + continue; + } amrex::OutStream() << std::setprecision(4) << std::left << std::setw(maxfnamelen) << allprocstat.fname << std::right