Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

TinyProfiler: shorten output into "Other" section #3885

Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions Src/Base/AMReX_TinyProfiler.H
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,7 @@ private:
double dtexmin{std::numeric_limits<double>::max()};
double dtexavg{0.0}, dtexmax{0.0};
bool usesCUPTI{false};
bool do_print{true};
std::string fname;
static bool compex (const ProcStats& lhs, const ProcStats& rhs) {
return lhs.dtexmax > rhs.dtexmax;
Expand Down Expand Up @@ -153,6 +154,7 @@ private:
static int device_synchronize_around_region;
static int n_print_tabs;
static int verbose;
static double print_threshold;

static void PrintStats (std::map<std::string,Stats>& regstats, double dt_max);
static void PrintMemStats (std::map<std::string, MemStat>& memstats,
Expand Down
69 changes: 69 additions & 0 deletions Src/Base/AMReX_TinyProfiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ double TinyProfiler::t_init = std::numeric_limits<double>::max();
int TinyProfiler::device_synchronize_around_region = 0;
int TinyProfiler::n_print_tabs = 0;
int TinyProfiler::verbose = 0;
double TinyProfiler::print_threshold = 1.;

namespace {
constexpr char mainregion[] = "main";
Expand Down Expand Up @@ -393,6 +394,9 @@ TinyProfiler::Initialize () noexcept
pp.queryAdd("device_synchronize_around_region", device_synchronize_around_region);
pp.queryAdd("verbose", verbose);
pp.queryAdd("v", verbose);
// Specify the maximum percentage of inclusive time
// that the "Other" section in the output can have (default 1%)
pp.queryAdd("print_threshold", print_threshold);
}
}

Expand Down Expand Up @@ -613,8 +617,61 @@ TinyProfiler::PrintStats (std::map<std::string,Stats>& regstats, double dt_max)
#ifdef AMREX_USE_CUPTI
const std::string hlinehlf((maxfnamelen+wnc+2+(wt+2)*3+wp+2)/2-12,'-');
#endif

ProcStats other_procstat;
bool print_other_procstat = false;

// try to combine low-performance impact functions into "Other" to clean up the output
if (print_threshold > 0.) {
// initialize other_procstat to zero
other_procstat.nmin = 0;
other_procstat.dtinmin = 0.;
other_procstat.dtexmin = 0.;
other_procstat.fname = "Other";
int num_procstats_in_other = 0;

// sort by exclusive time and iterate backwards over the profiled funcitons
std::sort(allprocstats.begin(), allprocstats.end(), ProcStats::compin);
for (int i = allprocstats.size()-1; i >= 0; --i) {
// include function in "Other" if together they are below the threshold
if ((other_procstat.dtinmax + allprocstats[i].dtinmax)*(100.0/dt_max)
< print_threshold) {
allprocstats[i].do_print = false;
++num_procstats_in_other;

// add time for function to "Other"
// for min and max this is not exact but produces an upper limit
other_procstat.nmin += allprocstats[i].nmin;
other_procstat.navg += allprocstats[i].navg;
other_procstat.nmax += allprocstats[i].nmax;

other_procstat.dtinmin += allprocstats[i].dtinmin;
other_procstat.dtinavg += allprocstats[i].dtinavg;
other_procstat.dtinmax += allprocstats[i].dtinmax;

other_procstat.dtexmin += allprocstats[i].dtexmin;
other_procstat.dtexavg += allprocstats[i].dtexavg;
other_procstat.dtexmax += allprocstats[i].dtexmax;
} else {
break;
}
}

if (num_procstats_in_other == 1) {
// if only one funciton would be included in "Other"
// the output would not get shorter
allprocstats[allprocstats.size() - 1].do_print = true;
} else if (num_procstats_in_other >= 2) {
print_other_procstat = true;
}
}

// Exclusive time
std::sort(allprocstats.begin(), allprocstats.end(), ProcStats::compex);
if (print_other_procstat) {
// make sure "Other" is printed at the end of the list
allprocstats.push_back(other_procstat);
}
amrex::OutStream() << "\n" << hline << "\n";
amrex::OutStream() << std::left
<< std::setw(maxfnamelen) << "Name"
Expand All @@ -627,6 +684,7 @@ TinyProfiler::PrintStats (std::map<std::string,Stats>& regstats, double dt_max)
<< "\n" << hline << "\n";
for (const auto & allprocstat : allprocstats)
{
if (!allprocstat.do_print) continue;
#ifdef AMREX_USE_CUPTI
if (it->usesCUPTI)
{
Expand Down Expand Up @@ -663,9 +721,16 @@ TinyProfiler::PrintStats (std::map<std::string,Stats>& regstats, double dt_max)
#endif
}
amrex::OutStream() << hline << "\n";
if (print_other_procstat) {
allprocstats.pop_back();
}

// Inclusive time
std::sort(allprocstats.begin(), allprocstats.end(), ProcStats::compin);
if (print_other_procstat) {
// make sure "Other" is printed at the end of the list
allprocstats.push_back(other_procstat);
}
amrex::OutStream() << "\n" << hline << "\n";
amrex::OutStream() << std::left
<< std::setw(maxfnamelen) << "Name"
Expand All @@ -678,6 +743,7 @@ TinyProfiler::PrintStats (std::map<std::string,Stats>& regstats, double dt_max)
<< "\n" << hline << "\n";
for (const auto & allprocstat : allprocstats)
{
if (!allprocstat.do_print) continue;
#ifdef AMREX_USE_CUPTI
if (it->usesCUPTI)
{
Expand Down Expand Up @@ -713,6 +779,9 @@ TinyProfiler::PrintStats (std::map<std::string,Stats>& regstats, double dt_max)
#endif
}
amrex::OutStream() << hline << "\n\n";
if (print_other_procstat) {
allprocstats.pop_back();
}
}
}

Expand Down
Loading