Skip to content

Commit

Permalink
Merge pull request #75 from mkstoyanov/custom_nruns
Browse files Browse the repository at this point in the history
Adds `-nruns` option that allows for an arbitrary number of runs in the benchmark.
  • Loading branch information
mkstoyanov authored Feb 12, 2025
2 parents b7afa48 + fd6cf4c commit dbf14e6
Show file tree
Hide file tree
Showing 2 changed files with 72 additions and 37 deletions.
82 changes: 49 additions & 33 deletions benchmarks/speed3d.h
Original file line number Diff line number Diff line change
Expand Up @@ -215,9 +215,16 @@ void benchmark_fft(std::array<int,3> size_fft, std::deque<std::string> const &ar

// Print results
if(me==0){
t_max = t_max / (2.0 * ntest);
double const fftsize = static_cast<double>(world.count());
double const floprate = 5.0 * batch_size * fftsize * std::log(fftsize) * 1e-9 / std::log(2.0) / t_max;
double const fftsize = static_cast<double>(world.count());

double floprate = 0.0;
if (ntest > 0) { // something was tested
t_max /= (2.0 * ntest); // time per test, 2 transforms forward/backward
floprate = 5.0 * batch_size * fftsize * std::log(fftsize) * 1e-9 / std::log(2.0) / t_max;
} else {
t_max = 0.0; // nothing was tested
}

long long mem_usage = static_cast<long long>(fft.size_inbox()) + static_cast<long long>(fft.size_outbox())
+ static_cast<long long>(fft.size_workspace());
mem_usage *= sizeof(output_type);
Expand All @@ -232,6 +239,7 @@ void benchmark_fft(std::array<int,3> size_fft, std::deque<std::string> const &ar
for(int i=0; i<5; i++)
print_proc_grid(i);
cout << "\n";
cout << "Num runs: " << ntest << '\n';
cout << "Time per run: " << t_max << " (s)\n";
cout << "Performance: " << floprate << " GFlops/s\n";
cout << "Memory usage: " << mem_usage << "MB/rank\n";
Expand Down Expand Up @@ -322,42 +330,50 @@ int main(int argc, char *argv[]){

if (argc < 6){
if (mpi::world_rank(0)){
cout << "\nUsage:\n mpirun -np x " << bench_executable << " <backend> <precision> <size-x> <size-y> <size-z> <args>\n\n"
<< " options\n"
<< " backend is the 1-D FFT library\n"
<< " available options for this build: " << backends << "\n"
<< " precision is either float or double\n"
<< " use float-long or double-long to enable 64-bit indexing\n"
<< " size-x/y/z are the 3D array dimensions \n\n"
<< " args is a set of optional arguments that define algorithmic tweaks and variations\n"
<< " -reorder: reorder the elements of the arrays so that each 1-D FFT will use contiguous data\n"
<< " -no-reorder: some of the 1-D will be strided (non contiguous)\n"
<< " -a2a: use MPI_Alltoall() communication method\n"
<< " -a2av: use MPI_Alltoallv() communication method (default)\n"
<< " -p2p: use MPI_Send() and MPI_Irecv() communication methods\n"
<< " -p2p_pl: use MPI_Isend() and MPI_Irecv() communication methods\n"
<< " -no-gpu-aware: move the data to the cpu before doing gpu operations (gpu backends only)\n"
<< " -pencils: use pencil reshape logic\n"
<< " -slabs: use slab reshape logic\n"
<< " -io_pencils: if input and output proc grids are pencils, useful for comparison with other libraries \n"
<< " -ingrid x y z: specifies the processor grid to use in the input, x y z must be integers \n"
<< " -outgrid x y z: specifies the processor grid to use in the output, x y z must be integers \n"
<< " -subcomm num_ranks: specifies the number of ranks to use in intermediate reshapes\n"
<< " -batch batch_size: specifies the size of the batch to use in the benchmark\n"
<< " -r2c_dir dir: specifies the r2c direction for the r2c tests, dir must be 0 1 or 2 \n"
<< " -mps: for the cufft backend and multiple gpus, associate the mpi ranks with different cuda devices\n"
<< " -nX: number of times to repeat the run, accepted variants are -n5 (default), -n1, -n10, -n50\n"
#ifdef BENCH_R2R
<< "Examples:\n"
cout << "\nUsage:\n mpirun -np x " << bench_executable << " <backend> <precision> <size-x> <size-y> <size-z> <args>\n";
cout << R"help1(
options
<backend> is the 1-D FFT library
)help1";
cout << " available backends for this build: " << backends;
cout << R"help2(
<precision> is either float or double
use float-long or double-long to enable 64-bit indexing
<size-x/y/z> are the 3D array dimensions
<args> is a set of optional arguments that define algorithmic tweaks and variations
-reorder: reorder the elements of the arrays so that each 1-D FFT will use contiguous data
-no-reorder: some of the 1-D will be strided (non contiguous)
-a2a: use MPI_Alltoall() communication method
-a2av: use MPI_Alltoallv() communication method (default)
-p2p: use MPI_Send() and MPI_Irecv() communication methods
-p2p_pl: use MPI_Isend() and MPI_Irecv() communication methods
-no-gpu-aware: move the data to the cpu before doing gpu operations (gpu backends only)
-pencils: use pencil reshape logic
-slabs: use slab reshape logic
-io_pencils: if input and output proc grids are pencils, useful for comparison with other libraries
-ingrid x y z: specifies the processor grid to use in the input, x y z must be integers
-outgrid x y z: specifies the processor grid to use in the output, x y z must be integers
-subcomm num_ranks: specifies the number of ranks to use in intermediate reshapes
-batch batch_size: specifies the size of the batch to use in the benchmark
-r2c_dir dir: specifies the r2c direction for the r2c tests, dir must be 0 1 or 2
-mps: for the cufft backend and multiple gpus, associate the mpi ranks with different cuda devices
-nX: number of times to repeat the run, accepted variants are -n5 (default), -n1, -n10, -n50
-nrunsXYZ: same as -n but allows for a custom number, XYZ must be a non-negative integer, e.g., -nruns17
)help2";

#ifdef BENCH_R2R
cout << "Examples:\n"
<< " mpirun -np 4 " << bench_executable << " fftw-cos double 128 128 128 -p2p\n"
<< " mpirun -np 8 " << bench_executable << " cufft-cos float 256 256 256\n"
<< " mpirun -np 12 " << bench_executable << " fftw-sin double 512 512 512 -slabs\n\n";
#else
<< "Examples:\n"
#else
cout << "Examples:\n"
<< " mpirun -np 4 " << bench_executable << " fftw double 128 128 128 -no-reorder\n"
<< " mpirun -np 8 " << bench_executable << " cufft float 256 256 256\n"
<< " mpirun -np 12 " << bench_executable << " fftw double 512 512 512 -p2p -slabs\n\n";
#endif
#endif
}

MPI_Finalize();
Expand Down
27 changes: 23 additions & 4 deletions test/test_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -356,7 +356,7 @@ bool has_option(std::deque<std::string> const &args, std::string const &opt){
return false;
}
//! \brief Takes the three arguments after \b opt and converts them to an array of ints, throws runtime_error if no arguments or cannot convert.
std::array<int, 3> get_grid(std::deque<std::string> const &args, std::string const &opt){
inline std::array<int, 3> get_grid(std::deque<std::string> const &args, std::string const &opt){
auto iopt = args.begin();
while(iopt != args.end()){
if (*iopt == opt){ // found the argument, take the next three entries
Expand All @@ -374,7 +374,7 @@ std::array<int, 3> get_grid(std::deque<std::string> const &args, std::string con
throw std::runtime_error(opt + " not found");
}

int get_int_arg(std::string const &name, std::deque<std::string> const &args, int default_value = -1){
inline int get_int_arg(std::string const &name, std::deque<std::string> const &args, int default_value = -1){
auto iopt = args.begin();
while(iopt != args.end()){
if (*iopt == name){
Expand All @@ -388,8 +388,27 @@ int get_int_arg(std::string const &name, std::deque<std::string> const &args, in
}
return default_value;
}

int nruns(std::deque<std::string> const &args){
//! returns the number of runs selected in the args
inline int nruns(std::deque<std::string> const &args){
for(auto &s : args) {
std::string::size_type nr = s.find("-nruns");
if (nr != 0)
continue;
// found a string with -nruns, get the number
int num = 0;
try {
num = std::stoi(s.substr(6));
} catch(std::invalid_argument &) {
std::cerr << "cannot convert '" << s.substr(6) << "' to 'int'\n";
throw;
} catch(std::out_of_range &) {
std::cerr << "provided integer '" << s.substr(6) << "' is too large\n";
throw;
}
if (num < 0)
throw std::runtime_error("the number of of runs has to be non-negative");
return num;
}
for(auto &s : args)
if (s == "-n1")
return 1;
Expand Down

0 comments on commit dbf14e6

Please sign in to comment.