Skip to content

Commit

Permalink
Remove step size constraints from performance measurement
Browse files Browse the repository at this point in the history
  • Loading branch information
niermann999 committed Feb 7, 2025
1 parent bb63381 commit 7a5833a
Show file tree
Hide file tree
Showing 3 changed files with 21 additions and 20 deletions.
25 changes: 13 additions & 12 deletions benchmarks/common/benchmarks/toy_detector_benchmark.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ class ToyDetectorBenchmark : public benchmark::Fixture {
traccc::seedfinder_config seeding_cfg;
traccc::seedfilter_config filter_cfg;
traccc::spacepoint_grid_config grid_cfg{seeding_cfg};
traccc::finding_config finding_cfg = get_trk_finding_config();
traccc::finding_config finding_cfg;
traccc::fitting_config fitting_cfg;

static constexpr std::array<float, 2> phi_range{
Expand Down Expand Up @@ -81,6 +81,10 @@ class ToyDetectorBenchmark : public benchmark::Fixture {
"the simulation data."
<< std::endl;

// Apply correct propagation config
apply_propagation_config(finding_cfg.propagation);
apply_propagation_config(fitting_cfg.propagation);

// Use deterministic random number generator for testing
using uniform_gen_t = detray::detail::random_numbers<
traccc::scalar, std::uniform_real_distribution<traccc::scalar>>;
Expand Down Expand Up @@ -126,11 +130,11 @@ class ToyDetectorBenchmark : public benchmark::Fixture {
detray::muon<traccc::scalar>(), n_events, det, field,
std::move(generator), std::move(smearer_writer_cfg), full_path);

// Same propagation configuration for sim and reco
apply_propagation_config(sim.get_config().propagation);
// Set constrained step size to 1 mm
sim.get_config().propagation.stepping.step_constraint =
1.f * detray::unit<float>::mm;
// Otherwise same propagation configuration for sim and reco
sim.get_config().propagation = finding_cfg.propagation;

sim.run();

Expand All @@ -156,16 +160,13 @@ class ToyDetectorBenchmark : public benchmark::Fixture {
return toy_cfg;
}

traccc::finding_config get_trk_finding_config() const {

traccc::finding_config finding_cfg{};

void apply_propagation_config(detray::propagation::config& cfg) const {
// Configure the propagation for the toy detector
finding_cfg.propagation.navigation.search_window = {3, 3};
finding_cfg.propagation.navigation.overstep_tolerance =
-300.f * detray::unit<traccc::scalar>::um;

return finding_cfg;
cfg.navigation.search_window = {3, 3};
cfg.navigation.overstep_tolerance = -300.f * detray::unit<float>::um;
cfg.navigation.min_mask_tolerance = 1e-5f * detray::unit<float>::mm;
cfg.navigation.max_mask_tolerance = 3.f * detray::unit<float>::mm;
cfg.navigation.mask_tolerance_scalor = 0.05f;
}

void SetUp(::benchmark::State& /*state*/) {
Expand Down
2 changes: 1 addition & 1 deletion benchmarks/cpu/toy_detector_cpu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ BENCHMARK_F(ToyDetectorBenchmark, CPU)(benchmark::State& state) {
for (auto _ : state) {

// Iterate over events
#pragma omp parallel for
#pragma omp parallel for schedule(dynamic)
for (unsigned int i_evt = 0; i_evt < n_events; i_evt++) {

auto& spacepoints_per_event = spacepoints[i_evt];
Expand Down
14 changes: 7 additions & 7 deletions benchmarks/cuda/toy_detector_cuda.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@
// VecMem include(s).
#include <vecmem/memory/cuda/device_memory_resource.hpp>
#include <vecmem/memory/cuda/host_memory_resource.hpp>
#include <vecmem/memory/cuda/managed_memory_resource.hpp>
#include <vecmem/memory/host_memory_resource.hpp>
#include <vecmem/utils/cuda/async_copy.hpp>
#include <vecmem/utils/cuda/copy.hpp>
Expand All @@ -56,7 +55,6 @@ BENCHMARK_F(ToyDetectorBenchmark, CUDA)(benchmark::State& state) {
vecmem::cuda::host_memory_resource cuda_host_mr;
vecmem::cuda::device_memory_resource device_mr;
traccc::memory_resource mr{device_mr, &cuda_host_mr};
vecmem::cuda::managed_memory_resource mng_mr;

// Copy and stream
vecmem::copy host_copy;
Expand All @@ -65,9 +63,9 @@ BENCHMARK_F(ToyDetectorBenchmark, CUDA)(benchmark::State& state) {
vecmem::cuda::async_copy async_copy{stream.cudaStream()};

// Read back detector file
host_detector_type det{mng_mr};
host_detector_type det{cuda_host_mr};
traccc::io::read_detector(
det, mng_mr, sim_dir + "toy_detector_geometry.json",
det, cuda_host_mr, sim_dir + "toy_detector_geometry.json",
sim_dir + "toy_detector_homogeneous_material.json",
sim_dir + "toy_detector_surface_grids.json");

Expand All @@ -84,8 +82,10 @@ BENCHMARK_F(ToyDetectorBenchmark, CUDA)(benchmark::State& state) {
traccc::cuda::fitting_algorithm<device_fitter_type> device_fitting(
fitting_cfg, mr, async_copy, stream);

// Copy detector to device
auto det_buffer = detray::get_buffer(det, device_mr, copy);
// Detector view object
auto det_view = detray::get_data(det);
auto det_view = detray::get_data(det_buffer);

// D2H copy object
traccc::device::container_d2h_copy_alg<traccc::track_state_container_types>
Expand Down Expand Up @@ -147,7 +147,7 @@ BENCHMARK_F(ToyDetectorBenchmark, CUDA)(benchmark::State& state) {
det_view, field, track_candidates_cuda_buffer);

// Create a temporary buffer that will receive the device memory.
auto size = track_states_cuda_buffer.headers.size();
/*auto size = track_states_cuda_buffer.headers.size();
std::vector<std::size_t> capacities(size, 0);
std::transform(track_states_cuda_buffer.items.host_ptr(),
track_states_cuda_buffer.items.host_ptr() + size,
Expand All @@ -156,7 +156,7 @@ BENCHMARK_F(ToyDetectorBenchmark, CUDA)(benchmark::State& state) {
// Copy the track states back to the host.
traccc::track_state_container_types::host track_states_host =
track_state_d2h(track_states_cuda_buffer);
track_state_d2h(track_states_cuda_buffer);*/
}
}

Expand Down

0 comments on commit 7a5833a

Please sign in to comment.