From 7a5833ac9f54943fa9952b9c3fa2a79185ce4517 Mon Sep 17 00:00:00 2001 From: Joana Niermann Date: Tue, 24 Sep 2024 04:12:48 +0200 Subject: [PATCH] Remove step size constraints from performance measurement --- .../benchmarks/toy_detector_benchmark.hpp | 25 ++++++++++--------- benchmarks/cpu/toy_detector_cpu.cpp | 2 +- benchmarks/cuda/toy_detector_cuda.cpp | 14 +++++------ 3 files changed, 21 insertions(+), 20 deletions(-) diff --git a/benchmarks/common/benchmarks/toy_detector_benchmark.hpp b/benchmarks/common/benchmarks/toy_detector_benchmark.hpp index 11bfde1fd..9a9054f3f 100644 --- a/benchmarks/common/benchmarks/toy_detector_benchmark.hpp +++ b/benchmarks/common/benchmarks/toy_detector_benchmark.hpp @@ -53,7 +53,7 @@ class ToyDetectorBenchmark : public benchmark::Fixture { traccc::seedfinder_config seeding_cfg; traccc::seedfilter_config filter_cfg; traccc::spacepoint_grid_config grid_cfg{seeding_cfg}; - traccc::finding_config finding_cfg = get_trk_finding_config(); + traccc::finding_config finding_cfg; traccc::fitting_config fitting_cfg; static constexpr std::array phi_range{ @@ -81,6 +81,10 @@ class ToyDetectorBenchmark : public benchmark::Fixture { "the simulation data." << std::endl; + // Apply correct propagation config + apply_propagation_config(finding_cfg.propagation); + apply_propagation_config(fitting_cfg.propagation); + // Use deterministic random number generator for testing using uniform_gen_t = detray::detail::random_numbers< traccc::scalar, std::uniform_real_distribution>; @@ -126,11 +130,11 @@ class ToyDetectorBenchmark : public benchmark::Fixture { detray::muon(), n_events, det, field, std::move(generator), std::move(smearer_writer_cfg), full_path); + // Same propagation configuration for sim and reco + apply_propagation_config(sim.get_config().propagation); // Set constrained step size to 1 mm sim.get_config().propagation.stepping.step_constraint = 1.f * detray::unit::mm; - // Otherwise same propagation configuration for sim and reco - sim.get_config().propagation = finding_cfg.propagation; sim.run(); @@ -156,16 +160,13 @@ class ToyDetectorBenchmark : public benchmark::Fixture { return toy_cfg; } - traccc::finding_config get_trk_finding_config() const { - - traccc::finding_config finding_cfg{}; - + void apply_propagation_config(detray::propagation::config& cfg) const { // Configure the propagation for the toy detector - finding_cfg.propagation.navigation.search_window = {3, 3}; - finding_cfg.propagation.navigation.overstep_tolerance = - -300.f * detray::unit::um; - - return finding_cfg; + cfg.navigation.search_window = {3, 3}; + cfg.navigation.overstep_tolerance = -300.f * detray::unit::um; + cfg.navigation.min_mask_tolerance = 1e-5f * detray::unit::mm; + cfg.navigation.max_mask_tolerance = 3.f * detray::unit::mm; + cfg.navigation.mask_tolerance_scalor = 0.05f; } void SetUp(::benchmark::State& /*state*/) { diff --git a/benchmarks/cpu/toy_detector_cpu.cpp b/benchmarks/cpu/toy_detector_cpu.cpp index 7f1acfd80..b89ca800f 100644 --- a/benchmarks/cpu/toy_detector_cpu.cpp +++ b/benchmarks/cpu/toy_detector_cpu.cpp @@ -62,7 +62,7 @@ BENCHMARK_F(ToyDetectorBenchmark, CPU)(benchmark::State& state) { for (auto _ : state) { // Iterate over events -#pragma omp parallel for +#pragma omp parallel for schedule(dynamic) for (unsigned int i_evt = 0; i_evt < n_events; i_evt++) { auto& spacepoints_per_event = spacepoints[i_evt]; diff --git a/benchmarks/cuda/toy_detector_cuda.cpp b/benchmarks/cuda/toy_detector_cuda.cpp index ce1593600..a5b3112fc 100644 --- a/benchmarks/cuda/toy_detector_cuda.cpp +++ b/benchmarks/cuda/toy_detector_cuda.cpp @@ -32,7 +32,6 @@ // VecMem include(s). #include #include -#include #include #include #include @@ -56,7 +55,6 @@ BENCHMARK_F(ToyDetectorBenchmark, CUDA)(benchmark::State& state) { vecmem::cuda::host_memory_resource cuda_host_mr; vecmem::cuda::device_memory_resource device_mr; traccc::memory_resource mr{device_mr, &cuda_host_mr}; - vecmem::cuda::managed_memory_resource mng_mr; // Copy and stream vecmem::copy host_copy; @@ -65,9 +63,9 @@ BENCHMARK_F(ToyDetectorBenchmark, CUDA)(benchmark::State& state) { vecmem::cuda::async_copy async_copy{stream.cudaStream()}; // Read back detector file - host_detector_type det{mng_mr}; + host_detector_type det{cuda_host_mr}; traccc::io::read_detector( - det, mng_mr, sim_dir + "toy_detector_geometry.json", + det, cuda_host_mr, sim_dir + "toy_detector_geometry.json", sim_dir + "toy_detector_homogeneous_material.json", sim_dir + "toy_detector_surface_grids.json"); @@ -84,8 +82,10 @@ BENCHMARK_F(ToyDetectorBenchmark, CUDA)(benchmark::State& state) { traccc::cuda::fitting_algorithm device_fitting( fitting_cfg, mr, async_copy, stream); + // Copy detector to device + auto det_buffer = detray::get_buffer(det, device_mr, copy); // Detector view object - auto det_view = detray::get_data(det); + auto det_view = detray::get_data(det_buffer); // D2H copy object traccc::device::container_d2h_copy_alg @@ -147,7 +147,7 @@ BENCHMARK_F(ToyDetectorBenchmark, CUDA)(benchmark::State& state) { det_view, field, track_candidates_cuda_buffer); // Create a temporary buffer that will receive the device memory. - auto size = track_states_cuda_buffer.headers.size(); + /*auto size = track_states_cuda_buffer.headers.size(); std::vector capacities(size, 0); std::transform(track_states_cuda_buffer.items.host_ptr(), track_states_cuda_buffer.items.host_ptr() + size, @@ -156,7 +156,7 @@ BENCHMARK_F(ToyDetectorBenchmark, CUDA)(benchmark::State& state) { // Copy the track states back to the host. traccc::track_state_container_types::host track_states_host = - track_state_d2h(track_states_cuda_buffer); + track_state_d2h(track_states_cuda_buffer);*/ } }