From b9857b99e52c162eaf9487ea1c9d932348fd525f Mon Sep 17 00:00:00 2001 From: John Bauman Date: Tue, 17 Dec 2024 22:44:03 +0000 Subject: [PATCH] #0: Switch to google benchmark for pgm dispatch tests --- .clang-format-ignore | 1 + .../fast-dispatch-frequent-tests-impl.yaml | 8 +- dependencies/CMakeLists.txt | 15 + .../perf_microbenchmark/CMakeLists.txt | 2 + .../dispatch/compare_pgm_dispatch_perf_ci.py | 66 + .../dispatch/pgm_dispatch_golden.json | 2223 +++++++++++++++++ .../dispatch/test_pgm_dispatch.cpp | 238 +- 7 files changed, 2501 insertions(+), 52 deletions(-) create mode 100755 tests/tt_metal/tt_metal/perf_microbenchmark/dispatch/compare_pgm_dispatch_perf_ci.py create mode 100644 tests/tt_metal/tt_metal/perf_microbenchmark/dispatch/pgm_dispatch_golden.json diff --git a/.clang-format-ignore b/.clang-format-ignore index bcf448b7fc2..81fa1f84e2c 100644 --- a/.clang-format-ignore +++ b/.clang-format-ignore @@ -108,6 +108,7 @@ tests/tt_metal/test_utils/env_vars.hpp tests/tt_metal/tt_metal/api/allocator/test_free_list_opt_allocator.cpp tests/tt_metal/tt_metal/api/test_global_semaphores.cpp tests/tt_metal/tt_metal/dispatch/sub_device_test_utils.hpp +tests/tt_metal/tt_metal/perf_microbenchmark/dispatch/pgm_dispatch_golden.json tests/tt_metal/tt_metal/perf_microbenchmark/10_dram_read_remote_cb_sync/test_dram_read_remote_cb.cpp tests/tt_metal/tt_metal/perf_microbenchmark/11_remote_cb_sync_matmul_single_core/test_remote_cb_sync_matmul.cpp tests/tt_metal/tt_metal/perf_microbenchmark/routing/kernels/traffic_gen_rx.cpp diff --git a/.github/workflows/fast-dispatch-frequent-tests-impl.yaml b/.github/workflows/fast-dispatch-frequent-tests-impl.yaml index 2dbc84d446b..9d66dfb9c85 100644 --- a/.github/workflows/fast-dispatch-frequent-tests-impl.yaml +++ b/.github/workflows/fast-dispatch-frequent-tests-impl.yaml @@ -21,7 +21,7 @@ jobs: name: "WH N300 pgm dispatch nightly", arch: wormhole_b0, runs-on: ["cloud-virtual-machine", "N300", "in-service"], - cmd: ./tests/tt_metal/tt_metal/perf_microbenchmark/dispatch/compare_pgm_dispatch_perf_ci.sh, + cmd: ./tests/tt_metal/tt_metal/perf_microbenchmark/dispatch/compare_pgm_dispatch_perf_ci.py, timeout: 10 }, ] @@ -57,3 +57,9 @@ jobs: path: | generated/test_reports/ prefix: "test_reports_" + - uses: ./.github/actions/upload-artifact-with-job-uuid + if: ${{ !cancelled() }} + with: + path: | + bench.json + prefix: "bench_json_" diff --git a/dependencies/CMakeLists.txt b/dependencies/CMakeLists.txt index 3064d846fe5..be143670e63 100644 --- a/dependencies/CMakeLists.txt +++ b/dependencies/CMakeLists.txt @@ -111,3 +111,18 @@ CPMAddPackage( OPTIONS "XTENSOR_ENABLE_TESTS OFF" ) + +############################################################################################################################ +# benchmark : https://github.com/google/benchmark +############################################################################################################################ + +CPMAddPackage(NAME benchmark GITHUB_REPOSITORY google/benchmark GIT_TAG v1.9.1) + +if(benchmark_ADDED) + set_target_properties( + benchmark + PROPERTIES + LIBRARY_OUTPUT_DIRECTORY + "${CMAKE_BINARY_DIR}/lib" + ) +endif() diff --git a/tests/tt_metal/tt_metal/perf_microbenchmark/CMakeLists.txt b/tests/tt_metal/tt_metal/perf_microbenchmark/CMakeLists.txt index 680e8f10459..ca84e37275c 100644 --- a/tests/tt_metal/tt_metal/perf_microbenchmark/CMakeLists.txt +++ b/tests/tt_metal/tt_metal/perf_microbenchmark/CMakeLists.txt @@ -77,4 +77,6 @@ foreach(TEST_SRC ${PERF_MICROBENCH_TESTS_SRCS}) list(APPEND PERF_MICROBENCH_TEST_TARGETS ${TEST_TARGET}) endforeach() +target_link_libraries(test_pgm_dispatch PUBLIC benchmark::benchmark) + add_custom_target(metal_perf_microbenchmark_tests DEPENDS ${PERF_MICROBENCH_TEST_TARGETS}) diff --git a/tests/tt_metal/tt_metal/perf_microbenchmark/dispatch/compare_pgm_dispatch_perf_ci.py b/tests/tt_metal/tt_metal/perf_microbenchmark/dispatch/compare_pgm_dispatch_perf_ci.py new file mode 100755 index 00000000000..ddfd1cd70be --- /dev/null +++ b/tests/tt_metal/tt_metal/perf_microbenchmark/dispatch/compare_pgm_dispatch_perf_ci.py @@ -0,0 +1,66 @@ +#!/usr/bin/python3 + +import json +import os +import sys + +os.chdir(os.getenv("TT_METAL_HOME")) +golden = json.load(open("tests/tt_metal/tt_metal/perf_microbenchmark/dispatch/pgm_dispatch_golden.json", "r")) + +THRESHOLD = 4 +result = os.system( + "build/test/tt_metal/perf_microbenchmark/dispatch/test_pgm_dispatch --benchmark_out_format=json --benchmark_out=bench.json" +) +if result != 0: + print(f"Test failed with error code {result}") + sys.exit(result) + +result = json.load(open("bench.json", "r")) + +golden_benchmarks = {} +for benchmark in golden["benchmarks"]: + golden_benchmarks[benchmark["name"]] = benchmark + +result_benchmarks = {} +for benchmark in result["benchmarks"]: + result_benchmarks[benchmark["name"]] = benchmark + +exit_code = 0 + +for name, benchmark in golden_benchmarks.items(): + if name not in result_benchmarks: + print(f"Golden benchmark {name} missing from results") + exit_code = 1 + continue + result = result_benchmarks[benchmark["name"]] + + if "error_occurred" in benchmark: + if "error_occurred" not in result: + result_time = result["IterationTime"] * 1000000 + print(f"Error in {name} was fixed in result (with time {result_time:.2f}us). Consider adjusting baselines.") + continue + + if "error_occurred" in result: + if "error_occurred" not in benchmark: + print(f"Benchmark {name} gave unexpected error: {result['error_message']}") + exit_code = 1 + continue + + golden_time = benchmark["IterationTime"] * 1000000 + result_time = result["IterationTime"] * 1000000 + if result_time / golden_time > (1 + THRESHOLD / 100): + print(f"Test {name} expected value {golden_time:.2f}us but got {result_time:.2f}us") + exit_code = 1 + if golden_time / result_time > (1 + THRESHOLD / 100): + print( + f"Test {name} got value {result_time:.2f}us but expected {golden_time:.2f}us. Consider adjusting baselines" + ) + +for name in result_benchmarks: + if name not in golden_benchmarks: + print(f"Result benchmark {name} missing from goldens") + exit_code = 1 + +if exit_code == 0: + print("Test successful") +sys.exit(exit_code) diff --git a/tests/tt_metal/tt_metal/perf_microbenchmark/dispatch/pgm_dispatch_golden.json b/tests/tt_metal/tt_metal/perf_microbenchmark/dispatch/pgm_dispatch_golden.json new file mode 100644 index 00000000000..6f26276ff20 --- /dev/null +++ b/tests/tt_metal/tt_metal/perf_microbenchmark/dispatch/pgm_dispatch_golden.json @@ -0,0 +1,2223 @@ +{ + "context": { + "date": "2024-12-19T05:39:52+00:00", + "host_name": "yyzc-wh-03-special-jbauman-for-reservation-4834988", + "executable": "build/test/tt_metal/perf_microbenchmark/dispatch/test_pgm_dispatch", + "num_cpus": 128, + "mhz_per_cpu": 3291, + "cpu_scaling_enabled": false, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32768, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 524288, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 16777216, + "num_sharing": 4 + } + ], + "load_avg": [0.703613,2.13721,2.0166], + "library_version": "v1.9.1", + "library_build_type": "debug", + "json_schema_version": 1 + }, + "benchmarks": [ + { + "name": "BM_pgm_dispatch/brisc_only_trace/256/manual_time", + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "BM_pgm_dispatch/brisc_only_trace/256/manual_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 27, + "real_time": 2.5835666666666660e+07, + "cpu_time": 6.7519740740740803e+04, + "time_unit": "ns", + "IterationTime": 2.5835666666666663e-06 + }, + { + "name": "BM_pgm_dispatch/brisc_only_trace/512/manual_time", + "family_index": 0, + "per_family_instance_index": 1, + "run_name": "BM_pgm_dispatch/brisc_only_trace/512/manual_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 27, + "real_time": 2.5922888888888884e+07, + "cpu_time": 6.7360333333331975e+04, + "time_unit": "ns", + "IterationTime": 2.5922888888888883e-06 + }, + { + "name": "BM_pgm_dispatch/brisc_only_trace/1024/manual_time", + "family_index": 0, + "per_family_instance_index": 2, + "run_name": "BM_pgm_dispatch/brisc_only_trace/1024/manual_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 27, + "real_time": 2.6136259259259257e+07, + "cpu_time": 6.5716074074073651e+04, + "time_unit": "ns", + "IterationTime": 2.6136259259259258e-06 + }, + { + "name": "BM_pgm_dispatch/brisc_only_trace/2048/manual_time", + "family_index": 0, + "per_family_instance_index": 3, + "run_name": "BM_pgm_dispatch/brisc_only_trace/2048/manual_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 26, + "real_time": 2.6920846153846148e+07, + "cpu_time": 6.6131076923078203e+04, + "time_unit": "ns", + "IterationTime": 2.6920846153846150e-06 + }, + { + "name": "BM_pgm_dispatch/brisc_only_trace/4096/manual_time", + "family_index": 0, + "per_family_instance_index": 4, + "run_name": "BM_pgm_dispatch/brisc_only_trace/4096/manual_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 23, + "real_time": 2.9802695652173925e+07, + "cpu_time": 6.5853652173911265e+04, + "time_unit": "ns", + "IterationTime": 2.9802695652173924e-06 + }, + { + "name": "BM_pgm_dispatch/brisc_only_trace/8192/manual_time", + "family_index": 0, + "per_family_instance_index": 5, + "run_name": "BM_pgm_dispatch/brisc_only_trace/8192/manual_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 21, + "real_time": 3.2766952380952381e+07, + "cpu_time": 6.7301904761906699e+04, + "time_unit": "ns", + "IterationTime": 3.2766952380952380e-06 + }, + { + "name": "BM_pgm_dispatch/brisc_only_trace/12288/manual_time", + "family_index": 0, + "per_family_instance_index": 6, + "run_name": "BM_pgm_dispatch/brisc_only_trace/12288/manual_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 20, + "real_time": 3.5436299999999993e+07, + "cpu_time": 6.6283600000000006e+04, + "time_unit": "ns", + "IterationTime": 3.5436299999999994e-06 + }, + { + "name": "BM_pgm_dispatch/ncrisc_only_trace/256/manual_time", + "family_index": 1, + "per_family_instance_index": 0, + "run_name": "BM_pgm_dispatch/ncrisc_only_trace/256/manual_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 27, + "real_time": 2.5836888888888884e+07, + "cpu_time": 6.5608148148146967e+04, + "time_unit": "ns", + "IterationTime": 2.5836888888888885e-06 + }, + { + "name": "BM_pgm_dispatch/ncrisc_only_trace/512/manual_time", + "family_index": 1, + "per_family_instance_index": 1, + "run_name": "BM_pgm_dispatch/ncrisc_only_trace/512/manual_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 27, + "real_time": 2.5922777777777776e+07, + "cpu_time": 6.7188592592590139e+04, + "time_unit": "ns", + "IterationTime": 2.5922777777777777e-06 + }, + { + "name": "BM_pgm_dispatch/ncrisc_only_trace/1024/manual_time", + "family_index": 1, + "per_family_instance_index": 2, + "run_name": "BM_pgm_dispatch/ncrisc_only_trace/1024/manual_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 27, + "real_time": 2.6136074074074078e+07, + "cpu_time": 6.5595407407408813e+04, + "time_unit": "ns", + "IterationTime": 2.6136074074074082e-06 + }, + { + "name": "BM_pgm_dispatch/ncrisc_only_trace/2048/manual_time", + "family_index": 1, + "per_family_instance_index": 3, + "run_name": "BM_pgm_dispatch/ncrisc_only_trace/2048/manual_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 26, + "real_time": 2.6921923076923072e+07, + "cpu_time": 6.6122730769233240e+04, + "time_unit": "ns", + "IterationTime": 2.6921923076923071e-06 + }, + { + "name": "BM_pgm_dispatch/ncrisc_only_trace/4096/manual_time", + "family_index": 1, + "per_family_instance_index": 4, + "run_name": "BM_pgm_dispatch/ncrisc_only_trace/4096/manual_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 23, + "real_time": 2.9802000000000004e+07, + "cpu_time": 6.6417956521735992e+04, + "time_unit": "ns", + "IterationTime": 2.9802000000000003e-06 + }, + { + "name": "BM_pgm_dispatch/ncrisc_only_trace/8192/manual_time", + "family_index": 1, + "per_family_instance_index": 5, + "run_name": "BM_pgm_dispatch/ncrisc_only_trace/8192/manual_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 21, + "real_time": 3.2766380952380951e+07, + "cpu_time": 6.5010000000005552e+04, + "time_unit": "ns", + "IterationTime": 3.2766380952380949e-06 + }, + { + "name": "BM_pgm_dispatch/ncrisc_only_trace/12288/manual_time", + "family_index": 1, + "per_family_instance_index": 6, + "run_name": "BM_pgm_dispatch/ncrisc_only_trace/12288/manual_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 20, + "real_time": 3.5437600000000000e+07, + "cpu_time": 6.5835400000002759e+04, + "time_unit": "ns", + "IterationTime": 3.5437600000000000e-06 + }, + { + "name": "BM_pgm_dispatch/trisc_only_trace/256/manual_time", + "family_index": 2, + "per_family_instance_index": 0, + "run_name": "BM_pgm_dispatch/trisc_only_trace/256/manual_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 24, + "real_time": 2.9092916666666672e+07, + "cpu_time": 6.5832291666660196e+04, + "time_unit": "ns", + "IterationTime": 2.9092916666666673e-06 + }, + { + "name": "BM_pgm_dispatch/trisc_only_trace/512/manual_time", + "family_index": 2, + "per_family_instance_index": 1, + "run_name": "BM_pgm_dispatch/trisc_only_trace/512/manual_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 24, + "real_time": 2.9236916666666660e+07, + "cpu_time": 6.8015250000006781e+04, + "time_unit": "ns", + "IterationTime": 2.9236916666666659e-06 + }, + { + "name": "BM_pgm_dispatch/trisc_only_trace/1024/manual_time", + "family_index": 2, + "per_family_instance_index": 2, + "run_name": "BM_pgm_dispatch/trisc_only_trace/1024/manual_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 23, + "real_time": 3.0122434782608699e+07, + "cpu_time": 6.5719347826089492e+04, + "time_unit": "ns", + "IterationTime": 3.0122434782608697e-06 + }, + { + "name": "BM_pgm_dispatch/trisc_only_trace/2048/manual_time", + "family_index": 2, + "per_family_instance_index": 3, + "run_name": "BM_pgm_dispatch/trisc_only_trace/2048/manual_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 22, + "real_time": 3.2263000000000004e+07, + "cpu_time": 6.7434772727273637e+04, + "time_unit": "ns", + "IterationTime": 3.2263000000000008e-06 + }, + { + "name": "BM_pgm_dispatch/trisc_only_trace/4096/manual_time", + "family_index": 2, + "per_family_instance_index": 4, + "run_name": "BM_pgm_dispatch/trisc_only_trace/4096/manual_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 19, + "real_time": 3.6278684210526310e+07, + "cpu_time": 6.7209421052632184e+04, + "time_unit": "ns", + "IterationTime": 3.6278684210526310e-06 + }, + { + "name": "BM_pgm_dispatch/trisc_only_trace/8192/manual_time", + "family_index": 2, + "per_family_instance_index": 5, + "run_name": "BM_pgm_dispatch/trisc_only_trace/8192/manual_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 16, + "real_time": 4.5065187500000007e+07, + "cpu_time": 6.7605312499996464e+04, + "time_unit": "ns", + "IterationTime": 4.5065187500000004e-06 + }, + { + "name": "BM_pgm_dispatch/trisc_only_trace/12288/manual_time", + "family_index": 2, + "per_family_instance_index": 6, + "run_name": "BM_pgm_dispatch/trisc_only_trace/12288/manual_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 13, + "real_time": 5.3967153846153863e+07, + "cpu_time": 6.4075307692282950e+04, + "time_unit": "ns", + "IterationTime": 5.3967153846153866e-06 + }, + { + "name": "BM_pgm_dispatch/brisc_trisc_only_trace/256/manual_time", + "family_index": 3, + "per_family_instance_index": 0, + "run_name": "BM_pgm_dispatch/brisc_trisc_only_trace/256/manual_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 23, + "real_time": 3.0262826086956516e+07, + "cpu_time": 6.5417130434783736e+04, + "time_unit": "ns", + "IterationTime": 3.0262826086956514e-06 + }, + { + "name": "BM_pgm_dispatch/brisc_trisc_only_trace/512/manual_time", + "family_index": 3, + "per_family_instance_index": 1, + "run_name": "BM_pgm_dispatch/brisc_trisc_only_trace/512/manual_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 23, + "real_time": 3.0604304347826093e+07, + "cpu_time": 6.6773695652179027e+04, + "time_unit": "ns", + "IterationTime": 3.0604304347826093e-06 + }, + { + "name": "BM_pgm_dispatch/brisc_trisc_only_trace/1024/manual_time", + "family_index": 3, + "per_family_instance_index": 2, + "run_name": "BM_pgm_dispatch/brisc_trisc_only_trace/1024/manual_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 22, + "real_time": 3.2259818181818176e+07, + "cpu_time": 6.4693772727270567e+04, + "time_unit": "ns", + "IterationTime": 3.2259818181818178e-06 + }, + { + "name": "BM_pgm_dispatch/brisc_trisc_only_trace/2048/manual_time", + "family_index": 3, + "per_family_instance_index": 3, + "run_name": "BM_pgm_dispatch/brisc_trisc_only_trace/2048/manual_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 21, + "real_time": 3.3864666666666664e+07, + "cpu_time": 6.6690047619029297e+04, + "time_unit": "ns", + "IterationTime": 3.3864666666666663e-06 + }, + { + "name": "BM_pgm_dispatch/brisc_trisc_only_trace/4096/manual_time", + "family_index": 3, + "per_family_instance_index": 4, + "run_name": "BM_pgm_dispatch/brisc_trisc_only_trace/4096/manual_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 18, + "real_time": 3.9313944444444448e+07, + "cpu_time": 6.3973000000001113e+04, + "time_unit": "ns", + "IterationTime": 3.9313944444444449e-06 + }, + { + "name": "BM_pgm_dispatch/brisc_trisc_only_trace/8192/manual_time", + "family_index": 3, + "per_family_instance_index": 5, + "run_name": "BM_pgm_dispatch/brisc_trisc_only_trace/8192/manual_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 14, + "real_time": 5.0917357142857142e+07, + "cpu_time": 6.4715142857123858e+04, + "time_unit": "ns", + "IterationTime": 5.0917357142857149e-06 + }, + { + "name": "BM_pgm_dispatch/brisc_trisc_only_trace/12288/manual_time", + "family_index": 3, + "per_family_instance_index": 6, + "run_name": "BM_pgm_dispatch/brisc_trisc_only_trace/12288/manual_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 11, + "real_time": 6.2550818181818180e+07, + "cpu_time": 6.5086090909110608e+04, + "time_unit": "ns", + "IterationTime": 6.2550818181818168e-06 + }, + { + "name": "BM_pgm_dispatch/all_processors_trace/256/manual_time", + "family_index": 4, + "per_family_instance_index": 0, + "run_name": "BM_pgm_dispatch/all_processors_trace/256/manual_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 22, + "real_time": 3.1691272727272723e+07, + "cpu_time": 6.4922318181811788e+04, + "time_unit": "ns", + "IterationTime": 3.1691272727272723e-06 + }, + { + "name": "BM_pgm_dispatch/all_processors_trace/512/manual_time", + "family_index": 4, + "per_family_instance_index": 1, + "run_name": "BM_pgm_dispatch/all_processors_trace/512/manual_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 22, + "real_time": 3.2287681818181809e+07, + "cpu_time": 7.1162954545446089e+04, + "time_unit": "ns", + "IterationTime": 3.2287681818181809e-06 + }, + { + "name": "BM_pgm_dispatch/all_processors_trace/1024/manual_time", + "family_index": 4, + "per_family_instance_index": 2, + "run_name": "BM_pgm_dispatch/all_processors_trace/1024/manual_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 21, + "real_time": 3.3887523809523806e+07, + "cpu_time": 6.6400571428582552e+04, + "time_unit": "ns", + "IterationTime": 3.3887523809523807e-06 + }, + { + "name": "BM_pgm_dispatch/all_processors_trace/2048/manual_time", + "family_index": 4, + "per_family_instance_index": 3, + "run_name": "BM_pgm_dispatch/all_processors_trace/2048/manual_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 19, + "real_time": 3.5949157894736849e+07, + "cpu_time": 6.5734631578954373e+04, + "time_unit": "ns", + "IterationTime": 3.5949157894736850e-06 + }, + { + "name": "BM_pgm_dispatch/all_processors_trace/4096/manual_time", + "family_index": 4, + "per_family_instance_index": 4, + "run_name": "BM_pgm_dispatch/all_processors_trace/4096/manual_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 16, + "real_time": 4.2890874999999993e+07, + "cpu_time": 6.5087687500020584e+04, + "time_unit": "ns", + "IterationTime": 4.2890874999999993e-06 + }, + { + "name": "BM_pgm_dispatch/all_processors_trace/8192/manual_time", + "family_index": 4, + "per_family_instance_index": 5, + "run_name": "BM_pgm_dispatch/all_processors_trace/8192/manual_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 12, + "real_time": 5.7888250000000000e+07, + "cpu_time": 6.8142500000016138e+04, + "time_unit": "ns", + "IterationTime": 5.7888249999999996e-06 + }, + { + "name": "BM_pgm_dispatch/all_processors_trace/12288/manual_time", + "family_index": 4, + "per_family_instance_index": 6, + "run_name": "BM_pgm_dispatch/all_processors_trace/12288/manual_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 10, + "real_time": 7.1583599999999985e+07, + "cpu_time": 6.3426200000016310e+04, + "time_unit": "ns", + "IterationTime": 7.1583599999999988e-06 + }, + { + "name": "BM_pgm_dispatch/all_processors_all_cores/256/manual_time", + "family_index": 5, + "per_family_instance_index": 0, + "run_name": "BM_pgm_dispatch/all_processors_all_cores/256/manual_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 22, + "real_time": 3.1998727272727273e+07, + "cpu_time": 6.6219636363642276e+04, + "time_unit": "ns", + "IterationTime": 3.1998727272727272e-06 + }, + { + "name": "BM_pgm_dispatch/all_processors_all_cores/512/manual_time", + "family_index": 5, + "per_family_instance_index": 1, + "run_name": "BM_pgm_dispatch/all_processors_all_cores/512/manual_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 22, + "real_time": 3.2290545454545461e+07, + "cpu_time": 6.6973454545457309e+04, + "time_unit": "ns", + "IterationTime": 3.2290545454545457e-06 + }, + { + "name": "BM_pgm_dispatch/all_processors_all_cores/1024/manual_time", + "family_index": 5, + "per_family_instance_index": 2, + "run_name": "BM_pgm_dispatch/all_processors_all_cores/1024/manual_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 21, + "real_time": 3.4136238095238097e+07, + "cpu_time": 6.6572523809530336e+04, + "time_unit": "ns", + "IterationTime": 3.4136238095238095e-06 + }, + { + "name": "BM_pgm_dispatch/all_processors_all_cores/2048/manual_time", + "family_index": 5, + "per_family_instance_index": 3, + "run_name": "BM_pgm_dispatch/all_processors_all_cores/2048/manual_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 19, + "real_time": 3.6371052631578945e+07, + "cpu_time": 6.4882473684179618e+04, + "time_unit": "ns", + "IterationTime": 3.6371052631578950e-06 + }, + { + "name": "BM_pgm_dispatch/all_processors_all_cores/4096/manual_time", + "family_index": 5, + "per_family_instance_index": 4, + "run_name": "BM_pgm_dispatch/all_processors_all_cores/4096/manual_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 16, + "real_time": 4.3394937500000000e+07, + "cpu_time": 6.8412999999989406e+04, + "time_unit": "ns", + "IterationTime": 4.3394937500000001e-06 + }, + { + "name": "BM_pgm_dispatch/all_processors_all_cores/8192/manual_time", + "family_index": 5, + "per_family_instance_index": 5, + "run_name": "BM_pgm_dispatch/all_processors_all_cores/8192/manual_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 12, + "real_time": 5.9896083333333336e+07, + "cpu_time": 6.5426583333305643e+04, + "time_unit": "ns", + "IterationTime": 5.9896083333333327e-06 + }, + { + "name": "BM_pgm_dispatch/all_processors_all_cores/12288/manual_time", + "family_index": 5, + "per_family_instance_index": 6, + "run_name": "BM_pgm_dispatch/all_processors_all_cores/12288/manual_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 9, + "real_time": 7.4717333333333328e+07, + "cpu_time": 6.3937777777726500e+04, + "time_unit": "ns", + "IterationTime": 7.4717333333333334e-06 + }, + { + "name": "BM_pgm_dispatch/all_processors_all_cores_1cb/256/manual_time", + "family_index": 6, + "per_family_instance_index": 0, + "run_name": "BM_pgm_dispatch/all_processors_all_cores_1cb/256/manual_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 20, + "real_time": 3.4878750000000007e+07, + "cpu_time": 6.6201050000014307e+04, + "time_unit": "ns", + "IterationTime": 3.4878750000000009e-06 + }, + { + "name": "BM_pgm_dispatch/all_processors_all_cores_1cb/512/manual_time", + "family_index": 6, + "per_family_instance_index": 1, + "run_name": "BM_pgm_dispatch/all_processors_all_cores_1cb/512/manual_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 20, + "real_time": 3.5514100000000015e+07, + "cpu_time": 6.8068000000032218e+04, + "time_unit": "ns", + "IterationTime": 3.5514100000000011e-06 + }, + { + "name": "BM_pgm_dispatch/all_processors_all_cores_1cb/1024/manual_time", + "family_index": 6, + "per_family_instance_index": 2, + "run_name": "BM_pgm_dispatch/all_processors_all_cores_1cb/1024/manual_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 19, + "real_time": 3.7336894736842111e+07, + "cpu_time": 6.4786473684230157e+04, + "time_unit": "ns", + "IterationTime": 3.7336894736842118e-06 + }, + { + "name": "BM_pgm_dispatch/all_processors_all_cores_1cb/2048/manual_time", + "family_index": 6, + "per_family_instance_index": 3, + "run_name": "BM_pgm_dispatch/all_processors_all_cores_1cb/2048/manual_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 18, + "real_time": 3.9390666666666672e+07, + "cpu_time": 6.6516944444434193e+04, + "time_unit": "ns", + "IterationTime": 3.9390666666666676e-06 + }, + { + "name": "BM_pgm_dispatch/all_processors_all_cores_1cb/4096/manual_time", + "family_index": 6, + "per_family_instance_index": 4, + "run_name": "BM_pgm_dispatch/all_processors_all_cores_1cb/4096/manual_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 15, + "real_time": 4.6557800000000007e+07, + "cpu_time": 6.9815666666646808e+04, + "time_unit": "ns", + "IterationTime": 4.6557799999999998e-06 + }, + { + "name": "BM_pgm_dispatch/all_processors_all_cores_1cb/8192/manual_time", + "family_index": 6, + "per_family_instance_index": 5, + "run_name": "BM_pgm_dispatch/all_processors_all_cores_1cb/8192/manual_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 11, + "real_time": 6.4940636363636374e+07, + "cpu_time": 6.8326090909080638e+04, + "time_unit": "ns", + "IterationTime": 6.4940636363636389e-06 + }, + { + "name": "BM_pgm_dispatch/all_processors_all_cores_32cb/256/manual_time", + "family_index": 7, + "per_family_instance_index": 0, + "run_name": "BM_pgm_dispatch/all_processors_all_cores_32cb/256/manual_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 20, + "real_time": 3.5180650000000007e+07, + "cpu_time": 6.6074049999986790e+04, + "time_unit": "ns", + "IterationTime": 3.5180650000000012e-06 + }, + { + "name": "BM_pgm_dispatch/all_processors_all_cores_32cb/512/manual_time", + "family_index": 7, + "per_family_instance_index": 1, + "run_name": "BM_pgm_dispatch/all_processors_all_cores_32cb/512/manual_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 20, + "real_time": 3.5598450000000007e+07, + "cpu_time": 6.6408549999996685e+04, + "time_unit": "ns", + "IterationTime": 3.5598450000000004e-06 + }, + { + "name": "BM_pgm_dispatch/all_processors_all_cores_32cb/1024/manual_time", + "family_index": 7, + "per_family_instance_index": 2, + "run_name": "BM_pgm_dispatch/all_processors_all_cores_32cb/1024/manual_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 19, + "real_time": 3.7828526315789476e+07, + "cpu_time": 6.6148315789467466e+04, + "time_unit": "ns", + "IterationTime": 3.7828526315789475e-06 + }, + { + "name": "BM_pgm_dispatch/all_processors_all_cores_32cb/2048/manual_time", + "family_index": 7, + "per_family_instance_index": 3, + "run_name": "BM_pgm_dispatch/all_processors_all_cores_32cb/2048/manual_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 18, + "real_time": 3.9599777777777776e+07, + "cpu_time": 6.4136388888888810e+04, + "time_unit": "ns", + "IterationTime": 3.9599777777777774e-06 + }, + { + "name": "BM_pgm_dispatch/all_processors_all_cores_32cb/4096/manual_time", + "family_index": 7, + "per_family_instance_index": 4, + "run_name": "BM_pgm_dispatch/all_processors_all_cores_32cb/4096/manual_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 15, + "real_time": 4.6655066666666664e+07, + "cpu_time": 6.6896933333341716e+04, + "time_unit": "ns", + "IterationTime": 4.6655066666666673e-06 + }, + { + "name": "BM_pgm_dispatch/all_processors_all_cores_32cb/8192/manual_time", + "family_index": 7, + "per_family_instance_index": 5, + "run_name": "BM_pgm_dispatch/all_processors_all_cores_32cb/8192/manual_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 9, + "real_time": 7.4992555555555552e+07, + "cpu_time": 6.5846888888899790e+04, + "time_unit": "ns", + "IterationTime": 7.4992555555555547e-06 + }, + { + "name": "BM_pgm_dispatch/all_processors_1_core_1_rta/256/manual_time", + "family_index": 8, + "per_family_instance_index": 0, + "run_name": "BM_pgm_dispatch/all_processors_1_core_1_rta/256/manual_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 20, + "real_time": 3.4590199999999993e+07, + "cpu_time": 6.6592949999977340e+04, + "time_unit": "ns", + "IterationTime": 3.4590199999999995e-06 + }, + { + "name": "BM_pgm_dispatch/all_processors_1_core_1_rta/512/manual_time", + "family_index": 8, + "per_family_instance_index": 1, + "run_name": "BM_pgm_dispatch/all_processors_1_core_1_rta/512/manual_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 20, + "real_time": 3.5121300000000007e+07, + "cpu_time": 6.6272249999999040e+04, + "time_unit": "ns", + "IterationTime": 3.5121300000000002e-06 + }, + { + "name": "BM_pgm_dispatch/all_processors_1_core_1_rta/1024/manual_time", + "family_index": 8, + "per_family_instance_index": 2, + "run_name": "BM_pgm_dispatch/all_processors_1_core_1_rta/1024/manual_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 19, + "real_time": 3.6807473684210524e+07, + "cpu_time": 6.8370684210500011e+04, + "time_unit": "ns", + "IterationTime": 3.6807473684210529e-06 + }, + { + "name": "BM_pgm_dispatch/all_processors_1_core_1_rta/2048/manual_time", + "family_index": 8, + "per_family_instance_index": 3, + "run_name": "BM_pgm_dispatch/all_processors_1_core_1_rta/2048/manual_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 18, + "real_time": 3.8756833333333336e+07, + "cpu_time": 6.6079222222212615e+04, + "time_unit": "ns", + "IterationTime": 3.8756833333333333e-06 + }, + { + "name": "BM_pgm_dispatch/all_processors_1_core_1_rta/4096/manual_time", + "family_index": 8, + "per_family_instance_index": 4, + "run_name": "BM_pgm_dispatch/all_processors_1_core_1_rta/4096/manual_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 15, + "real_time": 4.5730666666666664e+07, + "cpu_time": 6.4522933333321933e+04, + "time_unit": "ns", + "IterationTime": 4.5730666666666669e-06 + }, + { + "name": "BM_pgm_dispatch/all_processors_1_core_1_rta/8192/manual_time", + "family_index": 8, + "per_family_instance_index": 5, + "run_name": "BM_pgm_dispatch/all_processors_1_core_1_rta/8192/manual_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 12, + "real_time": 6.0021916666666664e+07, + "cpu_time": 6.1123999999986489e+04, + "time_unit": "ns", + "IterationTime": 6.0021916666666665e-06 + }, + { + "name": "BM_pgm_dispatch/one_processor_all_cores_128_rta/256/manual_time", + "family_index": 9, + "per_family_instance_index": 0, + "run_name": "BM_pgm_dispatch/one_processor_all_cores_128_rta/256/manual_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 11, + "real_time": 6.4911090909090906e+07, + "cpu_time": 6.6037272727309013e+04, + "time_unit": "ns", + "IterationTime": 6.4911090909090918e-06 + }, + { + "name": "BM_pgm_dispatch/one_processor_all_cores_128_rta/512/manual_time", + "family_index": 9, + "per_family_instance_index": 1, + "run_name": "BM_pgm_dispatch/one_processor_all_cores_128_rta/512/manual_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 11, + "real_time": 6.5020818181818180e+07, + "cpu_time": 6.1886181818076642e+04, + "time_unit": "ns", + "IterationTime": 6.5020818181818180e-06 + }, + { + "name": "BM_pgm_dispatch/one_processor_all_cores_128_rta/1024/manual_time", + "family_index": 9, + "per_family_instance_index": 2, + "run_name": "BM_pgm_dispatch/one_processor_all_cores_128_rta/1024/manual_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 11, + "real_time": 6.5199909090909079e+07, + "cpu_time": 6.5176181818186888e+04, + "time_unit": "ns", + "IterationTime": 6.5199909090909089e-06 + }, + { + "name": "BM_pgm_dispatch/one_processor_all_cores_128_rta/2048/manual_time", + "family_index": 9, + "per_family_instance_index": 3, + "run_name": "BM_pgm_dispatch/one_processor_all_cores_128_rta/2048/manual_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 11, + "real_time": 6.5815454545454547e+07, + "cpu_time": 6.3826818181925402e+04, + "time_unit": "ns", + "IterationTime": 6.5815454545454548e-06 + }, + { + "name": "BM_pgm_dispatch/one_processor_all_cores_128_rta/4096/manual_time", + "family_index": 9, + "per_family_instance_index": 4, + "run_name": "BM_pgm_dispatch/one_processor_all_cores_128_rta/4096/manual_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 10, + "real_time": 6.9217100000000000e+07, + "cpu_time": 6.1268199999986449e+04, + "time_unit": "ns", + "IterationTime": 6.9217100000000000e-06 + }, + { + "name": "BM_pgm_dispatch/one_processor_all_cores_128_rta/8192/manual_time", + "family_index": 9, + "per_family_instance_index": 5, + "run_name": "BM_pgm_dispatch/one_processor_all_cores_128_rta/8192/manual_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 10, + "real_time": 7.2028100000000000e+07, + "cpu_time": 6.2019299999960254e+04, + "time_unit": "ns", + "IterationTime": 7.2028099999999997e-06 + }, + { + "name": "BM_pgm_dispatch/one_processors_all_cores_1_rta/256/manual_time", + "family_index": 10, + "per_family_instance_index": 0, + "run_name": "BM_pgm_dispatch/one_processors_all_cores_1_rta/256/manual_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 16, + "real_time": 4.4310000000000000e+07, + "cpu_time": 6.6404062499914042e+04, + "time_unit": "ns", + "IterationTime": 4.4310000000000004e-06 + }, + { + "name": "BM_pgm_dispatch/one_processors_all_cores_1_rta/512/manual_time", + "family_index": 10, + "per_family_instance_index": 1, + "run_name": "BM_pgm_dispatch/one_processors_all_cores_1_rta/512/manual_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 16, + "real_time": 4.4408062500000000e+07, + "cpu_time": 7.0484500000023152e+04, + "time_unit": "ns", + "IterationTime": 4.4408062500000003e-06 + }, + { + "name": "BM_pgm_dispatch/one_processors_all_cores_1_rta/1024/manual_time", + "family_index": 10, + "per_family_instance_index": 2, + "run_name": "BM_pgm_dispatch/one_processors_all_cores_1_rta/1024/manual_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 16, + "real_time": 4.4574750000000000e+07, + "cpu_time": 6.4276312499833562e+04, + "time_unit": "ns", + "IterationTime": 4.4574750000000000e-06 + }, + { + "name": "BM_pgm_dispatch/one_processors_all_cores_1_rta/2048/manual_time", + "family_index": 10, + "per_family_instance_index": 3, + "run_name": "BM_pgm_dispatch/one_processors_all_cores_1_rta/2048/manual_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 16, + "real_time": 4.4934375000000000e+07, + "cpu_time": 6.4844687500009713e+04, + "time_unit": "ns", + "IterationTime": 4.4934375000000001e-06 + }, + { + "name": "BM_pgm_dispatch/one_processors_all_cores_1_rta/4096/manual_time", + "family_index": 10, + "per_family_instance_index": 4, + "run_name": "BM_pgm_dispatch/one_processors_all_cores_1_rta/4096/manual_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 15, + "real_time": 4.5797733333333343e+07, + "cpu_time": 6.5104799999933974e+04, + "time_unit": "ns", + "IterationTime": 4.5797733333333340e-06 + }, + { + "name": "BM_pgm_dispatch/one_processors_all_cores_1_rta/8192/manual_time", + "family_index": 10, + "per_family_instance_index": 5, + "run_name": "BM_pgm_dispatch/one_processors_all_cores_1_rta/8192/manual_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 15, + "real_time": 4.7226600000000007e+07, + "cpu_time": 6.8267599999908649e+04, + "time_unit": "ns", + "IterationTime": 4.7226600000000011e-06 + }, + { + "name": "BM_pgm_dispatch/all_processors_all_cores_1_rta/256/manual_time", + "family_index": 11, + "per_family_instance_index": 0, + "run_name": "BM_pgm_dispatch/all_processors_all_cores_1_rta/256/manual_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 14, + "real_time": 4.8304642857142866e+07, + "cpu_time": 6.6643785714351878e+04, + "time_unit": "ns", + "IterationTime": 4.8304642857142865e-06 + }, + { + "name": "BM_pgm_dispatch/all_processors_all_cores_1_rta/512/manual_time", + "family_index": 11, + "per_family_instance_index": 1, + "run_name": "BM_pgm_dispatch/all_processors_all_cores_1_rta/512/manual_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 14, + "real_time": 4.8547071428571425e+07, + "cpu_time": 6.6522785714237136e+04, + "time_unit": "ns", + "IterationTime": 4.8547071428571433e-06 + }, + { + "name": "BM_pgm_dispatch/all_processors_all_cores_1_rta/1024/manual_time", + "family_index": 11, + "per_family_instance_index": 2, + "run_name": "BM_pgm_dispatch/all_processors_all_cores_1_rta/1024/manual_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 14, + "real_time": 4.8982357142857142e+07, + "cpu_time": 6.6423642857265411e+04, + "time_unit": "ns", + "IterationTime": 4.8982357142857149e-06 + }, + { + "name": "BM_pgm_dispatch/all_processors_all_cores_1_rta/2048/manual_time", + "family_index": 11, + "per_family_instance_index": 3, + "run_name": "BM_pgm_dispatch/all_processors_all_cores_1_rta/2048/manual_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 14, + "real_time": 5.0782000000000000e+07, + "cpu_time": 6.6462214285729307e+04, + "time_unit": "ns", + "IterationTime": 5.0781999999999999e-06 + }, + { + "name": "BM_pgm_dispatch/all_processors_all_cores_1_rta/4096/manual_time", + "family_index": 11, + "per_family_instance_index": 4, + "run_name": "BM_pgm_dispatch/all_processors_all_cores_1_rta/4096/manual_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 13, + "real_time": 5.4430846153846145e+07, + "cpu_time": 6.6340692307529884e+04, + "time_unit": "ns", + "IterationTime": 5.4430846153846142e-06 + }, + { + "name": "BM_pgm_dispatch/all_processors_all_cores_1_rta/8192/manual_time", + "family_index": 11, + "per_family_instance_index": 5, + "run_name": "BM_pgm_dispatch/all_processors_all_cores_1_rta/8192/manual_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 8, + "real_time": 8.4718750000000015e+07, + "cpu_time": 6.7267749999988519e+04, + "time_unit": "ns", + "IterationTime": 8.4718750000000009e-06 + }, + { + "name": "BM_pgm_dispatch/all_processors_all_cores_32_rta/256/manual_time", + "family_index": 12, + "per_family_instance_index": 0, + "run_name": "BM_pgm_dispatch/all_processors_all_cores_32_rta/256/manual_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 12, + "real_time": 5.9192333333333321e+07, + "cpu_time": 6.2472916666500569e+04, + "time_unit": "ns", + "IterationTime": 5.9192333333333326e-06 + }, + { + "name": "BM_pgm_dispatch/all_processors_all_cores_32_rta/512/manual_time", + "family_index": 12, + "per_family_instance_index": 1, + "run_name": "BM_pgm_dispatch/all_processors_all_cores_32_rta/512/manual_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 12, + "real_time": 5.9924333333333336e+07, + "cpu_time": 6.3760666666468074e+04, + "time_unit": "ns", + "IterationTime": 5.9924333333333321e-06 + }, + { + "name": "BM_pgm_dispatch/all_processors_all_cores_32_rta/1024/manual_time", + "family_index": 12, + "per_family_instance_index": 2, + "run_name": "BM_pgm_dispatch/all_processors_all_cores_32_rta/1024/manual_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 11, + "real_time": 6.2028363636363626e+07, + "cpu_time": 6.2572454545449902e+04, + "time_unit": "ns", + "IterationTime": 6.2028363636363623e-06 + }, + { + "name": "BM_pgm_dispatch/all_processors_all_cores_32_rta/2048/manual_time", + "family_index": 12, + "per_family_instance_index": 3, + "run_name": "BM_pgm_dispatch/all_processors_all_cores_32_rta/2048/manual_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 11, + "real_time": 6.3189363636363633e+07, + "cpu_time": 6.1813181818011610e+04, + "time_unit": "ns", + "IterationTime": 6.3189363636363636e-06 + }, + { + "name": "BM_pgm_dispatch/all_processors_all_cores_32_rta/4096/manual_time", + "family_index": 12, + "per_family_instance_index": 4, + "run_name": "BM_pgm_dispatch/all_processors_all_cores_32_rta/4096/manual_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 10, + "real_time": 6.9267000000000015e+07, + "cpu_time": 6.8087600000055201e+04, + "time_unit": "ns", + "IterationTime": 6.9267000000000010e-06 + }, + { + "name": "BM_pgm_dispatch/all_processors_all_cores_32_rta/8192/manual_time", + "family_index": 12, + "per_family_instance_index": 5, + "run_name": "BM_pgm_dispatch/all_processors_all_cores_32_rta/8192/manual_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 7, + "real_time": 9.3737714285714284e+07, + "cpu_time": 6.5212571428219242e+04, + "time_unit": "ns", + "IterationTime": 9.3737714285714280e-06 + }, + { + "name": "BM_pgm_dispatch/all_processors_all_cores_128_rta/256/manual_time", + "family_index": 13, + "per_family_instance_index": 0, + "run_name": "BM_pgm_dispatch/all_processors_all_cores_128_rta/256/manual_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "error_occurred": true, + "error_message": "Test failed", + "iterations": 0, + "real_time": 0.0000000000000000e+00, + "cpu_time": 0.0000000000000000e+00, + "time_unit": "ns" + }, + { + "name": "BM_pgm_dispatch/all_processors_all_cores_128_rta/512/manual_time", + "family_index": 13, + "per_family_instance_index": 1, + "run_name": "BM_pgm_dispatch/all_processors_all_cores_128_rta/512/manual_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "error_occurred": true, + "error_message": "Test failed", + "iterations": 0, + "real_time": 0.0000000000000000e+00, + "cpu_time": 0.0000000000000000e+00, + "time_unit": "ns" + }, + { + "name": "BM_pgm_dispatch/all_processors_all_cores_128_rta/1024/manual_time", + "family_index": 13, + "per_family_instance_index": 2, + "run_name": "BM_pgm_dispatch/all_processors_all_cores_128_rta/1024/manual_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "error_occurred": true, + "error_message": "Test failed", + "iterations": 0, + "real_time": 0.0000000000000000e+00, + "cpu_time": 0.0000000000000000e+00, + "time_unit": "ns" + }, + { + "name": "BM_pgm_dispatch/all_processors_all_cores_128_rta/2048/manual_time", + "family_index": 13, + "per_family_instance_index": 3, + "run_name": "BM_pgm_dispatch/all_processors_all_cores_128_rta/2048/manual_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 6, + "real_time": 1.2468000000000000e+08, + "cpu_time": 7.6853500000121727e+04, + "time_unit": "ns", + "IterationTime": 1.2467999999999998e-05 + }, + { + "name": "BM_pgm_dispatch/all_processors_all_cores_128_rta/4096/manual_time", + "family_index": 13, + "per_family_instance_index": 4, + "run_name": "BM_pgm_dispatch/all_processors_all_cores_128_rta/4096/manual_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 5, + "real_time": 1.3172700000000003e+08, + "cpu_time": 7.3567600000501450e+04, + "time_unit": "ns", + "IterationTime": 1.3172700000000002e-05 + }, + { + "name": "BM_pgm_dispatch/all_processors_all_cores_128_rta/8192/manual_time", + "family_index": 13, + "per_family_instance_index": 5, + "run_name": "BM_pgm_dispatch/all_processors_all_cores_128_rta/8192/manual_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 5, + "real_time": 1.4582400000000000e+08, + "cpu_time": 7.1963799999252849e+04, + "time_unit": "ns", + "IterationTime": 1.4582399999999999e-05 + }, + { + "name": "BM_pgm_dispatch/all_processors_1_sem_1_core_1_processor_trace/256/manual_time", + "family_index": 14, + "per_family_instance_index": 0, + "run_name": "BM_pgm_dispatch/all_processors_1_sem_1_core_1_processor_trace/256/manual_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "error_occurred": true, + "error_message": "Test failed", + "iterations": 0, + "real_time": 0.0000000000000000e+00, + "cpu_time": 0.0000000000000000e+00, + "time_unit": "ns" + }, + { + "name": "BM_pgm_dispatch/all_processors_1_sem_1_core_1_processor_trace/512/manual_time", + "family_index": 14, + "per_family_instance_index": 1, + "run_name": "BM_pgm_dispatch/all_processors_1_sem_1_core_1_processor_trace/512/manual_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "error_occurred": true, + "error_message": "Test failed", + "iterations": 0, + "real_time": 0.0000000000000000e+00, + "cpu_time": 0.0000000000000000e+00, + "time_unit": "ns" + }, + { + "name": "BM_pgm_dispatch/all_processors_1_sem_1_core_1_processor_trace/1024/manual_time", + "family_index": 14, + "per_family_instance_index": 2, + "run_name": "BM_pgm_dispatch/all_processors_1_sem_1_core_1_processor_trace/1024/manual_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "error_occurred": true, + "error_message": "Test failed", + "iterations": 0, + "real_time": 0.0000000000000000e+00, + "cpu_time": 0.0000000000000000e+00, + "time_unit": "ns" + }, + { + "name": "BM_pgm_dispatch/all_processors_1_sem_1_core_1_processor_trace/2048/manual_time", + "family_index": 14, + "per_family_instance_index": 3, + "run_name": "BM_pgm_dispatch/all_processors_1_sem_1_core_1_processor_trace/2048/manual_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "error_occurred": true, + "error_message": "Test failed", + "iterations": 0, + "real_time": 0.0000000000000000e+00, + "cpu_time": 0.0000000000000000e+00, + "time_unit": "ns" + }, + { + "name": "BM_pgm_dispatch/all_processors_1_sem_1_core_1_processor_trace/4096/manual_time", + "family_index": 14, + "per_family_instance_index": 4, + "run_name": "BM_pgm_dispatch/all_processors_1_sem_1_core_1_processor_trace/4096/manual_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "error_occurred": true, + "error_message": "Test failed", + "iterations": 0, + "real_time": 0.0000000000000000e+00, + "cpu_time": 0.0000000000000000e+00, + "time_unit": "ns" + }, + { + "name": "BM_pgm_dispatch/all_processors_1_sem_1_core_1_processor_trace/8192/manual_time", + "family_index": 14, + "per_family_instance_index": 5, + "run_name": "BM_pgm_dispatch/all_processors_1_sem_1_core_1_processor_trace/8192/manual_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "error_occurred": true, + "error_message": "Test failed", + "iterations": 0, + "real_time": 0.0000000000000000e+00, + "cpu_time": 0.0000000000000000e+00, + "time_unit": "ns" + }, + { + "name": "BM_pgm_dispatch/all_processors_1_sem_all_cores_1_processor_trace/256/manual_time", + "family_index": 15, + "per_family_instance_index": 0, + "run_name": "BM_pgm_dispatch/all_processors_1_sem_all_cores_1_processor_trace/256/manual_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "error_occurred": true, + "error_message": "Test failed", + "iterations": 0, + "real_time": 0.0000000000000000e+00, + "cpu_time": 0.0000000000000000e+00, + "time_unit": "ns" + }, + { + "name": "BM_pgm_dispatch/all_processors_1_sem_all_cores_1_processor_trace/512/manual_time", + "family_index": 15, + "per_family_instance_index": 1, + "run_name": "BM_pgm_dispatch/all_processors_1_sem_all_cores_1_processor_trace/512/manual_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "error_occurred": true, + "error_message": "Test failed", + "iterations": 0, + "real_time": 0.0000000000000000e+00, + "cpu_time": 0.0000000000000000e+00, + "time_unit": "ns" + }, + { + "name": "BM_pgm_dispatch/all_processors_1_sem_all_cores_1_processor_trace/1024/manual_time", + "family_index": 15, + "per_family_instance_index": 2, + "run_name": "BM_pgm_dispatch/all_processors_1_sem_all_cores_1_processor_trace/1024/manual_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "error_occurred": true, + "error_message": "Test failed", + "iterations": 0, + "real_time": 0.0000000000000000e+00, + "cpu_time": 0.0000000000000000e+00, + "time_unit": "ns" + }, + { + "name": "BM_pgm_dispatch/all_processors_1_sem_all_cores_1_processor_trace/2048/manual_time", + "family_index": 15, + "per_family_instance_index": 3, + "run_name": "BM_pgm_dispatch/all_processors_1_sem_all_cores_1_processor_trace/2048/manual_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "error_occurred": true, + "error_message": "Test failed", + "iterations": 0, + "real_time": 0.0000000000000000e+00, + "cpu_time": 0.0000000000000000e+00, + "time_unit": "ns" + }, + { + "name": "BM_pgm_dispatch/all_processors_1_sem_all_cores_1_processor_trace/4096/manual_time", + "family_index": 15, + "per_family_instance_index": 4, + "run_name": "BM_pgm_dispatch/all_processors_1_sem_all_cores_1_processor_trace/4096/manual_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "error_occurred": true, + "error_message": "Test failed", + "iterations": 0, + "real_time": 0.0000000000000000e+00, + "cpu_time": 0.0000000000000000e+00, + "time_unit": "ns" + }, + { + "name": "BM_pgm_dispatch/all_processors_1_sem_all_cores_1_processor_trace/8192/manual_time", + "family_index": 15, + "per_family_instance_index": 5, + "run_name": "BM_pgm_dispatch/all_processors_1_sem_all_cores_1_processor_trace/8192/manual_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "error_occurred": true, + "error_message": "Test failed", + "iterations": 0, + "real_time": 0.0000000000000000e+00, + "cpu_time": 0.0000000000000000e+00, + "time_unit": "ns" + }, + { + "name": "BM_pgm_dispatch/maxed_config_params_trace/256/manual_time", + "family_index": 16, + "per_family_instance_index": 0, + "run_name": "BM_pgm_dispatch/maxed_config_params_trace/256/manual_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 5, + "real_time": 1.5446500000000000e+08, + "cpu_time": 6.8045400000471534e+04, + "time_unit": "ns", + "IterationTime": 1.5446500000000002e-05 + }, + { + "name": "BM_pgm_dispatch/maxed_config_params_trace/512/manual_time", + "family_index": 16, + "per_family_instance_index": 1, + "run_name": "BM_pgm_dispatch/maxed_config_params_trace/512/manual_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 5, + "real_time": 1.5482120000000000e+08, + "cpu_time": 7.2377399999368208e+04, + "time_unit": "ns", + "IterationTime": 1.5482120000000000e-05 + }, + { + "name": "BM_pgm_dispatch/maxed_config_params_trace/1024/manual_time", + "family_index": 16, + "per_family_instance_index": 2, + "run_name": "BM_pgm_dispatch/maxed_config_params_trace/1024/manual_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4, + "real_time": 1.5666750000000003e+08, + "cpu_time": 6.9431249999141190e+04, + "time_unit": "ns", + "IterationTime": 1.5666750000000004e-05 + }, + { + "name": "BM_pgm_dispatch/maxed_config_params_trace/2048/manual_time", + "family_index": 16, + "per_family_instance_index": 3, + "run_name": "BM_pgm_dispatch/maxed_config_params_trace/2048/manual_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4, + "real_time": 1.5921049999999997e+08, + "cpu_time": 7.0151750000135842e+04, + "time_unit": "ns", + "IterationTime": 1.5921049999999996e-05 + }, + { + "name": "BM_pgm_dispatch/maxed_config_params_trace/4096/manual_time", + "family_index": 16, + "per_family_instance_index": 4, + "run_name": "BM_pgm_dispatch/maxed_config_params_trace/4096/manual_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4, + "real_time": 1.6594250000000000e+08, + "cpu_time": 7.4581749998969826e+04, + "time_unit": "ns", + "IterationTime": 1.6594250000000001e-05 + }, + { + "name": "BM_pgm_dispatch/maxed_config_params_trace/8192/manual_time", + "family_index": 16, + "per_family_instance_index": 5, + "run_name": "BM_pgm_dispatch/maxed_config_params_trace/8192/manual_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4, + "real_time": 1.7984125000000000e+08, + "cpu_time": 7.0153999999433843e+04, + "time_unit": "ns", + "IterationTime": 1.7984125000000001e-05 + }, + { + "name": "BM_pgm_dispatch/kernel_groups_trace/256/manual_time", + "family_index": 17, + "per_family_instance_index": 0, + "run_name": "BM_pgm_dispatch/kernel_groups_trace/256/manual_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 5, + "real_time": 1.2802379999999997e+08, + "cpu_time": 1.0889069620000100e+08, + "time_unit": "ns", + "IterationTime": 1.2802379999999998e-05 + }, + { + "name": "BM_pgm_dispatch/kernel_groups_trace/512/manual_time", + "family_index": 17, + "per_family_instance_index": 1, + "run_name": "BM_pgm_dispatch/kernel_groups_trace/512/manual_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 5, + "real_time": 1.2877700000000000e+08, + "cpu_time": 1.0954924860000119e+08, + "time_unit": "ns", + "IterationTime": 1.2877700000000000e-05 + }, + { + "name": "BM_pgm_dispatch/kernel_groups_trace/1024/manual_time", + "family_index": 17, + "per_family_instance_index": 2, + "run_name": "BM_pgm_dispatch/kernel_groups_trace/1024/manual_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 5, + "real_time": 1.3046200000000000e+08, + "cpu_time": 1.1100781959999892e+08, + "time_unit": "ns", + "IterationTime": 1.3046199999999999e-05 + }, + { + "name": "BM_pgm_dispatch/kernel_groups_trace/2048/manual_time", + "family_index": 17, + "per_family_instance_index": 3, + "run_name": "BM_pgm_dispatch/kernel_groups_trace/2048/manual_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 5, + "real_time": 1.3564680000000000e+08, + "cpu_time": 1.1545340979999992e+08, + "time_unit": "ns", + "IterationTime": 1.3564679999999998e-05 + }, + { + "name": "BM_pgm_dispatch/kernel_groups_trace/4096/manual_time", + "family_index": 17, + "per_family_instance_index": 4, + "run_name": "BM_pgm_dispatch/kernel_groups_trace/4096/manual_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3, + "real_time": 1.8512733333333334e+08, + "cpu_time": 1.5813299466666800e+08, + "time_unit": "ns", + "IterationTime": 1.8512733333333333e-05 + }, + { + "name": "BM_pgm_dispatch/kernel_groups_trace/8192/manual_time", + "family_index": 17, + "per_family_instance_index": 5, + "run_name": "BM_pgm_dispatch/kernel_groups_trace/8192/manual_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2, + "real_time": 3.1612800000000000e+08, + "cpu_time": 2.7089095999999559e+08, + "time_unit": "ns", + "IterationTime": 3.1612800000000003e-05 + }, + { + "name": "BM_pgm_dispatch/kernel_groups_4_shadow/256/manual_time", + "family_index": 18, + "per_family_instance_index": 0, + "run_name": "BM_pgm_dispatch/kernel_groups_4_shadow/256/manual_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 5, + "real_time": 1.3127100000000000e+08, + "cpu_time": 1.2722860659999923e+08, + "time_unit": "ns", + "IterationTime": 1.3127100000000000e-05 + }, + { + "name": "BM_pgm_dispatch/kernel_groups_4_shadow/512/manual_time", + "family_index": 18, + "per_family_instance_index": 1, + "run_name": "BM_pgm_dispatch/kernel_groups_4_shadow/512/manual_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 5, + "real_time": 1.3318980000000000e+08, + "cpu_time": 1.2908235440000054e+08, + "time_unit": "ns", + "IterationTime": 1.3318980000000001e-05 + }, + { + "name": "BM_pgm_dispatch/kernel_groups_4_shadow/1024/manual_time", + "family_index": 18, + "per_family_instance_index": 2, + "run_name": "BM_pgm_dispatch/kernel_groups_4_shadow/1024/manual_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 5, + "real_time": 1.3365799999999997e+08, + "cpu_time": 1.2955394899999817e+08, + "time_unit": "ns", + "IterationTime": 1.3365799999999997e-05 + }, + { + "name": "BM_pgm_dispatch/kernel_groups_4_shadow/2048/manual_time", + "family_index": 18, + "per_family_instance_index": 3, + "run_name": "BM_pgm_dispatch/kernel_groups_4_shadow/2048/manual_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 5, + "real_time": 1.4412060000000003e+08, + "cpu_time": 1.3974434579999977e+08, + "time_unit": "ns", + "IterationTime": 1.4412060000000001e-05 + }, + { + "name": "BM_pgm_dispatch/kernel_groups_4_shadow/4096/manual_time", + "family_index": 18, + "per_family_instance_index": 4, + "run_name": "BM_pgm_dispatch/kernel_groups_4_shadow/4096/manual_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3, + "real_time": 1.9911533333333328e+08, + "cpu_time": 1.9309829833332989e+08, + "time_unit": "ns", + "IterationTime": 1.9911533333333327e-05 + }, + { + "name": "BM_pgm_dispatch/kernel_groups_4_shadow/8192/manual_time", + "family_index": 18, + "per_family_instance_index": 5, + "run_name": "BM_pgm_dispatch/kernel_groups_4_shadow/8192/manual_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2, + "real_time": 3.0395600000000000e+08, + "cpu_time": 2.9476834249999940e+08, + "time_unit": "ns", + "IterationTime": 3.0395599999999997e-05 + }, + { + "name": "BM_pgm_dispatch/kernel_groups_5_shadow/256/manual_time", + "family_index": 19, + "per_family_instance_index": 0, + "run_name": "BM_pgm_dispatch/kernel_groups_5_shadow/256/manual_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 5, + "real_time": 1.3626280000000000e+08, + "cpu_time": 1.3275913499999774e+08, + "time_unit": "ns", + "IterationTime": 1.3626280000000001e-05 + }, + { + "name": "BM_pgm_dispatch/kernel_groups_5_shadow/512/manual_time", + "family_index": 19, + "per_family_instance_index": 1, + "run_name": "BM_pgm_dispatch/kernel_groups_5_shadow/512/manual_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 5, + "real_time": 1.3687300000000000e+08, + "cpu_time": 1.3335982400000148e+08, + "time_unit": "ns", + "IterationTime": 1.3687299999999999e-05 + }, + { + "name": "BM_pgm_dispatch/kernel_groups_5_shadow/1024/manual_time", + "family_index": 19, + "per_family_instance_index": 2, + "run_name": "BM_pgm_dispatch/kernel_groups_5_shadow/1024/manual_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 5, + "real_time": 1.4039160000000003e+08, + "cpu_time": 1.3680257439999932e+08, + "time_unit": "ns", + "IterationTime": 1.4039160000000002e-05 + }, + { + "name": "BM_pgm_dispatch/kernel_groups_5_shadow/2048/manual_time", + "family_index": 19, + "per_family_instance_index": 3, + "run_name": "BM_pgm_dispatch/kernel_groups_5_shadow/2048/manual_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4, + "real_time": 1.5477800000000000e+08, + "cpu_time": 1.5087014749999738e+08, + "time_unit": "ns", + "IterationTime": 1.5477799999999999e-05 + }, + { + "name": "BM_pgm_dispatch/kernel_groups_5_shadow/4096/manual_time", + "family_index": 19, + "per_family_instance_index": 4, + "run_name": "BM_pgm_dispatch/kernel_groups_5_shadow/4096/manual_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3, + "real_time": 2.0902100000000000e+08, + "cpu_time": 2.0375469066667053e+08, + "time_unit": "ns", + "IterationTime": 2.0902100000000004e-05 + }, + { + "name": "BM_pgm_dispatch/kernel_groups_5_shadow/8192/manual_time", + "family_index": 19, + "per_family_instance_index": 5, + "run_name": "BM_pgm_dispatch/kernel_groups_5_shadow/8192/manual_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2, + "real_time": 3.1403350000000000e+08, + "cpu_time": 3.0612891250000018e+08, + "time_unit": "ns", + "IterationTime": 3.1403350000000001e-05 + }, + { + "name": "BM_pgm_dispatch/eth_dispatch/256/manual_time", + "family_index": 20, + "per_family_instance_index": 0, + "run_name": "BM_pgm_dispatch/eth_dispatch/256/manual_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "error_occurred": true, + "error_message": "Program creation failed", + "iterations": 0, + "real_time": 0.0000000000000000e+00, + "cpu_time": 0.0000000000000000e+00, + "time_unit": "ns" + }, + { + "name": "BM_pgm_dispatch/eth_dispatch/512/manual_time", + "family_index": 20, + "per_family_instance_index": 1, + "run_name": "BM_pgm_dispatch/eth_dispatch/512/manual_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "error_occurred": true, + "error_message": "Program creation failed", + "iterations": 0, + "real_time": 0.0000000000000000e+00, + "cpu_time": 0.0000000000000000e+00, + "time_unit": "ns" + }, + { + "name": "BM_pgm_dispatch/eth_dispatch/1024/manual_time", + "family_index": 20, + "per_family_instance_index": 2, + "run_name": "BM_pgm_dispatch/eth_dispatch/1024/manual_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "error_occurred": true, + "error_message": "Program creation failed", + "iterations": 0, + "real_time": 0.0000000000000000e+00, + "cpu_time": 0.0000000000000000e+00, + "time_unit": "ns" + }, + { + "name": "BM_pgm_dispatch/eth_dispatch/2048/manual_time", + "family_index": 20, + "per_family_instance_index": 3, + "run_name": "BM_pgm_dispatch/eth_dispatch/2048/manual_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "error_occurred": true, + "error_message": "Program creation failed", + "iterations": 0, + "real_time": 0.0000000000000000e+00, + "cpu_time": 0.0000000000000000e+00, + "time_unit": "ns" + }, + { + "name": "BM_pgm_dispatch/eth_dispatch/4096/manual_time", + "family_index": 20, + "per_family_instance_index": 4, + "run_name": "BM_pgm_dispatch/eth_dispatch/4096/manual_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "error_occurred": true, + "error_message": "Program creation failed", + "iterations": 0, + "real_time": 0.0000000000000000e+00, + "cpu_time": 0.0000000000000000e+00, + "time_unit": "ns" + }, + { + "name": "BM_pgm_dispatch/eth_dispatch/8192/manual_time", + "family_index": 20, + "per_family_instance_index": 5, + "run_name": "BM_pgm_dispatch/eth_dispatch/8192/manual_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "error_occurred": true, + "error_message": "Program creation failed", + "iterations": 0, + "real_time": 0.0000000000000000e+00, + "cpu_time": 0.0000000000000000e+00, + "time_unit": "ns" + }, + { + "name": "BM_pgm_dispatch/tensix_eth_2/256/manual_time", + "family_index": 21, + "per_family_instance_index": 0, + "run_name": "BM_pgm_dispatch/tensix_eth_2/256/manual_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 5, + "real_time": 1.3839739999999997e+08, + "cpu_time": 1.1774922120000044e+08, + "time_unit": "ns", + "IterationTime": 1.3839739999999998e-05 + }, + { + "name": "BM_pgm_dispatch/tensix_eth_2/512/manual_time", + "family_index": 21, + "per_family_instance_index": 1, + "run_name": "BM_pgm_dispatch/tensix_eth_2/512/manual_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 5, + "real_time": 1.3916380000000000e+08, + "cpu_time": 1.1841296220000005e+08, + "time_unit": "ns", + "IterationTime": 1.3916380000000000e-05 + }, + { + "name": "BM_pgm_dispatch/tensix_eth_2/1024/manual_time", + "family_index": 21, + "per_family_instance_index": 2, + "run_name": "BM_pgm_dispatch/tensix_eth_2/1024/manual_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4, + "real_time": 1.4189649999999997e+08, + "cpu_time": 1.2090796049999852e+08, + "time_unit": "ns", + "IterationTime": 1.4189649999999997e-05 + }, + { + "name": "BM_pgm_dispatch/tensix_eth_2/2048/manual_time", + "family_index": 21, + "per_family_instance_index": 3, + "run_name": "BM_pgm_dispatch/tensix_eth_2/2048/manual_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4, + "real_time": 1.4588925000000000e+08, + "cpu_time": 1.2433331975000073e+08, + "time_unit": "ns", + "IterationTime": 1.4588924999999999e-05 + }, + { + "name": "BM_pgm_dispatch/tensix_eth_2/4096/manual_time", + "family_index": 21, + "per_family_instance_index": 4, + "run_name": "BM_pgm_dispatch/tensix_eth_2/4096/manual_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4, + "real_time": 1.7947000000000003e+08, + "cpu_time": 1.5296343174999905e+08, + "time_unit": "ns", + "IterationTime": 1.7947000000000003e-05 + }, + { + "name": "BM_pgm_dispatch/tensix_eth_2/8192/manual_time", + "family_index": 21, + "per_family_instance_index": 5, + "run_name": "BM_pgm_dispatch/tensix_eth_2/8192/manual_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2, + "real_time": 2.9675700000000000e+08, + "cpu_time": 2.5447190999999946e+08, + "time_unit": "ns", + "IterationTime": 2.9675700000000001e-05 + }, + { + "name": "BM_pgm_dispatch/tensix_eth_2_4_shadow/256/manual_time", + "family_index": 22, + "per_family_instance_index": 0, + "run_name": "BM_pgm_dispatch/tensix_eth_2_4_shadow/256/manual_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4, + "real_time": 1.5604900000000000e+08, + "cpu_time": 1.5127179150000104e+08, + "time_unit": "ns", + "IterationTime": 1.5604900000000000e-05 + }, + { + "name": "BM_pgm_dispatch/tensix_eth_2_4_shadow/512/manual_time", + "family_index": 22, + "per_family_instance_index": 1, + "run_name": "BM_pgm_dispatch/tensix_eth_2_4_shadow/512/manual_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4, + "real_time": 1.5676550000000000e+08, + "cpu_time": 1.5197227674999779e+08, + "time_unit": "ns", + "IterationTime": 1.5676549999999998e-05 + }, + { + "name": "BM_pgm_dispatch/tensix_eth_2_4_shadow/1024/manual_time", + "family_index": 22, + "per_family_instance_index": 2, + "run_name": "BM_pgm_dispatch/tensix_eth_2_4_shadow/1024/manual_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4, + "real_time": 1.5828975000000000e+08, + "cpu_time": 1.5345843274999994e+08, + "time_unit": "ns", + "IterationTime": 1.5828974999999999e-05 + }, + { + "name": "BM_pgm_dispatch/tensix_eth_2_4_shadow/2048/manual_time", + "family_index": 22, + "per_family_instance_index": 3, + "run_name": "BM_pgm_dispatch/tensix_eth_2_4_shadow/2048/manual_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 4, + "real_time": 1.7030375000000000e+08, + "cpu_time": 1.6515184825000161e+08, + "time_unit": "ns", + "IterationTime": 1.7030375000000000e-05 + }, + { + "name": "BM_pgm_dispatch/tensix_eth_2_4_shadow/4096/manual_time", + "family_index": 22, + "per_family_instance_index": 4, + "run_name": "BM_pgm_dispatch/tensix_eth_2_4_shadow/4096/manual_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 3, + "real_time": 2.2415500000000000e+08, + "cpu_time": 2.1738842433333144e+08, + "time_unit": "ns", + "IterationTime": 2.2415499999999999e-05 + }, + { + "name": "BM_pgm_dispatch/tensix_eth_2_4_shadow/8192/manual_time", + "family_index": 22, + "per_family_instance_index": 5, + "run_name": "BM_pgm_dispatch/tensix_eth_2_4_shadow/8192/manual_time", + "run_type": "iteration", + "repetitions": 1, + "repetition_index": 0, + "threads": 1, + "iterations": 2, + "real_time": 3.3141250000000000e+08, + "cpu_time": 3.2145265750000364e+08, + "time_unit": "ns", + "IterationTime": 3.3141249999999997e-05 + } + ] +} diff --git a/tests/tt_metal/tt_metal/perf_microbenchmark/dispatch/test_pgm_dispatch.cpp b/tests/tt_metal/tt_metal/perf_microbenchmark/dispatch/test_pgm_dispatch.cpp index 88ba15ff003..2a9a44b8c6d 100644 --- a/tests/tt_metal/tt_metal/perf_microbenchmark/dispatch/test_pgm_dispatch.cpp +++ b/tests/tt_metal/tt_metal/perf_microbenchmark/dispatch/test_pgm_dispatch.cpp @@ -8,6 +8,7 @@ #include "tt_metal/impl/dispatch/command_queue.hpp" #include "tt_metal/impl/device/device.hpp" #include "tt_metal/llrt/rtoptions.hpp" +#include constexpr uint32_t DEFAULT_ITERATIONS = 10000; constexpr uint32_t DEFAULT_WARMUP_ITERATIONS = 100; @@ -47,8 +48,26 @@ bool use_global_g; bool use_trace_g; bool dispatch_from_eth_g; -void init(int argc, char** argv) { - std::vector input_args(argv, argv + argc); +std::tuple get_core_count() { + uint32_t core_x = 0; + uint32_t core_y = 0; + + std::string arch_name{getenv("ARCH_NAME")}; + if (arch_name == "grayskull") { + core_x = 11; + core_y = 8; + } else if (arch_name == "wormhole_b0") { + core_x = 7; + core_y = 6; + } else if (arch_name == "blackhole") { + core_x = 12; + core_y = 9; + } + return std::make_tuple(core_x, core_y); +} + +void init(std::vector input_args) { + auto core_count = get_core_count(); if (test_args::has_command_option(input_args, "-h") || test_args::has_command_option(input_args, "--help")) { log_info(LogTest, "Usage:"); @@ -80,11 +99,22 @@ void init(int argc, char** argv) { log_info(LogTest, " -z: enable dispatch lazy mode (default disabled)"); log_info(LogTest, " -tr: enable trace (default disabled)"); log_info(LogTest, " -de: dispatch from eth cores (default tensix)"); + log_info( + LogTest, + " -ac: use all viable worker cores (default {}x{})", + std::get<0>(core_count), + std::get<1>(core_count)); exit(0); } uint32_t core_x = test_args::get_command_option_uint32(input_args, "-x", 0); uint32_t core_y = test_args::get_command_option_uint32(input_args, "-y", 0); + + if (test_args::has_command_option(input_args, "-ac")) { + core_x = std::get<0>(core_count); + core_y = std::get<1>(core_count); + } + warmup_iterations_g = test_args::get_command_option_uint32(input_args, "-w", DEFAULT_WARMUP_ITERATIONS); iterations_g = test_args::get_command_option_uint32(input_args, "-i", DEFAULT_ITERATIONS); kernel_size_g = test_args::get_command_option_uint32(input_args, "-s", DEFAULT_KERNEL_SIZE_K * 1024); @@ -153,7 +183,7 @@ void set_runtime_args( } } -void initialize_program(tt_metal::Device* device, tt_metal::Program& program, uint32_t run_cycles) { +bool initialize_program(tt_metal::Device* device, tt_metal::Program& program, uint32_t run_cycles) { program = tt_metal::CreateProgram(); std::map defines = {{"KERNEL_BYTES", std::to_string(kernel_size_g)}}; @@ -231,7 +261,7 @@ void initialize_program(tt_metal::Device* device, tt_metal::Program& program, ui "Requested number of erisc cores {} exceeds actual erisc core count {}", erisc_count_g, erisc_cores.size()); - exit(0); + return false; } auto erisc_core = erisc_cores.begin(); for (uint32_t i = 0; i < erisc_count_g; i++, erisc_core++) { @@ -248,10 +278,47 @@ void initialize_program(tt_metal::Device* device, tt_metal::Program& program, ui tt_metal::SetCommonRuntimeArgs(program, eth_kernel, common_args); } } + return true; } -int main(int argc, char** argv) { - init(argc, argv); +static void BM_pgm_dispatch(benchmark::State& state, std::string args) { + std::vector strs; + char ch = ' '; + size_t current_pos = args.find(ch); + size_t prev_pos = 0; + + while (current_pos != std::string::npos) { + strs.push_back(args.substr(prev_pos, current_pos - prev_pos)); + prev_pos = current_pos + 1; + + current_pos = args.find(ch, prev_pos); + } + + strs.push_back(args.substr(prev_pos)); + + init(strs); + kernel_size_g = state.range(0); + if (use_trace_g) { + log_info(LogTest, "Running with trace enabled"); + } + log_info(LogTest, "Warmup iterations: {}", warmup_iterations_g); + log_info(LogTest, "Iterations: {}", iterations_g); + log_info( + LogTest, "Grid: ({}-{}) ({} cores)", workers_g.start_coord.str(), workers_g.end_coord.str(), workers_g.size()); + log_info(LogTest, "Kernel size: {}", kernel_size_g); + if (nfast_kernels_g != 0) { + log_info(LogTest, "Fast kernel cycles: {}", fast_kernel_cycles_g); + log_info(LogTest, "Slow kernel cycles: {}", slow_kernel_cycles_g); + log_info(LogTest, "{} fast kernels between slow kernels", nfast_kernels_g); + } else { + log_info(LogTest, "Kernel cycles: {}", slow_kernel_cycles_g); + } + log_info(LogTest, "KGs: {}", n_kgs_g); + log_info(LogTest, "CBs: {}", n_cbs_g); + log_info(LogTest, "UniqueRTArgs: {}", n_args_g); + log_info(LogTest, "CommonRTArgs: {}", n_common_args_g); + log_info(LogTest, "Sems: {}", n_sems_g); + log_info(LogTest, "Lazy: {}", lazy_g); tt::llrt::RunTimeOptions::get_instance().set_kernels_nullified(true); @@ -264,8 +331,14 @@ int main(int argc, char** argv) { CommandQueue& cq = device->command_queue(); tt_metal::Program program[2]; - initialize_program(device, program[0], slow_kernel_cycles_g); - initialize_program(device, program[1], fast_kernel_cycles_g); + if (!initialize_program(device, program[0], slow_kernel_cycles_g)) { + state.SkipWithError("Program creation failed"); + return; + } + if (!initialize_program(device, program[0], fast_kernel_cycles_g)) { + state.SkipWithError("Program creation failed"); + return; + } // Cache stuff for (int i = 0; i < warmup_iterations_g; i++) { @@ -295,48 +368,31 @@ int main(int argc, char** argv) { // Does this do anything? tt_metal::detail::SetLazyCommandQueueMode(true); } - - auto start = std::chrono::system_clock::now(); - if (use_trace_g) { - EnqueueTrace(cq, tid, false); - } else { - main_program_loop(); - } - if (time_just_finish_g) { - start = std::chrono::system_clock::now(); - } - Finish(cq); - auto end = std::chrono::system_clock::now(); - - if (use_trace_g) { - log_info(LogTest, "Running with trace enabled"); - } - log_info(LogTest, "Warmup iterations: {}", warmup_iterations_g); - log_info(LogTest, "Iterations: {}", iterations_g); - log_info( - LogTest, - "Grid: ({}-{}) ({} cores)", - workers_g.start_coord.str(), - workers_g.end_coord.str(), - workers_g.size()); - log_info(LogTest, "Kernel size: {}", kernel_size_g); - if (nfast_kernels_g != 0) { - log_info(LogTest, "Fast kernel cycles: {}", fast_kernel_cycles_g); - log_info(LogTest, "Slow kernel cycles: {}", slow_kernel_cycles_g); - log_info(LogTest, "{} fast kernels between slow kernels", nfast_kernels_g); - } else { - log_info(LogTest, "Kernel cycles: {}", slow_kernel_cycles_g); + // uint64_t total_iterations = 0; + + for (auto _ : state) { + auto start = std::chrono::system_clock::now(); + if (use_trace_g) { + EnqueueTrace(cq, tid, false); + } else { + main_program_loop(); + } + if (time_just_finish_g) { + start = std::chrono::system_clock::now(); + } + Finish(cq); + auto end = std::chrono::system_clock::now(); + auto elapsed_seconds = std::chrono::duration_cast>(end - start); + + state.SetIterationTime(elapsed_seconds.count()); + // total_iterations += iterations_g; + std::chrono::duration elapsed_seconds2 = (end - start); + log_info(LogTest, "Ran in {}us", elapsed_seconds2.count() * 1000 * 1000); + log_info(LogTest, "Ran in {}us per iteration", elapsed_seconds2.count() * 1000 * 1000 / iterations_g); } - log_info(LogTest, "KGs: {}", n_kgs_g); - log_info(LogTest, "CBs: {}", n_cbs_g); - log_info(LogTest, "UniqueRTArgs: {}", n_args_g); - log_info(LogTest, "CommonRTArgs: {}", n_common_args_g); - log_info(LogTest, "Sems: {}", n_sems_g); - log_info(LogTest, "Lazy: {}", lazy_g); - std::chrono::duration elapsed_seconds = (end - start); - log_info(LogTest, "Ran in {}us", elapsed_seconds.count() * 1000 * 1000); - log_info(LogTest, "Ran in {}us per iteration", elapsed_seconds.count() * 1000 * 1000 / iterations_g); + state.counters["IterationTime"] = benchmark::Counter( + iterations_g, benchmark::Counter::kIsIterationInvariantRate | benchmark::Counter::kInvert); pass &= tt_metal::CloseDevice(device); } catch (const std::exception& e) { @@ -348,9 +404,89 @@ int main(int argc, char** argv) { if (pass) { log_info(LogTest, "Test Passed"); - return 0; } else { - log_fatal(LogTest, "Test Failed\n"); - return 1; + state.SkipWithError("Test failed"); + } +} + +static void Max12288Args(benchmark::internal::Benchmark* b) { + b->Arg(256)->Arg(512)->Arg(1024)->Arg(2048)->Arg(4096)->Arg(8192)->Arg(12288); +} + +static void Max8192Args(benchmark::internal::Benchmark* b) { + b->Arg(256)->Arg(512)->Arg(1024)->Arg(2048)->Arg(4096)->Arg(8192); +} + +BENCHMARK_CAPTURE(BM_pgm_dispatch, brisc_only_trace, "-w 5000 -n -t -tr")->Apply(Max12288Args)->UseManualTime(); +BENCHMARK_CAPTURE(BM_pgm_dispatch, ncrisc_only_trace, "-w 5000 -b -t -tr")->Apply(Max12288Args)->UseManualTime(); +BENCHMARK_CAPTURE(BM_pgm_dispatch, trisc_only_trace, "-w 5000 -b -n -tr")->Apply(Max12288Args)->UseManualTime(); +BENCHMARK_CAPTURE(BM_pgm_dispatch, brisc_trisc_only_trace, "-w 5000 -n -tr")->Apply(Max12288Args)->UseManualTime(); +BENCHMARK_CAPTURE(BM_pgm_dispatch, all_processors_trace, "-w 5000 -tr")->Apply(Max12288Args)->UseManualTime(); +BENCHMARK_CAPTURE(BM_pgm_dispatch, all_processors_all_cores, "-w 5000 -ac -tr")->Apply(Max12288Args)->UseManualTime(); +BENCHMARK_CAPTURE(BM_pgm_dispatch, all_processors_all_cores_1cb, "-w 5000 -ac -c 1 -tr") + ->Apply(Max8192Args) + ->UseManualTime(); +BENCHMARK_CAPTURE(BM_pgm_dispatch, all_processors_all_cores_32cb, "-w 5000 -ac -c 32 -tr") + ->Apply(Max8192Args) + ->UseManualTime(); +BENCHMARK_CAPTURE(BM_pgm_dispatch, all_processors_1_core_1_rta, "-w 5000 -a 1 -tr") + ->Apply(Max8192Args) + ->UseManualTime(); +BENCHMARK_CAPTURE(BM_pgm_dispatch, one_processor_all_cores_128_rta, "-w 5000 -n -t -ac -a 128 -tr") + ->Apply(Max8192Args) + ->UseManualTime(); +BENCHMARK_CAPTURE(BM_pgm_dispatch, one_processors_all_cores_1_rta, "-w 5000 -n -t -ac -a 1 -tr") + ->Apply(Max8192Args) + ->UseManualTime(); +BENCHMARK_CAPTURE(BM_pgm_dispatch, all_processors_all_cores_1_rta, "-w 5000 -ac -a 1 -tr") + ->Apply(Max8192Args) + ->UseManualTime(); +BENCHMARK_CAPTURE(BM_pgm_dispatch, all_processors_all_cores_32_rta, "-w 5000 -ac -a 32 -tr") + ->Apply(Max8192Args) + ->UseManualTime(); +BENCHMARK_CAPTURE(BM_pgm_dispatch, all_processors_all_cores_128_rta, "-w 5000 -ac -a 128 -tr") + ->Apply(Max8192Args) + ->UseManualTime(); +BENCHMARK_CAPTURE(BM_pgm_dispatch, all_processors_1_sem_1_core_1_processor_trace, "-w 5000 -n -t -S 4 -tr") + ->Apply(Max8192Args) + ->UseManualTime(); +BENCHMARK_CAPTURE(BM_pgm_dispatch, all_processors_1_sem_all_cores_1_processor_trace, "-w 5000 -ac -n -t -S 4 -tr") + ->Apply(Max8192Args) + ->UseManualTime(); +BENCHMARK_CAPTURE(BM_pgm_dispatch, maxed_config_params_trace, "-w 5000 -ac -S 4 -c 32 -a 128 -tr") + ->Apply(Max8192Args) + ->UseManualTime(); +BENCHMARK_CAPTURE(BM_pgm_dispatch, kernel_groups_trace, "-w 5000 -ac -kg 8-tr")->Apply(Max8192Args)->UseManualTime(); + +int main(int argc, char** argv) { + auto core_count = get_core_count(); + + std::string max_kg = std::to_string(std::get<0>(core_count)); + benchmark::RegisterBenchmark( + "BM_pgm_dispatch/kernel_groups_4_shadow", BM_pgm_dispatch, "-w 5000 -ac -kg " + max_kg + " -rs 40000 -nf 4") + ->Apply(Max8192Args) + ->UseManualTime(); + benchmark::RegisterBenchmark( + "BM_pgm_dispatch/kernel_groups_5_shadow", BM_pgm_dispatch, "-w 5000 -ac -kg " + max_kg + " -rs 40000 -nf 5") + ->Apply(Max8192Args) + ->UseManualTime(); + if (getenv("ARCH_NAME") == std::string("wormhole_b0")) { + benchmark::RegisterBenchmark("BM_pgm_dispatch/eth_dispatch", BM_pgm_dispatch, "-w 5000 -b -n -t +e") + ->Apply(Max8192Args) + ->UseManualTime(); + benchmark::RegisterBenchmark( + "BM_pgm_dispatch/tensix_eth_2", BM_pgm_dispatch, "-w 5000 -ac -kg " + max_kg + " -+e -a 16") + ->Apply(Max8192Args) + ->UseManualTime(); + benchmark::RegisterBenchmark( + "BM_pgm_dispatch/tensix_eth_2_4_shadow", + BM_pgm_dispatch, + "-w 5000 -ac -kg " + max_kg + " -+e -a 16 -rs 40000 -nf 4") + ->Apply(Max8192Args) + ->UseManualTime(); } + benchmark::Initialize(&argc, argv); + benchmark::RunSpecifiedBenchmarks(); + benchmark::Shutdown(); + return 0; }