Skip to content

Commit

Permalink
#16541 - device profiler noc tracing feature
Browse files Browse the repository at this point in the history
  • Loading branch information
bgrady-tt committed Mar 5, 2025
1 parent 30fffa9 commit 71a7de3
Show file tree
Hide file tree
Showing 22 changed files with 976 additions and 63 deletions.
3 changes: 3 additions & 0 deletions .clangd
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,6 @@
CompileFlags:
Add:
- "std=c++20"
- "-DPROFILE_NOC_EVENTS=1"
- "-DPROFILE_KERNEL=1"
- "-DCOMPILE_FOR_BRISC=1"
18 changes: 18 additions & 0 deletions tests/tt_metal/tools/profiler/test_device_profiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -351,6 +351,24 @@ def test_timestamped_events():
assert eventCount in REF_COUNT_DICT[ENV_VAR_ARCH_NAME], "Wrong event count"


def test_noc_event_profiler():
ENV_VAR_ARCH_NAME = os.getenv("ARCH_NAME")
assert ENV_VAR_ARCH_NAME in ["grayskull", "wormhole_b0", "blackhole"]

testCommand = f"build/{PROG_EXMP_DIR}/test_noc_event_profiler"
clear_profiler_runtime_artifacts()
nocEventProfilerEnv = "TT_METAL_DEVICE_PROFILER_NOC_EVENTS=1"
profilerRun = os.system(f"cd {TT_METAL_HOME} && {nocEventProfilerEnv} {testCommand}")
assert profilerRun == 0

expected_trace_file = f"{PROFILER_LOGS_DIR}/noc_trace_dev0_ID0.json"
assert os.path.isfile(expected_trace_file)

with open(expected_trace_file, "r") as nocTraceJson:
noc_trace_data = json.load(nocTraceJson)
assert len(noc_trace_data) == 8


def test_sub_device_profiler():
ARCH_NAME = os.getenv("ARCH_NAME")
run_gtest_profiler_test(
Expand Down
72 changes: 66 additions & 6 deletions tt_metal/api/tt-metalium/profiler.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,12 @@
#include "program_impl.hpp"
#include "profiler_state.hpp"
#include "common.hpp"
#include "profiler_optional_metadata.hpp"
#include "tracy/TracyTTDevice.hpp"
#include "common/TracyTTDeviceData.hpp"

#include <nlohmann/json.hpp>

using std::chrono::duration;
using std::chrono::duration_cast;
using std::chrono::nanoseconds;
Expand Down Expand Up @@ -61,10 +64,24 @@ class DeviceProfiler {
// Iterate through all zone source locations and generate hash
void generateZoneSourceLocationsHashes();

// serialize all noc trace data into per-op json trace files
void serializeJsonNocTraces(
const nlohmann::ordered_json& noc_trace_json_log, const std::filesystem::path& output_dir, int device_id);

void emitCSVHeader(
std::ofstream& log_file_ofs, const tt::ARCH& device_architecture, int device_core_frequency) const;

// translates potentially-virtual coordinates recorded on Device into physical coordinates
CoreCoord getPhysicalAddressFromVirtual(const IDevice* device, const CoreCoord& c) const;

// Dumping profile result to file
void dumpResultToFile(
void logPacketData(
const IDevice* device,
std::ofstream& log_file_ofs,
nlohmann::ordered_json& noc_trace_json_log,
uint32_t runID,
uint32_t runHostID,
const std::string& opname,
int device_id,
CoreCoord core,
int core_flat,
Expand All @@ -73,9 +90,51 @@ class DeviceProfiler {
uint32_t timer_id,
uint64_t timestamp);

// logs packet data to CSV file
void logPacketDataToCSV(
const IDevice* device,
std::ofstream& log_file_ofs,
int device_id,
int core_x,
int core_y,
const std::string_view risc_name,
uint32_t timer_id,
uint64_t timestamp,
uint64_t data,
uint32_t run_id,
uint32_t run_host_id,
const std::string_view opname,
const std::string_view zone_name,
kernel_profiler::PacketTypes packet_type,
uint64_t source_line,
const std::string_view source_file);

// dump noc trace related profile data to json file
void logNocTracePacketDataToJson(
const IDevice* device,
nlohmann::ordered_json& noc_trace_json_log,
int device_id,
int core_x,
int core_y,
const std::string_view risc_name,
uint32_t timer_id,
uint64_t timestamp,
uint64_t data,
uint32_t run_id,
uint32_t run_host_id,
const std::string_view opname,
const std::string_view zone_name,
kernel_profiler::PacketTypes packet_type,
uint64_t source_line,
const std::string_view source_file);

// Helper function for reading risc profile results
void readRiscProfilerResults(
IDevice* device, CoreCoord& worker_core);
IDevice* device,
const CoreCoord& worker_core,
const std::optional<ProfilerOptionalMetadata>& metadata,
std::ofstream& log_file_ofs,
nlohmann::ordered_json& noc_trace_json_log);

// Push device results to tracy
void pushTracyDeviceResults();
Expand All @@ -100,17 +159,17 @@ class DeviceProfiler {
// DRAM Vector
std::vector<uint32_t> profile_buffer;

//Device events
// Device events
std::set<tracy::TTDeviceEvent> device_events;

std::set<tracy::TTDeviceEvent> device_sync_events;

std::set<tracy::TTDeviceEvent> device_sync_new_events;

//shift
// shift
int64_t shift = 0;

//frequency scale
// frequency scale
double freqScale = 1.0;

uint32_t my_device_id = 0;
Expand All @@ -128,7 +187,8 @@ class DeviceProfiler {
void dumpResults(
IDevice* device,
const std::vector<CoreCoord>& worker_cores,
ProfilerDumpState state = ProfilerDumpState::NORMAL);
ProfilerDumpState state = ProfilerDumpState::NORMAL,
const std::optional<ProfilerOptionalMetadata>& metadata = {});
};

} // namespace tt_metal
Expand Down
29 changes: 29 additions & 0 deletions tt_metal/api/tt-metalium/profiler_optional_metadata.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
// SPDX-FileCopyrightText: © 2025 Tenstorrent Inc.
//
// SPDX-License-Identifier: Apache-2.0

#pragma once

#include "map"

class ProfilerOptionalMetadata {
using DeviceID = uint32_t;
using RuntimeID = uint32_t;

public:
ProfilerOptionalMetadata(std::map<std::pair<DeviceID, RuntimeID>, std::string>&& runtime_map) :
runtime_id_to_opname(std::move(runtime_map)) {}

const std::string& getOpName(DeviceID device_id, RuntimeID runtime_id) const {
static const std::string empty_string;
auto key = std::make_pair(device_id, runtime_id);
auto it = runtime_id_to_opname.find(key);
if (it != runtime_id_to_opname.end()) {
return it->second;
}
return empty_string;
}

private:
std::map<std::pair<DeviceID, RuntimeID>, std::string> runtime_id_to_opname;
};
4 changes: 4 additions & 0 deletions tt_metal/api/tt-metalium/rtoptions.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,8 @@ class RunTimeOptions {
bool profile_dispatch_cores = false;
bool profiler_sync_enabled = false;
bool profiler_buffer_usage_enabled = false;
bool profiler_noc_events_enabled = false;
std::string profiler_noc_events_report_path;

bool null_kernels = false;

Expand Down Expand Up @@ -285,6 +287,8 @@ class RunTimeOptions {
inline bool get_profiler_do_dispatch_cores() { return profile_dispatch_cores; }
inline bool get_profiler_sync_enabled() { return profiler_sync_enabled; }
inline bool get_profiler_buffer_usage_enabled() { return profiler_buffer_usage_enabled; }
inline bool get_profiler_noc_events_enabled() { return profiler_noc_events_enabled; }
inline std::string get_profiler_noc_events_report_path() { return profiler_noc_events_report_path; }

inline void set_kernels_nullified(bool v) { null_kernels = v; }
inline bool get_kernels_nullified() { return null_kernels; }
Expand Down
8 changes: 6 additions & 2 deletions tt_metal/api/tt-metalium/tt_metal.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#include "buffer.hpp"
#include "profiler.hpp"
#include "llrt/tt_cluster.hpp"
#include "profiler_optional_metadata.hpp"

namespace tt::tt_metal {
inline namespace v0 {
Expand Down Expand Up @@ -220,7 +221,10 @@ void ProfilerSync(ProfilerSyncState state);
* | satate | Dumpprofiler various states | ProfilerDumpState | | False |
* */
void DumpDeviceProfileResults(
IDevice* device, std::vector<CoreCoord>& worker_cores, ProfilerDumpState = ProfilerDumpState::NORMAL);
IDevice* device,
std::vector<CoreCoord>& worker_cores,
ProfilerDumpState = ProfilerDumpState::NORMAL,
const std::optional<ProfilerOptionalMetadata>& metadata = {});

/**
* Traverse all cores and read device side profiler data and dump results into device side CSV log
Expand All @@ -232,7 +236,7 @@ void DumpDeviceProfileResults(
* | device | The device holding the program being profiled. | Device * | | True |
* | satate | Dumpprofiler various states | ProfilerDumpState | | False |
* */
void DumpDeviceProfileResults(IDevice* device, ProfilerDumpState = ProfilerDumpState::NORMAL);
void DumpDeviceProfileResults(IDevice* device, ProfilerDumpState = ProfilerDumpState::NORMAL, const std::optional<ProfilerOptionalMetadata>& metadata = {});

/**
* Set the directory for device-side CSV logs produced by the profiler instance in the tt-metal module
Expand Down
1 change: 1 addition & 0 deletions tt_metal/hw/firmware/src/brisc.cc
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include <cstdint>

// clang-format off
#undef PROFILE_NOC_EVENTS
#include "risc_common.h"
#include "tensix.h"
#include "tensix_types.h"
Expand Down
Loading

0 comments on commit 71a7de3

Please sign in to comment.