Skip to content

Commit

Permalink
#16541 - device profiler noc tracing feature
Browse files Browse the repository at this point in the history
  • Loading branch information
bgrady-tt committed Mar 4, 2025
1 parent 30fffa9 commit cc919af
Show file tree
Hide file tree
Showing 20 changed files with 835 additions and 63 deletions.
3 changes: 3 additions & 0 deletions .clangd
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,6 @@
CompileFlags:
Add:
- "std=c++20"
- "-DPROFILE_NOC_EVENTS=1"
- "-DPROFILE_KERNEL=1"
- "-DCOMPILE_FOR_BRISC=1"
10 changes: 10 additions & 0 deletions cmake/tracy.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,16 @@ target_compile_definitions(TracyClient PUBLIC TRACY_ENABLE)
target_compile_options(TracyClient PUBLIC -fno-omit-frame-pointer)
target_link_options(TracyClient PUBLIC -rdynamic)

# Allow changing the max number of profiler events trackable in device DRAM for
# each Tensix core. 1000 events ~ 5MB of total DRAM overhead.
if(DEFINED ENV{TT_METAL_DEVICE_PROFILER_OP_SUPPORT_COUNT_OVERRIDE})
target_compile_definitions(
TracyClient
PUBLIC
DEVICE_PROFILER_OP_SUPPORT_COUNT_OVERRIDE=$ENV{TT_METAL_DEVICE_PROFILER_OP_SUPPORT_COUNT_OVERRIDE}
)
endif()

# Our current fork of tracy does not have CMake support for these subdirectories
# Once we update, we can change this
include(ExternalProject)
Expand Down
72 changes: 66 additions & 6 deletions tt_metal/api/tt-metalium/profiler.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,12 @@
#include "program_impl.hpp"
#include "profiler_state.hpp"
#include "common.hpp"
#include "profiler_optional_metadata.hpp"
#include "tracy/TracyTTDevice.hpp"
#include "common/TracyTTDeviceData.hpp"

#include <nlohmann/json.hpp>

using std::chrono::duration;
using std::chrono::duration_cast;
using std::chrono::nanoseconds;
Expand Down Expand Up @@ -61,10 +64,24 @@ class DeviceProfiler {
// Iterate through all zone source locations and generate hash
void generateZoneSourceLocationsHashes();

// serialize all noc trace data into per-op json trace files
void serializeJsonNocTraces(
const nlohmann::ordered_json& noc_trace_json_log, const std::filesystem::path& output_dir, int device_id);

void emitCSVHeader(
std::ofstream& log_file_ofs, const tt::ARCH& device_architecture, int device_core_frequency) const;

// translates potentially-virtual coordinates recorded on Device into physical coordinates
CoreCoord getPhysicalAddressFromVirtual(const IDevice* device, const CoreCoord& c) const;

// Dumping profile result to file
void dumpResultToFile(
void logPacketData(
const IDevice* device,
std::ofstream& log_file_ofs,
nlohmann::ordered_json& noc_trace_json_log,
uint32_t runID,
uint32_t runHostID,
const std::string& opname,
int device_id,
CoreCoord core,
int core_flat,
Expand All @@ -73,9 +90,51 @@ class DeviceProfiler {
uint32_t timer_id,
uint64_t timestamp);

// logs packet data to CSV file
void logPacketDataToCSV(
const IDevice* device,
std::ofstream& log_file_ofs,
int device_id,
int core_x,
int core_y,
const std::string_view risc_name,
uint32_t timer_id,
uint64_t timestamp,
uint64_t data,
uint32_t run_id,
uint32_t run_host_id,
const std::string_view opname,
const std::string_view zone_name,
kernel_profiler::PacketTypes packet_type,
uint64_t source_line,
const std::string_view source_file);

// dump noc trace related profile data to json file
void logNocTracePacketDataToJson(
const IDevice* device,
nlohmann::ordered_json& noc_trace_json_log,
int device_id,
int core_x,
int core_y,
const std::string_view risc_name,
uint32_t timer_id,
uint64_t timestamp,
uint64_t data,
uint32_t run_id,
uint32_t run_host_id,
const std::string_view opname,
const std::string_view zone_name,
kernel_profiler::PacketTypes packet_type,
uint64_t source_line,
const std::string_view source_file);

// Helper function for reading risc profile results
void readRiscProfilerResults(
IDevice* device, CoreCoord& worker_core);
IDevice* device,
const CoreCoord& worker_core,
const std::optional<ProfilerOptionalMetadata>& metadata,
std::ofstream& log_file_ofs,
nlohmann::ordered_json& noc_trace_json_log);

// Push device results to tracy
void pushTracyDeviceResults();
Expand All @@ -100,17 +159,17 @@ class DeviceProfiler {
// DRAM Vector
std::vector<uint32_t> profile_buffer;

//Device events
// Device events
std::set<tracy::TTDeviceEvent> device_events;

std::set<tracy::TTDeviceEvent> device_sync_events;

std::set<tracy::TTDeviceEvent> device_sync_new_events;

//shift
// shift
int64_t shift = 0;

//frequency scale
// frequency scale
double freqScale = 1.0;

uint32_t my_device_id = 0;
Expand All @@ -128,7 +187,8 @@ class DeviceProfiler {
void dumpResults(
IDevice* device,
const std::vector<CoreCoord>& worker_cores,
ProfilerDumpState state = ProfilerDumpState::NORMAL);
ProfilerDumpState state = ProfilerDumpState::NORMAL,
const std::optional<ProfilerOptionalMetadata>& metadata = {});
};

} // namespace tt_metal
Expand Down
29 changes: 29 additions & 0 deletions tt_metal/api/tt-metalium/profiler_optional_metadata.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
// SPDX-FileCopyrightText: © 2025 Tenstorrent Inc.
//
// SPDX-License-Identifier: Apache-2.0

#pragma once

#include "map"

class ProfilerOptionalMetadata {
using DeviceID = uint32_t;
using RuntimeID = uint32_t;

public:
ProfilerOptionalMetadata(std::map<std::pair<DeviceID, RuntimeID>, std::string>&& runtime_map) :
runtime_id_to_opname(std::move(runtime_map)) {}

const std::string& getOpName(DeviceID device_id, RuntimeID runtime_id) const {
static const std::string empty_string;
auto key = std::make_pair(device_id, runtime_id);
auto it = runtime_id_to_opname.find(key);
if (it != runtime_id_to_opname.end()) {
return it->second;
}
return empty_string;
}

private:
std::map<std::pair<DeviceID, RuntimeID>, std::string> runtime_id_to_opname;
};
4 changes: 4 additions & 0 deletions tt_metal/api/tt-metalium/rtoptions.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,8 @@ class RunTimeOptions {
bool profile_dispatch_cores = false;
bool profiler_sync_enabled = false;
bool profiler_buffer_usage_enabled = false;
bool profiler_noc_events_enabled = false;
std::string profiler_noc_events_report_path;

bool null_kernels = false;

Expand Down Expand Up @@ -285,6 +287,8 @@ class RunTimeOptions {
inline bool get_profiler_do_dispatch_cores() { return profile_dispatch_cores; }
inline bool get_profiler_sync_enabled() { return profiler_sync_enabled; }
inline bool get_profiler_buffer_usage_enabled() { return profiler_buffer_usage_enabled; }
inline bool get_profiler_noc_events_enabled() { return profiler_noc_events_enabled; }
inline std::string get_profiler_noc_events_report_path() { return profiler_noc_events_report_path; }

inline void set_kernels_nullified(bool v) { null_kernels = v; }
inline bool get_kernels_nullified() { return null_kernels; }
Expand Down
8 changes: 6 additions & 2 deletions tt_metal/api/tt-metalium/tt_metal.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#include "buffer.hpp"
#include "profiler.hpp"
#include "llrt/tt_cluster.hpp"
#include "profiler_optional_metadata.hpp"

namespace tt::tt_metal {
inline namespace v0 {
Expand Down Expand Up @@ -220,7 +221,10 @@ void ProfilerSync(ProfilerSyncState state);
* | satate | Dumpprofiler various states | ProfilerDumpState | | False |
* */
void DumpDeviceProfileResults(
IDevice* device, std::vector<CoreCoord>& worker_cores, ProfilerDumpState = ProfilerDumpState::NORMAL);
IDevice* device,
std::vector<CoreCoord>& worker_cores,
ProfilerDumpState = ProfilerDumpState::NORMAL,
const std::optional<ProfilerOptionalMetadata>& metadata = {});

/**
* Traverse all cores and read device side profiler data and dump results into device side CSV log
Expand All @@ -232,7 +236,7 @@ void DumpDeviceProfileResults(
* | device | The device holding the program being profiled. | Device * | | True |
* | satate | Dumpprofiler various states | ProfilerDumpState | | False |
* */
void DumpDeviceProfileResults(IDevice* device, ProfilerDumpState = ProfilerDumpState::NORMAL);
void DumpDeviceProfileResults(IDevice* device, ProfilerDumpState = ProfilerDumpState::NORMAL, const std::optional<ProfilerOptionalMetadata>& metadata = {});

/**
* Set the directory for device-side CSV logs produced by the profiler instance in the tt-metal module
Expand Down
5 changes: 5 additions & 0 deletions tt_metal/hostdevcommon/api/hostdevcommon/profiler_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,12 @@ constexpr static std::uint32_t PROFILER_L1_BUFFER_SIZE = PROFILER_L1_VECTOR_SIZE

} // namespace kernel_profiler

#if defined(TRACY_ENABLE) && defined(DEVICE_PROFILER_OP_SUPPORT_COUNT_OVERRIDE)
constexpr static std::uint32_t PROFILER_OP_SUPPORT_COUNT = DEVICE_PROFILER_OP_SUPPORT_COUNT_OVERRIDE;
#else
constexpr static std::uint32_t PROFILER_OP_SUPPORT_COUNT = 1000;
#endif

constexpr static std::uint32_t PROFILER_FULL_HOST_VECTOR_SIZE_PER_RISC =
kernel_profiler::PROFILER_L1_MARKER_UINT32_SIZE *
(kernel_profiler::PROFILER_L1_PROGRAM_ID_COUNT + kernel_profiler::PROFILER_L1_GUARANTEED_MARKER_COUNT +
Expand Down
1 change: 1 addition & 0 deletions tt_metal/hw/firmware/src/brisc.cc
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include <cstdint>

// clang-format off
#undef PROFILE_NOC_EVENTS
#include "risc_common.h"
#include "tensix.h"
#include "tensix_types.h"
Expand Down
Loading

0 comments on commit cc919af

Please sign in to comment.