diff --git a/include/Makefile.am b/include/Makefile.am index 687a16e9f..3262d2c8d 100644 --- a/include/Makefile.am +++ b/include/Makefile.am @@ -27,6 +27,7 @@ noinst_HEADERS = \ nccl_ofi_ofiutils.h \ nccl_ofi_tracepoint.h \ tracing_impl/lttng.h \ + tracing_impl/nvtx.h \ nccl-headers/net.h \ nccl-headers/error.h \ nccl-headers/nvidia/err.h \ diff --git a/include/nccl_ofi_tracepoint.h b/include/nccl_ofi_tracepoint.h index 17f569cab..511354c98 100644 --- a/include/nccl_ofi_tracepoint.h +++ b/include/nccl_ofi_tracepoint.h @@ -6,55 +6,69 @@ #pragma once #include "config.h" +#include "tracing_impl/nvtx.h" #include "tracing_impl/lttng.h" #define NCCL_OFI_TRACE_SEND(dev, size, comm, msg_seq_num, request, nccl_req) do { \ lttng_ust_tracepoint(nccl_ofi_plugin, Send, dev, size, comm, msg_seq_num, request, nccl_req); \ + nvtx_push("Send"); \ } while(0) #define NCCL_OFI_TRACE_SEND_CTRL_RECV(dev, rail_id, comm, msg_seq_num) do { \ lttng_ust_tracepoint(nccl_ofi_plugin, Send_ctrl_recv, dev, rail_id, comm, msg_seq_num); \ + nvtx_push("Send_ctrl_recv"); \ } while (0) #define NCCL_OFI_TRACE_SEND_WRITE_SEG_START(dev, rail_id, size, comm, msg_seq_num, request) do { \ lttng_ust_tracepoint(nccl_ofi_plugin, Send_write_segment_start, dev, rail_id, size, comm, msg_seq_num, request); \ + nvtx_push("Send_write_segment_start"); \ } while(0) #define NCCL_OFI_TRACE_SEND_WRITE_SEG_COMPLETE(dev, rail_id, comm, msg_seq_num, request) do { \ lttng_ust_tracepoint(nccl_ofi_plugin, Send_write_segment_complete, dev, rail_id, comm, msg_seq_num, request); \ + nvtx_push("Send_write_segment_complete"); \ } while(0) #define NCCL_OFI_TRACE_RECV(dev, tag, size, request, nccl_req) do { \ lttng_ust_tracepoint(nccl_ofi_plugin, Recv, dev, tag, size, request, nccl_req); \ + nvtx_push("Recv"); \ } while(0) #define NCCL_OFI_TRACE_RECV_CTRL_SEND_COMPLETE(request) do { \ lttng_ust_tracepoint(nccl_ofi_plugin, Recv_ctrl_send_complete, request); \ + nvtx_push("Recv_ctrl_send_complete"); \ } while(0) #define NCCL_OFI_TRACE_RECV_SEGMENT_COMPLETE(dev, rail_id, size, request) do { \ lttng_ust_tracepoint(nccl_ofi_plugin, Recv_segment_complete, dev, rail_id, size, request); \ + nvtx_push("Recv_segment_complete"); \ } while(0) #define NCCL_OFI_TRACE_EAGER_RECV(dev, rail_id, comm, msg_seq_num) do { \ lttng_ust_tracepoint(nccl_ofi_plugin, Eager_recv, dev, rail_id, comm, msg_seq_num); \ + nvtx_push("Eager_recv"); \ } while(0) #define NCCL_OFI_TRACE_COMPLETIONS(request,ctx) do { \ lttng_ust_tracepoint(nccl_ofi_plugin, ProcessCompletions, request,ctx); \ + nvtx_push("ProcessCompletions"); \ } while(0) #define NCCL_OFI_TRACE_FLUSH(request, nccl_req) do { \ lttng_ust_tracepoint(nccl_ofi_plugin, Flush, request, nccl_req); \ + nvtx_push("Flush"); \ } while(0) #define NCCL_OFI_TRACE_PENDING_INSERT(request) do { \ lttng_ust_tracepoint(nccl_ofi_plugin, Pending_queue_insert, request); \ + nvtx_push("Pending_queue_insert"); \ } while(0) #define NCCL_OFI_TRACE_PENDING_REMOVE(request) do { \ lttng_ust_tracepoint(nccl_ofi_plugin, Pending_queue_remove, request); \ + nvtx_push("Pending_queue_remove"); \ } while(0) #define NCCL_OFI_TRACE_POP(...) do { \ + nvtx_pop(); \ } while(0) diff --git a/include/tracing_impl/nvtx.h b/include/tracing_impl/nvtx.h new file mode 100644 index 000000000..5f8680bec --- /dev/null +++ b/include/tracing_impl/nvtx.h @@ -0,0 +1,25 @@ +/* + * Copyright (c) 2022-2024 Amazon.com, Inc. or its affiliates. All rights reserved. + */ + +#pragma once +#if HAVE_NVTX_TRACING +#include "nvToolsExt.h" +static inline void nvtx_push(const char* name) { + const nvtxEventAttributes_t eventAttrib = { + .version = NVTX_VERSION, + .size = NVTX_EVENT_ATTRIB_STRUCT_SIZE, + .colorType = NVTX_COLOR_ARGB, + .color = 0xeb9234, + .messageType = NVTX_MESSAGE_TYPE_ASCII, + .message = { .ascii = name }, + }; + nvtxRangePushEx(&eventAttrib); +} +static inline void nvtx_pop(void) { + nvtxRangePop(); +} +#else +static inline void nvtx_push(const char* name){ (void)name; } +static inline void nvtx_pop(void){} +#endif