Skip to content

Commit

Permalink
tracing: Add separate SENDRECV trace functions
Browse files Browse the repository at this point in the history
For upcoming NVTX support, it will make sense to have separate tracing
functions for SENDRECV and RDMA protocols. We might consider placing
them in separate files in the future.

Signed-off-by: Eric Raut <eraut@amazon.com>
  • Loading branch information
rauteric committed Apr 4, 2024
1 parent 485e114 commit 9531ecc
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 4 deletions.
18 changes: 18 additions & 0 deletions include/nccl_ofi_tracepoint.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,24 @@
#include "config.h"
#include "tracing_impl/lttng.h"

/***** SENDRECV PROTOCOL *****/
#define NCCL_OFI_TRACE_SEND_SENDRECV(dev, size, comm, msg_seq_num, request, nccl_req) do { \
lttng_ust_tracepoint(nccl_ofi_plugin, Send, dev, size, comm, msg_seq_num, request, nccl_req); \
} while (0)

#define NCCL_OFI_TRACE_RECV_SENDRECV(dev, tag, size, request, nccl_req) do { \
lttng_ust_tracepoint(nccl_ofi_plugin, Recv, dev, tag, size, request, nccl_req); \
} while(0)

#define NCCL_OFI_TRACE_FLUSH_SENDRECV(request, nccl_req) do { \
lttng_ust_tracepoint(nccl_ofi_plugin, Flush, request, nccl_req); \
} while(0)

#define NCCL_OFI_TRACE_COMPLETIONS_SENDRECV(request,ctx) do { \
lttng_ust_tracepoint(nccl_ofi_plugin, ProcessCompletions, request,ctx); \
} while(0)

/***** RDMA PROTOCL *****/
#define NCCL_OFI_TRACE_SEND(dev, size, comm, msg_seq_num, request, nccl_req) do { \
lttng_ust_tracepoint(nccl_ofi_plugin, Send, dev, size, comm, msg_seq_num, request, nccl_req); \
} while(0)
Expand Down
8 changes: 4 additions & 4 deletions src/nccl_ofi_sendrecv.c
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ static inline int process_completions(struct fi_cq_tagged_entry *cq_entry,
comp_flags = cq_entry[comp_idx].flags;
req = container_of(op_ctx, nccl_net_ofi_sendrecv_req_t, ctx);

NCCL_OFI_TRACE_COMPLETIONS(req, &req->ctx);
NCCL_OFI_TRACE_COMPLETIONS_SENDRECV(req, &req->ctx);

/* Determine if this is control message */
if (OFI_UNLIKELY(cq_entry[comp_idx].tag & control_bit_mask)) {
Expand Down Expand Up @@ -854,7 +854,7 @@ static int recv(nccl_net_ofi_recv_comm_t *recv_comm, int n, void **buffers,
desc = fi_mr_desc(mr_handles[recv_n]);
}

NCCL_OFI_TRACE_RECV(dev_id, r_comm->tag, sizes[recv_n], req, base_req);
NCCL_OFI_TRACE_RECV_SENDRECV(dev_id, r_comm->tag, sizes[recv_n], req, base_req);

/*
* TODO: Use NCCL provided tags when plugin supports grouped
Expand Down Expand Up @@ -1035,7 +1035,7 @@ static int flush(nccl_net_ofi_recv_comm_t *recv_comm, int n, void **buffers,
}
}

NCCL_OFI_TRACE_FLUSH(req, base_req);
NCCL_OFI_TRACE_FLUSH_SENDRECV(req, base_req);

/* Issue RDMA read */
do {
Expand Down Expand Up @@ -1634,7 +1634,7 @@ static int send(nccl_net_ofi_send_comm_t *send_comm, void *data, int size, int t
if (mr_handle != NULL)
desc = fi_mr_desc(mr_handle);

NCCL_OFI_TRACE_SEND(req->dev_id, size, s_comm, 0, req, base_req);
NCCL_OFI_TRACE_SEND_SENDRECV(req->dev_id, size, s_comm, 0, req, base_req);

/*
* Try sending data to remote EP; Return NULL request
Expand Down

0 comments on commit 9531ecc

Please sign in to comment.