Skip to content

Commit

Permalink
rdma: Remove scheduler for control messages
Browse files Browse the repository at this point in the history
The control messages are not going to be scheduled over multiple rails,
so remove the unnecessary logic around it.

Signed-off-by: Brian Barrett <bbarrett@amazon.com>
Signed-off-by: Raghu Raja <raghunch@amazon.com>
  • Loading branch information
bwbarrett authored and rajachan committed Aug 28, 2024
1 parent 3ec37f8 commit 3c1ae1f
Show file tree
Hide file tree
Showing 2 changed files with 3 additions and 33 deletions.
4 changes: 0 additions & 4 deletions include/nccl_ofi_rdma.h
Original file line number Diff line number Diff line change
Expand Up @@ -238,10 +238,6 @@ typedef struct {
typedef struct {
/* Pointer to the allocated control buffer from freelist */
nccl_net_ofi_rdma_ctrl_fl_item_t *ctrl_fl_item;
/* Schedule used to transfer the control buffer. We save the
* pointer to reference it when transferring the buffer over
* network. */
nccl_net_ofi_schedule_t *ctrl_schedule;
/* Pointer to recv parent request */
nccl_net_ofi_rdma_req_t *recv_req;
#if HAVE_NVTX_TRACING
Expand Down
32 changes: 3 additions & 29 deletions src/nccl_ofi_rdma.c
Original file line number Diff line number Diff line change
Expand Up @@ -1834,12 +1834,6 @@ static inline int free_send_ctrl_req(nccl_net_ofi_rdma_req_t *req,
(nccl_net_ofi_rdma_recv_comm_t *)req->comm;
rdma_req_send_ctrl_data_t *send_ctrl_data = get_send_ctrl_data(req);

if (send_ctrl_data->ctrl_schedule) {
nccl_net_ofi_rdma_device_t *device = (nccl_net_ofi_rdma_device_t *)req->comm->ep->device;
nccl_net_ofi_release_schedule(device->scheduler, send_ctrl_data->ctrl_schedule);
send_ctrl_data->ctrl_schedule = NULL;
}

if (send_ctrl_data->ctrl_fl_item) {
nccl_net_ofi_rdma_recv_comm_t *r_comm = (nccl_net_ofi_rdma_recv_comm_t *)req->comm;
nccl_ofi_freelist_entry_free(r_comm->ctrl_buff_fl, send_ctrl_data->ctrl_fl_item);
Expand Down Expand Up @@ -2759,7 +2753,6 @@ static inline int insert_send_ctrl_req(
nccl_net_ofi_rdma_mr_handle_t *buff_mr_handle,
nccl_net_ofi_rdma_req_t *recv_req)
{
nccl_net_ofi_scheduler_t *scheduler = device->scheduler;
nccl_net_ofi_rdma_ep_t *ep = (nccl_net_ofi_rdma_ep_t *)r_comm->base.base.ep;
nccl_net_ofi_rdma_req_t *send_ctrl_req = allocate_req(r_comm->nccl_ofi_reqs_fl);
if (OFI_UNLIKELY(send_ctrl_req == NULL)) {
Expand All @@ -2775,22 +2768,9 @@ static inline int insert_send_ctrl_req(
send_ctrl_req->msg_seq_num = msg_seq_num;

rdma_req_send_ctrl_data_t *send_ctrl_data = get_send_ctrl_data(send_ctrl_req);
size_t ctrl_msg_len = nccl_net_ofi_rdma_ctrl_msg_size(ep->num_rails, ep->use_long_rkeys);

send_ctrl_data->recv_req = recv_req;
send_ctrl_data->ctrl_fl_item = NULL;
send_ctrl_data->ctrl_schedule = scheduler->get_schedule(scheduler,
ctrl_msg_len,
device->num_rails);

if (OFI_UNLIKELY(!(send_ctrl_data->ctrl_schedule))) {
return -EINVAL;
} else if (OFI_UNLIKELY(send_ctrl_data->ctrl_schedule->num_xfer_infos != 1)) {
NCCL_OFI_WARN("Invalid schedule for outgoing control message (%zu bytes). Expected one rail, but got %zu",
size,
send_ctrl_data->ctrl_schedule->num_xfer_infos);
return -EINVAL;
}

/*
* Allocate RDMA control buffer which transfers the RDMA write buffer
Expand Down Expand Up @@ -4400,17 +4380,12 @@ static int post_rdma_ctrl(nccl_net_ofi_rdma_req_t *req)
assert(req->type == NCCL_OFI_RDMA_SEND_CTRL);
nccl_net_ofi_rdma_recv_comm_t *r_comm = (nccl_net_ofi_rdma_recv_comm_t *)req->comm;
rdma_req_send_ctrl_data_t *send_ctrl_data = get_send_ctrl_data(req);
nccl_net_ofi_schedule_t *schedule = send_ctrl_data->ctrl_schedule;
nccl_net_ofi_rdma_ep_t *ep = (nccl_net_ofi_rdma_ep_t *)r_comm->base.base.ep;

assert(schedule != NULL);

// Should be using a single rail for posting the control message
nccl_net_ofi_xfer_info_t *xfer_info = &schedule->rail_xfer_infos[0];
const int control_rail_id = 0;

// Get communicator rail information to xfer the req
nccl_net_ofi_rdma_recv_comm_rail_t *comm_rail;
comm_rail = get_recv_comm_rail(r_comm, xfer_info->rail_id);
comm_rail = get_recv_comm_rail(r_comm, control_rail_id);

nccl_net_ofi_rdma_ctrl_fl_item_t *ctrl_fl_item = send_ctrl_data->ctrl_fl_item;

Expand All @@ -4419,8 +4394,7 @@ static int post_rdma_ctrl(nccl_net_ofi_rdma_req_t *req)
(freelist_regmr_fn_handle_t *)ctrl_fl_item->fl_reginfo.mr_handle;
nccl_net_ofi_rdma_mr_handle_t *mr_handle = fl_handle->mr_handle;

assert(xfer_info->rail_id < mr_handle->num_rails);
void *desc = fi_mr_desc(mr_handle->mr[xfer_info->rail_id]);
void *desc = fi_mr_desc(mr_handle->mr[control_rail_id]);

NCCL_OFI_TRACE_SEND_CTRL_START(req->dev_id, xfer_info->rail_id, req->comm, req, req->msg_seq_num);

Expand Down

0 comments on commit 3c1ae1f

Please sign in to comment.