Skip to content

Commit

Permalink
rdma: reimplement ep_addr_list
Browse files Browse the repository at this point in the history
move rdma.c to c++, move some structs from the header to the unit so
that it can use c++ types. Implement ep_addr_list with
unordered_map/unordered_set instead of uthashisms.
  • Loading branch information
Nicholas Sielicki committed Dec 3, 2024
1 parent 4796b12 commit 8e6fd61
Show file tree
Hide file tree
Showing 3 changed files with 164 additions and 135 deletions.
61 changes: 0 additions & 61 deletions include/nccl_ofi_rdma.h
Original file line number Diff line number Diff line change
Expand Up @@ -662,47 +662,6 @@ typedef struct nccl_net_ofi_rdma_listen_comm {
nccl_ofi_rdma_connection_info_t conn_msg;
} nccl_net_ofi_rdma_listen_comm_t;

/*
* @brief Endpoint rail
*
* Endpoint rail encapsulates data of an endpoint for a
* specific rail.
*/
struct nccl_net_ofi_ep_rail {
int rail_id;

/* Local libfabric endpoint handle */
struct fid_ep *ofi_ep;

/* Name of local libfabric endpoint */
char local_ep_name[MAX_EP_ADDR];

/* Length of local_ep_name */
size_t local_ep_name_len;

/* Address vector handle */
struct fid_av *av;

/* Completion Queue handle */
struct fid_cq *cq;

/* Access domain handles */
struct fid_domain *domain;

/*
* Bounce buffer management
*/

/* Number of bounce buffers posted */
size_t num_bounce_posted;
/* Minimum posted bounce buffers (see RDMA_MIN_POSTED_BOUNCE_BUFFERS) */
size_t min_bounce_posted;
/* Maximum posted bounce buffers (see RDMA_MAX_POSTED_BOUNCE_BUFFERS) */
size_t max_bounce_posted;
/* Mutex for bounce buffer operations */
pthread_mutex_t bounce_mutex;
};

/*
* @brief RDMA Endpoint
*
Expand Down Expand Up @@ -822,26 +781,6 @@ typedef struct nccl_net_ofi_rdma_device {
#endif
} nccl_net_ofi_rdma_device_t;


typedef struct nccl_net_ofi_rdma_domain_rail {
/* Access domain handles */
struct fid_domain *domain;

struct fid_cq *cq;
} nccl_net_ofi_rdma_domain_rail_t;


typedef struct nccl_net_ofi_rdma_domain {
nccl_net_ofi_domain_t base;

int num_rails;
nccl_net_ofi_rdma_domain_rail_t *domain_rails;

/* List of endpoints and set of addresses they have connections to */
nccl_ofi_ep_addr_list_t *ep_addr_list;
} nccl_net_ofi_rdma_domain_t;


struct nccl_net_ofi_rdma_plugin {
nccl_net_ofi_plugin_t base;

Expand Down
2 changes: 1 addition & 1 deletion src/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ sources = \
nccl_ofi_net.c \
nccl_ofi_sendrecv.c \
nccl_ofi_system.c \
nccl_ofi_rdma.c \
nccl_ofi_rdma.cc \
nccl_ofi_kvstore.cc \
nccl_ofi_scheduler.c \
nccl_ofi_topo.c \
Expand Down
Loading

0 comments on commit 8e6fd61

Please sign in to comment.