update

author: 智皓张 <[email protected]> 2023-08-04 15:26:31 +0800
committer: 智皓张 <[email protected]> 2023-08-04 15:26:31 +0800
commit: e0ded3bfc164f48b3f7a4083cffd6c1b2fff4439 (patch)
tree: 9a7eabc2bebe3f5e5a967b1170abcacd3356828f /src
parent: 1c82c0c7a27ea7778a5d2ca5104d822209afeb75 (diff)
4 files changed, 1385 insertions, 0 deletions
diff --git a/src/rdma_client.c b/src/rdma_client.c
new file mode 100644
index 0000000..a640be6
--- /dev/null
+++ b/src/rdma_client.c
@@ -0,0 +1,553 @@
+/*
+ * An example RDMA client side code. 
+ * Author: Animesh Trivedi 
+ *         [email protected]
+ */
+
+#include "rdma_common.h"
+
+/* These are basic RDMA resources */
+/* These are RDMA connection related resources */
+static struct rdma_event_channel *cm_event_channel = NULL;
+static struct rdma_cm_id *cm_client_id = NULL;
+static struct ibv_pd *pd = NULL;
+static struct ibv_comp_channel *io_completion_channel = NULL;
+static struct ibv_cq *client_cq = NULL;
+static struct ibv_qp_init_attr qp_init_attr;
+static struct ibv_qp *client_qp;
+/* These are memory buffers related resources */
+static struct ibv_mr *client_metadata_mr = NULL, 
+		     *client_src_mr = NULL, 
+		     *client_dst_mr = NULL, 
+		     *server_metadata_mr = NULL;
+static struct rdma_buffer_attr client_metadata_attr, server_metadata_attr;
+static struct ibv_send_wr client_send_wr, *bad_client_send_wr = NULL;
+static struct ibv_recv_wr server_recv_wr, *bad_server_recv_wr = NULL;
+static struct ibv_sge client_send_sge, server_recv_sge;
+/* Source and Destination buffers, where RDMA operations source and sink */
+static char *src = NULL, *dst = NULL; 
+
+/* This is our testing function */
+static int check_src_dst() 
+{
+	return memcmp((void*) src, (void*) dst, strlen(src));
+}
+
+/* This function prepares client side connection resources for an RDMA connection */
+static int client_prepare_connection(struct sockaddr_in *s_addr)
+{
+	struct rdma_cm_event *cm_event = NULL;
+	int ret = -1;
+	/*  Open a channel used to report asynchronous communication event */
+	cm_event_channel = rdma_create_event_channel();
+	if (!cm_event_channel) {
+		rdma_error("Creating cm event channel failed, errno: %d \n", -errno);
+		return -errno;
+	}
+	debug("RDMA CM event channel is created at : %p \n", cm_event_channel);
+	/* rdma_cm_id is the connection identifier (like socket) which is used 
+	 * to define an RDMA connection. 
+	 */
+	ret = rdma_create_id(cm_event_channel, &cm_client_id, 
+			NULL,
+			RDMA_PS_TCP);
+	if (ret) {
+		rdma_error("Creating cm id failed with errno: %d \n", -errno); 
+		return -errno;
+	}
+	/* Resolve destination and optional source addresses from IP addresses  to
+	 * an RDMA address.  If successful, the specified rdma_cm_id will be bound
+	 * to a local device. */
+	ret = rdma_resolve_addr(cm_client_id, NULL, (struct sockaddr*) s_addr, 2000);
+	if (ret) {
+		rdma_error("Failed to resolve address, errno: %d \n", -errno);
+		return -errno;
+	}
+	debug("waiting for cm event: RDMA_CM_EVENT_ADDR_RESOLVED\n");
+	ret  = process_rdma_cm_event(cm_event_channel, 
+			RDMA_CM_EVENT_ADDR_RESOLVED,
+			&cm_event);
+	if (ret) {
+		rdma_error("Failed to receive a valid event, ret = %d \n", ret);
+		return ret;
+	}
+	/* we ack the event */
+	ret = rdma_ack_cm_event(cm_event);
+	if (ret) {
+		rdma_error("Failed to acknowledge the CM event, errno: %d\n", -errno);
+		return -errno;
+	}
+	debug("RDMA address is resolved \n");
+
+	 /* Resolves an RDMA route to the destination address in order to 
+	  * establish a connection */
+	ret = rdma_resolve_route(cm_client_id, 2000);
+	if (ret) {
+		rdma_error("Failed to resolve route, erno: %d \n", -errno);
+	       return -errno;
+	}
+	debug("waiting for cm event: RDMA_CM_EVENT_ROUTE_RESOLVED\n");
+	ret = process_rdma_cm_event(cm_event_channel, 
+			RDMA_CM_EVENT_ROUTE_RESOLVED,
+			&cm_event);
+	if (ret) {
+		rdma_error("Failed to receive a valid event, ret = %d \n", ret);
+		return ret;
+	}
+	/* we ack the event */
+	ret = rdma_ack_cm_event(cm_event);
+	if (ret) {
+		rdma_error("Failed to acknowledge the CM event, errno: %d \n", -errno);
+		return -errno;
+	}
+	printf("Trying to connect to server at : %s port: %d \n", 
+			inet_ntoa(s_addr->sin_addr),
+			ntohs(s_addr->sin_port));
+	/* Protection Domain (PD) is similar to a "process abstraction" 
+	 * in the operating system. All resources are tied to a particular PD. 
+	 * And accessing recourses across PD will result in a protection fault.
+	 */
+	pd = ibv_alloc_pd(cm_client_id->verbs);
+	if (!pd) {
+		rdma_error("Failed to alloc pd, errno: %d \n", -errno);
+		return -errno;
+	}
+	debug("pd allocated at %p \n", pd);
+	/* Now we need a completion channel, were the I/O completion 
+	 * notifications are sent. Remember, this is different from connection 
+	 * management (CM) event notifications. 
+	 * A completion channel is also tied to an RDMA device, hence we will 
+	 * use cm_client_id->verbs. 
+	 */
+	io_completion_channel = ibv_create_comp_channel(cm_client_id->verbs);
+	if (!io_completion_channel) {
+		rdma_error("Failed to create IO completion event channel, errno: %d\n",
+			       -errno);
+	return -errno;
+	}
+	debug("completion event channel created at : %p \n", io_completion_channel);
+	/* Now we create a completion queue (CQ) where actual I/O 
+	 * completion metadata is placed. The metadata is packed into a structure 
+	 * called struct ibv_wc (wc = work completion). ibv_wc has detailed 
+	 * information about the work completion. An I/O request in RDMA world 
+	 * is called "work" ;) 
+	 */
+	client_cq = ibv_create_cq(cm_client_id->verbs /* which device*/, 
+			CQ_CAPACITY /* maximum capacity*/, 
+			NULL /* user context, not used here */,
+			io_completion_channel /* which IO completion channel */, 
+			0 /* signaling vector, not used here*/);
+	if (!client_cq) {
+		rdma_error("Failed to create CQ, errno: %d \n", -errno);
+		return -errno;
+	}
+	debug("CQ created at %p with %d elements \n", client_cq, client_cq->cqe);
+	ret = ibv_req_notify_cq(client_cq, 0);
+	if (ret) {
+		rdma_error("Failed to request notifications, errno: %d\n", -errno);
+		return -errno;
+	}
+       /* Now the last step, set up the queue pair (send, recv) queues and their capacity.
+         * The capacity here is define statically but this can be probed from the 
+	 * device. We just use a small number as defined in rdma_common.h */
+       bzero(&qp_init_attr, sizeof qp_init_attr);
+       qp_init_attr.cap.max_recv_sge = MAX_SGE; /* Maximum SGE per receive posting */
+       qp_init_attr.cap.max_recv_wr = MAX_WR; /* Maximum receive posting capacity */
+       qp_init_attr.cap.max_send_sge = MAX_SGE; /* Maximum SGE per send posting */
+       qp_init_attr.cap.max_send_wr = MAX_WR; /* Maximum send posting capacity */
+       qp_init_attr.qp_type = IBV_QPT_RC; /* QP type, RC = Reliable connection */
+       /* We use same completion queue, but one can use different queues */
+       qp_init_attr.recv_cq = client_cq; /* Where should I notify for receive completion operations */
+       qp_init_attr.send_cq = client_cq; /* Where should I notify for send completion operations */
+       /*Lets create a QP */
+       ret = rdma_create_qp(cm_client_id /* which connection id */,
+		       pd /* which protection domain*/,
+		       &qp_init_attr /* Initial attributes */);
+	if (ret) {
+		rdma_error("Failed to create QP, errno: %d \n", -errno);
+	       return -errno;
+	}
+	client_qp = cm_client_id->qp;
+	debug("QP created at %p \n", client_qp);
+	return 0;
+}
+
+/* Pre-posts a receive buffer before calling rdma_connect () */
+static int client_pre_post_recv_buffer()
+{
+	int ret = -1;
+	server_metadata_mr = rdma_buffer_register(pd,
+			&server_metadata_attr,
+			sizeof(server_metadata_attr),
+			(IBV_ACCESS_LOCAL_WRITE));
+	if(!server_metadata_mr){
+		rdma_error("Failed to setup the server metadata mr , -ENOMEM\n");
+		return -ENOMEM;
+	}
+	server_recv_sge.addr = (uint64_t) server_metadata_mr->addr;
+	server_recv_sge.length = (uint32_t) server_metadata_mr->length;
+	server_recv_sge.lkey = (uint32_t) server_metadata_mr->lkey;
+	/* now we link it to the request */
+	bzero(&server_recv_wr, sizeof(server_recv_wr));
+	server_recv_wr.sg_list = &server_recv_sge;
+	server_recv_wr.num_sge = 1;
+	ret = ibv_post_recv(client_qp /* which QP */,
+		      &server_recv_wr /* receive work request*/,
+		      &bad_server_recv_wr /* error WRs */);
+	if (ret) {
+		rdma_error("Failed to pre-post the receive buffer, errno: %d \n", ret);
+		return ret;
+	}
+	debug("Receive buffer pre-posting is successful \n");
+	return 0;
+}
+
+/* Connects to the RDMA server */
+static int client_connect_to_server() 
+{
+	struct rdma_conn_param conn_param;
+	struct rdma_cm_event *cm_event = NULL;
+	int ret = -1;
+	bzero(&conn_param, sizeof(conn_param));
+	conn_param.initiator_depth = 3;
+	conn_param.responder_resources = 3;
+	conn_param.retry_count = 3; // if fail, then how many times to retry
+	ret = rdma_connect(cm_client_id, &conn_param);
+	if (ret) {
+		rdma_error("Failed to connect to remote host , errno: %d\n", -errno);
+		return -errno;
+	}
+	debug("waiting for cm event: RDMA_CM_EVENT_ESTABLISHED\n");
+	ret = process_rdma_cm_event(cm_event_channel, 
+			RDMA_CM_EVENT_ESTABLISHED,
+			&cm_event);
+	if (ret) {
+		rdma_error("Failed to get cm event, ret = %d \n", ret);
+	       return ret;
+	}
+	ret = rdma_ack_cm_event(cm_event);
+	if (ret) {
+		rdma_error("Failed to acknowledge cm event, errno: %d\n", 
+			       -errno);
+		return -errno;
+	}
+	printf("The client is connected successfully \n");
+	return 0;
+}
+
+/* Exchange buffer metadata with the server. The client sends its, and then receives
+ * from the server. The client-side metadata on the server is _not_ used because
+ * this program is client driven. But it shown here how to do it for the illustration
+ * purposes
+ */
+static int client_xchange_metadata_with_server()
+{
+	struct ibv_wc wc[2];
+	int ret = -1;
+	client_src_mr = rdma_buffer_register(pd,
+			src,
+			strlen(src),
+			(IBV_ACCESS_LOCAL_WRITE|
+			 IBV_ACCESS_REMOTE_READ|
+			 IBV_ACCESS_REMOTE_WRITE));
+	if(!client_src_mr){
+		rdma_error("Failed to register the first buffer, ret = %d \n", ret);
+		return ret;
+	}
+	/* we prepare metadata for the first buffer */
+	client_metadata_attr.address = (uint64_t) client_src_mr->addr; 
+	client_metadata_attr.length = client_src_mr->length; 
+	client_metadata_attr.stag.local_stag = client_src_mr->lkey;
+	/* now we register the metadata memory */
+	client_metadata_mr = rdma_buffer_register(pd,
+			&client_metadata_attr,
+			sizeof(client_metadata_attr),
+			IBV_ACCESS_LOCAL_WRITE);
+	if(!client_metadata_mr) {
+		rdma_error("Failed to register the client metadata buffer, ret = %d \n", ret);
+		return ret;
+	}
+	/* now we fill up SGE */
+	client_send_sge.addr = (uint64_t) client_metadata_mr->addr;
+	client_send_sge.length = (uint32_t) client_metadata_mr->length;
+	client_send_sge.lkey = client_metadata_mr->lkey;
+	/* now we link to the send work request */
+	bzero(&client_send_wr, sizeof(client_send_wr));
+	client_send_wr.sg_list = &client_send_sge;
+	client_send_wr.num_sge = 1;
+	client_send_wr.opcode = IBV_WR_SEND;
+	client_send_wr.send_flags = IBV_SEND_SIGNALED;
+	/* Now we post it */
+	ret = ibv_post_send(client_qp, 
+		       &client_send_wr,
+	       &bad_client_send_wr);
+	if (ret) {
+		rdma_error("Failed to send client metadata, errno: %d \n", 
+				-errno);
+		return -errno;
+	}
+	/* at this point we are expecting 2 work completion. One for our 
+	 * send and one for recv that we will get from the server for 
+	 * its buffer information */
+	ret = process_work_completion_events(io_completion_channel, 
+			wc, 2);
+	if(ret != 2) {
+		rdma_error("We failed to get 2 work completions , ret = %d \n",
+				ret);
+		return ret;
+	}
+	debug("Server sent us its buffer location and credentials, showing \n");
+	show_rdma_buffer_attr(&server_metadata_attr);
+	return 0;
+}
+
+/* This function does :
+ * 1) Prepare memory buffers for RDMA operations 
+ * 1) RDMA write from src -> remote buffer 
+ * 2) RDMA read from remote bufer -> dst
+ */ 
+static int client_remote_memory_ops() 
+{
+	struct ibv_wc wc;
+	int ret = -1;
+	client_dst_mr = rdma_buffer_register(pd,
+			dst,
+			strlen(src),
+			(IBV_ACCESS_LOCAL_WRITE | 
+			 IBV_ACCESS_REMOTE_WRITE | 
+			 IBV_ACCESS_REMOTE_READ));
+	if (!client_dst_mr) {
+		rdma_error("We failed to create the destination buffer, -ENOMEM\n");
+		return -ENOMEM;
+	}
+	/* Step 1: is to copy the local buffer into the remote buffer. We will 
+	 * reuse the previous variables. */
+	/* now we fill up SGE */
+	client_send_sge.addr = (uint64_t) client_src_mr->addr;
+	client_send_sge.length = (uint32_t) client_src_mr->length;
+	client_send_sge.lkey = client_src_mr->lkey;
+	/* now we link to the send work request */
+	bzero(&client_send_wr, sizeof(client_send_wr));
+	client_send_wr.sg_list = &client_send_sge;
+	client_send_wr.num_sge = 1;
+	client_send_wr.opcode = IBV_WR_RDMA_WRITE;
+	client_send_wr.send_flags = IBV_SEND_SIGNALED;
+	/* we have to tell server side info for RDMA */
+	client_send_wr.wr.rdma.rkey = server_metadata_attr.stag.remote_stag;
+	client_send_wr.wr.rdma.remote_addr = server_metadata_attr.address;
+	/* Now we post it */
+	ret = ibv_post_send(client_qp, 
+		       &client_send_wr,
+	       &bad_client_send_wr);
+	if (ret) {
+		rdma_error("Failed to write client src buffer, errno: %d \n", 
+				-errno);
+		return -errno;
+	}
+	/* at this point we are expecting 1 work completion for the write */
+	ret = process_work_completion_events(io_completion_channel, 
+			&wc, 1);
+	if(ret != 1) {
+		rdma_error("We failed to get 1 work completions , ret = %d \n",
+				ret);
+		return ret;
+	}
+	debug("Client side WRITE is complete \n");
+	/* Now we prepare a READ using same variables but for destination */
+	client_send_sge.addr = (uint64_t) client_dst_mr->addr;
+	client_send_sge.length = (uint32_t) client_dst_mr->length;
+	client_send_sge.lkey = client_dst_mr->lkey;
+	/* now we link to the send work request */
+	bzero(&client_send_wr, sizeof(client_send_wr));
+	client_send_wr.sg_list = &client_send_sge;
+	client_send_wr.num_sge = 1;
+	client_send_wr.opcode = IBV_WR_RDMA_READ;
+	client_send_wr.send_flags = IBV_SEND_SIGNALED;
+	/* we have to tell server side info for RDMA */
+	client_send_wr.wr.rdma.rkey = server_metadata_attr.stag.remote_stag;
+	client_send_wr.wr.rdma.remote_addr = server_metadata_attr.address;
+	/* Now we post it */
+	ret = ibv_post_send(client_qp, 
+		       &client_send_wr,
+	       &bad_client_send_wr);
+	if (ret) {
+		rdma_error("Failed to read client dst buffer from the master, errno: %d \n", 
+				-errno);
+		return -errno;
+	}
+	/* at this point we are expecting 1 work completion for the write */
+	ret = process_work_completion_events(io_completion_channel, 
+			&wc, 1);
+	if(ret != 1) {
+		rdma_error("We failed to get 1 work completions , ret = %d \n",
+				ret);
+		return ret;
+	}
+	debug("Client side READ is complete \n");
+	return 0;
+}
+
+/* This function disconnects the RDMA connection from the server and cleans up 
+ * all the resources.
+ */
+static int client_disconnect_and_clean()
+{
+	struct rdma_cm_event *cm_event = NULL;
+	int ret = -1;
+	/* active disconnect from the client side */
+	ret = rdma_disconnect(cm_client_id);
+	if (ret) {
+		rdma_error("Failed to disconnect, errno: %d \n", -errno);
+		//continuing anyways
+	}
+	ret = process_rdma_cm_event(cm_event_channel, 
+			RDMA_CM_EVENT_DISCONNECTED,
+			&cm_event);
+	if (ret) {
+		rdma_error("Failed to get RDMA_CM_EVENT_DISCONNECTED event, ret = %d\n",
+				ret);
+		//continuing anyways 
+	}
+	ret = rdma_ack_cm_event(cm_event);
+	if (ret) {
+		rdma_error("Failed to acknowledge cm event, errno: %d\n", 
+			       -errno);
+		//continuing anyways
+	}
+	/* Destroy QP */
+	rdma_destroy_qp(cm_client_id);
+	/* Destroy client cm id */
+	ret = rdma_destroy_id(cm_client_id);
+	if (ret) {
+		rdma_error("Failed to destroy client id cleanly, %d \n", -errno);
+		// we continue anyways;
+	}
+	/* Destroy CQ */
+	ret = ibv_destroy_cq(client_cq);
+	if (ret) {
+		rdma_error("Failed to destroy completion queue cleanly, %d \n", -errno);
+		// we continue anyways;
+	}
+	/* Destroy completion channel */
+	ret = ibv_destroy_comp_channel(io_completion_channel);
+	if (ret) {
+		rdma_error("Failed to destroy completion channel cleanly, %d \n", -errno);
+		// we continue anyways;
+	}
+	/* Destroy memory buffers */
+	rdma_buffer_deregister(server_metadata_mr);
+	rdma_buffer_deregister(client_metadata_mr);	
+	rdma_buffer_deregister(client_src_mr);	
+	rdma_buffer_deregister(client_dst_mr);	
+	/* We free the buffers */
+	free(src);
+	free(dst);
+	/* Destroy protection domain */
+	ret = ibv_dealloc_pd(pd);
+	if (ret) {
+		rdma_error("Failed to destroy client protection domain cleanly, %d \n", -errno);
+		// we continue anyways;
+	}
+	rdma_destroy_event_channel(cm_event_channel);
+	printf("Client resource clean up is complete \n");
+	return 0;
+}
+
+void usage() {
+	printf("Usage:\n");
+	printf("rdma_client: [-a <server_addr>] [-p <server_port>] -s string (required)\n");
+	printf("(default IP is 127.0.0.1 and port is %d)\n", DEFAULT_RDMA_PORT);
+	exit(1);
+}
+
+int main(int argc, char **argv) {
+	struct sockaddr_in server_sockaddr;
+	int ret, option;
+	bzero(&server_sockaddr, sizeof server_sockaddr);
+	server_sockaddr.sin_family = AF_INET;
+	server_sockaddr.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+	/* buffers are NULL */
+	src = dst = NULL; 
+	/* Parse Command Line Arguments */
+	while ((option = getopt(argc, argv, "s:a:p:")) != -1) {
+		switch (option) {
+			case 's':
+				printf("Passed string is : %s , with count %u \n", 
+						optarg, 
+						(unsigned int) strlen(optarg));
+				src = calloc(strlen(optarg) , 1);
+				if (!src) {
+					rdma_error("Failed to allocate memory : -ENOMEM\n");
+					return -ENOMEM;
+				}
+				/* Copy the passes arguments */
+				strncpy(src, optarg, strlen(optarg));
+				dst = calloc(strlen(optarg), 1);
+				if (!dst) {
+					rdma_error("Failed to allocate destination memory, -ENOMEM\n");
+					free(src);
+					return -ENOMEM;
+				}
+				break;
+			case 'a':
+				/* remember, this overwrites the port info */
+				ret = get_addr(optarg, (struct sockaddr*) &server_sockaddr);
+				if (ret) {
+					rdma_error("Invalid IP \n");
+					return ret;
+				}
+				break;
+			case 'p':
+				/* passed port to listen on */
+				server_sockaddr.sin_port = htons(strtol(optarg, NULL, 0)); 
+				break;
+			default:
+				usage();
+				break;
+			}
+		}
+	if (!server_sockaddr.sin_port) {
+	  /* no port provided, use the default port */
+	  server_sockaddr.sin_port = htons(DEFAULT_RDMA_PORT);
+	  }
+	if (src == NULL) {
+		printf("Please provide a string to copy \n");
+		usage();
+       	}
+	ret = client_prepare_connection(&server_sockaddr);
+	if (ret) { 
+		rdma_error("Failed to setup client connection , ret = %d \n", ret);
+		return ret;
+	 }
+	ret = client_pre_post_recv_buffer(); 
+	if (ret) { 
+		rdma_error("Failed to setup client connection , ret = %d \n", ret);
+		return ret;
+	}
+	ret = client_connect_to_server();
+	if (ret) { 
+		rdma_error("Failed to setup client connection , ret = %d \n", ret);
+		return ret;
+	}
+	ret = client_xchange_metadata_with_server();
+	if (ret) {
+		rdma_error("Failed to setup client connection , ret = %d \n", ret);
+		return ret;
+	}
+	ret = client_remote_memory_ops();
+	if (ret) {
+		rdma_error("Failed to finish remote memory ops, ret = %d \n", ret);
+		return ret;
+	}
+	if (check_src_dst()) {
+		rdma_error("src and dst buffers do not match \n");
+	} else {
+		printf("...\nSUCCESS, source and destination buffers match \n");
+	}
+	ret = client_disconnect_and_clean();
+	if (ret) {
+		rdma_error("Failed to cleanly disconnect and clean up resources \n");
+	}
+	return ret;
+}
+
diff --git a/src/rdma_common.c b/src/rdma_common.c
new file mode 100644
index 0000000..0478c29
--- /dev/null
+++ b/src/rdma_common.c
@@ -0,0 +1,210 @@
+/*
+ * Implementation of the common RDMA functions. 
+ *
+ * Authors: Animesh Trivedi
+ *          [email protected] 
+ */
+
+#include "rdma_common.h"
+
+void show_rdma_cmid(struct rdma_cm_id *id)
+{
+	if(!id){
+		rdma_error("Passed ptr is NULL\n");
+		return;
+	}
+	printf("RDMA cm id at %p \n", id);
+	if(id->verbs && id->verbs->device)
+		printf("dev_ctx: %p (device name: %s) \n", id->verbs, 
+				id->verbs->device->name);
+	if(id->channel)
+		printf("cm event channel %p\n", id->channel);
+	printf("QP: %p, port_space %x, port_num %u \n", id->qp, 
+			id->ps,
+			id->port_num);
+}
+
+void show_rdma_buffer_attr(struct rdma_buffer_attr *attr){
+	if(!attr){
+		rdma_error("Passed attr is NULL\n");
+		return;
+	}
+	printf("---------------------------------------------------------\n");
+	printf("buffer attr, addr: %p , len: %u , stag : 0x%x \n", 
+			(void*) attr->address, 
+			(unsigned int) attr->length,
+			attr->stag.local_stag);
+	printf("---------------------------------------------------------\n");
+}
+
+struct ibv_mr* rdma_buffer_alloc(struct ibv_pd *pd, uint32_t size,
+    enum ibv_access_flags permission) 
+{
+	struct ibv_mr *mr = NULL;
+	if (!pd) {
+		rdma_error("Protection domain is NULL \n");
+		return NULL;
+	}
+	void *buf = calloc(1, size);
+	if (!buf) {
+		rdma_error("failed to allocate buffer, -ENOMEM\n");
+		return NULL;
+	}
+	debug("Buffer allocated: %p , len: %u \n", buf, size);
+	mr = rdma_buffer_register(pd, buf, size, permission);
+	if(!mr){
+		free(buf);
+	}
+	return mr;
+}
+
+struct ibv_mr *rdma_buffer_register(struct ibv_pd *pd, 
+		void *addr, uint32_t length, 
+		enum ibv_access_flags permission)
+{
+	struct ibv_mr *mr = NULL;
+	if (!pd) {
+		rdma_error("Protection domain is NULL, ignoring \n");
+		return NULL;
+	}
+	mr = ibv_reg_mr(pd, addr, length, permission);
+	if (!mr) {
+		rdma_error("Failed to create mr on buffer, errno: %d \n", -errno);
+		return NULL;
+	}
+	debug("Registered: %p , len: %u , stag: 0x%x \n", 
+			mr->addr, 
+			(unsigned int) mr->length, 
+			mr->lkey);
+	return mr;
+}
+
+void rdma_buffer_free(struct ibv_mr *mr) 
+{
+	if (!mr) {
+		rdma_error("Passed memory region is NULL, ignoring\n");
+		return ;
+	}
+	void *to_free = mr->addr;
+	rdma_buffer_deregister(mr);
+	debug("Buffer %p free'ed\n", to_free);
+	free(to_free);
+}
+
+void rdma_buffer_deregister(struct ibv_mr *mr) 
+{
+	if (!mr) { 
+		rdma_error("Passed memory region is NULL, ignoring\n");
+		return;
+	}
+	debug("Deregistered: %p , len: %u , stag : 0x%x \n", 
+			mr->addr, 
+			(unsigned int) mr->length, 
+			mr->lkey);
+	ibv_dereg_mr(mr);
+}
+
+int process_rdma_cm_event(struct rdma_event_channel *echannel, 
+		enum rdma_cm_event_type expected_event,
+		struct rdma_cm_event **cm_event)
+{
+	int ret = 1;
+	ret = rdma_get_cm_event(echannel, cm_event);
+	if (ret) {
+		rdma_error("Failed to retrieve a cm event, errno: %d \n",
+				-errno);
+		return -errno;
+	}
+	/* lets see, if it was a good event */
+	if(0 != (*cm_event)->status){
+		rdma_error("CM event has non zero status: %d\n", (*cm_event)->status);
+		ret = -((*cm_event)->status);
+		/* important, we acknowledge the event */
+		rdma_ack_cm_event(*cm_event);
+		return ret;
+	}
+	/* if it was a good event, was it of the expected type */
+	if ((*cm_event)->event != expected_event) {
+		rdma_error("Unexpected event received: %s [ expecting: %s ]", 
+				rdma_event_str((*cm_event)->event),
+				rdma_event_str(expected_event));
+		/* important, we acknowledge the event */
+		rdma_ack_cm_event(*cm_event);
+		return -1; // unexpected event :(
+	}
+	debug("A new %s type event is received \n", rdma_event_str((*cm_event)->event));
+	/* The caller must acknowledge the event */
+	return ret;
+}
+
+
+int process_work_completion_events (struct ibv_comp_channel *comp_channel, 
+		struct ibv_wc *wc, int max_wc)
+{
+	struct ibv_cq *cq_ptr = NULL;
+	void *context = NULL;
+	int ret = -1, i, total_wc = 0;
+       /* We wait for the notification on the CQ channel */
+	ret = ibv_get_cq_event(comp_channel, /* IO channel where we are expecting the notification */ 
+		       &cq_ptr, /* which CQ has an activity. This should be the same as CQ we created before */ 
+		       &context); /* Associated CQ user context, which we did set */
+       if (ret) {
+	       rdma_error("Failed to get next CQ event due to %d \n", -errno);
+	       return -errno;
+       }
+       /* Request for more notifications. */
+       ret = ibv_req_notify_cq(cq_ptr, 0);
+       if (ret){
+	       rdma_error("Failed to request further notifications %d \n", -errno);
+	       return -errno;
+       }
+       /* We got notification. We reap the work completion (WC) element. It is 
+	* unlikely but a good practice it write the CQ polling code that 
+       * can handle zero WCs. ibv_poll_cq can return zero. Same logic as 
+       * MUTEX conditional variables in pthread programming.
+	*/
+       total_wc = 0;
+       do {
+	       ret = ibv_poll_cq(cq_ptr /* the CQ, we got notification for */, 
+		       max_wc - total_wc /* number of remaining WC elements*/,
+		       wc + total_wc/* where to store */);
+	       if (ret < 0) {
+		       rdma_error("Failed to poll cq for wc due to %d \n", ret);
+		       /* ret is errno here */
+		       return ret;
+	       }
+	       total_wc += ret;
+       } while (total_wc < max_wc); 
+       debug("%d WC are completed \n", total_wc);
+       /* Now we check validity and status of I/O work completions */
+       for( i = 0 ; i < total_wc ; i++) {
+	       if (wc[i].status != IBV_WC_SUCCESS) {
+		       rdma_error("Work completion (WC) has error status: %s at index %d", 
+				       ibv_wc_status_str(wc[i].status), i);
+		       /* return negative value */
+		       return -(wc[i].status);
+	       }
+       }
+       /* Similar to connection management events, we need to acknowledge CQ events */
+       ibv_ack_cq_events(cq_ptr, 
+		       1 /* we received one event notification. This is not 
+		       number of WC elements */);
+       return total_wc; 
+}
+
+
+/* Code acknowledgment: rping.c from librdmacm/examples */
+int get_addr(char *dst, struct sockaddr *addr)
+{
+	struct addrinfo *res;
+	int ret = -1;
+	ret = getaddrinfo(dst, NULL, NULL, &res);
+	if (ret) {
+		rdma_error("getaddrinfo failed - invalid hostname or IP address\n");
+		return ret;
+	}
+	memcpy(addr, res->ai_addr, sizeof(struct sockaddr_in));
+	freeaddrinfo(res);
+	return ret;
+}
+
diff --git a/src/rdma_common.h b/src/rdma_common.h
new file mode 100644
index 0000000..5a228c9
--- /dev/null
+++ b/src/rdma_common.h
@@ -0,0 +1,133 @@
+/*
+ * Header file for the common RDMA routines used in the server/client example 
+ * program. 
+ *
+ * Author: Animesh Trivedi 
+ *          [email protected] 
+ *
+ */
+
+#ifndef RDMA_COMMON_H
+#define RDMA_COMMON_H
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+#include <errno.h>
+#include <getopt.h>
+
+#include <netdb.h>
+#include <netinet/in.h>	
+#include <arpa/inet.h>
+#include <sys/socket.h>
+
+#include <rdma/rdma_cma.h>
+#include <infiniband/verbs.h>
+
+/* Error Macro*/
+#define rdma_error(msg, args...) do {\
+	fprintf(stderr, "%s : %d : ERROR : "msg, __FILE__, __LINE__, ## args);\
+}while(0);
+
+#ifdef ACN_RDMA_DEBUG 
+/* Debug Macro */
+#define debug(msg, args...) do {\
+    printf("DEBUG: "msg, ## args);\
+}while(0);
+
+#else 
+
+#define debug(msg, args...) 
+
+#endif /* ACN_RDMA_DEBUG */
+
+/* Capacity of the completion queue (CQ) */
+#define CQ_CAPACITY (16)
+/* MAX SGE capacity */
+#define MAX_SGE (2)
+/* MAX work requests */
+#define MAX_WR (8)
+/* Default port where the RDMA server is listening */
+#define DEFAULT_RDMA_PORT (20886)
+
+/* 
+ * We use attribute so that compiler does not step in and try to pad the structure.
+ * We use this structure to exchange information between the server and the client. 
+ *
+ * For details see: http://gcc.gnu.org/onlinedocs/gcc/Type-Attributes.html
+ */
+struct __attribute((packed)) rdma_buffer_attr {
+  uint64_t address;
+  uint32_t length;
+  union stag {
+	  /* if we send, we call it local stags */
+	  uint32_t local_stag;
+	  /* if we receive, we call it remote stag */
+	  uint32_t remote_stag;
+  }stag;
+};
+/* resolves a given destination name to sin_addr */
+int get_addr(char *dst, struct sockaddr *addr);
+
+/* prints RDMA buffer info structure */
+void show_rdma_buffer_attr(struct rdma_buffer_attr *attr);
+
+/* 
+ * Processes an RDMA connection management (CM) event. 
+ * @echannel: CM event channel where the event is expected. 
+ * @expected_event: Expected event type 
+ * @cm_event: where the event will be stored 
+ */
+int process_rdma_cm_event(struct rdma_event_channel *echannel, 
+		enum rdma_cm_event_type expected_event,
+		struct rdma_cm_event **cm_event);
+
+/* Allocates an RDMA buffer of size 'length' with permission permission. This 
+ * function will also register the memory and returns a memory region (MR) 
+ * identifier or NULL on error. 
+ * @pd: Protection domain where the buffer should be allocated 
+ * @length: Length of the buffer 
+ * @permission: OR of IBV_ACCESS_* permissions as defined for the enum ibv_access_flags
+ */
+struct ibv_mr* rdma_buffer_alloc(struct ibv_pd *pd, 
+		uint32_t length, 
+		enum ibv_access_flags permission);
+
+/* Frees a previously allocated RDMA buffer. The buffer must be allocated by 
+ * calling rdma_buffer_alloc();
+ * @mr: RDMA memory region to free 
+ */
+void rdma_buffer_free(struct ibv_mr *mr);
+
+/* This function registers a previously allocated memory. Returns a memory region 
+ * (MR) identifier or NULL on error.
+ * @pd: protection domain where to register memory 
+ * @addr: Buffer address 
+ * @length: Length of the buffer 
+ * @permission: OR of IBV_ACCESS_* permissions as defined for the enum ibv_access_flags
+ */
+struct ibv_mr *rdma_buffer_register(struct ibv_pd *pd, 
+		void *addr, 
+		uint32_t length, 
+		enum ibv_access_flags permission);
+/* Deregisters a previously register memory 
+ * @mr: Memory region to deregister 
+ */
+void rdma_buffer_deregister(struct ibv_mr *mr);
+
+/* Processes a work completion (WC) notification. 
+ * @comp_channel: Completion channel where the notifications are expected to arrive 
+ * @wc: Array where to hold the work completion elements 
+ * @max_wc: Maximum number of expected work completion (WC) elements. wc must be 
+ *          atleast this size.
+ */
+int process_work_completion_events(struct ibv_comp_channel *comp_channel, 
+		struct ibv_wc *wc, 
+		int max_wc);
+
+/* prints some details from the cm id */
+void show_rdma_cmid(struct rdma_cm_id *id);
+
+#endif /* RDMA_COMMON_H */
diff --git a/src/rdma_server.c b/src/rdma_server.c
new file mode 100644
index 0000000..42f018f
--- /dev/null
+++ b/src/rdma_server.c
@@ -0,0 +1,489 @@
+/*
+ * This is a RDMA server side code. 
+ *
+ * Author: Animesh Trivedi 
+ *         [email protected] 
+ *
+ * TODO: Cleanup previously allocated resources in case of an error condition
+ */
+
+#include "rdma_common.h"
+
+/* These are the RDMA resources needed to setup an RDMA connection */
+/* Event channel, where connection management (cm) related events are relayed */
+static struct rdma_event_channel *cm_event_channel = NULL;
+static struct rdma_cm_id *cm_server_id = NULL, *cm_client_id = NULL;
+static struct ibv_pd *pd = NULL;
+static struct ibv_comp_channel *io_completion_channel = NULL;
+static struct ibv_cq *cq = NULL;
+static struct ibv_qp_init_attr qp_init_attr;
+static struct ibv_qp *client_qp = NULL;
+/* RDMA memory resources */
+static struct ibv_mr *client_metadata_mr = NULL, *server_buffer_mr = NULL, *server_metadata_mr = NULL;
+static struct rdma_buffer_attr client_metadata_attr, server_metadata_attr;
+static struct ibv_recv_wr client_recv_wr, *bad_client_recv_wr = NULL;
+static struct ibv_send_wr server_send_wr, *bad_server_send_wr = NULL;
+static struct ibv_sge client_recv_sge, server_send_sge;
+
+/* When we call this function cm_client_id must be set to a valid identifier.
+ * This is where, we prepare client connection before we accept it. This 
+ * mainly involve pre-posting a receive buffer to receive client side 
+ * RDMA credentials
+ */
+static int setup_client_resources()
+{
+	int ret = -1;
+	if(!cm_client_id){
+		rdma_error("Client id is still NULL \n");
+		return -EINVAL;
+	}
+	/* We have a valid connection identifier, lets start to allocate 
+	 * resources. We need: 
+	 * 1. Protection Domains (PD)
+	 * 2. Memory Buffers 
+	 * 3. Completion Queues (CQ)
+	 * 4. Queue Pair (QP)
+	 * Protection Domain (PD) is similar to a "process abstraction" 
+	 * in the operating system. All resources are tied to a particular PD. 
+	 * And accessing recourses across PD will result in a protection fault.
+	 */
+	pd = ibv_alloc_pd(cm_client_id->verbs 
+			/* verbs defines a verb's provider, 
+			 * i.e an RDMA device where the incoming 
+			 * client connection came */);
+	if (!pd) {
+		rdma_error("Failed to allocate a protection domain errno: %d\n",
+				-errno);
+		return -errno;
+	}
+	debug("A new protection domain is allocated at %p \n", pd);
+	/* Now we need a completion channel, were the I/O completion 
+	 * notifications are sent. Remember, this is different from connection 
+	 * management (CM) event notifications. 
+	 * A completion channel is also tied to an RDMA device, hence we will 
+	 * use cm_client_id->verbs. 
+	 */
+	io_completion_channel = ibv_create_comp_channel(cm_client_id->verbs);
+	if (!io_completion_channel) {
+		rdma_error("Failed to create an I/O completion event channel, %d\n",
+				-errno);
+		return -errno;
+	}
+	debug("An I/O completion event channel is created at %p \n", 
+			io_completion_channel);
+	/* Now we create a completion queue (CQ) where actual I/O 
+	 * completion metadata is placed. The metadata is packed into a structure 
+	 * called struct ibv_wc (wc = work completion). ibv_wc has detailed 
+	 * information about the work completion. An I/O request in RDMA world 
+	 * is called "work" ;) 
+	 */
+	cq = ibv_create_cq(cm_client_id->verbs /* which device*/, 
+			CQ_CAPACITY /* maximum capacity*/, 
+			NULL /* user context, not used here */,
+			io_completion_channel /* which IO completion channel */, 
+			0 /* signaling vector, not used here*/);
+	if (!cq) {
+		rdma_error("Failed to create a completion queue (cq), errno: %d\n",
+				-errno);
+		return -errno;
+	}
+	debug("Completion queue (CQ) is created at %p with %d elements \n", 
+			cq, cq->cqe);
+	/* Ask for the event for all activities in the completion queue*/
+	ret = ibv_req_notify_cq(cq /* on which CQ */, 
+			0 /* 0 = all event type, no filter*/);
+	if (ret) {
+		rdma_error("Failed to request notifications on CQ errno: %d \n",
+				-errno);
+		return -errno;
+	}
+	/* Now the last step, set up the queue pair (send, recv) queues and their capacity.
+	 * The capacity here is define statically but this can be probed from the 
+	 * device. We just use a small number as defined in rdma_common.h */
+       bzero(&qp_init_attr, sizeof qp_init_attr);
+       qp_init_attr.cap.max_recv_sge = MAX_SGE; /* Maximum SGE per receive posting */
+       qp_init_attr.cap.max_recv_wr = MAX_WR; /* Maximum receive posting capacity */
+       qp_init_attr.cap.max_send_sge = MAX_SGE; /* Maximum SGE per send posting */
+       qp_init_attr.cap.max_send_wr = MAX_WR; /* Maximum send posting capacity */
+       qp_init_attr.qp_type = IBV_QPT_RC; /* QP type, RC = Reliable connection */
+       /* We use same completion queue, but one can use different queues */
+       qp_init_attr.recv_cq = cq; /* Where should I notify for receive completion operations */
+       qp_init_attr.send_cq = cq; /* Where should I notify for send completion operations */
+       /*Lets create a QP */
+       ret = rdma_create_qp(cm_client_id /* which connection id */,
+		       pd /* which protection domain*/,
+		       &qp_init_attr /* Initial attributes */);
+       if (ret) {
+	       rdma_error("Failed to create QP due to errno: %d\n", -errno);
+	       return -errno;
+       }
+       /* Save the reference for handy typing but is not required */
+       client_qp = cm_client_id->qp;
+       debug("Client QP created at %p\n", client_qp);
+       return ret;
+}
+
+/* Starts an RDMA server by allocating basic connection resources */
+static int start_rdma_server(struct sockaddr_in *server_addr) 
+{
+	struct rdma_cm_event *cm_event = NULL;
+	int ret = -1;
+	/*  Open a channel used to report asynchronous communication event */
+	cm_event_channel = rdma_create_event_channel();
+	if (!cm_event_channel) {
+		rdma_error("Creating cm event channel failed with errno : (%d)", -errno);
+		return -errno;
+	}
+	debug("RDMA CM event channel is created successfully at %p \n", 
+			cm_event_channel);
+	/* rdma_cm_id is the connection identifier (like socket) which is used 
+	 * to define an RDMA connection. 
+	 */
+	ret = rdma_create_id(cm_event_channel, &cm_server_id, NULL, RDMA_PS_TCP);
+	if (ret) {
+		rdma_error("Creating server cm id failed with errno: %d ", -errno);
+		return -errno;
+	}
+	debug("A RDMA connection id for the server is created \n");
+	/* Explicit binding of rdma cm id to the socket credentials */
+	ret = rdma_bind_addr(cm_server_id, (struct sockaddr*) server_addr);
+	if (ret) {
+		rdma_error("Failed to bind server address, errno: %d \n", -errno);
+		return -errno;
+	}
+	debug("Server RDMA CM id is successfully binded \n");
+	/* Now we start to listen on the passed IP and port. However unlike
+	 * normal TCP listen, this is a non-blocking call. When a new client is 
+	 * connected, a new connection management (CM) event is generated on the 
+	 * RDMA CM event channel from where the listening id was created. Here we
+	 * have only one channel, so it is easy. */
+	ret = rdma_listen(cm_server_id, 8); /* backlog = 8 clients, same as TCP, see man listen*/
+	if (ret) {
+		rdma_error("rdma_listen failed to listen on server address, errno: %d ",
+				-errno);
+		return -errno;
+	}
+	printf("Server is listening successfully at: %s , port: %d \n",
+			inet_ntoa(server_addr->sin_addr),
+			ntohs(server_addr->sin_port));
+	/* now, we expect a client to connect and generate a RDMA_CM_EVNET_CONNECT_REQUEST 
+	 * We wait (block) on the connection management event channel for 
+	 * the connect event. 
+	 */
+	ret = process_rdma_cm_event(cm_event_channel, 
+			RDMA_CM_EVENT_CONNECT_REQUEST,
+			&cm_event);
+	if (ret) {
+		rdma_error("Failed to get cm event, ret = %d \n" , ret);
+		return ret;
+	}
+	/* Much like TCP connection, listening returns a new connection identifier 
+	 * for newly connected client. In the case of RDMA, this is stored in id 
+	 * field. For more details: man rdma_get_cm_event 
+	 */
+	cm_client_id = cm_event->id;
+	/* now we acknowledge the event. Acknowledging the event free the resources 
+	 * associated with the event structure. Hence any reference to the event 
+	 * must be made before acknowledgment. Like, we have already saved the 
+	 * client id from "id" field before acknowledging the event. 
+	 */
+	ret = rdma_ack_cm_event(cm_event);
+	if (ret) {
+		rdma_error("Failed to acknowledge the cm event errno: %d \n", -errno);
+		return -errno;
+	}
+	debug("A new RDMA client connection id is stored at %p\n", cm_client_id);
+	return ret;
+}
+
+/* Pre-posts a receive buffer and accepts an RDMA client connection */
+static int accept_client_connection()
+{
+	struct rdma_conn_param conn_param;
+	struct rdma_cm_event *cm_event = NULL;
+	struct sockaddr_in remote_sockaddr; 
+	int ret = -1;
+	if(!cm_client_id || !client_qp) {
+		rdma_error("Client resources are not properly setup\n");
+		return -EINVAL;
+	}
+	/* we prepare the receive buffer in which we will receive the client metadata*/
+        client_metadata_mr = rdma_buffer_register(pd /* which protection domain */, 
+			&client_metadata_attr /* what memory */,
+			sizeof(client_metadata_attr) /* what length */, 
+		       (IBV_ACCESS_LOCAL_WRITE) /* access permissions */);
+	if(!client_metadata_mr){
+		rdma_error("Failed to register client attr buffer\n");
+		//we assume ENOMEM
+		return -ENOMEM;
+	}
+	/* We pre-post this receive buffer on the QP. SGE credentials is where we 
+	 * receive the metadata from the client */
+	client_recv_sge.addr = (uint64_t) client_metadata_mr->addr; // same as &client_buffer_attr
+	client_recv_sge.length = client_metadata_mr->length;
+	client_recv_sge.lkey = client_metadata_mr->lkey;
+	/* Now we link this SGE to the work request (WR) */
+	bzero(&client_recv_wr, sizeof(client_recv_wr));
+	client_recv_wr.sg_list = &client_recv_sge;
+	client_recv_wr.num_sge = 1; // only one SGE
+	ret = ibv_post_recv(client_qp /* which QP */,
+		      &client_recv_wr /* receive work request*/,
+		      &bad_client_recv_wr /* error WRs */);
+	if (ret) {
+		rdma_error("Failed to pre-post the receive buffer, errno: %d \n", ret);
+		return ret;
+	}
+	debug("Receive buffer pre-posting is successful \n");
+	/* Now we accept the connection. Recall we have not accepted the connection 
+	 * yet because we have to do lots of resource pre-allocation */
+       memset(&conn_param, 0, sizeof(conn_param));
+       /* this tell how many outstanding requests can we handle */
+       conn_param.initiator_depth = 3; /* For this exercise, we put a small number here */
+       /* This tell how many outstanding requests we expect other side to handle */
+       conn_param.responder_resources = 3; /* For this exercise, we put a small number */
+       ret = rdma_accept(cm_client_id, &conn_param);
+       if (ret) {
+	       rdma_error("Failed to accept the connection, errno: %d \n", -errno);
+	       return -errno;
+       }
+       /* We expect an RDMA_CM_EVNET_ESTABLISHED to indicate that the RDMA  
+	* connection has been established and everything is fine on both, server 
+	* as well as the client sides.
+	*/
+        debug("Going to wait for : RDMA_CM_EVENT_ESTABLISHED event \n");
+       ret = process_rdma_cm_event(cm_event_channel, 
+		       RDMA_CM_EVENT_ESTABLISHED,
+		       &cm_event);
+        if (ret) {
+		rdma_error("Failed to get the cm event, errnp: %d \n", -errno);
+		return -errno;
+	}
+	/* We acknowledge the event */
+	ret = rdma_ack_cm_event(cm_event);
+	if (ret) {
+		rdma_error("Failed to acknowledge the cm event %d\n", -errno);
+		return -errno;
+	}
+	/* Just FYI: How to extract connection information */
+	memcpy(&remote_sockaddr /* where to save */, 
+			rdma_get_peer_addr(cm_client_id) /* gives you remote sockaddr */, 
+			sizeof(struct sockaddr_in) /* max size */);
+	printf("A new connection is accepted from %s \n", 
+			inet_ntoa(remote_sockaddr.sin_addr));
+	return ret;
+}
+
+/* This function sends server side buffer metadata to the connected client */
+static int send_server_metadata_to_client() 
+{
+	struct ibv_wc wc;
+	int ret = -1;
+	/* Now, we first wait for the client to start the communication by 
+	 * sending the server its metadata info. The server does not use it 
+	 * in our example. We will receive a work completion notification for 
+	 * our pre-posted receive request.
+	 */
+	ret = process_work_completion_events(io_completion_channel, &wc, 1);
+	if (ret != 1) {
+		rdma_error("Failed to receive , ret = %d \n", ret);
+		return ret;
+	}
+	/* if all good, then we should have client's buffer information, lets see */
+	printf("Client side buffer information is received...\n");
+	show_rdma_buffer_attr(&client_metadata_attr);
+	printf("The client has requested buffer length of : %u bytes \n", 
+			client_metadata_attr.length);
+	/* We need to setup requested memory buffer. This is where the client will 
+	* do RDMA READs and WRITEs. */
+       server_buffer_mr = rdma_buffer_alloc(pd /* which protection domain */, 
+		       client_metadata_attr.length /* what size to allocate */, 
+		       (IBV_ACCESS_LOCAL_WRITE|
+		       IBV_ACCESS_REMOTE_READ|
+		       IBV_ACCESS_REMOTE_WRITE) /* access permissions */);
+       if(!server_buffer_mr){
+	       rdma_error("Server failed to create a buffer \n");
+	       /* we assume that it is due to out of memory error */
+	       return -ENOMEM;
+       }
+       /* This buffer is used to transmit information about the above 
+	* buffer to the client. So this contains the metadata about the server 
+	* buffer. Hence this is called metadata buffer. Since this is already 
+	* on allocated, we just register it. 
+        * We need to prepare a send I/O operation that will tell the 
+	* client the address of the server buffer. 
+	*/
+       server_metadata_attr.address = (uint64_t) server_buffer_mr->addr;
+       server_metadata_attr.length = (uint32_t) server_buffer_mr->length;
+       server_metadata_attr.stag.local_stag = (uint32_t) server_buffer_mr->lkey;
+       server_metadata_mr = rdma_buffer_register(pd /* which protection domain*/, 
+		       &server_metadata_attr /* which memory to register */, 
+		       sizeof(server_metadata_attr) /* what is the size of memory */,
+		       IBV_ACCESS_LOCAL_WRITE /* what access permission */);
+       if(!server_metadata_mr){
+	       rdma_error("Server failed to create to hold server metadata \n");
+	       /* we assume that this is due to out of memory error */
+	       return -ENOMEM;
+       }
+       /* We need to transmit this buffer. So we create a send request. 
+	* A send request consists of multiple SGE elements. In our case, we only
+	* have one 
+	*/
+       server_send_sge.addr = (uint64_t) &server_metadata_attr;
+       server_send_sge.length = sizeof(server_metadata_attr);
+       server_send_sge.lkey = server_metadata_mr->lkey;
+       /* now we link this sge to the send request */
+       bzero(&server_send_wr, sizeof(server_send_wr));
+       server_send_wr.sg_list = &server_send_sge;
+       server_send_wr.num_sge = 1; // only 1 SGE element in the array 
+       server_send_wr.opcode = IBV_WR_SEND; // This is a send request 
+       server_send_wr.send_flags = IBV_SEND_SIGNALED; // We want to get notification 
+       /* This is a fast data path operation. Posting an I/O request */
+       ret = ibv_post_send(client_qp /* which QP */, 
+		       &server_send_wr /* Send request that we prepared before */, 
+		       &bad_server_send_wr /* In case of error, this will contain failed requests */);
+       if (ret) {
+	       rdma_error("Posting of server metdata failed, errno: %d \n",
+			       -errno);
+	       return -errno;
+       }
+       /* We check for completion notification */
+       ret = process_work_completion_events(io_completion_channel, &wc, 1);
+       if (ret != 1) {
+	       rdma_error("Failed to send server metadata, ret = %d \n", ret);
+	       return ret;
+       }
+       debug("Local buffer metadata has been sent to the client \n");
+       return 0;
+}
+
+/* This is server side logic. Server passively waits for the client to call 
+ * rdma_disconnect() and then it will clean up its resources */
+static int disconnect_and_cleanup()
+{
+	struct rdma_cm_event *cm_event = NULL;
+	int ret = -1;
+       /* Now we wait for the client to send us disconnect event */
+       debug("Waiting for cm event: RDMA_CM_EVENT_DISCONNECTED\n");
+       ret = process_rdma_cm_event(cm_event_channel, 
+		       RDMA_CM_EVENT_DISCONNECTED, 
+		       &cm_event);
+       if (ret) {
+	       rdma_error("Failed to get disconnect event, ret = %d \n", ret);
+	       return ret;
+       }
+	/* We acknowledge the event */
+	ret = rdma_ack_cm_event(cm_event);
+	if (ret) {
+		rdma_error("Failed to acknowledge the cm event %d\n", -errno);
+		return -errno;
+	}
+	printf("A disconnect event is received from the client...\n");
+	/* We free all the resources */
+	/* Destroy QP */
+	rdma_destroy_qp(cm_client_id);
+	/* Destroy client cm id */
+	ret = rdma_destroy_id(cm_client_id);
+	if (ret) {
+		rdma_error("Failed to destroy client id cleanly, %d \n", -errno);
+		// we continue anyways;
+	}
+	/* Destroy CQ */
+	ret = ibv_destroy_cq(cq);
+	if (ret) {
+		rdma_error("Failed to destroy completion queue cleanly, %d \n", -errno);
+		// we continue anyways;
+	}
+	/* Destroy completion channel */
+	ret = ibv_destroy_comp_channel(io_completion_channel);
+	if (ret) {
+		rdma_error("Failed to destroy completion channel cleanly, %d \n", -errno);
+		// we continue anyways;
+	}
+	/* Destroy memory buffers */
+	rdma_buffer_free(server_buffer_mr);
+	rdma_buffer_deregister(server_metadata_mr);	
+	rdma_buffer_deregister(client_metadata_mr);	
+	/* Destroy protection domain */
+	ret = ibv_dealloc_pd(pd);
+	if (ret) {
+		rdma_error("Failed to destroy client protection domain cleanly, %d \n", -errno);
+		// we continue anyways;
+	}
+	/* Destroy rdma server id */
+	ret = rdma_destroy_id(cm_server_id);
+	if (ret) {
+		rdma_error("Failed to destroy server id cleanly, %d \n", -errno);
+		// we continue anyways;
+	}
+	rdma_destroy_event_channel(cm_event_channel);
+	printf("Server shut-down is complete \n");
+	return 0;
+}
+
+
+void usage() 
+{
+	printf("Usage:\n");
+	printf("rdma_server: [-a <server_addr>] [-p <server_port>]\n");
+	printf("(default port is %d)\n", DEFAULT_RDMA_PORT);
+	exit(1);
+}
+
+int main(int argc, char **argv) 
+{
+	int ret, option;
+	struct sockaddr_in server_sockaddr;
+	bzero(&server_sockaddr, sizeof server_sockaddr);
+	server_sockaddr.sin_family = AF_INET; /* standard IP NET address */
+	server_sockaddr.sin_addr.s_addr = htonl(INADDR_ANY); /* passed address */
+	/* Parse Command Line Arguments, not the most reliable code */
+	while ((option = getopt(argc, argv, "a:p:")) != -1) {
+		switch (option) {
+			case 'a':
+				/* Remember, this will overwrite the port info */
+				ret = get_addr(optarg, (struct sockaddr*) &server_sockaddr);
+				if (ret) {
+					rdma_error("Invalid IP \n");
+					 return ret;
+				}
+				break;
+			case 'p':
+				/* passed port to listen on */
+				server_sockaddr.sin_port = htons(strtol(optarg, NULL, 0)); 
+				break;
+			default:
+				usage();
+				break;
+		}
+	}
+	if(!server_sockaddr.sin_port) {
+		/* If still zero, that mean no port info provided */
+		server_sockaddr.sin_port = htons(DEFAULT_RDMA_PORT); /* use default port */
+	 }
+	ret = start_rdma_server(&server_sockaddr);
+	if (ret) {
+		rdma_error("RDMA server failed to start cleanly, ret = %d \n", ret);
+		return ret;
+	}
+	ret = setup_client_resources();
+	if (ret) { 
+		rdma_error("Failed to setup client resources, ret = %d \n", ret);
+		return ret;
+	}
+	ret = accept_client_connection();
+	if (ret) {
+		rdma_error("Failed to handle client cleanly, ret = %d \n", ret);
+		return ret;
+	}
+	ret = send_server_metadata_to_client();
+	if (ret) {
+		rdma_error("Failed to send server metadata to the client, ret = %d \n", ret);
+		return ret;
+	}
+	ret = disconnect_and_cleanup();
+	if (ret) { 
+		rdma_error("Failed to clean up resources properly, ret = %d \n", ret);
+		return ret;
+	}
+	return 0;
+}
author	智皓张 <[email protected]>	2023-08-04 15:26:31 +0800
committer	智皓张 <[email protected]>	2023-08-04 15:26:31 +0800
commit	e0ded3bfc164f48b3f7a4083cffd6c1b2fff4439 (patch)
tree	9a7eabc2bebe3f5e5a967b1170abcacd3356828f /src
parent	1c82c0c7a27ea7778a5d2ca5104d822209afeb75 (diff)