update

author: 智皓张 <[email protected]> 2023-08-04 15:24:55 +0800
committer: 智皓张 <[email protected]> 2023-08-04 15:24:55 +0800
commit: 1c82c0c7a27ea7778a5d2ca5104d822209afeb75 (patch)
tree: a6911a2886f1fe4339e3d2b8dc0cded4f0c82618
parent: 8f0fe11da82349c15272b92115b2bc427a8e3a8e (diff)
7 files changed, 1671 insertions, 0 deletions
diff --git a/rdma-example-master/rdma-example-master/CMakeLists.txt b/rdma-example-master/rdma-example-master/CMakeLists.txt
new file mode 100644
index 0000000..2471b09
--- /dev/null
+++ b/rdma-example-master/rdma-example-master/CMakeLists.txt
@@ -0,0 +1,34 @@
+# Author : Animesh Trivedi
+
+cmake_minimum_required (VERSION 2.6)
+
+project (rdma-example)
+
+set(PROJECT_SOURCE_DIR ${CMAKE_SOURCE_DIR}/src)
+set(CMAKE_BINARY_DIR ${CMAKE_SOURCE_DIR}/bin)
+set(EXECUTABLE_OUTPUT_PATH ${CMAKE_BINARY_DIR})
+set(LIBRARY_OUTPUT_PATH ${CMAKE_BINARY_DIR})
+
+set(CMAKE_CXX_STANDARD 11)
+set(CMAKE_CXX_STANDARD_REQUIRED ON)
+set(CMAKE_CXX_EXTENSIONS OFF)
+
+set(CMAKE_BUILD_TYPE Release) # or debug if you need to use gdb on it.
+
+# Some how g++ has better performance than clang++. Of course I don't know all flags for clang++.
+set(CMAKE_CXX_COMPILER g++) # or clang++
+#set(CMAKE_CXX_COMPILER clang++) # or clang++
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O2 -Ofast -ffast-math -funroll-loops -march=native") # get crazy here
+#add_compile_options("-std=c++17")
+#set(CMAKE_CXX_FLAGS "-O0  -ggdb")
+
+find_library(IBVERBS_LIBRARY ibverbs HINTS /home/atr/local/lib)
+find_library(RDMACM_LIBRARY rdmacm HINTS /home/atr/local/lib)
+
+link_libraries(pthread ${IBVERBS_LIBRARY} ${RDMACM_LIBRARY})
+
+include_directories("${PROJECT_SOURCE_DIR}" "/home/atr/local/include/")
+
+add_executable(rdma_server ${PROJECT_SOURCE_DIR}/rdma_common.c ${PROJECT_SOURCE_DIR}/rdma_server.c)
+add_executable(rdma_client ${PROJECT_SOURCE_DIR}/rdma_common.c ${PROJECT_SOURCE_DIR}/rdma_client.c)
+
diff --git a/rdma-example-master/rdma-example-master/LICENSE b/rdma-example-master/rdma-example-master/LICENSE
new file mode 100644
index 0000000..261eeb9
--- /dev/null
+++ b/rdma-example-master/rdma-example-master/LICENSE
@@ -0,0 +1,201 @@
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
diff --git a/rdma-example-master/rdma-example-master/README.md b/rdma-example-master/rdma-example-master/README.md
new file mode 100644
index 0000000..fc6af6e
--- /dev/null
+++ b/rdma-example-master/rdma-example-master/README.md
@@ -0,0 +1,51 @@
+# RDMA exmaple
+
+A simple RDMA server client example. The code contains a lot of comments. Here is the workflow that happens in the example: 
+
+Client: 
+  1. setup RDMA resources   
+  2. connect to the server 
+  3. receive server side buffer information via send/recv exchange 
+  4. do an RDMA write to the server buffer from a (first) local buffer. The content of the buffer is the string passed with the `-s` argument. 
+  5. do an RDMA read to read the content of the server buffer into a second local buffer. 
+  6. compare the content of the first and second buffers, and match them. 
+  7. disconnect 
+
+Server: 
+  1. setup RDMA resources 
+  2. wait for a client to connect 
+  3. allocate and pin a server buffer
+  4. accept the incoming client connection 
+  5. send information about the local server buffer to the client 
+  6. wait for disconnect
+
+###### How to run      
+```text
+git clone https://github.com/animeshtrivedi/rdma-example.git
+cd ./rdma-example
+cmake .
+make
+``` 
+ 
+###### server
+```text
+./bin/rdma_server
+```
+###### client
+```text
+atr@atr:~/rdma-example$ ./bin/rdma_client -a 127.0.0.1 -s textstring 
+Passed string is : textstring , with count 10 
+Trying to connect to server at : 127.0.0.1 port: 20886 
+The client is connected successfully 
+---------------------------------------------------------
+buffer attr, addr: 0x5629832e22c0 , len: 10 , stag : 0x1617b400 
+---------------------------------------------------------
+...
+SUCCESS, source and destination buffers match 
+Client resource clean up is complete 
+atr@atr:~/rdma-example$ 
+
+```
+
+## Does not have an RDMA device?
+In case you do not have an RDMA device to test the code, you can setup SofitWARP software RDMA device on your Linux machine. Follow instructions here: [https://github.com/animeshtrivedi/blog/blob/master/post/2019-06-26-siw.md](https://github.com/animeshtrivedi/blog/blob/master/post/2019-06-26-siw.md).
diff --git a/rdma-example-master/rdma-example-master/src/rdma_client.c b/rdma-example-master/rdma-example-master/src/rdma_client.c
new file mode 100644
index 0000000..a640be6
--- /dev/null
+++ b/rdma-example-master/rdma-example-master/src/rdma_client.c
@@ -0,0 +1,553 @@
+/*
+ * An example RDMA client side code. 
+ * Author: Animesh Trivedi 
+ *         [email protected]
+ */
+
+#include "rdma_common.h"
+
+/* These are basic RDMA resources */
+/* These are RDMA connection related resources */
+static struct rdma_event_channel *cm_event_channel = NULL;
+static struct rdma_cm_id *cm_client_id = NULL;
+static struct ibv_pd *pd = NULL;
+static struct ibv_comp_channel *io_completion_channel = NULL;
+static struct ibv_cq *client_cq = NULL;
+static struct ibv_qp_init_attr qp_init_attr;
+static struct ibv_qp *client_qp;
+/* These are memory buffers related resources */
+static struct ibv_mr *client_metadata_mr = NULL, 
+		     *client_src_mr = NULL, 
+		     *client_dst_mr = NULL, 
+		     *server_metadata_mr = NULL;
+static struct rdma_buffer_attr client_metadata_attr, server_metadata_attr;
+static struct ibv_send_wr client_send_wr, *bad_client_send_wr = NULL;
+static struct ibv_recv_wr server_recv_wr, *bad_server_recv_wr = NULL;
+static struct ibv_sge client_send_sge, server_recv_sge;
+/* Source and Destination buffers, where RDMA operations source and sink */
+static char *src = NULL, *dst = NULL; 
+
+/* This is our testing function */
+static int check_src_dst() 
+{
+	return memcmp((void*) src, (void*) dst, strlen(src));
+}
+
+/* This function prepares client side connection resources for an RDMA connection */
+static int client_prepare_connection(struct sockaddr_in *s_addr)
+{
+	struct rdma_cm_event *cm_event = NULL;
+	int ret = -1;
+	/*  Open a channel used to report asynchronous communication event */
+	cm_event_channel = rdma_create_event_channel();
+	if (!cm_event_channel) {
+		rdma_error("Creating cm event channel failed, errno: %d \n", -errno);
+		return -errno;
+	}
+	debug("RDMA CM event channel is created at : %p \n", cm_event_channel);
+	/* rdma_cm_id is the connection identifier (like socket) which is used 
+	 * to define an RDMA connection. 
+	 */
+	ret = rdma_create_id(cm_event_channel, &cm_client_id, 
+			NULL,
+			RDMA_PS_TCP);
+	if (ret) {
+		rdma_error("Creating cm id failed with errno: %d \n", -errno); 
+		return -errno;
+	}
+	/* Resolve destination and optional source addresses from IP addresses  to
+	 * an RDMA address.  If successful, the specified rdma_cm_id will be bound
+	 * to a local device. */
+	ret = rdma_resolve_addr(cm_client_id, NULL, (struct sockaddr*) s_addr, 2000);
+	if (ret) {
+		rdma_error("Failed to resolve address, errno: %d \n", -errno);
+		return -errno;
+	}
+	debug("waiting for cm event: RDMA_CM_EVENT_ADDR_RESOLVED\n");
+	ret  = process_rdma_cm_event(cm_event_channel, 
+			RDMA_CM_EVENT_ADDR_RESOLVED,
+			&cm_event);
+	if (ret) {
+		rdma_error("Failed to receive a valid event, ret = %d \n", ret);
+		return ret;
+	}
+	/* we ack the event */
+	ret = rdma_ack_cm_event(cm_event);
+	if (ret) {
+		rdma_error("Failed to acknowledge the CM event, errno: %d\n", -errno);
+		return -errno;
+	}
+	debug("RDMA address is resolved \n");
+
+	 /* Resolves an RDMA route to the destination address in order to 
+	  * establish a connection */
+	ret = rdma_resolve_route(cm_client_id, 2000);
+	if (ret) {
+		rdma_error("Failed to resolve route, erno: %d \n", -errno);
+	       return -errno;
+	}
+	debug("waiting for cm event: RDMA_CM_EVENT_ROUTE_RESOLVED\n");
+	ret = process_rdma_cm_event(cm_event_channel, 
+			RDMA_CM_EVENT_ROUTE_RESOLVED,
+			&cm_event);
+	if (ret) {
+		rdma_error("Failed to receive a valid event, ret = %d \n", ret);
+		return ret;
+	}
+	/* we ack the event */
+	ret = rdma_ack_cm_event(cm_event);
+	if (ret) {
+		rdma_error("Failed to acknowledge the CM event, errno: %d \n", -errno);
+		return -errno;
+	}
+	printf("Trying to connect to server at : %s port: %d \n", 
+			inet_ntoa(s_addr->sin_addr),
+			ntohs(s_addr->sin_port));
+	/* Protection Domain (PD) is similar to a "process abstraction" 
+	 * in the operating system. All resources are tied to a particular PD. 
+	 * And accessing recourses across PD will result in a protection fault.
+	 */
+	pd = ibv_alloc_pd(cm_client_id->verbs);
+	if (!pd) {
+		rdma_error("Failed to alloc pd, errno: %d \n", -errno);
+		return -errno;
+	}
+	debug("pd allocated at %p \n", pd);
+	/* Now we need a completion channel, were the I/O completion 
+	 * notifications are sent. Remember, this is different from connection 
+	 * management (CM) event notifications. 
+	 * A completion channel is also tied to an RDMA device, hence we will 
+	 * use cm_client_id->verbs. 
+	 */
+	io_completion_channel = ibv_create_comp_channel(cm_client_id->verbs);
+	if (!io_completion_channel) {
+		rdma_error("Failed to create IO completion event channel, errno: %d\n",
+			       -errno);
+	return -errno;
+	}
+	debug("completion event channel created at : %p \n", io_completion_channel);
+	/* Now we create a completion queue (CQ) where actual I/O 
+	 * completion metadata is placed. The metadata is packed into a structure 
+	 * called struct ibv_wc (wc = work completion). ibv_wc has detailed 
+	 * information about the work completion. An I/O request in RDMA world 
+	 * is called "work" ;) 
+	 */
+	client_cq = ibv_create_cq(cm_client_id->verbs /* which device*/, 
+			CQ_CAPACITY /* maximum capacity*/, 
+			NULL /* user context, not used here */,
+			io_completion_channel /* which IO completion channel */, 
+			0 /* signaling vector, not used here*/);
+	if (!client_cq) {
+		rdma_error("Failed to create CQ, errno: %d \n", -errno);
+		return -errno;
+	}
+	debug("CQ created at %p with %d elements \n", client_cq, client_cq->cqe);
+	ret = ibv_req_notify_cq(client_cq, 0);
+	if (ret) {
+		rdma_error("Failed to request notifications, errno: %d\n", -errno);
+		return -errno;
+	}
+       /* Now the last step, set up the queue pair (send, recv) queues and their capacity.
+         * The capacity here is define statically but this can be probed from the 
+	 * device. We just use a small number as defined in rdma_common.h */
+       bzero(&qp_init_attr, sizeof qp_init_attr);
+       qp_init_attr.cap.max_recv_sge = MAX_SGE; /* Maximum SGE per receive posting */
+       qp_init_attr.cap.max_recv_wr = MAX_WR; /* Maximum receive posting capacity */
+       qp_init_attr.cap.max_send_sge = MAX_SGE; /* Maximum SGE per send posting */
+       qp_init_attr.cap.max_send_wr = MAX_WR; /* Maximum send posting capacity */
+       qp_init_attr.qp_type = IBV_QPT_RC; /* QP type, RC = Reliable connection */
+       /* We use same completion queue, but one can use different queues */
+       qp_init_attr.recv_cq = client_cq; /* Where should I notify for receive completion operations */
+       qp_init_attr.send_cq = client_cq; /* Where should I notify for send completion operations */
+       /*Lets create a QP */
+       ret = rdma_create_qp(cm_client_id /* which connection id */,
+		       pd /* which protection domain*/,
+		       &qp_init_attr /* Initial attributes */);
+	if (ret) {
+		rdma_error("Failed to create QP, errno: %d \n", -errno);
+	       return -errno;
+	}
+	client_qp = cm_client_id->qp;
+	debug("QP created at %p \n", client_qp);
+	return 0;
+}
+
+/* Pre-posts a receive buffer before calling rdma_connect () */
+static int client_pre_post_recv_buffer()
+{
+	int ret = -1;
+	server_metadata_mr = rdma_buffer_register(pd,
+			&server_metadata_attr,
+			sizeof(server_metadata_attr),
+			(IBV_ACCESS_LOCAL_WRITE));
+	if(!server_metadata_mr){
+		rdma_error("Failed to setup the server metadata mr , -ENOMEM\n");
+		return -ENOMEM;
+	}
+	server_recv_sge.addr = (uint64_t) server_metadata_mr->addr;
+	server_recv_sge.length = (uint32_t) server_metadata_mr->length;
+	server_recv_sge.lkey = (uint32_t) server_metadata_mr->lkey;
+	/* now we link it to the request */
+	bzero(&server_recv_wr, sizeof(server_recv_wr));
+	server_recv_wr.sg_list = &server_recv_sge;
+	server_recv_wr.num_sge = 1;
+	ret = ibv_post_recv(client_qp /* which QP */,
+		      &server_recv_wr /* receive work request*/,
+		      &bad_server_recv_wr /* error WRs */);
+	if (ret) {
+		rdma_error("Failed to pre-post the receive buffer, errno: %d \n", ret);
+		return ret;
+	}
+	debug("Receive buffer pre-posting is successful \n");
+	return 0;
+}
+
+/* Connects to the RDMA server */
+static int client_connect_to_server() 
+{
+	struct rdma_conn_param conn_param;
+	struct rdma_cm_event *cm_event = NULL;
+	int ret = -1;
+	bzero(&conn_param, sizeof(conn_param));
+	conn_param.initiator_depth = 3;
+	conn_param.responder_resources = 3;
+	conn_param.retry_count = 3; // if fail, then how many times to retry
+	ret = rdma_connect(cm_client_id, &conn_param);
+	if (ret) {
+		rdma_error("Failed to connect to remote host , errno: %d\n", -errno);
+		return -errno;
+	}
+	debug("waiting for cm event: RDMA_CM_EVENT_ESTABLISHED\n");
+	ret = process_rdma_cm_event(cm_event_channel, 
+			RDMA_CM_EVENT_ESTABLISHED,
+			&cm_event);
+	if (ret) {
+		rdma_error("Failed to get cm event, ret = %d \n", ret);
+	       return ret;
+	}
+	ret = rdma_ack_cm_event(cm_event);
+	if (ret) {
+		rdma_error("Failed to acknowledge cm event, errno: %d\n", 
+			       -errno);
+		return -errno;
+	}
+	printf("The client is connected successfully \n");
+	return 0;
+}
+
+/* Exchange buffer metadata with the server. The client sends its, and then receives
+ * from the server. The client-side metadata on the server is _not_ used because
+ * this program is client driven. But it shown here how to do it for the illustration
+ * purposes
+ */
+static int client_xchange_metadata_with_server()
+{
+	struct ibv_wc wc[2];
+	int ret = -1;
+	client_src_mr = rdma_buffer_register(pd,
+			src,
+			strlen(src),
+			(IBV_ACCESS_LOCAL_WRITE|
+			 IBV_ACCESS_REMOTE_READ|
+			 IBV_ACCESS_REMOTE_WRITE));
+	if(!client_src_mr){
+		rdma_error("Failed to register the first buffer, ret = %d \n", ret);
+		return ret;
+	}
+	/* we prepare metadata for the first buffer */
+	client_metadata_attr.address = (uint64_t) client_src_mr->addr; 
+	client_metadata_attr.length = client_src_mr->length; 
+	client_metadata_attr.stag.local_stag = client_src_mr->lkey;
+	/* now we register the metadata memory */
+	client_metadata_mr = rdma_buffer_register(pd,
+			&client_metadata_attr,
+			sizeof(client_metadata_attr),
+			IBV_ACCESS_LOCAL_WRITE);
+	if(!client_metadata_mr) {
+		rdma_error("Failed to register the client metadata buffer, ret = %d \n", ret);
+		return ret;
+	}
+	/* now we fill up SGE */
+	client_send_sge.addr = (uint64_t) client_metadata_mr->addr;
+	client_send_sge.length = (uint32_t) client_metadata_mr->length;
+	client_send_sge.lkey = client_metadata_mr->lkey;
+	/* now we link to the send work request */
+	bzero(&client_send_wr, sizeof(client_send_wr));
+	client_send_wr.sg_list = &client_send_sge;
+	client_send_wr.num_sge = 1;
+	client_send_wr.opcode = IBV_WR_SEND;
+	client_send_wr.send_flags = IBV_SEND_SIGNALED;
+	/* Now we post it */
+	ret = ibv_post_send(client_qp, 
+		       &client_send_wr,
+	       &bad_client_send_wr);
+	if (ret) {
+		rdma_error("Failed to send client metadata, errno: %d \n", 
+				-errno);
+		return -errno;
+	}
+	/* at this point we are expecting 2 work completion. One for our 
+	 * send and one for recv that we will get from the server for 
+	 * its buffer information */
+	ret = process_work_completion_events(io_completion_channel, 
+			wc, 2);
+	if(ret != 2) {
+		rdma_error("We failed to get 2 work completions , ret = %d \n",
+				ret);
+		return ret;
+	}
+	debug("Server sent us its buffer location and credentials, showing \n");
+	show_rdma_buffer_attr(&server_metadata_attr);
+	return 0;
+}
+
+/* This function does :
+ * 1) Prepare memory buffers for RDMA operations 
+ * 1) RDMA write from src -> remote buffer 
+ * 2) RDMA read from remote bufer -> dst
+ */ 
+static int client_remote_memory_ops() 
+{
+	struct ibv_wc wc;
+	int ret = -1;
+	client_dst_mr = rdma_buffer_register(pd,
+			dst,
+			strlen(src),
+			(IBV_ACCESS_LOCAL_WRITE | 
+			 IBV_ACCESS_REMOTE_WRITE | 
+			 IBV_ACCESS_REMOTE_READ));
+	if (!client_dst_mr) {
+		rdma_error("We failed to create the destination buffer, -ENOMEM\n");
+		return -ENOMEM;
+	}
+	/* Step 1: is to copy the local buffer into the remote buffer. We will 
+	 * reuse the previous variables. */
+	/* now we fill up SGE */
+	client_send_sge.addr = (uint64_t) client_src_mr->addr;
+	client_send_sge.length = (uint32_t) client_src_mr->length;
+	client_send_sge.lkey = client_src_mr->lkey;
+	/* now we link to the send work request */
+	bzero(&client_send_wr, sizeof(client_send_wr));
+	client_send_wr.sg_list = &client_send_sge;
+	client_send_wr.num_sge = 1;
+	client_send_wr.opcode = IBV_WR_RDMA_WRITE;
+	client_send_wr.send_flags = IBV_SEND_SIGNALED;
+	/* we have to tell server side info for RDMA */
+	client_send_wr.wr.rdma.rkey = server_metadata_attr.stag.remote_stag;
+	client_send_wr.wr.rdma.remote_addr = server_metadata_attr.address;
+	/* Now we post it */
+	ret = ibv_post_send(client_qp, 
+		       &client_send_wr,
+	       &bad_client_send_wr);
+	if (ret) {
+		rdma_error("Failed to write client src buffer, errno: %d \n", 
+				-errno);
+		return -errno;
+	}
+	/* at this point we are expecting 1 work completion for the write */
+	ret = process_work_completion_events(io_completion_channel, 
+			&wc, 1);
+	if(ret != 1) {
+		rdma_error("We failed to get 1 work completions , ret = %d \n",
+				ret);
+		return ret;
+	}
+	debug("Client side WRITE is complete \n");
+	/* Now we prepare a READ using same variables but for destination */
+	client_send_sge.addr = (uint64_t) client_dst_mr->addr;
+	client_send_sge.length = (uint32_t) client_dst_mr->length;
+	client_send_sge.lkey = client_dst_mr->lkey;
+	/* now we link to the send work request */
+	bzero(&client_send_wr, sizeof(client_send_wr));
+	client_send_wr.sg_list = &client_send_sge;
+	client_send_wr.num_sge = 1;
+	client_send_wr.opcode = IBV_WR_RDMA_READ;
+	client_send_wr.send_flags = IBV_SEND_SIGNALED;
+	/* we have to tell server side info for RDMA */
+	client_send_wr.wr.rdma.rkey = server_metadata_attr.stag.remote_stag;
+	client_send_wr.wr.rdma.remote_addr = server_metadata_attr.address;
+	/* Now we post it */
+	ret = ibv_post_send(client_qp, 
+		       &client_send_wr,
+	       &bad_client_send_wr);
+	if (ret) {
+		rdma_error("Failed to read client dst buffer from the master, errno: %d \n", 
+				-errno);
+		return -errno;
+	}
+	/* at this point we are expecting 1 work completion for the write */
+	ret = process_work_completion_events(io_completion_channel, 
+			&wc, 1);
+	if(ret != 1) {
+		rdma_error("We failed to get 1 work completions , ret = %d \n",
+				ret);
+		return ret;
+	}
+	debug("Client side READ is complete \n");
+	return 0;
+}
+
+/* This function disconnects the RDMA connection from the server and cleans up 
+ * all the resources.
+ */
+static int client_disconnect_and_clean()
+{
+	struct rdma_cm_event *cm_event = NULL;
+	int ret = -1;
+	/* active disconnect from the client side */
+	ret = rdma_disconnect(cm_client_id);
+	if (ret) {
+		rdma_error("Failed to disconnect, errno: %d \n", -errno);
+		//continuing anyways
+	}
+	ret = process_rdma_cm_event(cm_event_channel, 
+			RDMA_CM_EVENT_DISCONNECTED,
+			&cm_event);
+	if (ret) {
+		rdma_error("Failed to get RDMA_CM_EVENT_DISCONNECTED event, ret = %d\n",
+				ret);
+		//continuing anyways 
+	}
+	ret = rdma_ack_cm_event(cm_event);
+	if (ret) {
+		rdma_error("Failed to acknowledge cm event, errno: %d\n", 
+			       -errno);
+		//continuing anyways
+	}
+	/* Destroy QP */
+	rdma_destroy_qp(cm_client_id);
+	/* Destroy client cm id */
+	ret = rdma_destroy_id(cm_client_id);
+	if (ret) {
+		rdma_error("Failed to destroy client id cleanly, %d \n", -errno);
+		// we continue anyways;
+	}
+	/* Destroy CQ */
+	ret = ibv_destroy_cq(client_cq);
+	if (ret) {
+		rdma_error("Failed to destroy completion queue cleanly, %d \n", -errno);
+		// we continue anyways;
+	}
+	/* Destroy completion channel */
+	ret = ibv_destroy_comp_channel(io_completion_channel);
+	if (ret) {
+		rdma_error("Failed to destroy completion channel cleanly, %d \n", -errno);
+		// we continue anyways;
+	}
+	/* Destroy memory buffers */
+	rdma_buffer_deregister(server_metadata_mr);
+	rdma_buffer_deregister(client_metadata_mr);	
+	rdma_buffer_deregister(client_src_mr);	
+	rdma_buffer_deregister(client_dst_mr);	
+	/* We free the buffers */
+	free(src);
+	free(dst);
+	/* Destroy protection domain */
+	ret = ibv_dealloc_pd(pd);
+	if (ret) {
+		rdma_error("Failed to destroy client protection domain cleanly, %d \n", -errno);
+		// we continue anyways;
+	}
+	rdma_destroy_event_channel(cm_event_channel);
+	printf("Client resource clean up is complete \n");
+	return 0;
+}
+
+void usage() {
+	printf("Usage:\n");
+	printf("rdma_client: [-a <server_addr>] [-p <server_port>] -s string (required)\n");
+	printf("(default IP is 127.0.0.1 and port is %d)\n", DEFAULT_RDMA_PORT);
+	exit(1);
+}
+
+int main(int argc, char **argv) {
+	struct sockaddr_in server_sockaddr;
+	int ret, option;
+	bzero(&server_sockaddr, sizeof server_sockaddr);
+	server_sockaddr.sin_family = AF_INET;
+	server_sockaddr.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+	/* buffers are NULL */
+	src = dst = NULL; 
+	/* Parse Command Line Arguments */
+	while ((option = getopt(argc, argv, "s:a:p:")) != -1) {
+		switch (option) {
+			case 's':
+				printf("Passed string is : %s , with count %u \n", 
+						optarg, 
+						(unsigned int) strlen(optarg));
+				src = calloc(strlen(optarg) , 1);
+				if (!src) {
+					rdma_error("Failed to allocate memory : -ENOMEM\n");
+					return -ENOMEM;
+				}
+				/* Copy the passes arguments */
+				strncpy(src, optarg, strlen(optarg));
+				dst = calloc(strlen(optarg), 1);
+				if (!dst) {
+					rdma_error("Failed to allocate destination memory, -ENOMEM\n");
+					free(src);
+					return -ENOMEM;
+				}
+				break;
+			case 'a':
+				/* remember, this overwrites the port info */
+				ret = get_addr(optarg, (struct sockaddr*) &server_sockaddr);
+				if (ret) {
+					rdma_error("Invalid IP \n");
+					return ret;
+				}
+				break;
+			case 'p':
+				/* passed port to listen on */
+				server_sockaddr.sin_port = htons(strtol(optarg, NULL, 0)); 
+				break;
+			default:
+				usage();
+				break;
+			}
+		}
+	if (!server_sockaddr.sin_port) {
+	  /* no port provided, use the default port */
+	  server_sockaddr.sin_port = htons(DEFAULT_RDMA_PORT);
+	  }
+	if (src == NULL) {
+		printf("Please provide a string to copy \n");
+		usage();
+       	}
+	ret = client_prepare_connection(&server_sockaddr);
+	if (ret) { 
+		rdma_error("Failed to setup client connection , ret = %d \n", ret);
+		return ret;
+	 }
+	ret = client_pre_post_recv_buffer(); 
+	if (ret) { 
+		rdma_error("Failed to setup client connection , ret = %d \n", ret);
+		return ret;
+	}
+	ret = client_connect_to_server();
+	if (ret) { 
+		rdma_error("Failed to setup client connection , ret = %d \n", ret);
+		return ret;
+	}
+	ret = client_xchange_metadata_with_server();
+	if (ret) {
+		rdma_error("Failed to setup client connection , ret = %d \n", ret);
+		return ret;
+	}
+	ret = client_remote_memory_ops();
+	if (ret) {
+		rdma_error("Failed to finish remote memory ops, ret = %d \n", ret);
+		return ret;
+	}
+	if (check_src_dst()) {
+		rdma_error("src and dst buffers do not match \n");
+	} else {
+		printf("...\nSUCCESS, source and destination buffers match \n");
+	}
+	ret = client_disconnect_and_clean();
+	if (ret) {
+		rdma_error("Failed to cleanly disconnect and clean up resources \n");
+	}
+	return ret;
+}
+
diff --git a/rdma-example-master/rdma-example-master/src/rdma_common.c b/rdma-example-master/rdma-example-master/src/rdma_common.c
new file mode 100644
index 0000000..0478c29
--- /dev/null
+++ b/rdma-example-master/rdma-example-master/src/rdma_common.c
@@ -0,0 +1,210 @@
+/*
+ * Implementation of the common RDMA functions. 
+ *
+ * Authors: Animesh Trivedi
+ *          [email protected] 
+ */
+
+#include "rdma_common.h"
+
+void show_rdma_cmid(struct rdma_cm_id *id)
+{
+	if(!id){
+		rdma_error("Passed ptr is NULL\n");
+		return;
+	}
+	printf("RDMA cm id at %p \n", id);
+	if(id->verbs && id->verbs->device)
+		printf("dev_ctx: %p (device name: %s) \n", id->verbs, 
+				id->verbs->device->name);
+	if(id->channel)
+		printf("cm event channel %p\n", id->channel);
+	printf("QP: %p, port_space %x, port_num %u \n", id->qp, 
+			id->ps,
+			id->port_num);
+}
+
+void show_rdma_buffer_attr(struct rdma_buffer_attr *attr){
+	if(!attr){
+		rdma_error("Passed attr is NULL\n");
+		return;
+	}
+	printf("---------------------------------------------------------\n");
+	printf("buffer attr, addr: %p , len: %u , stag : 0x%x \n", 
+			(void*) attr->address, 
+			(unsigned int) attr->length,
+			attr->stag.local_stag);
+	printf("---------------------------------------------------------\n");
+}
+
+struct ibv_mr* rdma_buffer_alloc(struct ibv_pd *pd, uint32_t size,
+    enum ibv_access_flags permission) 
+{
+	struct ibv_mr *mr = NULL;
+	if (!pd) {
+		rdma_error("Protection domain is NULL \n");
+		return NULL;
+	}
+	void *buf = calloc(1, size);
+	if (!buf) {
+		rdma_error("failed to allocate buffer, -ENOMEM\n");
+		return NULL;
+	}
+	debug("Buffer allocated: %p , len: %u \n", buf, size);
+	mr = rdma_buffer_register(pd, buf, size, permission);
+	if(!mr){
+		free(buf);
+	}
+	return mr;
+}
+
+struct ibv_mr *rdma_buffer_register(struct ibv_pd *pd, 
+		void *addr, uint32_t length, 
+		enum ibv_access_flags permission)
+{
+	struct ibv_mr *mr = NULL;
+	if (!pd) {
+		rdma_error("Protection domain is NULL, ignoring \n");
+		return NULL;
+	}
+	mr = ibv_reg_mr(pd, addr, length, permission);
+	if (!mr) {
+		rdma_error("Failed to create mr on buffer, errno: %d \n", -errno);
+		return NULL;
+	}
+	debug("Registered: %p , len: %u , stag: 0x%x \n", 
+			mr->addr, 
+			(unsigned int) mr->length, 
+			mr->lkey);
+	return mr;
+}
+
+void rdma_buffer_free(struct ibv_mr *mr) 
+{
+	if (!mr) {
+		rdma_error("Passed memory region is NULL, ignoring\n");
+		return ;
+	}
+	void *to_free = mr->addr;
+	rdma_buffer_deregister(mr);
+	debug("Buffer %p free'ed\n", to_free);
+	free(to_free);
+}
+
+void rdma_buffer_deregister(struct ibv_mr *mr) 
+{
+	if (!mr) { 
+		rdma_error("Passed memory region is NULL, ignoring\n");
+		return;
+	}
+	debug("Deregistered: %p , len: %u , stag : 0x%x \n", 
+			mr->addr, 
+			(unsigned int) mr->length, 
+			mr->lkey);
+	ibv_dereg_mr(mr);
+}
+
+int process_rdma_cm_event(struct rdma_event_channel *echannel, 
+		enum rdma_cm_event_type expected_event,
+		struct rdma_cm_event **cm_event)
+{
+	int ret = 1;
+	ret = rdma_get_cm_event(echannel, cm_event);
+	if (ret) {
+		rdma_error("Failed to retrieve a cm event, errno: %d \n",
+				-errno);
+		return -errno;
+	}
+	/* lets see, if it was a good event */
+	if(0 != (*cm_event)->status){
+		rdma_error("CM event has non zero status: %d\n", (*cm_event)->status);
+		ret = -((*cm_event)->status);
+		/* important, we acknowledge the event */
+		rdma_ack_cm_event(*cm_event);
+		return ret;
+	}
+	/* if it was a good event, was it of the expected type */
+	if ((*cm_event)->event != expected_event) {
+		rdma_error("Unexpected event received: %s [ expecting: %s ]", 
+				rdma_event_str((*cm_event)->event),
+				rdma_event_str(expected_event));
+		/* important, we acknowledge the event */
+		rdma_ack_cm_event(*cm_event);
+		return -1; // unexpected event :(
+	}
+	debug("A new %s type event is received \n", rdma_event_str((*cm_event)->event));
+	/* The caller must acknowledge the event */
+	return ret;
+}
+
+
+int process_work_completion_events (struct ibv_comp_channel *comp_channel, 
+		struct ibv_wc *wc, int max_wc)
+{
+	struct ibv_cq *cq_ptr = NULL;
+	void *context = NULL;
+	int ret = -1, i, total_wc = 0;
+       /* We wait for the notification on the CQ channel */
+	ret = ibv_get_cq_event(comp_channel, /* IO channel where we are expecting the notification */ 
+		       &cq_ptr, /* which CQ has an activity. This should be the same as CQ we created before */ 
+		       &context); /* Associated CQ user context, which we did set */
+       if (ret) {
+	       rdma_error("Failed to get next CQ event due to %d \n", -errno);
+	       return -errno;
+       }
+       /* Request for more notifications. */
+       ret = ibv_req_notify_cq(cq_ptr, 0);
+       if (ret){
+	       rdma_error("Failed to request further notifications %d \n", -errno);
+	       return -errno;
+       }
+       /* We got notification. We reap the work completion (WC) element. It is 
+	* unlikely but a good practice it write the CQ polling code that 
+       * can handle zero WCs. ibv_poll_cq can return zero. Same logic as 
+       * MUTEX conditional variables in pthread programming.
+	*/
+       total_wc = 0;
+       do {
+	       ret = ibv_poll_cq(cq_ptr /* the CQ, we got notification for */, 
+		       max_wc - total_wc /* number of remaining WC elements*/,
+		       wc + total_wc/* where to store */);
+	       if (ret < 0) {
+		       rdma_error("Failed to poll cq for wc due to %d \n", ret);
+		       /* ret is errno here */
+		       return ret;
+	       }
+	       total_wc += ret;
+       } while (total_wc < max_wc); 
+       debug("%d WC are completed \n", total_wc);
+       /* Now we check validity and status of I/O work completions */
+       for( i = 0 ; i < total_wc ; i++) {
+	       if (wc[i].status != IBV_WC_SUCCESS) {
+		       rdma_error("Work completion (WC) has error status: %s at index %d", 
+				       ibv_wc_status_str(wc[i].status), i);
+		       /* return negative value */
+		       return -(wc[i].status);
+	       }
+       }
+       /* Similar to connection management events, we need to acknowledge CQ events */
+       ibv_ack_cq_events(cq_ptr, 
+		       1 /* we received one event notification. This is not 
+		       number of WC elements */);
+       return total_wc; 
+}
+
+
+/* Code acknowledgment: rping.c from librdmacm/examples */
+int get_addr(char *dst, struct sockaddr *addr)
+{
+	struct addrinfo *res;
+	int ret = -1;
+	ret = getaddrinfo(dst, NULL, NULL, &res);
+	if (ret) {
+		rdma_error("getaddrinfo failed - invalid hostname or IP address\n");
+		return ret;
+	}
+	memcpy(addr, res->ai_addr, sizeof(struct sockaddr_in));
+	freeaddrinfo(res);
+	return ret;
+}
+
diff --git a/rdma-example-master/rdma-example-master/src/rdma_common.h b/rdma-example-master/rdma-example-master/src/rdma_common.h
new file mode 100644
index 0000000..5a228c9
--- /dev/null
+++ b/rdma-example-master/rdma-example-master/src/rdma_common.h
@@ -0,0 +1,133 @@
+/*
+ * Header file for the common RDMA routines used in the server/client example 
+ * program. 
+ *
+ * Author: Animesh Trivedi 
+ *          [email protected] 
+ *
+ */
+
+#ifndef RDMA_COMMON_H
+#define RDMA_COMMON_H
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+#include <errno.h>
+#include <getopt.h>
+
+#include <netdb.h>
+#include <netinet/in.h>	
+#include <arpa/inet.h>
+#include <sys/socket.h>
+
+#include <rdma/rdma_cma.h>
+#include <infiniband/verbs.h>
+
+/* Error Macro*/
+#define rdma_error(msg, args...) do {\
+	fprintf(stderr, "%s : %d : ERROR : "msg, __FILE__, __LINE__, ## args);\
+}while(0);
+
+#ifdef ACN_RDMA_DEBUG 
+/* Debug Macro */
+#define debug(msg, args...) do {\
+    printf("DEBUG: "msg, ## args);\
+}while(0);
+
+#else 
+
+#define debug(msg, args...) 
+
+#endif /* ACN_RDMA_DEBUG */
+
+/* Capacity of the completion queue (CQ) */
+#define CQ_CAPACITY (16)
+/* MAX SGE capacity */
+#define MAX_SGE (2)
+/* MAX work requests */
+#define MAX_WR (8)
+/* Default port where the RDMA server is listening */
+#define DEFAULT_RDMA_PORT (20886)
+
+/* 
+ * We use attribute so that compiler does not step in and try to pad the structure.
+ * We use this structure to exchange information between the server and the client. 
+ *
+ * For details see: http://gcc.gnu.org/onlinedocs/gcc/Type-Attributes.html
+ */
+struct __attribute((packed)) rdma_buffer_attr {
+  uint64_t address;
+  uint32_t length;
+  union stag {
+	  /* if we send, we call it local stags */
+	  uint32_t local_stag;
+	  /* if we receive, we call it remote stag */
+	  uint32_t remote_stag;
+  }stag;
+};
+/* resolves a given destination name to sin_addr */
+int get_addr(char *dst, struct sockaddr *addr);
+
+/* prints RDMA buffer info structure */
+void show_rdma_buffer_attr(struct rdma_buffer_attr *attr);
+
+/* 
+ * Processes an RDMA connection management (CM) event. 
+ * @echannel: CM event channel where the event is expected. 
+ * @expected_event: Expected event type 
+ * @cm_event: where the event will be stored 
+ */
+int process_rdma_cm_event(struct rdma_event_channel *echannel, 
+		enum rdma_cm_event_type expected_event,
+		struct rdma_cm_event **cm_event);
+
+/* Allocates an RDMA buffer of size 'length' with permission permission. This 
+ * function will also register the memory and returns a memory region (MR) 
+ * identifier or NULL on error. 
+ * @pd: Protection domain where the buffer should be allocated 
+ * @length: Length of the buffer 
+ * @permission: OR of IBV_ACCESS_* permissions as defined for the enum ibv_access_flags
+ */
+struct ibv_mr* rdma_buffer_alloc(struct ibv_pd *pd, 
+		uint32_t length, 
+		enum ibv_access_flags permission);
+
+/* Frees a previously allocated RDMA buffer. The buffer must be allocated by 
+ * calling rdma_buffer_alloc();
+ * @mr: RDMA memory region to free 
+ */
+void rdma_buffer_free(struct ibv_mr *mr);
+
+/* This function registers a previously allocated memory. Returns a memory region 
+ * (MR) identifier or NULL on error.
+ * @pd: protection domain where to register memory 
+ * @addr: Buffer address 
+ * @length: Length of the buffer 
+ * @permission: OR of IBV_ACCESS_* permissions as defined for the enum ibv_access_flags
+ */
+struct ibv_mr *rdma_buffer_register(struct ibv_pd *pd, 
+		void *addr, 
+		uint32_t length, 
+		enum ibv_access_flags permission);
+/* Deregisters a previously register memory 
+ * @mr: Memory region to deregister 
+ */
+void rdma_buffer_deregister(struct ibv_mr *mr);
+
+/* Processes a work completion (WC) notification. 
+ * @comp_channel: Completion channel where the notifications are expected to arrive 
+ * @wc: Array where to hold the work completion elements 
+ * @max_wc: Maximum number of expected work completion (WC) elements. wc must be 
+ *          atleast this size.
+ */
+int process_work_completion_events(struct ibv_comp_channel *comp_channel, 
+		struct ibv_wc *wc, 
+		int max_wc);
+
+/* prints some details from the cm id */
+void show_rdma_cmid(struct rdma_cm_id *id);
+
+#endif /* RDMA_COMMON_H */
diff --git a/rdma-example-master/rdma-example-master/src/rdma_server.c b/rdma-example-master/rdma-example-master/src/rdma_server.c
new file mode 100644
index 0000000..42f018f
--- /dev/null
+++ b/rdma-example-master/rdma-example-master/src/rdma_server.c
@@ -0,0 +1,489 @@
+/*
+ * This is a RDMA server side code. 
+ *
+ * Author: Animesh Trivedi 
+ *         [email protected] 
+ *
+ * TODO: Cleanup previously allocated resources in case of an error condition
+ */
+
+#include "rdma_common.h"
+
+/* These are the RDMA resources needed to setup an RDMA connection */
+/* Event channel, where connection management (cm) related events are relayed */
+static struct rdma_event_channel *cm_event_channel = NULL;
+static struct rdma_cm_id *cm_server_id = NULL, *cm_client_id = NULL;
+static struct ibv_pd *pd = NULL;
+static struct ibv_comp_channel *io_completion_channel = NULL;
+static struct ibv_cq *cq = NULL;
+static struct ibv_qp_init_attr qp_init_attr;
+static struct ibv_qp *client_qp = NULL;
+/* RDMA memory resources */
+static struct ibv_mr *client_metadata_mr = NULL, *server_buffer_mr = NULL, *server_metadata_mr = NULL;
+static struct rdma_buffer_attr client_metadata_attr, server_metadata_attr;
+static struct ibv_recv_wr client_recv_wr, *bad_client_recv_wr = NULL;
+static struct ibv_send_wr server_send_wr, *bad_server_send_wr = NULL;
+static struct ibv_sge client_recv_sge, server_send_sge;
+
+/* When we call this function cm_client_id must be set to a valid identifier.
+ * This is where, we prepare client connection before we accept it. This 
+ * mainly involve pre-posting a receive buffer to receive client side 
+ * RDMA credentials
+ */
+static int setup_client_resources()
+{
+	int ret = -1;
+	if(!cm_client_id){
+		rdma_error("Client id is still NULL \n");
+		return -EINVAL;
+	}
+	/* We have a valid connection identifier, lets start to allocate 
+	 * resources. We need: 
+	 * 1. Protection Domains (PD)
+	 * 2. Memory Buffers 
+	 * 3. Completion Queues (CQ)
+	 * 4. Queue Pair (QP)
+	 * Protection Domain (PD) is similar to a "process abstraction" 
+	 * in the operating system. All resources are tied to a particular PD. 
+	 * And accessing recourses across PD will result in a protection fault.
+	 */
+	pd = ibv_alloc_pd(cm_client_id->verbs 
+			/* verbs defines a verb's provider, 
+			 * i.e an RDMA device where the incoming 
+			 * client connection came */);
+	if (!pd) {
+		rdma_error("Failed to allocate a protection domain errno: %d\n",
+				-errno);
+		return -errno;
+	}
+	debug("A new protection domain is allocated at %p \n", pd);
+	/* Now we need a completion channel, were the I/O completion 
+	 * notifications are sent. Remember, this is different from connection 
+	 * management (CM) event notifications. 
+	 * A completion channel is also tied to an RDMA device, hence we will 
+	 * use cm_client_id->verbs. 
+	 */
+	io_completion_channel = ibv_create_comp_channel(cm_client_id->verbs);
+	if (!io_completion_channel) {
+		rdma_error("Failed to create an I/O completion event channel, %d\n",
+				-errno);
+		return -errno;
+	}
+	debug("An I/O completion event channel is created at %p \n", 
+			io_completion_channel);
+	/* Now we create a completion queue (CQ) where actual I/O 
+	 * completion metadata is placed. The metadata is packed into a structure 
+	 * called struct ibv_wc (wc = work completion). ibv_wc has detailed 
+	 * information about the work completion. An I/O request in RDMA world 
+	 * is called "work" ;) 
+	 */
+	cq = ibv_create_cq(cm_client_id->verbs /* which device*/, 
+			CQ_CAPACITY /* maximum capacity*/, 
+			NULL /* user context, not used here */,
+			io_completion_channel /* which IO completion channel */, 
+			0 /* signaling vector, not used here*/);
+	if (!cq) {
+		rdma_error("Failed to create a completion queue (cq), errno: %d\n",
+				-errno);
+		return -errno;
+	}
+	debug("Completion queue (CQ) is created at %p with %d elements \n", 
+			cq, cq->cqe);
+	/* Ask for the event for all activities in the completion queue*/
+	ret = ibv_req_notify_cq(cq /* on which CQ */, 
+			0 /* 0 = all event type, no filter*/);
+	if (ret) {
+		rdma_error("Failed to request notifications on CQ errno: %d \n",
+				-errno);
+		return -errno;
+	}
+	/* Now the last step, set up the queue pair (send, recv) queues and their capacity.
+	 * The capacity here is define statically but this can be probed from the 
+	 * device. We just use a small number as defined in rdma_common.h */
+       bzero(&qp_init_attr, sizeof qp_init_attr);
+       qp_init_attr.cap.max_recv_sge = MAX_SGE; /* Maximum SGE per receive posting */
+       qp_init_attr.cap.max_recv_wr = MAX_WR; /* Maximum receive posting capacity */
+       qp_init_attr.cap.max_send_sge = MAX_SGE; /* Maximum SGE per send posting */
+       qp_init_attr.cap.max_send_wr = MAX_WR; /* Maximum send posting capacity */
+       qp_init_attr.qp_type = IBV_QPT_RC; /* QP type, RC = Reliable connection */
+       /* We use same completion queue, but one can use different queues */
+       qp_init_attr.recv_cq = cq; /* Where should I notify for receive completion operations */
+       qp_init_attr.send_cq = cq; /* Where should I notify for send completion operations */
+       /*Lets create a QP */
+       ret = rdma_create_qp(cm_client_id /* which connection id */,
+		       pd /* which protection domain*/,
+		       &qp_init_attr /* Initial attributes */);
+       if (ret) {
+	       rdma_error("Failed to create QP due to errno: %d\n", -errno);
+	       return -errno;
+       }
+       /* Save the reference for handy typing but is not required */
+       client_qp = cm_client_id->qp;
+       debug("Client QP created at %p\n", client_qp);
+       return ret;
+}
+
+/* Starts an RDMA server by allocating basic connection resources */
+static int start_rdma_server(struct sockaddr_in *server_addr) 
+{
+	struct rdma_cm_event *cm_event = NULL;
+	int ret = -1;
+	/*  Open a channel used to report asynchronous communication event */
+	cm_event_channel = rdma_create_event_channel();
+	if (!cm_event_channel) {
+		rdma_error("Creating cm event channel failed with errno : (%d)", -errno);
+		return -errno;
+	}
+	debug("RDMA CM event channel is created successfully at %p \n", 
+			cm_event_channel);
+	/* rdma_cm_id is the connection identifier (like socket) which is used 
+	 * to define an RDMA connection. 
+	 */
+	ret = rdma_create_id(cm_event_channel, &cm_server_id, NULL, RDMA_PS_TCP);
+	if (ret) {
+		rdma_error("Creating server cm id failed with errno: %d ", -errno);
+		return -errno;
+	}
+	debug("A RDMA connection id for the server is created \n");
+	/* Explicit binding of rdma cm id to the socket credentials */
+	ret = rdma_bind_addr(cm_server_id, (struct sockaddr*) server_addr);
+	if (ret) {
+		rdma_error("Failed to bind server address, errno: %d \n", -errno);
+		return -errno;
+	}
+	debug("Server RDMA CM id is successfully binded \n");
+	/* Now we start to listen on the passed IP and port. However unlike
+	 * normal TCP listen, this is a non-blocking call. When a new client is 
+	 * connected, a new connection management (CM) event is generated on the 
+	 * RDMA CM event channel from where the listening id was created. Here we
+	 * have only one channel, so it is easy. */
+	ret = rdma_listen(cm_server_id, 8); /* backlog = 8 clients, same as TCP, see man listen*/
+	if (ret) {
+		rdma_error("rdma_listen failed to listen on server address, errno: %d ",
+				-errno);
+		return -errno;
+	}
+	printf("Server is listening successfully at: %s , port: %d \n",
+			inet_ntoa(server_addr->sin_addr),
+			ntohs(server_addr->sin_port));
+	/* now, we expect a client to connect and generate a RDMA_CM_EVNET_CONNECT_REQUEST 
+	 * We wait (block) on the connection management event channel for 
+	 * the connect event. 
+	 */
+	ret = process_rdma_cm_event(cm_event_channel, 
+			RDMA_CM_EVENT_CONNECT_REQUEST,
+			&cm_event);
+	if (ret) {
+		rdma_error("Failed to get cm event, ret = %d \n" , ret);
+		return ret;
+	}
+	/* Much like TCP connection, listening returns a new connection identifier 
+	 * for newly connected client. In the case of RDMA, this is stored in id 
+	 * field. For more details: man rdma_get_cm_event 
+	 */
+	cm_client_id = cm_event->id;
+	/* now we acknowledge the event. Acknowledging the event free the resources 
+	 * associated with the event structure. Hence any reference to the event 
+	 * must be made before acknowledgment. Like, we have already saved the 
+	 * client id from "id" field before acknowledging the event. 
+	 */
+	ret = rdma_ack_cm_event(cm_event);
+	if (ret) {
+		rdma_error("Failed to acknowledge the cm event errno: %d \n", -errno);
+		return -errno;
+	}
+	debug("A new RDMA client connection id is stored at %p\n", cm_client_id);
+	return ret;
+}
+
+/* Pre-posts a receive buffer and accepts an RDMA client connection */
+static int accept_client_connection()
+{
+	struct rdma_conn_param conn_param;
+	struct rdma_cm_event *cm_event = NULL;
+	struct sockaddr_in remote_sockaddr; 
+	int ret = -1;
+	if(!cm_client_id || !client_qp) {
+		rdma_error("Client resources are not properly setup\n");
+		return -EINVAL;
+	}
+	/* we prepare the receive buffer in which we will receive the client metadata*/
+        client_metadata_mr = rdma_buffer_register(pd /* which protection domain */, 
+			&client_metadata_attr /* what memory */,
+			sizeof(client_metadata_attr) /* what length */, 
+		       (IBV_ACCESS_LOCAL_WRITE) /* access permissions */);
+	if(!client_metadata_mr){
+		rdma_error("Failed to register client attr buffer\n");
+		//we assume ENOMEM
+		return -ENOMEM;
+	}
+	/* We pre-post this receive buffer on the QP. SGE credentials is where we 
+	 * receive the metadata from the client */
+	client_recv_sge.addr = (uint64_t) client_metadata_mr->addr; // same as &client_buffer_attr
+	client_recv_sge.length = client_metadata_mr->length;
+	client_recv_sge.lkey = client_metadata_mr->lkey;
+	/* Now we link this SGE to the work request (WR) */
+	bzero(&client_recv_wr, sizeof(client_recv_wr));
+	client_recv_wr.sg_list = &client_recv_sge;
+	client_recv_wr.num_sge = 1; // only one SGE
+	ret = ibv_post_recv(client_qp /* which QP */,
+		      &client_recv_wr /* receive work request*/,
+		      &bad_client_recv_wr /* error WRs */);
+	if (ret) {
+		rdma_error("Failed to pre-post the receive buffer, errno: %d \n", ret);
+		return ret;
+	}
+	debug("Receive buffer pre-posting is successful \n");
+	/* Now we accept the connection. Recall we have not accepted the connection 
+	 * yet because we have to do lots of resource pre-allocation */
+       memset(&conn_param, 0, sizeof(conn_param));
+       /* this tell how many outstanding requests can we handle */
+       conn_param.initiator_depth = 3; /* For this exercise, we put a small number here */
+       /* This tell how many outstanding requests we expect other side to handle */
+       conn_param.responder_resources = 3; /* For this exercise, we put a small number */
+       ret = rdma_accept(cm_client_id, &conn_param);
+       if (ret) {
+	       rdma_error("Failed to accept the connection, errno: %d \n", -errno);
+	       return -errno;
+       }
+       /* We expect an RDMA_CM_EVNET_ESTABLISHED to indicate that the RDMA  
+	* connection has been established and everything is fine on both, server 
+	* as well as the client sides.
+	*/
+        debug("Going to wait for : RDMA_CM_EVENT_ESTABLISHED event \n");
+       ret = process_rdma_cm_event(cm_event_channel, 
+		       RDMA_CM_EVENT_ESTABLISHED,
+		       &cm_event);
+        if (ret) {
+		rdma_error("Failed to get the cm event, errnp: %d \n", -errno);
+		return -errno;
+	}
+	/* We acknowledge the event */
+	ret = rdma_ack_cm_event(cm_event);
+	if (ret) {
+		rdma_error("Failed to acknowledge the cm event %d\n", -errno);
+		return -errno;
+	}
+	/* Just FYI: How to extract connection information */
+	memcpy(&remote_sockaddr /* where to save */, 
+			rdma_get_peer_addr(cm_client_id) /* gives you remote sockaddr */, 
+			sizeof(struct sockaddr_in) /* max size */);
+	printf("A new connection is accepted from %s \n", 
+			inet_ntoa(remote_sockaddr.sin_addr));
+	return ret;
+}
+
+/* This function sends server side buffer metadata to the connected client */
+static int send_server_metadata_to_client() 
+{
+	struct ibv_wc wc;
+	int ret = -1;
+	/* Now, we first wait for the client to start the communication by 
+	 * sending the server its metadata info. The server does not use it 
+	 * in our example. We will receive a work completion notification for 
+	 * our pre-posted receive request.
+	 */
+	ret = process_work_completion_events(io_completion_channel, &wc, 1);
+	if (ret != 1) {
+		rdma_error("Failed to receive , ret = %d \n", ret);
+		return ret;
+	}
+	/* if all good, then we should have client's buffer information, lets see */
+	printf("Client side buffer information is received...\n");
+	show_rdma_buffer_attr(&client_metadata_attr);
+	printf("The client has requested buffer length of : %u bytes \n", 
+			client_metadata_attr.length);
+	/* We need to setup requested memory buffer. This is where the client will 
+	* do RDMA READs and WRITEs. */
+       server_buffer_mr = rdma_buffer_alloc(pd /* which protection domain */, 
+		       client_metadata_attr.length /* what size to allocate */, 
+		       (IBV_ACCESS_LOCAL_WRITE|
+		       IBV_ACCESS_REMOTE_READ|
+		       IBV_ACCESS_REMOTE_WRITE) /* access permissions */);
+       if(!server_buffer_mr){
+	       rdma_error("Server failed to create a buffer \n");
+	       /* we assume that it is due to out of memory error */
+	       return -ENOMEM;
+       }
+       /* This buffer is used to transmit information about the above 
+	* buffer to the client. So this contains the metadata about the server 
+	* buffer. Hence this is called metadata buffer. Since this is already 
+	* on allocated, we just register it. 
+        * We need to prepare a send I/O operation that will tell the 
+	* client the address of the server buffer. 
+	*/
+       server_metadata_attr.address = (uint64_t) server_buffer_mr->addr;
+       server_metadata_attr.length = (uint32_t) server_buffer_mr->length;
+       server_metadata_attr.stag.local_stag = (uint32_t) server_buffer_mr->lkey;
+       server_metadata_mr = rdma_buffer_register(pd /* which protection domain*/, 
+		       &server_metadata_attr /* which memory to register */, 
+		       sizeof(server_metadata_attr) /* what is the size of memory */,
+		       IBV_ACCESS_LOCAL_WRITE /* what access permission */);
+       if(!server_metadata_mr){
+	       rdma_error("Server failed to create to hold server metadata \n");
+	       /* we assume that this is due to out of memory error */
+	       return -ENOMEM;
+       }
+       /* We need to transmit this buffer. So we create a send request. 
+	* A send request consists of multiple SGE elements. In our case, we only
+	* have one 
+	*/
+       server_send_sge.addr = (uint64_t) &server_metadata_attr;
+       server_send_sge.length = sizeof(server_metadata_attr);
+       server_send_sge.lkey = server_metadata_mr->lkey;
+       /* now we link this sge to the send request */
+       bzero(&server_send_wr, sizeof(server_send_wr));
+       server_send_wr.sg_list = &server_send_sge;
+       server_send_wr.num_sge = 1; // only 1 SGE element in the array 
+       server_send_wr.opcode = IBV_WR_SEND; // This is a send request 
+       server_send_wr.send_flags = IBV_SEND_SIGNALED; // We want to get notification 
+       /* This is a fast data path operation. Posting an I/O request */
+       ret = ibv_post_send(client_qp /* which QP */, 
+		       &server_send_wr /* Send request that we prepared before */, 
+		       &bad_server_send_wr /* In case of error, this will contain failed requests */);
+       if (ret) {
+	       rdma_error("Posting of server metdata failed, errno: %d \n",
+			       -errno);
+	       return -errno;
+       }
+       /* We check for completion notification */
+       ret = process_work_completion_events(io_completion_channel, &wc, 1);
+       if (ret != 1) {
+	       rdma_error("Failed to send server metadata, ret = %d \n", ret);
+	       return ret;
+       }
+       debug("Local buffer metadata has been sent to the client \n");
+       return 0;
+}
+
+/* This is server side logic. Server passively waits for the client to call 
+ * rdma_disconnect() and then it will clean up its resources */
+static int disconnect_and_cleanup()
+{
+	struct rdma_cm_event *cm_event = NULL;
+	int ret = -1;
+       /* Now we wait for the client to send us disconnect event */
+       debug("Waiting for cm event: RDMA_CM_EVENT_DISCONNECTED\n");
+       ret = process_rdma_cm_event(cm_event_channel, 
+		       RDMA_CM_EVENT_DISCONNECTED, 
+		       &cm_event);
+       if (ret) {
+	       rdma_error("Failed to get disconnect event, ret = %d \n", ret);
+	       return ret;
+       }
+	/* We acknowledge the event */
+	ret = rdma_ack_cm_event(cm_event);
+	if (ret) {
+		rdma_error("Failed to acknowledge the cm event %d\n", -errno);
+		return -errno;
+	}
+	printf("A disconnect event is received from the client...\n");
+	/* We free all the resources */
+	/* Destroy QP */
+	rdma_destroy_qp(cm_client_id);
+	/* Destroy client cm id */
+	ret = rdma_destroy_id(cm_client_id);
+	if (ret) {
+		rdma_error("Failed to destroy client id cleanly, %d \n", -errno);
+		// we continue anyways;
+	}
+	/* Destroy CQ */
+	ret = ibv_destroy_cq(cq);
+	if (ret) {
+		rdma_error("Failed to destroy completion queue cleanly, %d \n", -errno);
+		// we continue anyways;
+	}
+	/* Destroy completion channel */
+	ret = ibv_destroy_comp_channel(io_completion_channel);
+	if (ret) {
+		rdma_error("Failed to destroy completion channel cleanly, %d \n", -errno);
+		// we continue anyways;
+	}
+	/* Destroy memory buffers */
+	rdma_buffer_free(server_buffer_mr);
+	rdma_buffer_deregister(server_metadata_mr);	
+	rdma_buffer_deregister(client_metadata_mr);	
+	/* Destroy protection domain */
+	ret = ibv_dealloc_pd(pd);
+	if (ret) {
+		rdma_error("Failed to destroy client protection domain cleanly, %d \n", -errno);
+		// we continue anyways;
+	}
+	/* Destroy rdma server id */
+	ret = rdma_destroy_id(cm_server_id);
+	if (ret) {
+		rdma_error("Failed to destroy server id cleanly, %d \n", -errno);
+		// we continue anyways;
+	}
+	rdma_destroy_event_channel(cm_event_channel);
+	printf("Server shut-down is complete \n");
+	return 0;
+}
+
+
+void usage() 
+{
+	printf("Usage:\n");
+	printf("rdma_server: [-a <server_addr>] [-p <server_port>]\n");
+	printf("(default port is %d)\n", DEFAULT_RDMA_PORT);
+	exit(1);
+}
+
+int main(int argc, char **argv) 
+{
+	int ret, option;
+	struct sockaddr_in server_sockaddr;
+	bzero(&server_sockaddr, sizeof server_sockaddr);
+	server_sockaddr.sin_family = AF_INET; /* standard IP NET address */
+	server_sockaddr.sin_addr.s_addr = htonl(INADDR_ANY); /* passed address */
+	/* Parse Command Line Arguments, not the most reliable code */
+	while ((option = getopt(argc, argv, "a:p:")) != -1) {
+		switch (option) {
+			case 'a':
+				/* Remember, this will overwrite the port info */
+				ret = get_addr(optarg, (struct sockaddr*) &server_sockaddr);
+				if (ret) {
+					rdma_error("Invalid IP \n");
+					 return ret;
+				}
+				break;
+			case 'p':
+				/* passed port to listen on */
+				server_sockaddr.sin_port = htons(strtol(optarg, NULL, 0)); 
+				break;
+			default:
+				usage();
+				break;
+		}
+	}
+	if(!server_sockaddr.sin_port) {
+		/* If still zero, that mean no port info provided */
+		server_sockaddr.sin_port = htons(DEFAULT_RDMA_PORT); /* use default port */
+	 }
+	ret = start_rdma_server(&server_sockaddr);
+	if (ret) {
+		rdma_error("RDMA server failed to start cleanly, ret = %d \n", ret);
+		return ret;
+	}
+	ret = setup_client_resources();
+	if (ret) { 
+		rdma_error("Failed to setup client resources, ret = %d \n", ret);
+		return ret;
+	}
+	ret = accept_client_connection();
+	if (ret) {
+		rdma_error("Failed to handle client cleanly, ret = %d \n", ret);
+		return ret;
+	}
+	ret = send_server_metadata_to_client();
+	if (ret) {
+		rdma_error("Failed to send server metadata to the client, ret = %d \n", ret);
+		return ret;
+	}
+	ret = disconnect_and_cleanup();
+	if (ret) { 
+		rdma_error("Failed to clean up resources properly, ret = %d \n", ret);
+		return ret;
+	}
+	return 0;
+}
author	智皓张 <[email protected]>	2023-08-04 15:24:55 +0800
committer	智皓张 <[email protected]>	2023-08-04 15:24:55 +0800
commit	1c82c0c7a27ea7778a5d2ca5104d822209afeb75 (patch)
tree	a6911a2886f1fe4339e3d2b8dc0cded4f0c82618
parent	8f0fe11da82349c15272b92115b2bc427a8e3a8e (diff)