summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
author智皓 张 <[email protected]>2023-08-04 15:24:55 +0800
committer智皓 张 <[email protected]>2023-08-04 15:24:55 +0800
commit1c82c0c7a27ea7778a5d2ca5104d822209afeb75 (patch)
treea6911a2886f1fe4339e3d2b8dc0cded4f0c82618
parent8f0fe11da82349c15272b92115b2bc427a8e3a8e (diff)
update
-rw-r--r--rdma-example-master/rdma-example-master/CMakeLists.txt34
-rw-r--r--rdma-example-master/rdma-example-master/LICENSE201
-rw-r--r--rdma-example-master/rdma-example-master/README.md51
-rw-r--r--rdma-example-master/rdma-example-master/src/rdma_client.c553
-rw-r--r--rdma-example-master/rdma-example-master/src/rdma_common.c210
-rw-r--r--rdma-example-master/rdma-example-master/src/rdma_common.h133
-rw-r--r--rdma-example-master/rdma-example-master/src/rdma_server.c489
7 files changed, 1671 insertions, 0 deletions
diff --git a/rdma-example-master/rdma-example-master/CMakeLists.txt b/rdma-example-master/rdma-example-master/CMakeLists.txt
new file mode 100644
index 0000000..2471b09
--- /dev/null
+++ b/rdma-example-master/rdma-example-master/CMakeLists.txt
@@ -0,0 +1,34 @@
+# Author : Animesh Trivedi
+
+cmake_minimum_required (VERSION 2.6)
+
+project (rdma-example)
+
+set(PROJECT_SOURCE_DIR ${CMAKE_SOURCE_DIR}/src)
+set(CMAKE_BINARY_DIR ${CMAKE_SOURCE_DIR}/bin)
+set(EXECUTABLE_OUTPUT_PATH ${CMAKE_BINARY_DIR})
+set(LIBRARY_OUTPUT_PATH ${CMAKE_BINARY_DIR})
+
+set(CMAKE_CXX_STANDARD 11)
+set(CMAKE_CXX_STANDARD_REQUIRED ON)
+set(CMAKE_CXX_EXTENSIONS OFF)
+
+set(CMAKE_BUILD_TYPE Release) # or debug if you need to use gdb on it.
+
+# Some how g++ has better performance than clang++. Of course I don't know all flags for clang++.
+set(CMAKE_CXX_COMPILER g++) # or clang++
+#set(CMAKE_CXX_COMPILER clang++) # or clang++
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O2 -Ofast -ffast-math -funroll-loops -march=native") # get crazy here
+#add_compile_options("-std=c++17")
+#set(CMAKE_CXX_FLAGS "-O0 -ggdb")
+
+find_library(IBVERBS_LIBRARY ibverbs HINTS /home/atr/local/lib)
+find_library(RDMACM_LIBRARY rdmacm HINTS /home/atr/local/lib)
+
+link_libraries(pthread ${IBVERBS_LIBRARY} ${RDMACM_LIBRARY})
+
+include_directories("${PROJECT_SOURCE_DIR}" "/home/atr/local/include/")
+
+add_executable(rdma_server ${PROJECT_SOURCE_DIR}/rdma_common.c ${PROJECT_SOURCE_DIR}/rdma_server.c)
+add_executable(rdma_client ${PROJECT_SOURCE_DIR}/rdma_common.c ${PROJECT_SOURCE_DIR}/rdma_client.c)
+
diff --git a/rdma-example-master/rdma-example-master/LICENSE b/rdma-example-master/rdma-example-master/LICENSE
new file mode 100644
index 0000000..261eeb9
--- /dev/null
+++ b/rdma-example-master/rdma-example-master/LICENSE
@@ -0,0 +1,201 @@
+ Apache License
+ Version 2.0, January 2004
+ http://www.apache.org/licenses/
+
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+ 1. Definitions.
+
+ "License" shall mean the terms and conditions for use, reproduction,
+ and distribution as defined by Sections 1 through 9 of this document.
+
+ "Licensor" shall mean the copyright owner or entity authorized by
+ the copyright owner that is granting the License.
+
+ "Legal Entity" shall mean the union of the acting entity and all
+ other entities that control, are controlled by, or are under common
+ control with that entity. For the purposes of this definition,
+ "control" means (i) the power, direct or indirect, to cause the
+ direction or management of such entity, whether by contract or
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
+ outstanding shares, or (iii) beneficial ownership of such entity.
+
+ "You" (or "Your") shall mean an individual or Legal Entity
+ exercising permissions granted by this License.
+
+ "Source" form shall mean the preferred form for making modifications,
+ including but not limited to software source code, documentation
+ source, and configuration files.
+
+ "Object" form shall mean any form resulting from mechanical
+ transformation or translation of a Source form, including but
+ not limited to compiled object code, generated documentation,
+ and conversions to other media types.
+
+ "Work" shall mean the work of authorship, whether in Source or
+ Object form, made available under the License, as indicated by a
+ copyright notice that is included in or attached to the work
+ (an example is provided in the Appendix below).
+
+ "Derivative Works" shall mean any work, whether in Source or Object
+ form, that is based on (or derived from) the Work and for which the
+ editorial revisions, annotations, elaborations, or other modifications
+ represent, as a whole, an original work of authorship. For the purposes
+ of this License, Derivative Works shall not include works that remain
+ separable from, or merely link (or bind by name) to the interfaces of,
+ the Work and Derivative Works thereof.
+
+ "Contribution" shall mean any work of authorship, including
+ the original version of the Work and any modifications or additions
+ to that Work or Derivative Works thereof, that is intentionally
+ submitted to Licensor for inclusion in the Work by the copyright owner
+ or by an individual or Legal Entity authorized to submit on behalf of
+ the copyright owner. For the purposes of this definition, "submitted"
+ means any form of electronic, verbal, or written communication sent
+ to the Licensor or its representatives, including but not limited to
+ communication on electronic mailing lists, source code control systems,
+ and issue tracking systems that are managed by, or on behalf of, the
+ Licensor for the purpose of discussing and improving the Work, but
+ excluding communication that is conspicuously marked or otherwise
+ designated in writing by the copyright owner as "Not a Contribution."
+
+ "Contributor" shall mean Licensor and any individual or Legal Entity
+ on behalf of whom a Contribution has been received by Licensor and
+ subsequently incorporated within the Work.
+
+ 2. Grant of Copyright License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ copyright license to reproduce, prepare Derivative Works of,
+ publicly display, publicly perform, sublicense, and distribute the
+ Work and such Derivative Works in Source or Object form.
+
+ 3. Grant of Patent License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ (except as stated in this section) patent license to make, have made,
+ use, offer to sell, sell, import, and otherwise transfer the Work,
+ where such license applies only to those patent claims licensable
+ by such Contributor that are necessarily infringed by their
+ Contribution(s) alone or by combination of their Contribution(s)
+ with the Work to which such Contribution(s) was submitted. If You
+ institute patent litigation against any entity (including a
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
+ or a Contribution incorporated within the Work constitutes direct
+ or contributory patent infringement, then any patent licenses
+ granted to You under this License for that Work shall terminate
+ as of the date such litigation is filed.
+
+ 4. Redistribution. You may reproduce and distribute copies of the
+ Work or Derivative Works thereof in any medium, with or without
+ modifications, and in Source or Object form, provided that You
+ meet the following conditions:
+
+ (a) You must give any other recipients of the Work or
+ Derivative Works a copy of this License; and
+
+ (b) You must cause any modified files to carry prominent notices
+ stating that You changed the files; and
+
+ (c) You must retain, in the Source form of any Derivative Works
+ that You distribute, all copyright, patent, trademark, and
+ attribution notices from the Source form of the Work,
+ excluding those notices that do not pertain to any part of
+ the Derivative Works; and
+
+ (d) If the Work includes a "NOTICE" text file as part of its
+ distribution, then any Derivative Works that You distribute must
+ include a readable copy of the attribution notices contained
+ within such NOTICE file, excluding those notices that do not
+ pertain to any part of the Derivative Works, in at least one
+ of the following places: within a NOTICE text file distributed
+ as part of the Derivative Works; within the Source form or
+ documentation, if provided along with the Derivative Works; or,
+ within a display generated by the Derivative Works, if and
+ wherever such third-party notices normally appear. The contents
+ of the NOTICE file are for informational purposes only and
+ do not modify the License. You may add Your own attribution
+ notices within Derivative Works that You distribute, alongside
+ or as an addendum to the NOTICE text from the Work, provided
+ that such additional attribution notices cannot be construed
+ as modifying the License.
+
+ You may add Your own copyright statement to Your modifications and
+ may provide additional or different license terms and conditions
+ for use, reproduction, or distribution of Your modifications, or
+ for any such Derivative Works as a whole, provided Your use,
+ reproduction, and distribution of the Work otherwise complies with
+ the conditions stated in this License.
+
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
+ any Contribution intentionally submitted for inclusion in the Work
+ by You to the Licensor shall be under the terms and conditions of
+ this License, without any additional terms or conditions.
+ Notwithstanding the above, nothing herein shall supersede or modify
+ the terms of any separate license agreement you may have executed
+ with Licensor regarding such Contributions.
+
+ 6. Trademarks. This License does not grant permission to use the trade
+ names, trademarks, service marks, or product names of the Licensor,
+ except as required for reasonable and customary use in describing the
+ origin of the Work and reproducing the content of the NOTICE file.
+
+ 7. Disclaimer of Warranty. Unless required by applicable law or
+ agreed to in writing, Licensor provides the Work (and each
+ Contributor provides its Contributions) on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ implied, including, without limitation, any warranties or conditions
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+ PARTICULAR PURPOSE. You are solely responsible for determining the
+ appropriateness of using or redistributing the Work and assume any
+ risks associated with Your exercise of permissions under this License.
+
+ 8. Limitation of Liability. In no event and under no legal theory,
+ whether in tort (including negligence), contract, or otherwise,
+ unless required by applicable law (such as deliberate and grossly
+ negligent acts) or agreed to in writing, shall any Contributor be
+ liable to You for damages, including any direct, indirect, special,
+ incidental, or consequential damages of any character arising as a
+ result of this License or out of the use or inability to use the
+ Work (including but not limited to damages for loss of goodwill,
+ work stoppage, computer failure or malfunction, or any and all
+ other commercial damages or losses), even if such Contributor
+ has been advised of the possibility of such damages.
+
+ 9. Accepting Warranty or Additional Liability. While redistributing
+ the Work or Derivative Works thereof, You may choose to offer,
+ and charge a fee for, acceptance of support, warranty, indemnity,
+ or other liability obligations and/or rights consistent with this
+ License. However, in accepting such obligations, You may act only
+ on Your own behalf and on Your sole responsibility, not on behalf
+ of any other Contributor, and only if You agree to indemnify,
+ defend, and hold each Contributor harmless for any liability
+ incurred by, or claims asserted against, such Contributor by reason
+ of your accepting any such warranty or additional liability.
+
+ END OF TERMS AND CONDITIONS
+
+ APPENDIX: How to apply the Apache License to your work.
+
+ To apply the Apache License to your work, attach the following
+ boilerplate notice, with the fields enclosed by brackets "[]"
+ replaced with your own identifying information. (Don't include
+ the brackets!) The text should be enclosed in the appropriate
+ comment syntax for the file format. We also recommend that a
+ file or class name and description of purpose be included on the
+ same "printed page" as the copyright notice for easier
+ identification within third-party archives.
+
+ Copyright [yyyy] [name of copyright owner]
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
diff --git a/rdma-example-master/rdma-example-master/README.md b/rdma-example-master/rdma-example-master/README.md
new file mode 100644
index 0000000..fc6af6e
--- /dev/null
+++ b/rdma-example-master/rdma-example-master/README.md
@@ -0,0 +1,51 @@
+# RDMA exmaple
+
+A simple RDMA server client example. The code contains a lot of comments. Here is the workflow that happens in the example:
+
+Client:
+ 1. setup RDMA resources
+ 2. connect to the server
+ 3. receive server side buffer information via send/recv exchange
+ 4. do an RDMA write to the server buffer from a (first) local buffer. The content of the buffer is the string passed with the `-s` argument.
+ 5. do an RDMA read to read the content of the server buffer into a second local buffer.
+ 6. compare the content of the first and second buffers, and match them.
+ 7. disconnect
+
+Server:
+ 1. setup RDMA resources
+ 2. wait for a client to connect
+ 3. allocate and pin a server buffer
+ 4. accept the incoming client connection
+ 5. send information about the local server buffer to the client
+ 6. wait for disconnect
+
+###### How to run
+```text
+git clone https://github.com/animeshtrivedi/rdma-example.git
+cd ./rdma-example
+cmake .
+make
+```
+
+###### server
+```text
+./bin/rdma_server
+```
+###### client
+```text
+atr@atr:~/rdma-example$ ./bin/rdma_client -a 127.0.0.1 -s textstring
+Passed string is : textstring , with count 10
+Trying to connect to server at : 127.0.0.1 port: 20886
+The client is connected successfully
+---------------------------------------------------------
+buffer attr, addr: 0x5629832e22c0 , len: 10 , stag : 0x1617b400
+---------------------------------------------------------
+...
+SUCCESS, source and destination buffers match
+Client resource clean up is complete
+atr@atr:~/rdma-example$
+
+```
+
+## Does not have an RDMA device?
+In case you do not have an RDMA device to test the code, you can setup SofitWARP software RDMA device on your Linux machine. Follow instructions here: [https://github.com/animeshtrivedi/blog/blob/master/post/2019-06-26-siw.md](https://github.com/animeshtrivedi/blog/blob/master/post/2019-06-26-siw.md).
diff --git a/rdma-example-master/rdma-example-master/src/rdma_client.c b/rdma-example-master/rdma-example-master/src/rdma_client.c
new file mode 100644
index 0000000..a640be6
--- /dev/null
+++ b/rdma-example-master/rdma-example-master/src/rdma_client.c
@@ -0,0 +1,553 @@
+/*
+ * An example RDMA client side code.
+ * Author: Animesh Trivedi
+ */
+
+#include "rdma_common.h"
+
+/* These are basic RDMA resources */
+/* These are RDMA connection related resources */
+static struct rdma_event_channel *cm_event_channel = NULL;
+static struct rdma_cm_id *cm_client_id = NULL;
+static struct ibv_pd *pd = NULL;
+static struct ibv_comp_channel *io_completion_channel = NULL;
+static struct ibv_cq *client_cq = NULL;
+static struct ibv_qp_init_attr qp_init_attr;
+static struct ibv_qp *client_qp;
+/* These are memory buffers related resources */
+static struct ibv_mr *client_metadata_mr = NULL,
+ *client_src_mr = NULL,
+ *client_dst_mr = NULL,
+ *server_metadata_mr = NULL;
+static struct rdma_buffer_attr client_metadata_attr, server_metadata_attr;
+static struct ibv_send_wr client_send_wr, *bad_client_send_wr = NULL;
+static struct ibv_recv_wr server_recv_wr, *bad_server_recv_wr = NULL;
+static struct ibv_sge client_send_sge, server_recv_sge;
+/* Source and Destination buffers, where RDMA operations source and sink */
+static char *src = NULL, *dst = NULL;
+
+/* This is our testing function */
+static int check_src_dst()
+{
+ return memcmp((void*) src, (void*) dst, strlen(src));
+}
+
+/* This function prepares client side connection resources for an RDMA connection */
+static int client_prepare_connection(struct sockaddr_in *s_addr)
+{
+ struct rdma_cm_event *cm_event = NULL;
+ int ret = -1;
+ /* Open a channel used to report asynchronous communication event */
+ cm_event_channel = rdma_create_event_channel();
+ if (!cm_event_channel) {
+ rdma_error("Creating cm event channel failed, errno: %d \n", -errno);
+ return -errno;
+ }
+ debug("RDMA CM event channel is created at : %p \n", cm_event_channel);
+ /* rdma_cm_id is the connection identifier (like socket) which is used
+ * to define an RDMA connection.
+ */
+ ret = rdma_create_id(cm_event_channel, &cm_client_id,
+ NULL,
+ RDMA_PS_TCP);
+ if (ret) {
+ rdma_error("Creating cm id failed with errno: %d \n", -errno);
+ return -errno;
+ }
+ /* Resolve destination and optional source addresses from IP addresses to
+ * an RDMA address. If successful, the specified rdma_cm_id will be bound
+ * to a local device. */
+ ret = rdma_resolve_addr(cm_client_id, NULL, (struct sockaddr*) s_addr, 2000);
+ if (ret) {
+ rdma_error("Failed to resolve address, errno: %d \n", -errno);
+ return -errno;
+ }
+ debug("waiting for cm event: RDMA_CM_EVENT_ADDR_RESOLVED\n");
+ ret = process_rdma_cm_event(cm_event_channel,
+ RDMA_CM_EVENT_ADDR_RESOLVED,
+ &cm_event);
+ if (ret) {
+ rdma_error("Failed to receive a valid event, ret = %d \n", ret);
+ return ret;
+ }
+ /* we ack the event */
+ ret = rdma_ack_cm_event(cm_event);
+ if (ret) {
+ rdma_error("Failed to acknowledge the CM event, errno: %d\n", -errno);
+ return -errno;
+ }
+ debug("RDMA address is resolved \n");
+
+ /* Resolves an RDMA route to the destination address in order to
+ * establish a connection */
+ ret = rdma_resolve_route(cm_client_id, 2000);
+ if (ret) {
+ rdma_error("Failed to resolve route, erno: %d \n", -errno);
+ return -errno;
+ }
+ debug("waiting for cm event: RDMA_CM_EVENT_ROUTE_RESOLVED\n");
+ ret = process_rdma_cm_event(cm_event_channel,
+ RDMA_CM_EVENT_ROUTE_RESOLVED,
+ &cm_event);
+ if (ret) {
+ rdma_error("Failed to receive a valid event, ret = %d \n", ret);
+ return ret;
+ }
+ /* we ack the event */
+ ret = rdma_ack_cm_event(cm_event);
+ if (ret) {
+ rdma_error("Failed to acknowledge the CM event, errno: %d \n", -errno);
+ return -errno;
+ }
+ printf("Trying to connect to server at : %s port: %d \n",
+ inet_ntoa(s_addr->sin_addr),
+ ntohs(s_addr->sin_port));
+ /* Protection Domain (PD) is similar to a "process abstraction"
+ * in the operating system. All resources are tied to a particular PD.
+ * And accessing recourses across PD will result in a protection fault.
+ */
+ pd = ibv_alloc_pd(cm_client_id->verbs);
+ if (!pd) {
+ rdma_error("Failed to alloc pd, errno: %d \n", -errno);
+ return -errno;
+ }
+ debug("pd allocated at %p \n", pd);
+ /* Now we need a completion channel, were the I/O completion
+ * notifications are sent. Remember, this is different from connection
+ * management (CM) event notifications.
+ * A completion channel is also tied to an RDMA device, hence we will
+ * use cm_client_id->verbs.
+ */
+ io_completion_channel = ibv_create_comp_channel(cm_client_id->verbs);
+ if (!io_completion_channel) {
+ rdma_error("Failed to create IO completion event channel, errno: %d\n",
+ -errno);
+ return -errno;
+ }
+ debug("completion event channel created at : %p \n", io_completion_channel);
+ /* Now we create a completion queue (CQ) where actual I/O
+ * completion metadata is placed. The metadata is packed into a structure
+ * called struct ibv_wc (wc = work completion). ibv_wc has detailed
+ * information about the work completion. An I/O request in RDMA world
+ * is called "work" ;)
+ */
+ client_cq = ibv_create_cq(cm_client_id->verbs /* which device*/,
+ CQ_CAPACITY /* maximum capacity*/,
+ NULL /* user context, not used here */,
+ io_completion_channel /* which IO completion channel */,
+ 0 /* signaling vector, not used here*/);
+ if (!client_cq) {
+ rdma_error("Failed to create CQ, errno: %d \n", -errno);
+ return -errno;
+ }
+ debug("CQ created at %p with %d elements \n", client_cq, client_cq->cqe);
+ ret = ibv_req_notify_cq(client_cq, 0);
+ if (ret) {
+ rdma_error("Failed to request notifications, errno: %d\n", -errno);
+ return -errno;
+ }
+ /* Now the last step, set up the queue pair (send, recv) queues and their capacity.
+ * The capacity here is define statically but this can be probed from the
+ * device. We just use a small number as defined in rdma_common.h */
+ bzero(&qp_init_attr, sizeof qp_init_attr);
+ qp_init_attr.cap.max_recv_sge = MAX_SGE; /* Maximum SGE per receive posting */
+ qp_init_attr.cap.max_recv_wr = MAX_WR; /* Maximum receive posting capacity */
+ qp_init_attr.cap.max_send_sge = MAX_SGE; /* Maximum SGE per send posting */
+ qp_init_attr.cap.max_send_wr = MAX_WR; /* Maximum send posting capacity */
+ qp_init_attr.qp_type = IBV_QPT_RC; /* QP type, RC = Reliable connection */
+ /* We use same completion queue, but one can use different queues */
+ qp_init_attr.recv_cq = client_cq; /* Where should I notify for receive completion operations */
+ qp_init_attr.send_cq = client_cq; /* Where should I notify for send completion operations */
+ /*Lets create a QP */
+ ret = rdma_create_qp(cm_client_id /* which connection id */,
+ pd /* which protection domain*/,
+ &qp_init_attr /* Initial attributes */);
+ if (ret) {
+ rdma_error("Failed to create QP, errno: %d \n", -errno);
+ return -errno;
+ }
+ client_qp = cm_client_id->qp;
+ debug("QP created at %p \n", client_qp);
+ return 0;
+}
+
+/* Pre-posts a receive buffer before calling rdma_connect () */
+static int client_pre_post_recv_buffer()
+{
+ int ret = -1;
+ server_metadata_mr = rdma_buffer_register(pd,
+ &server_metadata_attr,
+ sizeof(server_metadata_attr),
+ (IBV_ACCESS_LOCAL_WRITE));
+ if(!server_metadata_mr){
+ rdma_error("Failed to setup the server metadata mr , -ENOMEM\n");
+ return -ENOMEM;
+ }
+ server_recv_sge.addr = (uint64_t) server_metadata_mr->addr;
+ server_recv_sge.length = (uint32_t) server_metadata_mr->length;
+ server_recv_sge.lkey = (uint32_t) server_metadata_mr->lkey;
+ /* now we link it to the request */
+ bzero(&server_recv_wr, sizeof(server_recv_wr));
+ server_recv_wr.sg_list = &server_recv_sge;
+ server_recv_wr.num_sge = 1;
+ ret = ibv_post_recv(client_qp /* which QP */,
+ &server_recv_wr /* receive work request*/,
+ &bad_server_recv_wr /* error WRs */);
+ if (ret) {
+ rdma_error("Failed to pre-post the receive buffer, errno: %d \n", ret);
+ return ret;
+ }
+ debug("Receive buffer pre-posting is successful \n");
+ return 0;
+}
+
+/* Connects to the RDMA server */
+static int client_connect_to_server()
+{
+ struct rdma_conn_param conn_param;
+ struct rdma_cm_event *cm_event = NULL;
+ int ret = -1;
+ bzero(&conn_param, sizeof(conn_param));
+ conn_param.initiator_depth = 3;
+ conn_param.responder_resources = 3;
+ conn_param.retry_count = 3; // if fail, then how many times to retry
+ ret = rdma_connect(cm_client_id, &conn_param);
+ if (ret) {
+ rdma_error("Failed to connect to remote host , errno: %d\n", -errno);
+ return -errno;
+ }
+ debug("waiting for cm event: RDMA_CM_EVENT_ESTABLISHED\n");
+ ret = process_rdma_cm_event(cm_event_channel,
+ RDMA_CM_EVENT_ESTABLISHED,
+ &cm_event);
+ if (ret) {
+ rdma_error("Failed to get cm event, ret = %d \n", ret);
+ return ret;
+ }
+ ret = rdma_ack_cm_event(cm_event);
+ if (ret) {
+ rdma_error("Failed to acknowledge cm event, errno: %d\n",
+ -errno);
+ return -errno;
+ }
+ printf("The client is connected successfully \n");
+ return 0;
+}
+
+/* Exchange buffer metadata with the server. The client sends its, and then receives
+ * from the server. The client-side metadata on the server is _not_ used because
+ * this program is client driven. But it shown here how to do it for the illustration
+ * purposes
+ */
+static int client_xchange_metadata_with_server()
+{
+ struct ibv_wc wc[2];
+ int ret = -1;
+ client_src_mr = rdma_buffer_register(pd,
+ src,
+ strlen(src),
+ (IBV_ACCESS_LOCAL_WRITE|
+ IBV_ACCESS_REMOTE_READ|
+ IBV_ACCESS_REMOTE_WRITE));
+ if(!client_src_mr){
+ rdma_error("Failed to register the first buffer, ret = %d \n", ret);
+ return ret;
+ }
+ /* we prepare metadata for the first buffer */
+ client_metadata_attr.address = (uint64_t) client_src_mr->addr;
+ client_metadata_attr.length = client_src_mr->length;
+ client_metadata_attr.stag.local_stag = client_src_mr->lkey;
+ /* now we register the metadata memory */
+ client_metadata_mr = rdma_buffer_register(pd,
+ &client_metadata_attr,
+ sizeof(client_metadata_attr),
+ IBV_ACCESS_LOCAL_WRITE);
+ if(!client_metadata_mr) {
+ rdma_error("Failed to register the client metadata buffer, ret = %d \n", ret);
+ return ret;
+ }
+ /* now we fill up SGE */
+ client_send_sge.addr = (uint64_t) client_metadata_mr->addr;
+ client_send_sge.length = (uint32_t) client_metadata_mr->length;
+ client_send_sge.lkey = client_metadata_mr->lkey;
+ /* now we link to the send work request */
+ bzero(&client_send_wr, sizeof(client_send_wr));
+ client_send_wr.sg_list = &client_send_sge;
+ client_send_wr.num_sge = 1;
+ client_send_wr.opcode = IBV_WR_SEND;
+ client_send_wr.send_flags = IBV_SEND_SIGNALED;
+ /* Now we post it */
+ ret = ibv_post_send(client_qp,
+ &client_send_wr,
+ &bad_client_send_wr);
+ if (ret) {
+ rdma_error("Failed to send client metadata, errno: %d \n",
+ -errno);
+ return -errno;
+ }
+ /* at this point we are expecting 2 work completion. One for our
+ * send and one for recv that we will get from the server for
+ * its buffer information */
+ ret = process_work_completion_events(io_completion_channel,
+ wc, 2);
+ if(ret != 2) {
+ rdma_error("We failed to get 2 work completions , ret = %d \n",
+ ret);
+ return ret;
+ }
+ debug("Server sent us its buffer location and credentials, showing \n");
+ show_rdma_buffer_attr(&server_metadata_attr);
+ return 0;
+}
+
+/* This function does :
+ * 1) Prepare memory buffers for RDMA operations
+ * 1) RDMA write from src -> remote buffer
+ * 2) RDMA read from remote bufer -> dst
+ */
+static int client_remote_memory_ops()
+{
+ struct ibv_wc wc;
+ int ret = -1;
+ client_dst_mr = rdma_buffer_register(pd,
+ dst,
+ strlen(src),
+ (IBV_ACCESS_LOCAL_WRITE |
+ IBV_ACCESS_REMOTE_WRITE |
+ IBV_ACCESS_REMOTE_READ));
+ if (!client_dst_mr) {
+ rdma_error("We failed to create the destination buffer, -ENOMEM\n");
+ return -ENOMEM;
+ }
+ /* Step 1: is to copy the local buffer into the remote buffer. We will
+ * reuse the previous variables. */
+ /* now we fill up SGE */
+ client_send_sge.addr = (uint64_t) client_src_mr->addr;
+ client_send_sge.length = (uint32_t) client_src_mr->length;
+ client_send_sge.lkey = client_src_mr->lkey;
+ /* now we link to the send work request */
+ bzero(&client_send_wr, sizeof(client_send_wr));
+ client_send_wr.sg_list = &client_send_sge;
+ client_send_wr.num_sge = 1;
+ client_send_wr.opcode = IBV_WR_RDMA_WRITE;
+ client_send_wr.send_flags = IBV_SEND_SIGNALED;
+ /* we have to tell server side info for RDMA */
+ client_send_wr.wr.rdma.rkey = server_metadata_attr.stag.remote_stag;
+ client_send_wr.wr.rdma.remote_addr = server_metadata_attr.address;
+ /* Now we post it */
+ ret = ibv_post_send(client_qp,
+ &client_send_wr,
+ &bad_client_send_wr);
+ if (ret) {
+ rdma_error("Failed to write client src buffer, errno: %d \n",
+ -errno);
+ return -errno;
+ }
+ /* at this point we are expecting 1 work completion for the write */
+ ret = process_work_completion_events(io_completion_channel,
+ &wc, 1);
+ if(ret != 1) {
+ rdma_error("We failed to get 1 work completions , ret = %d \n",
+ ret);
+ return ret;
+ }
+ debug("Client side WRITE is complete \n");
+ /* Now we prepare a READ using same variables but for destination */
+ client_send_sge.addr = (uint64_t) client_dst_mr->addr;
+ client_send_sge.length = (uint32_t) client_dst_mr->length;
+ client_send_sge.lkey = client_dst_mr->lkey;
+ /* now we link to the send work request */
+ bzero(&client_send_wr, sizeof(client_send_wr));
+ client_send_wr.sg_list = &client_send_sge;
+ client_send_wr.num_sge = 1;
+ client_send_wr.opcode = IBV_WR_RDMA_READ;
+ client_send_wr.send_flags = IBV_SEND_SIGNALED;
+ /* we have to tell server side info for RDMA */
+ client_send_wr.wr.rdma.rkey = server_metadata_attr.stag.remote_stag;
+ client_send_wr.wr.rdma.remote_addr = server_metadata_attr.address;
+ /* Now we post it */
+ ret = ibv_post_send(client_qp,
+ &client_send_wr,
+ &bad_client_send_wr);
+ if (ret) {
+ rdma_error("Failed to read client dst buffer from the master, errno: %d \n",
+ -errno);
+ return -errno;
+ }
+ /* at this point we are expecting 1 work completion for the write */
+ ret = process_work_completion_events(io_completion_channel,
+ &wc, 1);
+ if(ret != 1) {
+ rdma_error("We failed to get 1 work completions , ret = %d \n",
+ ret);
+ return ret;
+ }
+ debug("Client side READ is complete \n");
+ return 0;
+}
+
+/* This function disconnects the RDMA connection from the server and cleans up
+ * all the resources.
+ */
+static int client_disconnect_and_clean()
+{
+ struct rdma_cm_event *cm_event = NULL;
+ int ret = -1;
+ /* active disconnect from the client side */
+ ret = rdma_disconnect(cm_client_id);
+ if (ret) {
+ rdma_error("Failed to disconnect, errno: %d \n", -errno);
+ //continuing anyways
+ }
+ ret = process_rdma_cm_event(cm_event_channel,
+ RDMA_CM_EVENT_DISCONNECTED,
+ &cm_event);
+ if (ret) {
+ rdma_error("Failed to get RDMA_CM_EVENT_DISCONNECTED event, ret = %d\n",
+ ret);
+ //continuing anyways
+ }
+ ret = rdma_ack_cm_event(cm_event);
+ if (ret) {
+ rdma_error("Failed to acknowledge cm event, errno: %d\n",
+ -errno);
+ //continuing anyways
+ }
+ /* Destroy QP */
+ rdma_destroy_qp(cm_client_id);
+ /* Destroy client cm id */
+ ret = rdma_destroy_id(cm_client_id);
+ if (ret) {
+ rdma_error("Failed to destroy client id cleanly, %d \n", -errno);
+ // we continue anyways;
+ }
+ /* Destroy CQ */
+ ret = ibv_destroy_cq(client_cq);
+ if (ret) {
+ rdma_error("Failed to destroy completion queue cleanly, %d \n", -errno);
+ // we continue anyways;
+ }
+ /* Destroy completion channel */
+ ret = ibv_destroy_comp_channel(io_completion_channel);
+ if (ret) {
+ rdma_error("Failed to destroy completion channel cleanly, %d \n", -errno);
+ // we continue anyways;
+ }
+ /* Destroy memory buffers */
+ rdma_buffer_deregister(server_metadata_mr);
+ rdma_buffer_deregister(client_metadata_mr);
+ rdma_buffer_deregister(client_src_mr);
+ rdma_buffer_deregister(client_dst_mr);
+ /* We free the buffers */
+ free(src);
+ free(dst);
+ /* Destroy protection domain */
+ ret = ibv_dealloc_pd(pd);
+ if (ret) {
+ rdma_error("Failed to destroy client protection domain cleanly, %d \n", -errno);
+ // we continue anyways;
+ }
+ rdma_destroy_event_channel(cm_event_channel);
+ printf("Client resource clean up is complete \n");
+ return 0;
+}
+
+void usage() {
+ printf("Usage:\n");
+ printf("rdma_client: [-a <server_addr>] [-p <server_port>] -s string (required)\n");
+ printf("(default IP is 127.0.0.1 and port is %d)\n", DEFAULT_RDMA_PORT);
+ exit(1);
+}
+
+int main(int argc, char **argv) {
+ struct sockaddr_in server_sockaddr;
+ int ret, option;
+ bzero(&server_sockaddr, sizeof server_sockaddr);
+ server_sockaddr.sin_family = AF_INET;
+ server_sockaddr.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+ /* buffers are NULL */
+ src = dst = NULL;
+ /* Parse Command Line Arguments */
+ while ((option = getopt(argc, argv, "s:a:p:")) != -1) {
+ switch (option) {
+ case 's':
+ printf("Passed string is : %s , with count %u \n",
+ optarg,
+ (unsigned int) strlen(optarg));
+ src = calloc(strlen(optarg) , 1);
+ if (!src) {
+ rdma_error("Failed to allocate memory : -ENOMEM\n");
+ return -ENOMEM;
+ }
+ /* Copy the passes arguments */
+ strncpy(src, optarg, strlen(optarg));
+ dst = calloc(strlen(optarg), 1);
+ if (!dst) {
+ rdma_error("Failed to allocate destination memory, -ENOMEM\n");
+ free(src);
+ return -ENOMEM;
+ }
+ break;
+ case 'a':
+ /* remember, this overwrites the port info */
+ ret = get_addr(optarg, (struct sockaddr*) &server_sockaddr);
+ if (ret) {
+ rdma_error("Invalid IP \n");
+ return ret;
+ }
+ break;
+ case 'p':
+ /* passed port to listen on */
+ server_sockaddr.sin_port = htons(strtol(optarg, NULL, 0));
+ break;
+ default:
+ usage();
+ break;
+ }
+ }
+ if (!server_sockaddr.sin_port) {
+ /* no port provided, use the default port */
+ server_sockaddr.sin_port = htons(DEFAULT_RDMA_PORT);
+ }
+ if (src == NULL) {
+ printf("Please provide a string to copy \n");
+ usage();
+ }
+ ret = client_prepare_connection(&server_sockaddr);
+ if (ret) {
+ rdma_error("Failed to setup client connection , ret = %d \n", ret);
+ return ret;
+ }
+ ret = client_pre_post_recv_buffer();
+ if (ret) {
+ rdma_error("Failed to setup client connection , ret = %d \n", ret);
+ return ret;
+ }
+ ret = client_connect_to_server();
+ if (ret) {
+ rdma_error("Failed to setup client connection , ret = %d \n", ret);
+ return ret;
+ }
+ ret = client_xchange_metadata_with_server();
+ if (ret) {
+ rdma_error("Failed to setup client connection , ret = %d \n", ret);
+ return ret;
+ }
+ ret = client_remote_memory_ops();
+ if (ret) {
+ rdma_error("Failed to finish remote memory ops, ret = %d \n", ret);
+ return ret;
+ }
+ if (check_src_dst()) {
+ rdma_error("src and dst buffers do not match \n");
+ } else {
+ printf("...\nSUCCESS, source and destination buffers match \n");
+ }
+ ret = client_disconnect_and_clean();
+ if (ret) {
+ rdma_error("Failed to cleanly disconnect and clean up resources \n");
+ }
+ return ret;
+}
+
diff --git a/rdma-example-master/rdma-example-master/src/rdma_common.c b/rdma-example-master/rdma-example-master/src/rdma_common.c
new file mode 100644
index 0000000..0478c29
--- /dev/null
+++ b/rdma-example-master/rdma-example-master/src/rdma_common.c
@@ -0,0 +1,210 @@
+/*
+ * Implementation of the common RDMA functions.
+ *
+ * Authors: Animesh Trivedi
+ */
+
+#include "rdma_common.h"
+
+void show_rdma_cmid(struct rdma_cm_id *id)
+{
+ if(!id){
+ rdma_error("Passed ptr is NULL\n");
+ return;
+ }
+ printf("RDMA cm id at %p \n", id);
+ if(id->verbs && id->verbs->device)
+ printf("dev_ctx: %p (device name: %s) \n", id->verbs,
+ id->verbs->device->name);
+ if(id->channel)
+ printf("cm event channel %p\n", id->channel);
+ printf("QP: %p, port_space %x, port_num %u \n", id->qp,
+ id->ps,
+ id->port_num);
+}
+
+void show_rdma_buffer_attr(struct rdma_buffer_attr *attr){
+ if(!attr){
+ rdma_error("Passed attr is NULL\n");
+ return;
+ }
+ printf("---------------------------------------------------------\n");
+ printf("buffer attr, addr: %p , len: %u , stag : 0x%x \n",
+ (void*) attr->address,
+ (unsigned int) attr->length,
+ attr->stag.local_stag);
+ printf("---------------------------------------------------------\n");
+}
+
+struct ibv_mr* rdma_buffer_alloc(struct ibv_pd *pd, uint32_t size,
+ enum ibv_access_flags permission)
+{
+ struct ibv_mr *mr = NULL;
+ if (!pd) {
+ rdma_error("Protection domain is NULL \n");
+ return NULL;
+ }
+ void *buf = calloc(1, size);
+ if (!buf) {
+ rdma_error("failed to allocate buffer, -ENOMEM\n");
+ return NULL;
+ }
+ debug("Buffer allocated: %p , len: %u \n", buf, size);
+ mr = rdma_buffer_register(pd, buf, size, permission);
+ if(!mr){
+ free(buf);
+ }
+ return mr;
+}
+
+struct ibv_mr *rdma_buffer_register(struct ibv_pd *pd,
+ void *addr, uint32_t length,
+ enum ibv_access_flags permission)
+{
+ struct ibv_mr *mr = NULL;
+ if (!pd) {
+ rdma_error("Protection domain is NULL, ignoring \n");
+ return NULL;
+ }
+ mr = ibv_reg_mr(pd, addr, length, permission);
+ if (!mr) {
+ rdma_error("Failed to create mr on buffer, errno: %d \n", -errno);
+ return NULL;
+ }
+ debug("Registered: %p , len: %u , stag: 0x%x \n",
+ mr->addr,
+ (unsigned int) mr->length,
+ mr->lkey);
+ return mr;
+}
+
+void rdma_buffer_free(struct ibv_mr *mr)
+{
+ if (!mr) {
+ rdma_error("Passed memory region is NULL, ignoring\n");
+ return ;
+ }
+ void *to_free = mr->addr;
+ rdma_buffer_deregister(mr);
+ debug("Buffer %p free'ed\n", to_free);
+ free(to_free);
+}
+
+void rdma_buffer_deregister(struct ibv_mr *mr)
+{
+ if (!mr) {
+ rdma_error("Passed memory region is NULL, ignoring\n");
+ return;
+ }
+ debug("Deregistered: %p , len: %u , stag : 0x%x \n",
+ mr->addr,
+ (unsigned int) mr->length,
+ mr->lkey);
+ ibv_dereg_mr(mr);
+}
+
+int process_rdma_cm_event(struct rdma_event_channel *echannel,
+ enum rdma_cm_event_type expected_event,
+ struct rdma_cm_event **cm_event)
+{
+ int ret = 1;
+ ret = rdma_get_cm_event(echannel, cm_event);
+ if (ret) {
+ rdma_error("Failed to retrieve a cm event, errno: %d \n",
+ -errno);
+ return -errno;
+ }
+ /* lets see, if it was a good event */
+ if(0 != (*cm_event)->status){
+ rdma_error("CM event has non zero status: %d\n", (*cm_event)->status);
+ ret = -((*cm_event)->status);
+ /* important, we acknowledge the event */
+ rdma_ack_cm_event(*cm_event);
+ return ret;
+ }
+ /* if it was a good event, was it of the expected type */
+ if ((*cm_event)->event != expected_event) {
+ rdma_error("Unexpected event received: %s [ expecting: %s ]",
+ rdma_event_str((*cm_event)->event),
+ rdma_event_str(expected_event));
+ /* important, we acknowledge the event */
+ rdma_ack_cm_event(*cm_event);
+ return -1; // unexpected event :(
+ }
+ debug("A new %s type event is received \n", rdma_event_str((*cm_event)->event));
+ /* The caller must acknowledge the event */
+ return ret;
+}
+
+
+int process_work_completion_events (struct ibv_comp_channel *comp_channel,
+ struct ibv_wc *wc, int max_wc)
+{
+ struct ibv_cq *cq_ptr = NULL;
+ void *context = NULL;
+ int ret = -1, i, total_wc = 0;
+ /* We wait for the notification on the CQ channel */
+ ret = ibv_get_cq_event(comp_channel, /* IO channel where we are expecting the notification */
+ &cq_ptr, /* which CQ has an activity. This should be the same as CQ we created before */
+ &context); /* Associated CQ user context, which we did set */
+ if (ret) {
+ rdma_error("Failed to get next CQ event due to %d \n", -errno);
+ return -errno;
+ }
+ /* Request for more notifications. */
+ ret = ibv_req_notify_cq(cq_ptr, 0);
+ if (ret){
+ rdma_error("Failed to request further notifications %d \n", -errno);
+ return -errno;
+ }
+ /* We got notification. We reap the work completion (WC) element. It is
+ * unlikely but a good practice it write the CQ polling code that
+ * can handle zero WCs. ibv_poll_cq can return zero. Same logic as
+ * MUTEX conditional variables in pthread programming.
+ */
+ total_wc = 0;
+ do {
+ ret = ibv_poll_cq(cq_ptr /* the CQ, we got notification for */,
+ max_wc - total_wc /* number of remaining WC elements*/,
+ wc + total_wc/* where to store */);
+ if (ret < 0) {
+ rdma_error("Failed to poll cq for wc due to %d \n", ret);
+ /* ret is errno here */
+ return ret;
+ }
+ total_wc += ret;
+ } while (total_wc < max_wc);
+ debug("%d WC are completed \n", total_wc);
+ /* Now we check validity and status of I/O work completions */
+ for( i = 0 ; i < total_wc ; i++) {
+ if (wc[i].status != IBV_WC_SUCCESS) {
+ rdma_error("Work completion (WC) has error status: %s at index %d",
+ ibv_wc_status_str(wc[i].status), i);
+ /* return negative value */
+ return -(wc[i].status);
+ }
+ }
+ /* Similar to connection management events, we need to acknowledge CQ events */
+ ibv_ack_cq_events(cq_ptr,
+ 1 /* we received one event notification. This is not
+ number of WC elements */);
+ return total_wc;
+}
+
+
+/* Code acknowledgment: rping.c from librdmacm/examples */
+int get_addr(char *dst, struct sockaddr *addr)
+{
+ struct addrinfo *res;
+ int ret = -1;
+ ret = getaddrinfo(dst, NULL, NULL, &res);
+ if (ret) {
+ rdma_error("getaddrinfo failed - invalid hostname or IP address\n");
+ return ret;
+ }
+ memcpy(addr, res->ai_addr, sizeof(struct sockaddr_in));
+ freeaddrinfo(res);
+ return ret;
+}
+
diff --git a/rdma-example-master/rdma-example-master/src/rdma_common.h b/rdma-example-master/rdma-example-master/src/rdma_common.h
new file mode 100644
index 0000000..5a228c9
--- /dev/null
+++ b/rdma-example-master/rdma-example-master/src/rdma_common.h
@@ -0,0 +1,133 @@
+/*
+ * Header file for the common RDMA routines used in the server/client example
+ * program.
+ *
+ * Author: Animesh Trivedi
+ *
+ */
+
+#ifndef RDMA_COMMON_H
+#define RDMA_COMMON_H
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+#include <errno.h>
+#include <getopt.h>
+
+#include <netdb.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <sys/socket.h>
+
+#include <rdma/rdma_cma.h>
+#include <infiniband/verbs.h>
+
+/* Error Macro*/
+#define rdma_error(msg, args...) do {\
+ fprintf(stderr, "%s : %d : ERROR : "msg, __FILE__, __LINE__, ## args);\
+}while(0);
+
+#ifdef ACN_RDMA_DEBUG
+/* Debug Macro */
+#define debug(msg, args...) do {\
+ printf("DEBUG: "msg, ## args);\
+}while(0);
+
+#else
+
+#define debug(msg, args...)
+
+#endif /* ACN_RDMA_DEBUG */
+
+/* Capacity of the completion queue (CQ) */
+#define CQ_CAPACITY (16)
+/* MAX SGE capacity */
+#define MAX_SGE (2)
+/* MAX work requests */
+#define MAX_WR (8)
+/* Default port where the RDMA server is listening */
+#define DEFAULT_RDMA_PORT (20886)
+
+/*
+ * We use attribute so that compiler does not step in and try to pad the structure.
+ * We use this structure to exchange information between the server and the client.
+ *
+ * For details see: http://gcc.gnu.org/onlinedocs/gcc/Type-Attributes.html
+ */
+struct __attribute((packed)) rdma_buffer_attr {
+ uint64_t address;
+ uint32_t length;
+ union stag {
+ /* if we send, we call it local stags */
+ uint32_t local_stag;
+ /* if we receive, we call it remote stag */
+ uint32_t remote_stag;
+ }stag;
+};
+/* resolves a given destination name to sin_addr */
+int get_addr(char *dst, struct sockaddr *addr);
+
+/* prints RDMA buffer info structure */
+void show_rdma_buffer_attr(struct rdma_buffer_attr *attr);
+
+/*
+ * Processes an RDMA connection management (CM) event.
+ * @echannel: CM event channel where the event is expected.
+ * @expected_event: Expected event type
+ * @cm_event: where the event will be stored
+ */
+int process_rdma_cm_event(struct rdma_event_channel *echannel,
+ enum rdma_cm_event_type expected_event,
+ struct rdma_cm_event **cm_event);
+
+/* Allocates an RDMA buffer of size 'length' with permission permission. This
+ * function will also register the memory and returns a memory region (MR)
+ * identifier or NULL on error.
+ * @pd: Protection domain where the buffer should be allocated
+ * @length: Length of the buffer
+ * @permission: OR of IBV_ACCESS_* permissions as defined for the enum ibv_access_flags
+ */
+struct ibv_mr* rdma_buffer_alloc(struct ibv_pd *pd,
+ uint32_t length,
+ enum ibv_access_flags permission);
+
+/* Frees a previously allocated RDMA buffer. The buffer must be allocated by
+ * calling rdma_buffer_alloc();
+ * @mr: RDMA memory region to free
+ */
+void rdma_buffer_free(struct ibv_mr *mr);
+
+/* This function registers a previously allocated memory. Returns a memory region
+ * (MR) identifier or NULL on error.
+ * @pd: protection domain where to register memory
+ * @addr: Buffer address
+ * @length: Length of the buffer
+ * @permission: OR of IBV_ACCESS_* permissions as defined for the enum ibv_access_flags
+ */
+struct ibv_mr *rdma_buffer_register(struct ibv_pd *pd,
+ void *addr,
+ uint32_t length,
+ enum ibv_access_flags permission);
+/* Deregisters a previously register memory
+ * @mr: Memory region to deregister
+ */
+void rdma_buffer_deregister(struct ibv_mr *mr);
+
+/* Processes a work completion (WC) notification.
+ * @comp_channel: Completion channel where the notifications are expected to arrive
+ * @wc: Array where to hold the work completion elements
+ * @max_wc: Maximum number of expected work completion (WC) elements. wc must be
+ * atleast this size.
+ */
+int process_work_completion_events(struct ibv_comp_channel *comp_channel,
+ struct ibv_wc *wc,
+ int max_wc);
+
+/* prints some details from the cm id */
+void show_rdma_cmid(struct rdma_cm_id *id);
+
+#endif /* RDMA_COMMON_H */
diff --git a/rdma-example-master/rdma-example-master/src/rdma_server.c b/rdma-example-master/rdma-example-master/src/rdma_server.c
new file mode 100644
index 0000000..42f018f
--- /dev/null
+++ b/rdma-example-master/rdma-example-master/src/rdma_server.c
@@ -0,0 +1,489 @@
+/*
+ * This is a RDMA server side code.
+ *
+ * Author: Animesh Trivedi
+ *
+ * TODO: Cleanup previously allocated resources in case of an error condition
+ */
+
+#include "rdma_common.h"
+
+/* These are the RDMA resources needed to setup an RDMA connection */
+/* Event channel, where connection management (cm) related events are relayed */
+static struct rdma_event_channel *cm_event_channel = NULL;
+static struct rdma_cm_id *cm_server_id = NULL, *cm_client_id = NULL;
+static struct ibv_pd *pd = NULL;
+static struct ibv_comp_channel *io_completion_channel = NULL;
+static struct ibv_cq *cq = NULL;
+static struct ibv_qp_init_attr qp_init_attr;
+static struct ibv_qp *client_qp = NULL;
+/* RDMA memory resources */
+static struct ibv_mr *client_metadata_mr = NULL, *server_buffer_mr = NULL, *server_metadata_mr = NULL;
+static struct rdma_buffer_attr client_metadata_attr, server_metadata_attr;
+static struct ibv_recv_wr client_recv_wr, *bad_client_recv_wr = NULL;
+static struct ibv_send_wr server_send_wr, *bad_server_send_wr = NULL;
+static struct ibv_sge client_recv_sge, server_send_sge;
+
+/* When we call this function cm_client_id must be set to a valid identifier.
+ * This is where, we prepare client connection before we accept it. This
+ * mainly involve pre-posting a receive buffer to receive client side
+ * RDMA credentials
+ */
+static int setup_client_resources()
+{
+ int ret = -1;
+ if(!cm_client_id){
+ rdma_error("Client id is still NULL \n");
+ return -EINVAL;
+ }
+ /* We have a valid connection identifier, lets start to allocate
+ * resources. We need:
+ * 1. Protection Domains (PD)
+ * 2. Memory Buffers
+ * 3. Completion Queues (CQ)
+ * 4. Queue Pair (QP)
+ * Protection Domain (PD) is similar to a "process abstraction"
+ * in the operating system. All resources are tied to a particular PD.
+ * And accessing recourses across PD will result in a protection fault.
+ */
+ pd = ibv_alloc_pd(cm_client_id->verbs
+ /* verbs defines a verb's provider,
+ * i.e an RDMA device where the incoming
+ * client connection came */);
+ if (!pd) {
+ rdma_error("Failed to allocate a protection domain errno: %d\n",
+ -errno);
+ return -errno;
+ }
+ debug("A new protection domain is allocated at %p \n", pd);
+ /* Now we need a completion channel, were the I/O completion
+ * notifications are sent. Remember, this is different from connection
+ * management (CM) event notifications.
+ * A completion channel is also tied to an RDMA device, hence we will
+ * use cm_client_id->verbs.
+ */
+ io_completion_channel = ibv_create_comp_channel(cm_client_id->verbs);
+ if (!io_completion_channel) {
+ rdma_error("Failed to create an I/O completion event channel, %d\n",
+ -errno);
+ return -errno;
+ }
+ debug("An I/O completion event channel is created at %p \n",
+ io_completion_channel);
+ /* Now we create a completion queue (CQ) where actual I/O
+ * completion metadata is placed. The metadata is packed into a structure
+ * called struct ibv_wc (wc = work completion). ibv_wc has detailed
+ * information about the work completion. An I/O request in RDMA world
+ * is called "work" ;)
+ */
+ cq = ibv_create_cq(cm_client_id->verbs /* which device*/,
+ CQ_CAPACITY /* maximum capacity*/,
+ NULL /* user context, not used here */,
+ io_completion_channel /* which IO completion channel */,
+ 0 /* signaling vector, not used here*/);
+ if (!cq) {
+ rdma_error("Failed to create a completion queue (cq), errno: %d\n",
+ -errno);
+ return -errno;
+ }
+ debug("Completion queue (CQ) is created at %p with %d elements \n",
+ cq, cq->cqe);
+ /* Ask for the event for all activities in the completion queue*/
+ ret = ibv_req_notify_cq(cq /* on which CQ */,
+ 0 /* 0 = all event type, no filter*/);
+ if (ret) {
+ rdma_error("Failed to request notifications on CQ errno: %d \n",
+ -errno);
+ return -errno;
+ }
+ /* Now the last step, set up the queue pair (send, recv) queues and their capacity.
+ * The capacity here is define statically but this can be probed from the
+ * device. We just use a small number as defined in rdma_common.h */
+ bzero(&qp_init_attr, sizeof qp_init_attr);
+ qp_init_attr.cap.max_recv_sge = MAX_SGE; /* Maximum SGE per receive posting */
+ qp_init_attr.cap.max_recv_wr = MAX_WR; /* Maximum receive posting capacity */
+ qp_init_attr.cap.max_send_sge = MAX_SGE; /* Maximum SGE per send posting */
+ qp_init_attr.cap.max_send_wr = MAX_WR; /* Maximum send posting capacity */
+ qp_init_attr.qp_type = IBV_QPT_RC; /* QP type, RC = Reliable connection */
+ /* We use same completion queue, but one can use different queues */
+ qp_init_attr.recv_cq = cq; /* Where should I notify for receive completion operations */
+ qp_init_attr.send_cq = cq; /* Where should I notify for send completion operations */
+ /*Lets create a QP */
+ ret = rdma_create_qp(cm_client_id /* which connection id */,
+ pd /* which protection domain*/,
+ &qp_init_attr /* Initial attributes */);
+ if (ret) {
+ rdma_error("Failed to create QP due to errno: %d\n", -errno);
+ return -errno;
+ }
+ /* Save the reference for handy typing but is not required */
+ client_qp = cm_client_id->qp;
+ debug("Client QP created at %p\n", client_qp);
+ return ret;
+}
+
+/* Starts an RDMA server by allocating basic connection resources */
+static int start_rdma_server(struct sockaddr_in *server_addr)
+{
+ struct rdma_cm_event *cm_event = NULL;
+ int ret = -1;
+ /* Open a channel used to report asynchronous communication event */
+ cm_event_channel = rdma_create_event_channel();
+ if (!cm_event_channel) {
+ rdma_error("Creating cm event channel failed with errno : (%d)", -errno);
+ return -errno;
+ }
+ debug("RDMA CM event channel is created successfully at %p \n",
+ cm_event_channel);
+ /* rdma_cm_id is the connection identifier (like socket) which is used
+ * to define an RDMA connection.
+ */
+ ret = rdma_create_id(cm_event_channel, &cm_server_id, NULL, RDMA_PS_TCP);
+ if (ret) {
+ rdma_error("Creating server cm id failed with errno: %d ", -errno);
+ return -errno;
+ }
+ debug("A RDMA connection id for the server is created \n");
+ /* Explicit binding of rdma cm id to the socket credentials */
+ ret = rdma_bind_addr(cm_server_id, (struct sockaddr*) server_addr);
+ if (ret) {
+ rdma_error("Failed to bind server address, errno: %d \n", -errno);
+ return -errno;
+ }
+ debug("Server RDMA CM id is successfully binded \n");
+ /* Now we start to listen on the passed IP and port. However unlike
+ * normal TCP listen, this is a non-blocking call. When a new client is
+ * connected, a new connection management (CM) event is generated on the
+ * RDMA CM event channel from where the listening id was created. Here we
+ * have only one channel, so it is easy. */
+ ret = rdma_listen(cm_server_id, 8); /* backlog = 8 clients, same as TCP, see man listen*/
+ if (ret) {
+ rdma_error("rdma_listen failed to listen on server address, errno: %d ",
+ -errno);
+ return -errno;
+ }
+ printf("Server is listening successfully at: %s , port: %d \n",
+ inet_ntoa(server_addr->sin_addr),
+ ntohs(server_addr->sin_port));
+ /* now, we expect a client to connect and generate a RDMA_CM_EVNET_CONNECT_REQUEST
+ * We wait (block) on the connection management event channel for
+ * the connect event.
+ */
+ ret = process_rdma_cm_event(cm_event_channel,
+ RDMA_CM_EVENT_CONNECT_REQUEST,
+ &cm_event);
+ if (ret) {
+ rdma_error("Failed to get cm event, ret = %d \n" , ret);
+ return ret;
+ }
+ /* Much like TCP connection, listening returns a new connection identifier
+ * for newly connected client. In the case of RDMA, this is stored in id
+ * field. For more details: man rdma_get_cm_event
+ */
+ cm_client_id = cm_event->id;
+ /* now we acknowledge the event. Acknowledging the event free the resources
+ * associated with the event structure. Hence any reference to the event
+ * must be made before acknowledgment. Like, we have already saved the
+ * client id from "id" field before acknowledging the event.
+ */
+ ret = rdma_ack_cm_event(cm_event);
+ if (ret) {
+ rdma_error("Failed to acknowledge the cm event errno: %d \n", -errno);
+ return -errno;
+ }
+ debug("A new RDMA client connection id is stored at %p\n", cm_client_id);
+ return ret;
+}
+
+/* Pre-posts a receive buffer and accepts an RDMA client connection */
+static int accept_client_connection()
+{
+ struct rdma_conn_param conn_param;
+ struct rdma_cm_event *cm_event = NULL;
+ struct sockaddr_in remote_sockaddr;
+ int ret = -1;
+ if(!cm_client_id || !client_qp) {
+ rdma_error("Client resources are not properly setup\n");
+ return -EINVAL;
+ }
+ /* we prepare the receive buffer in which we will receive the client metadata*/
+ client_metadata_mr = rdma_buffer_register(pd /* which protection domain */,
+ &client_metadata_attr /* what memory */,
+ sizeof(client_metadata_attr) /* what length */,
+ (IBV_ACCESS_LOCAL_WRITE) /* access permissions */);
+ if(!client_metadata_mr){
+ rdma_error("Failed to register client attr buffer\n");
+ //we assume ENOMEM
+ return -ENOMEM;
+ }
+ /* We pre-post this receive buffer on the QP. SGE credentials is where we
+ * receive the metadata from the client */
+ client_recv_sge.addr = (uint64_t) client_metadata_mr->addr; // same as &client_buffer_attr
+ client_recv_sge.length = client_metadata_mr->length;
+ client_recv_sge.lkey = client_metadata_mr->lkey;
+ /* Now we link this SGE to the work request (WR) */
+ bzero(&client_recv_wr, sizeof(client_recv_wr));
+ client_recv_wr.sg_list = &client_recv_sge;
+ client_recv_wr.num_sge = 1; // only one SGE
+ ret = ibv_post_recv(client_qp /* which QP */,
+ &client_recv_wr /* receive work request*/,
+ &bad_client_recv_wr /* error WRs */);
+ if (ret) {
+ rdma_error("Failed to pre-post the receive buffer, errno: %d \n", ret);
+ return ret;
+ }
+ debug("Receive buffer pre-posting is successful \n");
+ /* Now we accept the connection. Recall we have not accepted the connection
+ * yet because we have to do lots of resource pre-allocation */
+ memset(&conn_param, 0, sizeof(conn_param));
+ /* this tell how many outstanding requests can we handle */
+ conn_param.initiator_depth = 3; /* For this exercise, we put a small number here */
+ /* This tell how many outstanding requests we expect other side to handle */
+ conn_param.responder_resources = 3; /* For this exercise, we put a small number */
+ ret = rdma_accept(cm_client_id, &conn_param);
+ if (ret) {
+ rdma_error("Failed to accept the connection, errno: %d \n", -errno);
+ return -errno;
+ }
+ /* We expect an RDMA_CM_EVNET_ESTABLISHED to indicate that the RDMA
+ * connection has been established and everything is fine on both, server
+ * as well as the client sides.
+ */
+ debug("Going to wait for : RDMA_CM_EVENT_ESTABLISHED event \n");
+ ret = process_rdma_cm_event(cm_event_channel,
+ RDMA_CM_EVENT_ESTABLISHED,
+ &cm_event);
+ if (ret) {
+ rdma_error("Failed to get the cm event, errnp: %d \n", -errno);
+ return -errno;
+ }
+ /* We acknowledge the event */
+ ret = rdma_ack_cm_event(cm_event);
+ if (ret) {
+ rdma_error("Failed to acknowledge the cm event %d\n", -errno);
+ return -errno;
+ }
+ /* Just FYI: How to extract connection information */
+ memcpy(&remote_sockaddr /* where to save */,
+ rdma_get_peer_addr(cm_client_id) /* gives you remote sockaddr */,
+ sizeof(struct sockaddr_in) /* max size */);
+ printf("A new connection is accepted from %s \n",
+ inet_ntoa(remote_sockaddr.sin_addr));
+ return ret;
+}
+
+/* This function sends server side buffer metadata to the connected client */
+static int send_server_metadata_to_client()
+{
+ struct ibv_wc wc;
+ int ret = -1;
+ /* Now, we first wait for the client to start the communication by
+ * sending the server its metadata info. The server does not use it
+ * in our example. We will receive a work completion notification for
+ * our pre-posted receive request.
+ */
+ ret = process_work_completion_events(io_completion_channel, &wc, 1);
+ if (ret != 1) {
+ rdma_error("Failed to receive , ret = %d \n", ret);
+ return ret;
+ }
+ /* if all good, then we should have client's buffer information, lets see */
+ printf("Client side buffer information is received...\n");
+ show_rdma_buffer_attr(&client_metadata_attr);
+ printf("The client has requested buffer length of : %u bytes \n",
+ client_metadata_attr.length);
+ /* We need to setup requested memory buffer. This is where the client will
+ * do RDMA READs and WRITEs. */
+ server_buffer_mr = rdma_buffer_alloc(pd /* which protection domain */,
+ client_metadata_attr.length /* what size to allocate */,
+ (IBV_ACCESS_LOCAL_WRITE|
+ IBV_ACCESS_REMOTE_READ|
+ IBV_ACCESS_REMOTE_WRITE) /* access permissions */);
+ if(!server_buffer_mr){
+ rdma_error("Server failed to create a buffer \n");
+ /* we assume that it is due to out of memory error */
+ return -ENOMEM;
+ }
+ /* This buffer is used to transmit information about the above
+ * buffer to the client. So this contains the metadata about the server
+ * buffer. Hence this is called metadata buffer. Since this is already
+ * on allocated, we just register it.
+ * We need to prepare a send I/O operation that will tell the
+ * client the address of the server buffer.
+ */
+ server_metadata_attr.address = (uint64_t) server_buffer_mr->addr;
+ server_metadata_attr.length = (uint32_t) server_buffer_mr->length;
+ server_metadata_attr.stag.local_stag = (uint32_t) server_buffer_mr->lkey;
+ server_metadata_mr = rdma_buffer_register(pd /* which protection domain*/,
+ &server_metadata_attr /* which memory to register */,
+ sizeof(server_metadata_attr) /* what is the size of memory */,
+ IBV_ACCESS_LOCAL_WRITE /* what access permission */);
+ if(!server_metadata_mr){
+ rdma_error("Server failed to create to hold server metadata \n");
+ /* we assume that this is due to out of memory error */
+ return -ENOMEM;
+ }
+ /* We need to transmit this buffer. So we create a send request.
+ * A send request consists of multiple SGE elements. In our case, we only
+ * have one
+ */
+ server_send_sge.addr = (uint64_t) &server_metadata_attr;
+ server_send_sge.length = sizeof(server_metadata_attr);
+ server_send_sge.lkey = server_metadata_mr->lkey;
+ /* now we link this sge to the send request */
+ bzero(&server_send_wr, sizeof(server_send_wr));
+ server_send_wr.sg_list = &server_send_sge;
+ server_send_wr.num_sge = 1; // only 1 SGE element in the array
+ server_send_wr.opcode = IBV_WR_SEND; // This is a send request
+ server_send_wr.send_flags = IBV_SEND_SIGNALED; // We want to get notification
+ /* This is a fast data path operation. Posting an I/O request */
+ ret = ibv_post_send(client_qp /* which QP */,
+ &server_send_wr /* Send request that we prepared before */,
+ &bad_server_send_wr /* In case of error, this will contain failed requests */);
+ if (ret) {
+ rdma_error("Posting of server metdata failed, errno: %d \n",
+ -errno);
+ return -errno;
+ }
+ /* We check for completion notification */
+ ret = process_work_completion_events(io_completion_channel, &wc, 1);
+ if (ret != 1) {
+ rdma_error("Failed to send server metadata, ret = %d \n", ret);
+ return ret;
+ }
+ debug("Local buffer metadata has been sent to the client \n");
+ return 0;
+}
+
+/* This is server side logic. Server passively waits for the client to call
+ * rdma_disconnect() and then it will clean up its resources */
+static int disconnect_and_cleanup()
+{
+ struct rdma_cm_event *cm_event = NULL;
+ int ret = -1;
+ /* Now we wait for the client to send us disconnect event */
+ debug("Waiting for cm event: RDMA_CM_EVENT_DISCONNECTED\n");
+ ret = process_rdma_cm_event(cm_event_channel,
+ RDMA_CM_EVENT_DISCONNECTED,
+ &cm_event);
+ if (ret) {
+ rdma_error("Failed to get disconnect event, ret = %d \n", ret);
+ return ret;
+ }
+ /* We acknowledge the event */
+ ret = rdma_ack_cm_event(cm_event);
+ if (ret) {
+ rdma_error("Failed to acknowledge the cm event %d\n", -errno);
+ return -errno;
+ }
+ printf("A disconnect event is received from the client...\n");
+ /* We free all the resources */
+ /* Destroy QP */
+ rdma_destroy_qp(cm_client_id);
+ /* Destroy client cm id */
+ ret = rdma_destroy_id(cm_client_id);
+ if (ret) {
+ rdma_error("Failed to destroy client id cleanly, %d \n", -errno);
+ // we continue anyways;
+ }
+ /* Destroy CQ */
+ ret = ibv_destroy_cq(cq);
+ if (ret) {
+ rdma_error("Failed to destroy completion queue cleanly, %d \n", -errno);
+ // we continue anyways;
+ }
+ /* Destroy completion channel */
+ ret = ibv_destroy_comp_channel(io_completion_channel);
+ if (ret) {
+ rdma_error("Failed to destroy completion channel cleanly, %d \n", -errno);
+ // we continue anyways;
+ }
+ /* Destroy memory buffers */
+ rdma_buffer_free(server_buffer_mr);
+ rdma_buffer_deregister(server_metadata_mr);
+ rdma_buffer_deregister(client_metadata_mr);
+ /* Destroy protection domain */
+ ret = ibv_dealloc_pd(pd);
+ if (ret) {
+ rdma_error("Failed to destroy client protection domain cleanly, %d \n", -errno);
+ // we continue anyways;
+ }
+ /* Destroy rdma server id */
+ ret = rdma_destroy_id(cm_server_id);
+ if (ret) {
+ rdma_error("Failed to destroy server id cleanly, %d \n", -errno);
+ // we continue anyways;
+ }
+ rdma_destroy_event_channel(cm_event_channel);
+ printf("Server shut-down is complete \n");
+ return 0;
+}
+
+
+void usage()
+{
+ printf("Usage:\n");
+ printf("rdma_server: [-a <server_addr>] [-p <server_port>]\n");
+ printf("(default port is %d)\n", DEFAULT_RDMA_PORT);
+ exit(1);
+}
+
+int main(int argc, char **argv)
+{
+ int ret, option;
+ struct sockaddr_in server_sockaddr;
+ bzero(&server_sockaddr, sizeof server_sockaddr);
+ server_sockaddr.sin_family = AF_INET; /* standard IP NET address */
+ server_sockaddr.sin_addr.s_addr = htonl(INADDR_ANY); /* passed address */
+ /* Parse Command Line Arguments, not the most reliable code */
+ while ((option = getopt(argc, argv, "a:p:")) != -1) {
+ switch (option) {
+ case 'a':
+ /* Remember, this will overwrite the port info */
+ ret = get_addr(optarg, (struct sockaddr*) &server_sockaddr);
+ if (ret) {
+ rdma_error("Invalid IP \n");
+ return ret;
+ }
+ break;
+ case 'p':
+ /* passed port to listen on */
+ server_sockaddr.sin_port = htons(strtol(optarg, NULL, 0));
+ break;
+ default:
+ usage();
+ break;
+ }
+ }
+ if(!server_sockaddr.sin_port) {
+ /* If still zero, that mean no port info provided */
+ server_sockaddr.sin_port = htons(DEFAULT_RDMA_PORT); /* use default port */
+ }
+ ret = start_rdma_server(&server_sockaddr);
+ if (ret) {
+ rdma_error("RDMA server failed to start cleanly, ret = %d \n", ret);
+ return ret;
+ }
+ ret = setup_client_resources();
+ if (ret) {
+ rdma_error("Failed to setup client resources, ret = %d \n", ret);
+ return ret;
+ }
+ ret = accept_client_connection();
+ if (ret) {
+ rdma_error("Failed to handle client cleanly, ret = %d \n", ret);
+ return ret;
+ }
+ ret = send_server_metadata_to_client();
+ if (ret) {
+ rdma_error("Failed to send server metadata to the client, ret = %d \n", ret);
+ return ret;
+ }
+ ret = disconnect_and_cleanup();
+ if (ret) {
+ rdma_error("Failed to clean up resources properly, ret = %d \n", ret);
+ return ret;
+ }
+ return 0;
+}