summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.gitattributes1
-rw-r--r--CMakeLists.txt4
-rw-r--r--app/CMakeLists.txt2
-rw-r--r--app/include/bpfdump.h24
-rw-r--r--app/include/mrapp.h24
-rw-r--r--app/include/tap.h6
-rw-r--r--app/src/bpfdump.c271
-rw-r--r--app/src/marsio.c121
-rw-r--r--app/src/rawio.c43
-rw-r--r--app/src/tap.c398
-rw-r--r--include/internal/vdev_define.h11
-rw-r--r--service/include/sc_devmgr.h29
-rw-r--r--service/src/core.c2
-rw-r--r--service/src/devmgr.c108
-rw-r--r--service/src/node.c5
-rw-r--r--service/src/node_eth_ingress.c97
-rw-r--r--service/src/node_phydev.c12
-rw-r--r--tools/classifier_rule_test/CMakeLists.txt2
-rw-r--r--tools/lb_rule_test/CMakeLists.txt2
19 files changed, 694 insertions, 468 deletions
diff --git a/.gitattributes b/.gitattributes
index 04d8752..1131d94 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -1,3 +1,4 @@
*.a binary
*.so binary
*.o binary
+*.sh text eol=lf \ No newline at end of file
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 10ff012..668d9ab 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -15,8 +15,8 @@ option(ENABLE_SANITIZE_ADDRESS "Enable AddressSanitizer" FALSE)
option(ENABLE_SANITIZE_THREAD "Enable ThreadSanitizer" FALSE)
option(ENABLE_VNODE_CHECK_THREAD_SAFE "Enable concurrent write thread-safe check for VNODE." FALSE)
-set(CMAKE_C_FLAGS "-std=gnu99 -m64 -march=corei7-avx")
-set(CMAKE_CXX_FLAGS "-std=gnu++11 -m64 -march=corei7-avx" )
+set(CMAKE_C_FLAGS "-std=gnu99 -m64 -march=corei7-avx -Wno-format-truncation")
+set(CMAKE_CXX_FLAGS "-std=gnu++11 -m64 -march=corei7-avx -Wno-format-truncation" )
if(ENABLE_WARNING_ALL)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall -fPIC")
diff --git a/app/CMakeLists.txt b/app/CMakeLists.txt
index 7ae1296..ac13d71 100644
--- a/app/CMakeLists.txt
+++ b/app/CMakeLists.txt
@@ -8,7 +8,7 @@ include_directories(${DPDK_INCLUDE_DIR})
include_directories(include)
add_definitions(${DPDK_C_PREDEFINED})
add_library(marsio SHARED src/marsio.c src/arp.c src/icmp.c src/neigh.c src/rawio.c src/mrb.c
- src/sendpath.c src/monit.c src/bpfdump.c)
+ src/sendpath.c src/monit.c src/tap.c)
set_target_properties(marsio PROPERTIES VERSION ${MARSIO_VERSION_MAJOR}.${MARSIO_VERSION_MINOR})
set_target_properties(marsio PROPERTIES SOVERSION ${MARSIO_VERSION_MAJOR})
diff --git a/app/include/bpfdump.h b/app/include/bpfdump.h
deleted file mode 100644
index 875030d..0000000
--- a/app/include/bpfdump.h
+++ /dev/null
@@ -1,24 +0,0 @@
-#pragma once
-
-#include <rte_mbuf.h>
-
-struct bpf_dumper;
-
-enum bpf_dumper_backend
-{
- BPF_DUMPER_TAP,
- BPF_DUMPER_PCAP,
- BPF_DUMPER_SYSLOG,
- BPF_DUMPER_COUNT,
- BPF_DUMPER_MAX
-};
-
-#define BPF_DUMPER_DEFAULT_BACKEND BPF_DUMPER_TAP
-
-int bpf_dumper_write(struct bpf_dumper * dumper, struct rte_mbuf * __mbufs[],
- unsigned int nr_mbufs);
-
-void bpf_dumper_destory(struct bpf_dumper * dumper);
-
-struct bpf_dumper * bpf_dumper_create(const char * appsym, enum bpf_dumper_backend backend,
- const char * str_devsym, const char * str_path, const char * str_bpf_expr); \ No newline at end of file
diff --git a/app/include/mrapp.h b/app/include/mrapp.h
index 56bf9fc..e60d2d9 100644
--- a/app/include/mrapp.h
+++ b/app/include/mrapp.h
@@ -2,12 +2,12 @@
#include <common.h>
#include <ctrlmsg.h>
-#include <vdev_define.h>
-#include <neigh.h>
-#include <marsio.h>
#include <ldbc.h>
+#include <marsio.h>
+#include <neigh.h>
#include <pcap/pcap.h>
-#include <bpfdump.h>
+#include <tap.h>
+#include <vdev_define.h>
struct mr_instance;
@@ -39,14 +39,12 @@ struct mrapp_stat
struct mr_vdev
{
char devsym[MR_SYMBOL_MAX];
- struct mr_instance * instance;
- struct vdev_instance * vdi;
unsigned int nr_rxstream;
unsigned int nr_txstream;
- unsigned int en_arp;
- unsigned int en_icmp;
- struct bpf_dumper * bpf_dumper;
+ struct mr_instance * instance;
+ struct vdev_instance * vdi;
+ struct tap_device * tap_representor;
};
struct mr_thread_info
@@ -120,6 +118,14 @@ struct mr_instance
unsigned int recv_all_state[MR_SID_MAX];
/* 基于ASAN的内存保护模式 */
unsigned int memory_protect_with_asan;
+
+ /* tap resp handles */
+ struct tap_device * tap_resp_devices[MR_DEVICE_MAX];
+ unsigned int nr_tap_resp_devices;
+
+ /* tap resp epoll fd */
+ int tap_resp_epfd;
+ pthread_t pid_tap_resp_poll;
};
/* EAL环境是否初始化 */
diff --git a/app/include/tap.h b/app/include/tap.h
new file mode 100644
index 0000000..249304a
--- /dev/null
+++ b/app/include/tap.h
@@ -0,0 +1,6 @@
+#pragma once
+#include <marsio.h>
+#include <mrapp.h>
+
+int tap_representor_init(struct mr_instance * mr_instance, struct mr_vdev * vdev);
+int tap_representor_entry(struct mr_vdev * vdev, unsigned int qid, marsio_buff_t * buffs[], unsigned int nr_buffs); \ No newline at end of file
diff --git a/app/src/bpfdump.c b/app/src/bpfdump.c
deleted file mode 100644
index 8452bdc..0000000
--- a/app/src/bpfdump.c
+++ /dev/null
@@ -1,271 +0,0 @@
-#include <common.h>
-#include <pcap/pcap.h>
-#include <rte_atomic.h>
-#include <rte_malloc.h>
-#include <rte_mbuf.h>
-
-#include <netinet/in.h>
-#include <linux/if.h>
-#include <linux/if_tun.h>
-#include <fcntl.h>
-#include <sys/ioctl.h>
-#include <unistd.h>
-#include <signal.h>
-#include <assert.h>
-#include <bpfdump.h>
-
-static int __tap_count = 0;
-
-static const char * str_bpf_dumper_backend[] =
-{
- [BPF_DUMPER_TAP] = "TAP",
- [BPF_DUMPER_PCAP] = "PCAP",
- [BPF_DUMPER_SYSLOG] = "SYSLOG",
- [BPF_DUMPER_COUNT] = "COUNT"
-};
-
-struct __backend_ops
-{
- int(*fn_create)(struct bpf_dumper * object, const char * appsym,
- const char * str_devsym, const char * str_path);
- void(*fn_destory)(struct bpf_dumper * object);
- int(*fn_write)(struct bpf_dumper * object, const char * pkt_ptr, unsigned int pkt_len);
-};
-
-struct bpf_dumper
-{
- char str_dumper_sym[MR_SYMBOL_MAX];
- char str_bpf_expr[MR_STRING_MAX];
- char str_dumpfile[MR_STRING_MAX];
-
- struct bpf_program bpf_program;
-
- unsigned int en_read;
- unsigned int en_write;
-
- enum bpf_dumper_backend backend;
- struct __backend_ops * backend_ops;
-
- struct __backend_tap
- {
- int tapdev_fd;
- } backend_tap;
-
- int bpf_offset;
-
- rte_atomic64_t stat_write_pkts;
- rte_atomic64_t stat_write_pktlen;
- rte_atomic64_t stat_write_drops;
-
- rte_atomic64_t stat_read_pkts;
- rte_atomic64_t stat_read_pktlen;
- rte_atomic64_t stat_read_drops;
-};
-
-static int tap_ioctl(int fd, unsigned long request, struct ifreq *ifr, int set)
-{
- short req_flags = ifr->ifr_flags;
- switch (request) {
- case SIOCSIFFLAGS:
- if (ioctl(fd, SIOCGIFFLAGS, ifr) < 0)
- goto error;
- if (set)
- ifr->ifr_flags |= req_flags;
- else
- ifr->ifr_flags &= ~req_flags;
- break;
-
- case SIOCGIFFLAGS:
- case SIOCGIFHWADDR:
- case SIOCSIFHWADDR:
- case SIOCSIFMTU:
- break;
- default:
- return -EINVAL;
- }
-
- if (ioctl(fd, request, ifr) < 0)
- goto error;
-
- return 0;
-
-error:
- MR_ERROR("%s: ioctl(%lu) failed with error: %s", ifr->ifr_name, request, strerror(errno));
- return -errno;
-}
-
-static int __tap_dumper_create(struct bpf_dumper * object, const char * appsym,
- const char * str_devsym, const char * str_path)
-{
- /* ����TUNTAP�豸ʹ�õ�FD */
- int fd = 0;
- int ioctl_sock = 0;
-
- fd = open("/dev/net/tun", O_RDWR);
- if (fd < 0)
- {
- MR_ERROR("Tap dumper %s open /dev/net/tun failed: %s",
- object->str_dumper_sym, strerror(errno)); goto __fail_out;
- }
-
- /* ����TUNTAP�豸ʱʹ�õ�FD */
- ioctl_sock = socket(AF_INET, SOCK_DGRAM, 0);
- if (ioctl_sock < 0)
- {
- MR_ERROR("Tap dumper %s unable to get a socket for management: %s",
- object->str_dumper_sym, strerror(errno)); goto __fail_out;
- }
-
- struct ifreq ifr;
- memset(&ifr, 0, sizeof(ifr));
-
- snprintf(object->str_dumpfile, IFNAMSIZ, "%s_t%d", appsym, __tap_count++);
-
- /* TAP device without packet information */
- ifr.ifr_flags = IFF_TAP | IFF_NO_PI;
- snprintf(ifr.ifr_name, IFNAMSIZ, "%s", object->str_dumpfile);
-
- int ret = ioctl(fd, TUNSETIFF, (void *)&ifr);
- if (ret < 0)
- {
- MR_ERROR("Tap dumper %s tap fd(fd = %d) ioctl failed: %s",
- object->str_dumper_sym, fd, strerror(errno)); goto __fail_out;
- }
-
- struct ifreq link_up_ifr = { .ifr_flags = IFF_UP | IFF_RUNNING };
- snprintf(link_up_ifr.ifr_name, IFNAMSIZ, "%s", object->str_dumpfile);
-
- ret = tap_ioctl(ioctl_sock, SIOCSIFFLAGS, &link_up_ifr, 1);
- if(ret < 0)
- {
- MR_ERROR("Tap dumper %s tap device link up failed", object->str_dumper_sym);
- goto __fail_out;
- }
-
- MR_DEBUG("TAP dumper %s: tap fd = %d, ifr_name = %s",
- object->str_dumper_sym, fd, ifr.ifr_name);
-
- object->backend_tap.tapdev_fd = fd;
- close(ioctl_sock);
-
- return RT_SUCCESS;
-
-__fail_out:
- if(fd > 0) close(fd);
- if (ioctl_sock > 0) close(ioctl_sock);
- return RT_ERR;
-}
-
-static void __tap_dumper_destory(struct bpf_dumper * object)
-{
- close(object->backend_tap.tapdev_fd);
- return;
-}
-
-static int __tap_dumper_write(struct bpf_dumper * object, const char * pkt_ptr, unsigned int pkt_len)
-{
- return write(object->backend_tap.tapdev_fd, pkt_ptr, pkt_len);
-}
-
-static struct __backend_ops __backend_ops_jumper[] =
-{
- [BPF_DUMPER_TAP] =
- {
- .fn_create = __tap_dumper_create,
- .fn_destory = __tap_dumper_destory,
- .fn_write = __tap_dumper_write
- }
-};
-
-int bpf_dumper_write(struct bpf_dumper * dumper, struct rte_mbuf * __mbufs[],
- unsigned int nr_mbufs)
-{
- unsigned int nr_write_success = 0;
-
- for (unsigned int i = 0; i < nr_mbufs; i++)
- {
- struct rte_mbuf * __mbuf = __mbufs[i];
-
- const char * pkt_ptr = rte_pktmbuf_mtod_offset(__mbuf, const char *, dumper->bpf_offset);
- unsigned int pkt_len = rte_pktmbuf_data_len(__mbuf) > dumper->bpf_offset ?
- rte_pktmbuf_data_len(__mbuf) - dumper->bpf_offset : 0;
-
- if (pkt_ptr == NULL || pkt_len == 0) goto __dropme;
-
- if (bpf_filter(dumper->bpf_program.bf_insns,
- (const u_char *)pkt_ptr, pkt_len, pkt_len) == 0 || pkt_len > 65535) goto __dropme;
-
- assert(dumper->backend_ops->fn_write != NULL);
- int ret = dumper->backend_ops->fn_write(dumper, pkt_ptr, pkt_len);
- if (unlikely(ret < 0)) goto __dropme;
-
- /* д��TAP�豸�ɹ� */
- rte_atomic64_add(&dumper->stat_write_pkts, 1);
- rte_atomic64_add(&dumper->stat_write_pktlen, pkt_len);
- nr_write_success++;
- continue;
-
- __dropme:
- rte_atomic64_add(&dumper->stat_write_drops, 1);
- continue;
- }
-
- return nr_write_success;
-}
-
-void bpf_dumper_destory(struct bpf_dumper * dumper)
-{
- if (dumper != NULL) dumper->backend_ops->fn_destory(dumper);
- pcap_freecode(&dumper->bpf_program);
- rte_free(dumper);
- return;
-}
-
-struct bpf_dumper * bpf_dumper_create(const char * appsym, enum bpf_dumper_backend backend,
- const char * str_devsym, const char * str_path, const char * str_bpf_expr)
-{
- assert(backend >= 0 && backend < BPF_DUMPER_MAX);
-
- struct bpf_dumper * dumper = rte_zmalloc(NULL, sizeof(struct bpf_dumper), 0);
- MR_VERIFY_MALLOC(dumper);
-
- snprintf(dumper->str_dumper_sym, sizeof(dumper->str_dumper_sym), "%s_%s_%s",
- appsym, str_devsym, str_bpf_dumper_backend[backend]);
-
- dumper->backend = backend;
- dumper->backend_ops = &__backend_ops_jumper[backend];
-
- /* Backend Init */
- int ret = dumper->backend_ops->fn_create(dumper, appsym, str_devsym, str_path);
- if (ret < 0)
- {
- MR_ERROR("BPF dumper %s backend create failed. ", dumper->str_dumper_sym);
- goto __errout;
- }
-
- /* BPF Filter */
- strncpy(dumper->str_bpf_expr, str_bpf_expr, sizeof(dumper->str_bpf_expr));
-
- ret = pcap_compile_nopcap(65535, DLT_EN10MB, &dumper->bpf_program,
- dumper->str_bpf_expr, 1, PCAP_NETMASK_UNKNOWN);
-
- if (ret < 0)
- {
- MR_ERROR("BPF dumper %s bpf filter compile failed, bpf expr is '%s': %s",
- dumper->str_dumper_sym, dumper->str_bpf_expr, pcap_strerror(ret));
- goto __errout;
- }
-
- MR_INFO(" ");
- MR_INFO("Application %s, BPF Dumper %s:", appsym, dumper->str_dumper_sym);
- MR_INFO(" BPF expression : %s", dumper->str_bpf_expr);
- MR_INFO(" BPF offset : %d", dumper->bpf_offset);
- MR_INFO(" Backend : %s", str_bpf_dumper_backend[dumper->backend]);
- MR_INFO(" Dumpfile(or device) : %s", dumper->str_dumpfile);
-
- return dumper;
-
-__errout:
- if (dumper != NULL) bpf_dumper_destory(dumper);
- return NULL;
-} \ No newline at end of file
diff --git a/app/src/marsio.c b/app/src/marsio.c
index 5a10097..c84ff7e 100644
--- a/app/src/marsio.c
+++ b/app/src/marsio.c
@@ -13,6 +13,7 @@
#include <arpa/inet.h>
#include <ctrlmsg.h>
#include <ctrlmsg_define.h>
+#include <tap.h>
#include <MESA_prof_load.h>
#include <unistd.h>
#include <signal.h>
@@ -82,7 +83,7 @@ void __mrapp_mem_protect_unlock_mempool_cb(struct rte_mempool * mp, void *arg)
#if 0
/* 基于ASAN的内存保护模式
* 该保护模式开启后,共享大页内存将标记为不可达,对共享内存的访问将被ASAN探测并记录 */
-static void mrapp_mem_protect_with_asan_init(struct mr_instance * instance)
+static void mrapp_mem_protect_with_asan_init(struct ref_mr_instance * instance)
{
MESA_load_profile_uint_def(instance->app_cfgfile_path, "protect", "enable",
&instance->memory_protect_with_asan, 0);
@@ -278,52 +279,6 @@ static int mrapp_distributer_init(struct mr_instance * instance)
return RT_SUCCESS;
}
-static int mrapp_bpf_dumper_init(struct mr_instance * instance, struct mr_vdev * vdev)
-{
- const char * str_devsym = vdev->devsym;
- const char * str_cfgfile = instance->app_cfgfile_path;
-
- /* 从应用配置文件中读取Dumper的配置。Dumper系调试使用,由用户通过配置文件指定,
- 读不到相关选项,则不启用Dumper。 */
-
- char str_section[MR_SYMBOL_MAX] = { 0 };
- snprintf(str_section, sizeof(str_section), "bpfdump:%s", str_devsym);
-
- unsigned int __opt_enable = 0;
- MESA_load_profile_uint_def(str_cfgfile, str_section, "enable", &__opt_enable, 0);
-
- /* 是否启用 */
- if (!__opt_enable) return RT_SUCCESS;
-
- char __opt_str_bpf_expr[MR_STRING_MAX] = { 0 };
- char __opt_str_dumpfile[MR_STRING_MAX] = { 0 };
-
- unsigned int __opt_direction = 0;
- unsigned int __opt_backend = 0;
- unsigned int __opt_offset = 0;
-
- MESA_load_profile_string_def(str_cfgfile, str_section, "bpf_expr", __opt_str_bpf_expr,
- sizeof(__opt_str_bpf_expr), NULL);
- MESA_load_profile_string_def(str_cfgfile, str_section, "dumpfile", __opt_str_dumpfile,
- sizeof(__opt_str_dumpfile), NULL);
-
- MESA_load_profile_uint_def(str_cfgfile, str_section, "bpf_offset", &__opt_offset, 0);
- MESA_load_profile_uint_def(str_cfgfile, str_section, "direction", &__opt_direction, 0);
- MESA_load_profile_uint_def(str_cfgfile, str_section, "backend", &__opt_backend, 0);
-
- vdev->bpf_dumper = bpf_dumper_create(instance->appsym, __opt_backend,
- str_devsym, __opt_str_dumpfile, __opt_str_bpf_expr);
-
- if (unlikely(vdev->bpf_dumper == NULL))
- {
- MR_ERROR("In application %s, BPF dumper for device %s create failed.",
- instance->appsym, str_devsym); return RT_ERR;
- }
-
- return RT_SUCCESS;
-}
-
-
static unsigned __table_strip(char *str, unsigned len)
{
int newlen = len;
@@ -541,49 +496,6 @@ err:
return RT_ERR;
}
-/* 邻居子系统,延迟初始化
- 设备初始化时初始化该部分
-*/
-static int mrapp_neigh_device_init(struct mr_instance * instance, struct mr_vdev * vdev)
-{
- /* 读静态邻居表项 */
- struct mr_static_neigh_entry * __neigh_entry_iter;
- TAILQ_FOREACH(__neigh_entry_iter, &instance->static_neigh_list, next)
- {
- if (strncmp(vdev->devsym, __neigh_entry_iter->devsym, sizeof(vdev->devsym)) != 0)
- continue;
-
- int ret = neigh_create_or_update(instance->neigh, __neigh_entry_iter->in_addr,
- &__neigh_entry_iter->ether_addr, vdev->vdi, 1);
-
- if (ret < 0)
- {
- MR_WARNING("Static neighbour %s->%s on device %s failed, Ignore.",
- __neigh_entry_iter->str_in_addr,
- __neigh_entry_iter->str_ether_addr,
- __neigh_entry_iter->devsym);
-
- continue;
- }
-
- MR_INFO("Static neighbour: %s->%s, device: %s.", __neigh_entry_iter->str_in_addr,
- __neigh_entry_iter->str_ether_addr, __neigh_entry_iter->devsym);
- }
-
- /* 设备开启了发送队列后,且启用了ARP功能,发免费ARP报文 */
- if (vdev->nr_txstream == 0 || vdev->en_arp == 0)
- {
- return RT_SUCCESS;
- }
-
- for (int i = 0; i < instance->nr_gratuitous_arp_send; i++)
- {
- arp_request_send(vdev->vdi, 0, vdev->vdi->vdev->in_addr);
- }
-
- return RT_SUCCESS;
-}
-
/* 注册应用 */
static int mrapp_app_register(struct mr_instance * instance)
{
@@ -737,14 +649,6 @@ static int __open_device_response_handler(struct ctrlmsg_handler * ct_hand,
__open_device_unposion(mr_vdev->vdi);
}
- /* 启用了IP地址,处理ARP请求和ICMP请求 */
- struct vdev * vdev = mr_vdev->vdi->vdev;
- if (vdev->in_addr.s_addr != 0 && vdev->in_mask.s_addr != 0)
- {
- mr_vdev->en_arp = 1;
- mr_vdev->en_icmp = 1;
- }
-
instance->nr_vdevs++;
wake_up:
@@ -781,15 +685,14 @@ struct mr_vdev * marsio_open_device(struct mr_instance * instance,
{
/* 构造虚设备打开请求 */
struct ctrl_msg_vdev_open_request req_msg;
- ctrl_msg_header_construct(&req_msg.msg_header, sizeof(req_msg),
- CTRL_MSG_TYPE_REQUEST, CTRLMSG_TOPIC_VDEV_OPEN);
+ ctrl_msg_header_construct(&req_msg.msg_header, sizeof(req_msg), CTRL_MSG_TYPE_REQUEST, CTRLMSG_TOPIC_VDEV_OPEN);
snprintf((char *)req_msg.devsym, sizeof(req_msg.devsym), "%s", devsym);
req_msg.nr_rxstream = nr_rxstream;
req_msg.nr_txstream = nr_txstream;
/* 发送请求 */
- ctrlmsg_msg_send(instance->ctrlmsg_handler, NULL, (struct ctrl_msg_header*)&req_msg);
+ ctrlmsg_msg_send(instance->ctrlmsg_handler, NULL, (struct ctrl_msg_header *)&req_msg);
/* 等待应答 */
/* TODO: 抽象出单独的函数 */
@@ -804,19 +707,21 @@ struct mr_vdev * marsio_open_device(struct mr_instance * instance,
/* 查询打开结果 */
struct mr_vdev * vdev = marsio_device_lookup(instance, devsym);
- if (vdev == NULL) return NULL;
+ if (vdev == NULL)
+ {
+ return NULL;
+ }
MR_INFO(" ");
MR_INFO("Application %s, Device %s:", instance->appsym, vdev->devsym);
MR_INFO(" Rx Queue Count : %d", vdev->nr_rxstream);
MR_INFO(" Tx Queue Count : %d", vdev->nr_txstream);
- MR_INFO(" ARP protocol handler : %s", vdev->en_arp ? "Enable" : "Disable");
- MR_INFO(" ICMP protocol handler : %s", vdev->en_arp ? "Enable" : "Disable");
- /* 调试捕包 */
- mrapp_bpf_dumper_init(instance, vdev);
- /* 静态邻居表 */
- mrapp_neigh_device_init(instance, vdev);
+ if (vdev->vdi->vdev->representor_config.enable > 0)
+ {
+ tap_representor_init(instance, vdev);
+ }
+
return vdev;
}
diff --git a/app/src/rawio.c b/app/src/rawio.c
index 1f4c762..2b32174 100644
--- a/app/src/rawio.c
+++ b/app/src/rawio.c
@@ -39,7 +39,7 @@ int mrapp_packet_fast_send_burst(struct vdev_instance * vdi, queue_id_t qid,
PROTECT_rte_mbuf_poison_bulk(mbufs, nr_mbufs);
rte_spinlock_lock(&__f_fast_lock);
- int ret = vnode_mirror_enqueue_bulk(vdi->vnode_ltx_prod, qid, mbufs, mbufs_hash, nr_mbufs);
+ int ret = vnode_mirror_enqueue_bulk(vdi->vnode_ftx_prod, qid, mbufs, mbufs_hash, nr_mbufs);
rte_spinlock_unlock(&__f_fast_lock);
return ret;
}
@@ -47,23 +47,26 @@ int mrapp_packet_fast_send_burst(struct vdev_instance * vdi, queue_id_t qid,
int marsio_recv_burst(struct mr_vdev * vdev, queue_id_t qid, marsio_buff_t * mbufs[], int nr_mbufs)
{
assert(qid < vdev->nr_rxstream);
- if (unlikely(qid >= vdev->nr_rxstream)) return -EINVAL;
+ if (unlikely(qid >= vdev->nr_rxstream))
+ return -EINVAL;
struct vdev_instance * vdi = vdev->vdi;
struct rte_mbuf ** __mbufs = (struct rte_mbuf **)mbufs;
int ret = vnode_mirror_dequeue_burst(vdi->vnode_rx_cons, qid, __mbufs, nr_mbufs);
- if (unlikely(ret == 0)) goto out;
+ if (unlikely(ret == 0))
+ goto out;
PROTECT_rte_mbuf_unpoison_bulk(__mbufs, ret);
- /* ARP */
- if (vdev->en_arp) arp_entry(vdev->instance, vdi, qid, __mbufs, ret);
- /* ICMP */
- if (vdev->en_icmp) icmp_entry(vdev->instance, vdi, qid, __mbufs, ret);
+ if (vdev->tap_representor != NULL)
+ {
+ tap_representor_entry(vdev, qid, mbufs, ret);
+ }
/* 报文合法性检查,防止Double-Free */
- for (int i = 0; i < ret; i++) __rte_mbuf_sanity_check(__mbufs[i], i);
+ for (int i = 0; i < ret; i++)
+ __rte_mbuf_sanity_check(__mbufs[i], i);
/* 线程运行情况统计,收报情况,对于非注册线程不统计 */
if (thread_info.instance != NULL)
@@ -73,13 +76,6 @@ int marsio_recv_burst(struct mr_vdev * vdev, queue_id_t qid, marsio_buff_t * mbu
thread_info.instance->stat[tid].packet_recv_length = __packet_total_len(__mbufs, ret);
}
- /* BPF Dumper */
- if (unlikely(vdev->bpf_dumper != NULL))
- {
- bpf_dumper_write(vdev->bpf_dumper, __mbufs, ret);
-
- }
-
PROTECT_rte_mbuf_poison_bulk(__mbufs, ret);
out:
return ret;
@@ -143,10 +139,6 @@ int marsio_send_burst(struct mr_sendpath * sendpath, queue_id_t sid, marsio_buff
hash_t hash[MR_BURST_MAX];
for (int i = 0; i < nr_mbufs; i++) hash[i] = __mbufs[i]->hash.usr;
- /* BPF Dumper */
- if (unlikely(sendpath->vdev->bpf_dumper != NULL))
- bpf_dumper_write(sendpath->vdev->bpf_dumper, __mbufs, nr_mbufs);
-
/* 线程运行情况统计 */
if (thread_info.instance != NULL)
{
@@ -204,10 +196,6 @@ int marsio_send_burst_with_options(struct mr_sendpath * sendpath, queue_id_t sid
hash_t hash[MR_BURST_MAX];
for (int i = 0; i < nr_mbufs; i++) hash[i] = __mbufs[i]->hash.usr;
- /* BPF Dumper */
- if (unlikely(sendpath->vdev->bpf_dumper != NULL))
- bpf_dumper_write(sendpath->vdev->bpf_dumper, __mbufs, nr_mbufs);
-
/* 线程运行情况统计 */
if (thread_info.instance != NULL)
{
@@ -219,12 +207,7 @@ int marsio_send_burst_with_options(struct mr_sendpath * sendpath, queue_id_t sid
/* 提交到队列前,应用不再具有访问权限,对mbuf进行保护 */
PROTECT_rte_mbuf_poison_bulk(__mbufs, nr_mbufs);
- if (options & MARSIO_SEND_OPT_FAST)
- {
- vnode_mirror_enqueue_bulk(sendpath->target_vdi->vnode_ftx_prod, sid,
- __mbufs, hash, nr_mbufs);
- }
- else if(options & MARSIO_SEND_OPT_CTRL)
+ if(options & MARSIO_SEND_OPT_CTRL)
{
vnode_mirror_enqueue_bulk(sendpath->target_vdi->vnode_ltx_prod, sid,
__mbufs, hash, nr_mbufs);
@@ -248,7 +231,7 @@ err:
for (int i = 0; i < nr_mbufs; i++) rte_pktmbuf_free(mbufs[i]);
return RT_ERR;
-}
+ }
void marsio_send_burst_flush(struct mr_sendpath * sendpath, queue_id_t sid)
{
diff --git a/app/src/tap.c b/app/src/tap.c
new file mode 100644
index 0000000..bb0c131
--- /dev/null
+++ b/app/src/tap.c
@@ -0,0 +1,398 @@
+#include <common.h>
+#include <pcap/pcap.h>
+#include <rte_atomic.h>
+#include <rte_epoll.h>
+#include <rte_ether.h>
+#include <rte_malloc.h>
+#include <rte_mbuf.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <sys/epoll.h>
+
+#include <assert.h>
+#include <fcntl.h>
+#include <linux/if.h>
+#include <linux/if_tun.h>
+#include <marsio.h>
+#include <mrapp.h>
+#include <mrb_define.h>
+#include <netinet/in.h>
+#include <sys/ioctl.h>
+#include <tap.h>
+#include <unistd.h>
+
+static int tap_ioctl(int fd, unsigned long request, struct ifreq * ifr, int set)
+{
+ short req_flags = ifr->ifr_flags;
+ switch (request)
+ {
+ case SIOCSIFFLAGS:
+ if (ioctl(fd, SIOCGIFFLAGS, ifr) < 0)
+ goto error;
+ if (set)
+ ifr->ifr_flags |= req_flags;
+ else
+ ifr->ifr_flags &= ~req_flags;
+ break;
+
+ case SIOCGIFFLAGS:
+ case SIOCGIFHWADDR:
+ case SIOCSIFHWADDR:
+ case SIOCSIFMTU:
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ if (ioctl(fd, request, ifr) < 0)
+ goto error;
+
+ return 0;
+
+error:
+ MR_ERROR("%s: ioctl(%lu) failed with error: %s", ifr->ifr_name, request, strerror(errno));
+ return -errno;
+}
+
+struct tap_device
+{
+ struct mr_instance * ref_mr_instance;
+ struct mr_vdev * ref_vdev;
+
+ int tap_fd;
+ struct rte_epoll_event epoll_event;
+
+ rte_atomic64_t stat_write_pkts;
+ rte_atomic64_t stat_write_pktlen;
+ rte_atomic64_t stat_write_drops;
+
+ rte_atomic64_t stat_read_pkts;
+ rte_atomic64_t stat_read_pktlen;
+ rte_atomic64_t stat_read_drops;
+};
+
+static int tap_resp_dev_filter(struct vdev * vdev_desc, struct rte_mbuf * mbuf)
+{
+ struct mrb_metadata * mrb_meta = (struct mrb_metadata *)mrbuf_cz_data(mbuf, MR_NODE_CTRLZONE_ID);
+ struct pkt_parser_result * parser_result = &mrb_meta->pkt_parser_result;
+ const struct rte_ether_hdr * ether_hdr = rte_pktmbuf_mtod(mbuf, const struct rte_ether_hdr *);
+
+ /* not local's mac addr or broadcast packet, ignore it */
+ if (rte_is_broadcast_ether_addr(&ether_hdr->dst_addr) == 0 &&
+ rte_is_same_ether_addr(&ether_hdr->dst_addr, &vdev_desc->ether_addr) == 0)
+ {
+ return 0;
+ }
+
+ /* for arp, rarp and lldp, only check the dest's mac address */
+ if (ether_hdr->ether_type == htons(RTE_ETHER_TYPE_ARP) && vdev_desc->representor_config.redirect_local_arp > 0)
+ {
+ return 1;
+ }
+
+ if (ether_hdr->ether_type == htons(RTE_ETHER_TYPE_RARP) && vdev_desc->representor_config.redirect_local_rarp > 0)
+ {
+ return 1;
+ }
+
+ if (ether_hdr->ether_type == htons(RTE_ETHER_TYPE_LLDP) && vdev_desc->representor_config.redirect_local_lldp > 0)
+ {
+ return 1;
+ }
+
+ /* allow layers are ETHER->IPv4->TCP, ETHER->IPv4->UDP, the ipv6 is not supported for now. */
+ static const uint16_t exp_ipv4_tcp[] = {
+ LAYER_TYPE_ID_ETHER,
+ LAYER_TYPE_ID_IPV4,
+ LAYER_TYPE_ID_TCP,
+ };
+
+ static const uint16_t exp_ipv4_udp[] = {
+ LAYER_TYPE_ID_ETHER,
+ LAYER_TYPE_ID_IPV4,
+ LAYER_TYPE_ID_UDP,
+ };
+ static const uint16_t exp_ipv4_others[] = {
+ LAYER_TYPE_ID_ETHER,
+ LAYER_TYPE_ID_IPV4,
+ };
+
+ if (complex_layer_type_expect(parser_result, exp_ipv4_tcp, RTE_DIM(exp_ipv4_tcp)) == 0 ||
+ complex_layer_type_expect(parser_result, exp_ipv4_udp, RTE_DIM(exp_ipv4_udp)) == 0 ||
+ complex_layer_type_expect(parser_result, exp_ipv4_others, RTE_DIM(exp_ipv4_others)) == 0)
+ {
+ return 1;
+ }
+ else
+ {
+ return 0;
+ }
+
+ assert(false);
+}
+
+static int tap_device_tx(struct tap_device * tap_dev, struct rte_mbuf * mbuf)
+{
+ const char * pkt_ptr = rte_pktmbuf_mtod(mbuf, const char *);
+ unsigned int pkt_len = rte_pktmbuf_data_len(mbuf);
+
+ if (pkt_ptr == NULL || pkt_len == 0)
+ {
+ rte_atomic64_add(&tap_dev->stat_write_drops, 1);
+ goto err;
+ }
+
+ ssize_t len = write(tap_dev->tap_fd, pkt_ptr, pkt_len);
+ if (unlikely(len < 0))
+ {
+ rte_atomic64_add(&tap_dev->stat_write_drops, 1);
+ goto err;
+ }
+
+ rte_atomic64_add(&tap_dev->stat_write_pkts, 1);
+ rte_atomic64_add(&tap_dev->stat_write_pktlen, pkt_len);
+
+ return 0;
+
+err:
+ return -1;
+}
+
+static int tap_device_rx(struct tap_device * tap_dev, unsigned int queue_id, marsio_buff_t * buffs[],
+ unsigned int nr_buffs)
+{
+ char buff[ETH_MAX_MTU];
+ ssize_t sz_buff = read(tap_dev->tap_fd, buff, sizeof(buff));
+ if (sz_buff == -1 && (errno == EWOULDBLOCK || errno == EAGAIN))
+ {
+ return 0;
+ }
+ else if (unlikely(sz_buff < 0))
+ {
+ return -1;
+ }
+
+ int ret = marsio_buff_malloc_global(tap_dev->ref_mr_instance, buffs, 1, MARSIO_SOCKET_ID_ANY, MARSIO_LCORE_ID_ANY);
+ if (unlikely(ret < 0))
+ {
+ return -2;
+ }
+
+ struct rte_mbuf * mbuf = (struct rte_mbuf *)buffs[0];
+ assert(mbuf != NULL);
+
+ char * mbuf_data_ptr = rte_pktmbuf_append(mbuf, sz_buff);
+ if (unlikely(mbuf_data_ptr == NULL))
+ {
+ marsio_buff_free(tap_dev->ref_mr_instance, buffs, 1, MARSIO_SOCKET_ID_ANY, MARSIO_LCORE_ID_ANY);
+ return -3;
+ }
+
+ rte_memcpy(mbuf_data_ptr, buff, sz_buff);
+ return 1;
+}
+
+static struct tap_device * tap_device_create(struct mr_instance * mr_instance, const char * name, struct rte_ether_addr * hwaddr)
+{
+ struct tap_device * tap_dev_inst = rte_zmalloc(NULL, sizeof(struct tap_device), 0);
+ MR_VERIFY_MALLOC(tap_dev_inst);
+
+ tap_dev_inst->ref_mr_instance = mr_instance;
+ int ioctl_sock = -1;
+ int tap_fd = -1;
+
+ tap_fd = open("/dev/net/tun", O_RDWR);
+ if (tap_fd < 0)
+ {
+ MR_ERROR("open /dev/net/tun failed: %s", strerror(errno));
+ goto errout;
+ }
+
+ ioctl_sock = socket(AF_INET, SOCK_DGRAM, 0);
+ if (ioctl_sock < 0)
+ {
+ MR_ERROR("unable to get a socket for management: %s", strerror(errno));
+ goto errout;
+ }
+
+ struct ifreq ifr = {.ifr_flags = IFF_TAP | IFF_NO_PI};
+ snprintf(ifr.ifr_name, IFNAMSIZ, "%s", name);
+
+ int ret = ioctl(tap_fd, TUNSETIFF, (void *)&ifr);
+ if (ret < 0)
+ {
+ MR_ERROR("tap %s tap_fd(tap_fd = %d) ioctl failed: %s", name, tap_fd, strerror(errno));
+ goto errout;
+ }
+
+ /* set the tap device in up and running status */
+ struct ifreq link_up_ifr = {.ifr_flags = IFF_UP | IFF_RUNNING};
+ snprintf(link_up_ifr.ifr_name, IFNAMSIZ, "%s", name);
+
+ ret = tap_ioctl(ioctl_sock, SIOCSIFFLAGS, &link_up_ifr, 1);
+ if (ret < 0)
+ {
+ MR_ERROR("tap device %s link up failed.", name);
+ goto errout;
+ }
+
+ /* clear the ifr, and use it as mac addr setup */
+ ret = tap_ioctl(ioctl_sock, SIOCGIFHWADDR, &ifr, 0);
+ if (ret < 0)
+ {
+ MR_ERROR("tap device %s mac address get failed.", name);
+ goto errout;
+ }
+
+ ifr.ifr_hwaddr.sa_family = AF_LOCAL;
+ rte_memcpy(ifr.ifr_hwaddr.sa_data, hwaddr, RTE_ETHER_ADDR_LEN);
+ ret = tap_ioctl(ioctl_sock, SIOCSIFHWADDR, &ifr, 1);
+ if (ret < 0)
+ {
+ MR_ERROR("tap device %s mac address set failed.", name);
+ goto errout;
+ }
+
+ MR_INFO("tap device %s for created.", name);
+ tap_dev_inst->tap_fd = tap_fd;
+
+ close(ioctl_sock);
+ ioctl_sock = -1;
+
+ /* first time called, need to create evfd */
+ if (mr_instance->tap_resp_epfd == 0)
+ {
+ mr_instance->tap_resp_epfd = epoll_create1(EPOLL_CLOEXEC);
+ }
+
+ /* prepare add the device handle to instance, and join to epoll fd */
+ struct rte_epoll_event epoll_event = {
+ .epdata.event = EPOLLIN,
+ .epdata.data = (void *)tap_dev_inst,
+ };
+
+ tap_dev_inst->epoll_event = epoll_event;
+ ret = rte_epoll_ctl(mr_instance->tap_resp_epfd, EPOLL_CTL_ADD, tap_fd, &tap_dev_inst->epoll_event);
+ if (ret < 0)
+ {
+ MR_ERROR("failed at add tap_fd %d to epoll_fd for tap representor %s.", tap_fd, name);
+ goto errout;
+ }
+
+ /* add to the tap resp sets */
+ mr_instance->tap_resp_devices[mr_instance->nr_tap_resp_devices] = tap_dev_inst;
+ mr_instance->nr_tap_resp_devices++;
+ return tap_dev_inst;
+
+errout:
+ if (tap_dev_inst != NULL)
+ {
+ rte_free(tap_dev_inst);
+ }
+
+ if (ioctl_sock >= 0)
+ {
+ close(ioctl_sock);
+ }
+
+ if (tap_fd >= 0)
+ {
+ close(tap_fd);
+ }
+
+ return NULL;
+}
+
+static void * tap_representor_poll_thread_entry(void * arg)
+{
+#define TAP_RESP_EVENTS_MAX 8
+ struct mr_instance * mr_instance = (struct mr_instance *)arg;
+ struct rte_epoll_event epoll_events[TAP_RESP_EVENTS_MAX] = {};
+ int ret = 0;
+
+ while (1)
+ {
+ int n = rte_epoll_wait(mr_instance->tap_resp_epfd, epoll_events, TAP_RESP_EVENTS_MAX, -1);
+ if (unlikely(n < 0))
+ {
+ MR_ERROR("rte_epoll_wait returned error %d, tap_resp poll thread terminated.", errno);
+ goto errout;
+ }
+
+ /* handle the read event, read the packet then redirect to shmdev queues */
+ for (int i = 0; i < n; i++)
+ {
+ struct tap_device * tap_dev = (struct tap_device *)(epoll_events[i].epdata.data);
+ struct mr_vdev * vdev = tap_dev->ref_vdev;
+
+ marsio_buff_t * buff[1];
+ ret = tap_device_rx(tap_dev, 0, buff, RTE_DIM(buff));
+
+ if (ret <= 0)
+ {
+ continue;
+ }
+
+ ret = mrapp_packet_fast_send_burst(vdev->vdi, 0, (struct rte_mbuf **)buff, RTE_DIM(buff));
+ if (unlikely(ret < 0))
+ {
+ marsio_buff_free(mr_instance, buff, RTE_DIM(buff), MARSIO_SOCKET_ID_ANY, MARSIO_LCORE_ID_ANY);
+ }
+ }
+ }
+
+errout:
+ return (void *)NULL;
+}
+
+int tap_representor_entry(struct mr_vdev * vdev, unsigned int qid, marsio_buff_t * buffs[], unsigned int nr_buffs)
+{
+ static rte_spinlock_t tap_dev_write_lock = {0};
+
+ for (unsigned int i = 0; i < nr_buffs; i++)
+ {
+ struct rte_mbuf * mbuf = (struct rte_mbuf *)buffs[i];
+ if (tap_resp_dev_filter(vdev->vdi->vdev, mbuf) <= 0)
+ {
+ continue;
+ }
+
+ /* spinlock at here */
+ rte_spinlock_lock(&tap_dev_write_lock);
+ tap_device_tx(vdev->tap_representor, mbuf);
+ rte_spinlock_unlock(&tap_dev_write_lock);
+ }
+
+ return 0;
+}
+
+int tap_representor_init(struct mr_instance * mr_instance, struct mr_vdev * vdev)
+{
+ struct vdev * _vdev = vdev->vdi->vdev;
+ struct tap_device * tap_dev = tap_device_create(mr_instance, vdev->devsym, &_vdev->ether_addr);
+ if (unlikely(tap_dev == NULL))
+ {
+ MR_ERROR("failed at create tap representor for %s, ignore it.", vdev->devsym);
+ return -1;
+ }
+
+ tap_dev->ref_mr_instance = mr_instance;
+ tap_dev->ref_vdev = vdev;
+ vdev->tap_representor = tap_dev;
+
+ /* create the tap representor poll thread at first time.
+ * this thread is use for recv the packet from the tap, and forward to the rings */
+ if (mr_instance->pid_tap_resp_poll == 0)
+ {
+ int ret = pthread_create(&mr_instance->pid_tap_resp_poll, NULL, tap_representor_poll_thread_entry,
+ (void *)mr_instance);
+ if (unlikely(ret < 0))
+ {
+ MR_ERROR("failed at creating thread for tap representor poll routine: %s", strerror(errno));
+ return -2;
+ }
+ }
+
+ MR_INFO("tap representor for %s created. ", vdev->devsym);
+ return 0;
+} \ No newline at end of file
diff --git a/include/internal/vdev_define.h b/include/internal/vdev_define.h
index e85d30e..6579ef6 100644
--- a/include/internal/vdev_define.h
+++ b/include/internal/vdev_define.h
@@ -80,6 +80,17 @@ struct vdev
/* 网卡状态 */
uint8_t link_status;
+
+ /* representor config */
+ struct
+ {
+ unsigned int enable;
+ unsigned int redirect_local_arp;
+ unsigned int redirect_local_rarp;
+ unsigned int redirect_local_lldp;
+ unsigned int redirect_local_ipv4;
+ unsigned int redirect_local_ipv6;
+ } representor_config;
};
/* 虚拟设备句柄,应用使用 */
diff --git a/service/include/sc_devmgr.h b/service/include/sc_devmgr.h
index 6f3ceca..f840234 100644
--- a/service/include/sc_devmgr.h
+++ b/service/include/sc_devmgr.h
@@ -41,7 +41,7 @@ enum mr_dev_role
MR_DEV_ROLE_TAP_INTERFACE,
MR_DEV_ROLE_ENDPOINT_INTERFACE,
MR_DEV_ROLE_NF_INTERFACE,
- MR_DEV_ROLE_SYSTEM_INTERFACE,
+ MR_DEV_ROLE_KERNEL_RESP_INTERFACE,
};
enum mr_dev_type
@@ -50,6 +50,24 @@ enum mr_dev_type
MR_DEV_TYPE_BOND,
};
+enum representor_ns
+{
+ REPRESENTOR_NS_SERVICE,
+ REPRESENTOR_NS_APP,
+};
+
+struct representor_config
+{
+ char str_representor_symbol[MR_SYMBOL_MAX];
+ enum representor_ns ns_type;
+
+ unsigned int redirect_local_arp;
+ unsigned int redirect_local_rarp;
+ unsigned int redirect_local_lldp;
+ unsigned int redirect_local_ipv4;
+ unsigned int redirect_local_ipv6;
+};
+
struct dpdk_dev_stats
{
struct
@@ -170,6 +188,15 @@ struct mr_dev_desc
unsigned int is_bond_slave;
struct mr_dev_desc * bond_master;
+ struct representor_config * representor_config;
+
+ /* only available at service namespace representor */
+ struct
+ {
+ struct mr_dev_desc * device_representor;
+ struct mr_dev_desc * represented_device;
+ };
+
/* for endpoint */
struct
{
diff --git a/service/src/core.c b/service/src/core.c
index f0af720..48f574d 100644
--- a/service/src/core.c
+++ b/service/src/core.c
@@ -289,6 +289,7 @@ int sc_dataplane_thread(void * arg)
return 0;
}
+#if 0
static const char * __str_disable_or_enable_ptr(void * ptr)
{
if (ptr == NULL)
@@ -296,6 +297,7 @@ static const char * __str_disable_or_enable_ptr(void * ptr)
else
return "Enable";
}
+#endif
static const char * __str_disable_or_enable_uint(unsigned int value)
{
diff --git a/service/src/devmgr.c b/service/src/devmgr.c
index 6ebc0bd..d2b3258 100644
--- a/service/src/devmgr.c
+++ b/service/src/devmgr.c
@@ -10,7 +10,6 @@
#include <rte_bus_pci.h>
#include <rte_config.h>
#include <rte_debug.h>
-#include <rte_eth_bond.h>
#include <rte_ethdev.h>
#include <rte_ether.h>
#include <rte_flow.h>
@@ -21,6 +20,7 @@
#include <rte_node_eth_api.h>
#include <rte_pci.h>
#include <rte_string_fns.h>
+#include <rte_eth_bond.h>
#include <MESA_prof_load.h>
#include <cJSON.h>
@@ -120,7 +120,7 @@ static int mr_dev_desc_status_print(struct mr_dev_desc * dev_desc)
[MR_DEV_ROLE_TAP_INTERFACE] = "tap",
[MR_DEV_ROLE_ENDPOINT_INTERFACE] = "endpoint",
[MR_DEV_ROLE_NF_INTERFACE] = "network function",
- [MR_DEV_ROLE_SYSTEM_INTERFACE] = "system",
+ [MR_DEV_ROLE_KERNEL_RESP_INTERFACE] = "kernel resp",
};
static const char * dev_type_as_str[] = {
@@ -171,6 +171,80 @@ struct mr_dev_desc * mr_dev_desc_create(struct devmgr_main * devmgr_main, const
return dev_desc;
}
+static int kernel_resp_crosslink(struct devmgr_main * devmgr_main)
+{
+ unsigned int dev_iterator = 0;
+ struct mr_dev_desc * dev_desc_iter = NULL;
+
+ while ((dev_desc_iter = mr_dev_desc_iterate(devmgr_main, &dev_iterator)) != NULL)
+ {
+ /* this device has no representor */
+ if (dev_desc_iter->representor_config == NULL)
+ {
+ continue;
+ }
+
+ struct representor_config * resp_config = dev_desc_iter->representor_config;
+
+ /* the device has a representor in app namespace,
+ * this representor will create by the app library */
+ if (resp_config->ns_type != REPRESENTOR_NS_SERVICE)
+ {
+ continue;
+ }
+
+ struct mr_dev_desc * representor = mr_dev_desc_lookup(devmgr_main, resp_config->str_representor_symbol);
+ if (representor == NULL)
+ {
+ MR_WARNING("The representor device %s for device %s is not existed.", resp_config->str_representor_symbol,
+ dev_desc_iter->symbol);
+ continue;
+ }
+
+ assert(representor->represented_device == NULL);
+ assert(dev_desc_iter->device_representor == NULL);
+
+ dev_desc_iter->device_representor = representor;
+ representor->represented_device = dev_desc_iter;
+ }
+
+ return 0;
+}
+
+static struct representor_config * kernel_resp_config_load(struct devmgr_main * devmgr_main, const char * devsym)
+{
+ struct sc_main * sc = devmgr_main->sc;
+ const char * cfg = sc->local_cfgfile;
+
+ char str_section[MR_SYMBOL_MAX];
+ snprintf(str_section, sizeof(str_section), "device:%s", devsym);
+
+ unsigned int en_representor = 0;
+ MESA_load_profile_uint_def(cfg, str_section, "en_representor", &en_representor, 0);
+
+ if (en_representor == 0)
+ {
+ return NULL;
+ }
+
+ struct representor_config * resp_cfg = ZMALLOC(sizeof(struct representor_config));
+ MR_VERIFY_MALLOC(resp_cfg);
+
+ /* for default, redirect all kinds of local packets except tunnels */
+ MESA_load_profile_uint_def(cfg, str_section, "representor_ns", &resp_cfg->ns_type, 0);
+ MESA_load_profile_string_def(cfg, str_section, "representor_dev", resp_cfg->str_representor_symbol,
+ sizeof(resp_cfg->str_representor_symbol), "");
+
+ MESA_load_profile_uint_def(cfg, str_section, "redirect_local_arp", &resp_cfg->redirect_local_arp, 1);
+ MESA_load_profile_uint_def(cfg, str_section, "redirect_local_rarp", &resp_cfg->redirect_local_ipv4, 1);
+ MESA_load_profile_uint_def(cfg, str_section, "redirect_local_lldp", &resp_cfg->redirect_local_lldp, 1);
+ MESA_load_profile_uint_def(cfg, str_section, "redirect_local_ipv4", &resp_cfg->redirect_local_ipv4, 1);
+ MESA_load_profile_uint_def(cfg, str_section, "redirect_local_ipv6", &resp_cfg->redirect_local_ipv6, 1);
+
+ return resp_cfg;
+}
+
+
int mr_dev_desc_config_load(struct devmgr_main * devmgr_main, struct mr_dev_desc * dev_desc)
{
const char * cfgfile = devmgr_main->sc->local_cfgfile;
@@ -180,6 +254,9 @@ int mr_dev_desc_config_load(struct devmgr_main * devmgr_main, struct mr_dev_desc
MESA_load_profile_uint_def(cfgfile, str_section, "driver", &dev_desc->drv_type, MR_DEV_DRV_TYPE_DPDK_PCI);
MESA_load_profile_uint_def(cfgfile, str_section, "role", &dev_desc->role_type, MR_DEV_ROLE_NONE);
+ /* for the kernel resp */
+ dev_desc->representor_config = kernel_resp_config_load(devmgr_main, dev_desc->symbol);
+
/* Get ether */
char str_ether[MR_STRING_MAX] = {};
if (MESA_load_profile_string_nodef(cfgfile, str_section, "ether", str_ether, sizeof(str_ether)) >= 0)
@@ -426,12 +503,30 @@ int shmdev_setup_one_device(struct devmgr_main * devmgr_main, const char * devsy
return RT_ERR;
}
- struct vdev * shmdev_handle = vdev_lookup(sc->vdev_main, devsym);
- MR_VERIFY_2(shmdev_handle != NULL, "vdev_lookup() returns NULL");
+ struct vdev * vdev_desc = vdev_lookup(sc->vdev_main, devsym);
+ MR_VERIFY_2(vdev_desc != NULL, "vdev_lookup() returns NULL");
dev_desc->port_id = devmgr_main->shmdev_port_id_counter++;
dev_desc->drv_type = MR_DEV_DRV_TYPE_SHMDEV;
- dev_desc->shm_dev_desc = shmdev_handle;
+ dev_desc->shm_dev_desc = vdev_desc;
+
+ /* inherit configuration from dev_desc */
+ struct representor_config * resp_config = dev_desc->representor_config;
+ if(resp_config != NULL && resp_config->ns_type == REPRESENTOR_NS_APP)
+ {
+ vdev_desc->representor_config.enable = 1;
+ vdev_desc->representor_config.redirect_local_arp = resp_config->redirect_local_arp;
+ vdev_desc->representor_config.redirect_local_lldp = resp_config->redirect_local_lldp;
+ vdev_desc->representor_config.redirect_local_rarp = resp_config->redirect_local_rarp;
+ vdev_desc->representor_config.redirect_local_ipv4 = resp_config->redirect_local_ipv4;
+ vdev_desc->representor_config.redirect_local_ipv6 = resp_config->redirect_local_ipv6;
+ }
+
+ /* copy the ether addr, inet addr to vdev desc */
+ vdev_desc->ether_addr = dev_desc->eth_addr;
+ vdev_desc->in_addr = dev_desc->in_addr;
+ vdev_desc->in_mask = dev_desc->in_mask;
+ vdev_desc->in_gateway = dev_desc->in_gateway;
/* add to index array */
devmgr_main->dev_descs[dev_desc->port_id] = dev_desc;
@@ -1823,6 +1918,9 @@ int devmgr_init(struct devmgr_main * devmgr_main)
return RT_ERR;
}
+ /* kernel resp cross link */
+ kernel_resp_crosslink(devmgr_main);
+
/* 启动物理设备状态更新线程 */
pthread_t _pid_link_update;
ret = pthread_create(&_pid_link_update, NULL, dpdk_dev_link_state_update_thread, (void *)devmgr_main);
diff --git a/service/src/node.c b/service/src/node.c
index b8f1ddc..dba70be 100644
--- a/service/src/node.c
+++ b/service/src/node.c
@@ -22,6 +22,7 @@
#include <sc_common.h>
#include <sc_node.h>
+#if 0
static int ethdev_rx_tx_nodes_setup(struct sc_main * sc, struct node_manager_main * ctx)
{
/* For each physical devices, the dpdk's library will create:
@@ -33,7 +34,7 @@ static int ethdev_rx_tx_nodes_setup(struct sc_main * sc, struct node_manager_mai
struct dpdk_dev * phydev_iter = NULL;
unsigned int nr_workers = sc->nr_io_thread;
-#if 0
+
while (phydev_iterate(sc->phydev_main, &phydev_iter) >= 0)
{
struct rte_node_ethdev_config * ethdev_config = &ctx->node_ethdev_config[ctx->nr_node_ethdev_config];
@@ -84,8 +85,8 @@ static int ethdev_rx_tx_nodes_setup(struct sc_main * sc, struct node_manager_mai
error:
return RT_ERR;
-#endif
}
+#endif
static int graph_init(struct sc_main * sc, struct node_manager_main * ctx)
{
diff --git a/service/src/node_eth_ingress.c b/service/src/node_eth_ingress.c
index c553ecf..05b222d 100644
--- a/service/src/node_eth_ingress.c
+++ b/service/src/node_eth_ingress.c
@@ -32,6 +32,7 @@ enum
ETH_INGRESS_NEXT_FORWARDER,
ETH_INGRESS_NEXT_HEALTH_CHECK,
ETH_INGRESS_NEXT_PKT_DROP,
+ ETH_INGRESS_NEXT_ETH_EGRESS,
ETH_INGRESS_NEXT_MAX,
};
@@ -65,9 +66,89 @@ int node_eth_ingress_init(struct node_manager_main * node_mgr_main)
return RT_SUCCESS;
}
-static int endpoint_pkt_filter(struct mr_dev_desc * dev_desc, unsigned int graph_id, struct rte_mbuf * mbuf,
- struct pkt_parser_result * parser_result)
+static int kernel_resp_dev_filter(struct mr_dev_desc * dev_desc, struct rte_mbuf * mbuf, struct pkt_parser_result * parser_result)
{
+ struct representor_config * resp_cfg = dev_desc->representor_config;
+ const struct rte_ether_hdr * ether_hdr = rte_pktmbuf_mtod(mbuf, const struct rte_ether_hdr *);
+
+ /* not local's mac addr or broadcast packet, ignore it */
+ if (rte_is_broadcast_ether_addr(&ether_hdr->dst_addr) == 0 &&
+ rte_is_same_ether_addr(&ether_hdr->dst_addr, &dev_desc->eth_addr) == 0)
+ {
+ return 0;
+ }
+
+ /* for arp, rarp and lldp, only check the dest's mac address */
+ if (ether_hdr->ether_type == htons(RTE_ETHER_TYPE_ARP) && resp_cfg->redirect_local_arp > 0)
+ {
+ return 1;
+ }
+
+ if (ether_hdr->ether_type == htons(RTE_ETHER_TYPE_RARP) && resp_cfg->redirect_local_rarp > 0)
+ {
+ return 1;
+ }
+
+ if (ether_hdr->ether_type == htons(RTE_ETHER_TYPE_LLDP) && resp_cfg->redirect_local_lldp > 0)
+ {
+ return 1;
+ }
+
+ /* allow layers are ETHER->IPv4->TCP, ETHER->IPv4->UDP, the ipv6 is not supported for now. */
+ static const uint16_t exp_ipv4_tcp[] = {
+ LAYER_TYPE_ID_ETHER,
+ LAYER_TYPE_ID_IPV4,
+ LAYER_TYPE_ID_TCP,
+ };
+
+ static const uint16_t exp_ipv4_udp[] = {
+ LAYER_TYPE_ID_ETHER,
+ LAYER_TYPE_ID_IPV4,
+ LAYER_TYPE_ID_UDP,
+ };
+ static const uint16_t exp_ipv4_others[] = {
+ LAYER_TYPE_ID_ETHER,
+ LAYER_TYPE_ID_IPV4,
+ };
+
+ if (complex_layer_type_expect(parser_result, exp_ipv4_tcp, RTE_DIM(exp_ipv4_tcp)) == 0 ||
+ complex_layer_type_expect(parser_result, exp_ipv4_udp, RTE_DIM(exp_ipv4_udp)) == 0 ||
+ complex_layer_type_expect(parser_result, exp_ipv4_others, RTE_DIM(exp_ipv4_others)) == 0)
+ {
+ return 1;
+ }
+ else
+ {
+ return 0;
+ }
+
+ assert(false);
+}
+
+static int endpoint_dev_packet_handler(struct mr_dev_desc * dev_desc, unsigned int graph_id, struct rte_mbuf * mbuf,
+ struct mrb_metadata * mrb_meta)
+{
+ struct pkt_parser_result * parser_result = &mrb_meta->pkt_parser_result;
+
+ /* this device has a kernel resp device, redirect all arp, icmp and bfd packets to resp device */
+ if (dev_desc->device_representor != NULL)
+ {
+ /* should go to kernel resp device */
+ if (kernel_resp_dev_filter(dev_desc->device_representor, mbuf, parser_result) > 0)
+ {
+ /* goto the kernel resp device directly */
+ mrb_meta->port_egress = dev_desc->device_representor->port_id;
+ return ETH_INGRESS_NEXT_ETH_EGRESS;
+ }
+ }
+
+ /* as a resp device, all packet from resp device should go to represented device directly */
+ else if (dev_desc->represented_device != NULL)
+ {
+ mrb_meta->port_egress = dev_desc->represented_device->port_id;
+ return ETH_INGRESS_NEXT_ETH_EGRESS;
+ }
+
/* check this packet is to local programs or not,
* packet should be like: Eth, IPv4, TCP/UDP, et al. */
if (parser_result->nr_layers < 3)
@@ -140,12 +221,10 @@ static __rte_always_inline uint16_t eth_ingress_node_process(struct rte_graph *
pkts += 1;
n_left_from -= 1;
- struct mrb_metadata * private_ctrlzone = (struct mrb_metadata *)mrbuf_cz_data(mbuf, MR_NODE_CTRLZONE_ID);
- struct pkt_parser_result * pkt_parser_result = &private_ctrlzone->pkt_parser_result;
-
+ struct mrb_metadata * mrb_meta = (struct mrb_metadata *)mrbuf_cz_data(mbuf, MR_NODE_CTRLZONE_ID);
ETH_INGRESS_STAT_ADD(eth_ingress_main, graph_id, total_pkts, 1);
- port_id_t port_ingress = private_ctrlzone->port_ingress;
+ port_id_t port_ingress = mrb_meta->port_ingress;
struct mr_dev_desc * dev_desc = mr_dev_desc_lookup_by_port_id(sc->devmgr_main, port_ingress);
assert(dev_desc != NULL);
@@ -164,9 +243,10 @@ static __rte_always_inline uint16_t eth_ingress_node_process(struct rte_graph *
next_node_index = ETH_INGRESS_NEXT_VWIRE_INGRESS;
goto node_enqueue;
}
- else if (dev_desc->role_type == MR_DEV_ROLE_ENDPOINT_INTERFACE)
+ else if (dev_desc->role_type == MR_DEV_ROLE_ENDPOINT_INTERFACE ||
+ dev_desc->role_type == MR_DEV_ROLE_KERNEL_RESP_INTERFACE)
{
- next_node_index = endpoint_pkt_filter(dev_desc, graph_id, mbuf, pkt_parser_result);
+ next_node_index = endpoint_dev_packet_handler(dev_desc, graph_id, mbuf, mrb_meta);
goto node_enqueue;
}
else if (dev_desc->role_type == MR_DEV_ROLE_NF_INTERFACE)
@@ -226,6 +306,7 @@ static struct rte_node_register eth_ingress_node_base = {
[ETH_INGRESS_NEXT_BRIDGE] = "bridge",
[ETH_INGRESS_NEXT_BFD] = "bfd",
[ETH_INGRESS_NEXT_VWIRE_INGRESS] = "vwire_ingress",
+ [ETH_INGRESS_NEXT_ETH_EGRESS] = "eth_egress",
[ETH_INGRESS_NEXT_FORWARDER] = "forwarder",
[ETH_INGRESS_NEXT_HEALTH_CHECK] = "health_check_deal_answer",
[ETH_INGRESS_NEXT_PKT_DROP] = "pkt_drop",
diff --git a/service/src/node_phydev.c b/service/src/node_phydev.c
index ea825e5..fd5c1a3 100644
--- a/service/src/node_phydev.c
+++ b/service/src/node_phydev.c
@@ -28,6 +28,8 @@ static struct dev_node_main * p_dev_node_main = &st_dev_node_main;
static_assert(sizeof(struct dev_node_ctx) <= RTE_NODE_CTX_SZ, "dev_node_ctx size must smaller than RTE_NODE_CTX_SZ");
+
+
static __rte_always_inline uint16_t dpdk_dev_rx_node_process(struct rte_graph * graph, struct rte_node * node,
void ** objs, uint16_t cnt)
{
@@ -54,11 +56,11 @@ static __rte_always_inline uint16_t dpdk_dev_rx_node_process(struct rte_graph *
for (unsigned int i = 0; i < node->idx; i++)
{
struct rte_mbuf * mbuf = (struct rte_mbuf *)node->objs[i];
- struct mrb_metadata * private_ctrlzone = mrbuf_cz_data(mbuf, MR_NODE_CTRLZONE_ID);
- memset(private_ctrlzone, 0, sizeof(struct mrb_metadata));
+ struct mrb_metadata * mrb_meta = mrbuf_cz_data(mbuf, MR_NODE_CTRLZONE_ID);
+ memset(mrb_meta, 0, sizeof(struct mrb_metadata));
- private_ctrlzone->port_ingress = ctx->dev_desc->port_id;
- private_ctrlzone->port_egress = UINT16_MAX;
+ mrb_meta->port_ingress = ctx->dev_desc->port_id;
+ mrb_meta->port_egress = UINT16_MAX;
/* Parser Pkt */
struct pkt_parser pkt_parser;
@@ -66,7 +68,7 @@ static __rte_always_inline uint16_t dpdk_dev_rx_node_process(struct rte_graph *
complex_parser_ether(&pkt_parser, rte_pktmbuf_mtod(mbuf, const char *));
/* copy the result to private zone */
- private_ctrlzone->pkt_parser_result = pkt_parser.result;
+ mrb_meta->pkt_parser_result = pkt_parser.result;
}
/* move to next node */
diff --git a/tools/classifier_rule_test/CMakeLists.txt b/tools/classifier_rule_test/CMakeLists.txt
index 4f41670..5485a81 100644
--- a/tools/classifier_rule_test/CMakeLists.txt
+++ b/tools/classifier_rule_test/CMakeLists.txt
@@ -6,4 +6,4 @@ include_directories(${DPDK_INCLUDE_DIR})
add_executable(classifier_rule_test classifier_rule_test.c)
target_link_libraries(classifier_rule_test MESA_prof_load_static infra m MESA_htable_static ${SYSTEMD_LIBRARIES})
-install(PROGRAMS ${CMAKE_CURRENT_BINARY_DIR}/classifier_rule_test DESTINATION bin COMPONENT Program) \ No newline at end of file
+#install(PROGRAMS ${CMAKE_CURRENT_BINARY_DIR}/classifier_rule_test DESTINATION bin COMPONENT Program) \ No newline at end of file
diff --git a/tools/lb_rule_test/CMakeLists.txt b/tools/lb_rule_test/CMakeLists.txt
index f24431f..fa24fe5 100644
--- a/tools/lb_rule_test/CMakeLists.txt
+++ b/tools/lb_rule_test/CMakeLists.txt
@@ -6,4 +6,4 @@ include_directories(${DPDK_INCLUDE_DIR})
add_executable(lb_rule_test lb_rule_test.c)
target_link_libraries(lb_rule_test MESA_prof_load_static infra m MESA_htable_static ${SYSTEMD_LIBRARIES})
-install(PROGRAMS ${CMAKE_CURRENT_BINARY_DIR}/lb_rule_test DESTINATION bin COMPONENT Program) \ No newline at end of file
+#install(PROGRAMS ${CMAKE_CURRENT_BINARY_DIR}/lb_rule_test DESTINATION bin COMPONENT Program) \ No newline at end of file