diff options
| -rw-r--r-- | .gitattributes | 1 | ||||
| -rw-r--r-- | CMakeLists.txt | 4 | ||||
| -rw-r--r-- | app/CMakeLists.txt | 2 | ||||
| -rw-r--r-- | app/include/bpfdump.h | 24 | ||||
| -rw-r--r-- | app/include/mrapp.h | 24 | ||||
| -rw-r--r-- | app/include/tap.h | 6 | ||||
| -rw-r--r-- | app/src/bpfdump.c | 271 | ||||
| -rw-r--r-- | app/src/marsio.c | 121 | ||||
| -rw-r--r-- | app/src/rawio.c | 43 | ||||
| -rw-r--r-- | app/src/tap.c | 398 | ||||
| -rw-r--r-- | include/internal/vdev_define.h | 11 | ||||
| -rw-r--r-- | service/include/sc_devmgr.h | 29 | ||||
| -rw-r--r-- | service/src/core.c | 2 | ||||
| -rw-r--r-- | service/src/devmgr.c | 108 | ||||
| -rw-r--r-- | service/src/node.c | 5 | ||||
| -rw-r--r-- | service/src/node_eth_ingress.c | 97 | ||||
| -rw-r--r-- | service/src/node_phydev.c | 12 | ||||
| -rw-r--r-- | tools/classifier_rule_test/CMakeLists.txt | 2 | ||||
| -rw-r--r-- | tools/lb_rule_test/CMakeLists.txt | 2 |
19 files changed, 694 insertions, 468 deletions
diff --git a/.gitattributes b/.gitattributes index 04d8752..1131d94 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1,3 +1,4 @@ *.a binary *.so binary *.o binary +*.sh text eol=lf
\ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt index 10ff012..668d9ab 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -15,8 +15,8 @@ option(ENABLE_SANITIZE_ADDRESS "Enable AddressSanitizer" FALSE) option(ENABLE_SANITIZE_THREAD "Enable ThreadSanitizer" FALSE) option(ENABLE_VNODE_CHECK_THREAD_SAFE "Enable concurrent write thread-safe check for VNODE." FALSE) -set(CMAKE_C_FLAGS "-std=gnu99 -m64 -march=corei7-avx") -set(CMAKE_CXX_FLAGS "-std=gnu++11 -m64 -march=corei7-avx" ) +set(CMAKE_C_FLAGS "-std=gnu99 -m64 -march=corei7-avx -Wno-format-truncation") +set(CMAKE_CXX_FLAGS "-std=gnu++11 -m64 -march=corei7-avx -Wno-format-truncation" ) if(ENABLE_WARNING_ALL) set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall -fPIC") diff --git a/app/CMakeLists.txt b/app/CMakeLists.txt index 7ae1296..ac13d71 100644 --- a/app/CMakeLists.txt +++ b/app/CMakeLists.txt @@ -8,7 +8,7 @@ include_directories(${DPDK_INCLUDE_DIR}) include_directories(include) add_definitions(${DPDK_C_PREDEFINED}) add_library(marsio SHARED src/marsio.c src/arp.c src/icmp.c src/neigh.c src/rawio.c src/mrb.c - src/sendpath.c src/monit.c src/bpfdump.c) + src/sendpath.c src/monit.c src/tap.c) set_target_properties(marsio PROPERTIES VERSION ${MARSIO_VERSION_MAJOR}.${MARSIO_VERSION_MINOR}) set_target_properties(marsio PROPERTIES SOVERSION ${MARSIO_VERSION_MAJOR}) diff --git a/app/include/bpfdump.h b/app/include/bpfdump.h deleted file mode 100644 index 875030d..0000000 --- a/app/include/bpfdump.h +++ /dev/null @@ -1,24 +0,0 @@ -#pragma once - -#include <rte_mbuf.h> - -struct bpf_dumper; - -enum bpf_dumper_backend -{ - BPF_DUMPER_TAP, - BPF_DUMPER_PCAP, - BPF_DUMPER_SYSLOG, - BPF_DUMPER_COUNT, - BPF_DUMPER_MAX -}; - -#define BPF_DUMPER_DEFAULT_BACKEND BPF_DUMPER_TAP - -int bpf_dumper_write(struct bpf_dumper * dumper, struct rte_mbuf * __mbufs[], - unsigned int nr_mbufs); - -void bpf_dumper_destory(struct bpf_dumper * dumper); - -struct bpf_dumper * bpf_dumper_create(const char * appsym, enum bpf_dumper_backend backend, - const char * str_devsym, const char * str_path, const char * str_bpf_expr);
\ No newline at end of file diff --git a/app/include/mrapp.h b/app/include/mrapp.h index 56bf9fc..e60d2d9 100644 --- a/app/include/mrapp.h +++ b/app/include/mrapp.h @@ -2,12 +2,12 @@ #include <common.h> #include <ctrlmsg.h> -#include <vdev_define.h> -#include <neigh.h> -#include <marsio.h> #include <ldbc.h> +#include <marsio.h> +#include <neigh.h> #include <pcap/pcap.h> -#include <bpfdump.h> +#include <tap.h> +#include <vdev_define.h> struct mr_instance; @@ -39,14 +39,12 @@ struct mrapp_stat struct mr_vdev { char devsym[MR_SYMBOL_MAX]; - struct mr_instance * instance; - struct vdev_instance * vdi; unsigned int nr_rxstream; unsigned int nr_txstream; - unsigned int en_arp; - unsigned int en_icmp; - struct bpf_dumper * bpf_dumper; + struct mr_instance * instance; + struct vdev_instance * vdi; + struct tap_device * tap_representor; }; struct mr_thread_info @@ -120,6 +118,14 @@ struct mr_instance unsigned int recv_all_state[MR_SID_MAX]; /* 基于ASAN的内存保护模式 */ unsigned int memory_protect_with_asan; + + /* tap resp handles */ + struct tap_device * tap_resp_devices[MR_DEVICE_MAX]; + unsigned int nr_tap_resp_devices; + + /* tap resp epoll fd */ + int tap_resp_epfd; + pthread_t pid_tap_resp_poll; }; /* EAL环境是否初始化 */ diff --git a/app/include/tap.h b/app/include/tap.h new file mode 100644 index 0000000..249304a --- /dev/null +++ b/app/include/tap.h @@ -0,0 +1,6 @@ +#pragma once +#include <marsio.h> +#include <mrapp.h> + +int tap_representor_init(struct mr_instance * mr_instance, struct mr_vdev * vdev); +int tap_representor_entry(struct mr_vdev * vdev, unsigned int qid, marsio_buff_t * buffs[], unsigned int nr_buffs);
\ No newline at end of file diff --git a/app/src/bpfdump.c b/app/src/bpfdump.c deleted file mode 100644 index 8452bdc..0000000 --- a/app/src/bpfdump.c +++ /dev/null @@ -1,271 +0,0 @@ -#include <common.h> -#include <pcap/pcap.h> -#include <rte_atomic.h> -#include <rte_malloc.h> -#include <rte_mbuf.h> - -#include <netinet/in.h> -#include <linux/if.h> -#include <linux/if_tun.h> -#include <fcntl.h> -#include <sys/ioctl.h> -#include <unistd.h> -#include <signal.h> -#include <assert.h> -#include <bpfdump.h> - -static int __tap_count = 0; - -static const char * str_bpf_dumper_backend[] = -{ - [BPF_DUMPER_TAP] = "TAP", - [BPF_DUMPER_PCAP] = "PCAP", - [BPF_DUMPER_SYSLOG] = "SYSLOG", - [BPF_DUMPER_COUNT] = "COUNT" -}; - -struct __backend_ops -{ - int(*fn_create)(struct bpf_dumper * object, const char * appsym, - const char * str_devsym, const char * str_path); - void(*fn_destory)(struct bpf_dumper * object); - int(*fn_write)(struct bpf_dumper * object, const char * pkt_ptr, unsigned int pkt_len); -}; - -struct bpf_dumper -{ - char str_dumper_sym[MR_SYMBOL_MAX]; - char str_bpf_expr[MR_STRING_MAX]; - char str_dumpfile[MR_STRING_MAX]; - - struct bpf_program bpf_program; - - unsigned int en_read; - unsigned int en_write; - - enum bpf_dumper_backend backend; - struct __backend_ops * backend_ops; - - struct __backend_tap - { - int tapdev_fd; - } backend_tap; - - int bpf_offset; - - rte_atomic64_t stat_write_pkts; - rte_atomic64_t stat_write_pktlen; - rte_atomic64_t stat_write_drops; - - rte_atomic64_t stat_read_pkts; - rte_atomic64_t stat_read_pktlen; - rte_atomic64_t stat_read_drops; -}; - -static int tap_ioctl(int fd, unsigned long request, struct ifreq *ifr, int set)
-{
- short req_flags = ifr->ifr_flags;
- switch (request) {
- case SIOCSIFFLAGS:
- if (ioctl(fd, SIOCGIFFLAGS, ifr) < 0)
- goto error;
- if (set)
- ifr->ifr_flags |= req_flags;
- else
- ifr->ifr_flags &= ~req_flags;
- break;
-
- case SIOCGIFFLAGS:
- case SIOCGIFHWADDR:
- case SIOCSIFHWADDR:
- case SIOCSIFMTU:
- break;
- default:
- return -EINVAL;
- }
-
- if (ioctl(fd, request, ifr) < 0)
- goto error;
-
- return 0;
-
-error:
- MR_ERROR("%s: ioctl(%lu) failed with error: %s", ifr->ifr_name, request, strerror(errno));
- return -errno;
-} - -static int __tap_dumper_create(struct bpf_dumper * object, const char * appsym, - const char * str_devsym, const char * str_path) -{ - /* ����TUNTAP�豸ʹ�õ�FD */ - int fd = 0; - int ioctl_sock = 0; - - fd = open("/dev/net/tun", O_RDWR); - if (fd < 0) - { - MR_ERROR("Tap dumper %s open /dev/net/tun failed: %s", - object->str_dumper_sym, strerror(errno)); goto __fail_out; - } - - /* ����TUNTAP�豸ʱʹ�õ�FD */ - ioctl_sock = socket(AF_INET, SOCK_DGRAM, 0); - if (ioctl_sock < 0) - { - MR_ERROR("Tap dumper %s unable to get a socket for management: %s", - object->str_dumper_sym, strerror(errno)); goto __fail_out; - } - - struct ifreq ifr; - memset(&ifr, 0, sizeof(ifr)); - - snprintf(object->str_dumpfile, IFNAMSIZ, "%s_t%d", appsym, __tap_count++); - - /* TAP device without packet information */ - ifr.ifr_flags = IFF_TAP | IFF_NO_PI; - snprintf(ifr.ifr_name, IFNAMSIZ, "%s", object->str_dumpfile); - - int ret = ioctl(fd, TUNSETIFF, (void *)&ifr); - if (ret < 0) - { - MR_ERROR("Tap dumper %s tap fd(fd = %d) ioctl failed: %s", - object->str_dumper_sym, fd, strerror(errno)); goto __fail_out; - } - - struct ifreq link_up_ifr = { .ifr_flags = IFF_UP | IFF_RUNNING }; - snprintf(link_up_ifr.ifr_name, IFNAMSIZ, "%s", object->str_dumpfile); - - ret = tap_ioctl(ioctl_sock, SIOCSIFFLAGS, &link_up_ifr, 1); - if(ret < 0) - { - MR_ERROR("Tap dumper %s tap device link up failed", object->str_dumper_sym); - goto __fail_out; - } - - MR_DEBUG("TAP dumper %s: tap fd = %d, ifr_name = %s", - object->str_dumper_sym, fd, ifr.ifr_name); - - object->backend_tap.tapdev_fd = fd; - close(ioctl_sock); - - return RT_SUCCESS; - -__fail_out: - if(fd > 0) close(fd); - if (ioctl_sock > 0) close(ioctl_sock); - return RT_ERR; -} - -static void __tap_dumper_destory(struct bpf_dumper * object) -{ - close(object->backend_tap.tapdev_fd); - return; -} - -static int __tap_dumper_write(struct bpf_dumper * object, const char * pkt_ptr, unsigned int pkt_len) -{ - return write(object->backend_tap.tapdev_fd, pkt_ptr, pkt_len); -} - -static struct __backend_ops __backend_ops_jumper[] = -{ - [BPF_DUMPER_TAP] = - { - .fn_create = __tap_dumper_create, - .fn_destory = __tap_dumper_destory, - .fn_write = __tap_dumper_write - } -}; - -int bpf_dumper_write(struct bpf_dumper * dumper, struct rte_mbuf * __mbufs[], - unsigned int nr_mbufs) -{ - unsigned int nr_write_success = 0; - - for (unsigned int i = 0; i < nr_mbufs; i++) - { - struct rte_mbuf * __mbuf = __mbufs[i]; - - const char * pkt_ptr = rte_pktmbuf_mtod_offset(__mbuf, const char *, dumper->bpf_offset); - unsigned int pkt_len = rte_pktmbuf_data_len(__mbuf) > dumper->bpf_offset ? - rte_pktmbuf_data_len(__mbuf) - dumper->bpf_offset : 0; - - if (pkt_ptr == NULL || pkt_len == 0) goto __dropme; - - if (bpf_filter(dumper->bpf_program.bf_insns, - (const u_char *)pkt_ptr, pkt_len, pkt_len) == 0 || pkt_len > 65535) goto __dropme; - - assert(dumper->backend_ops->fn_write != NULL); - int ret = dumper->backend_ops->fn_write(dumper, pkt_ptr, pkt_len); - if (unlikely(ret < 0)) goto __dropme; - - /* д��TAP�豸�ɹ� */ - rte_atomic64_add(&dumper->stat_write_pkts, 1); - rte_atomic64_add(&dumper->stat_write_pktlen, pkt_len); - nr_write_success++; - continue; - - __dropme: - rte_atomic64_add(&dumper->stat_write_drops, 1); - continue; - } - - return nr_write_success; -} - -void bpf_dumper_destory(struct bpf_dumper * dumper) -{ - if (dumper != NULL) dumper->backend_ops->fn_destory(dumper); - pcap_freecode(&dumper->bpf_program); - rte_free(dumper); - return; -} - -struct bpf_dumper * bpf_dumper_create(const char * appsym, enum bpf_dumper_backend backend, - const char * str_devsym, const char * str_path, const char * str_bpf_expr) -{ - assert(backend >= 0 && backend < BPF_DUMPER_MAX); - - struct bpf_dumper * dumper = rte_zmalloc(NULL, sizeof(struct bpf_dumper), 0); - MR_VERIFY_MALLOC(dumper); - - snprintf(dumper->str_dumper_sym, sizeof(dumper->str_dumper_sym), "%s_%s_%s", - appsym, str_devsym, str_bpf_dumper_backend[backend]); - - dumper->backend = backend; - dumper->backend_ops = &__backend_ops_jumper[backend]; - - /* Backend Init */ - int ret = dumper->backend_ops->fn_create(dumper, appsym, str_devsym, str_path); - if (ret < 0) - { - MR_ERROR("BPF dumper %s backend create failed. ", dumper->str_dumper_sym); - goto __errout; - } - - /* BPF Filter */ - strncpy(dumper->str_bpf_expr, str_bpf_expr, sizeof(dumper->str_bpf_expr)); - - ret = pcap_compile_nopcap(65535, DLT_EN10MB, &dumper->bpf_program, - dumper->str_bpf_expr, 1, PCAP_NETMASK_UNKNOWN); - - if (ret < 0) - { - MR_ERROR("BPF dumper %s bpf filter compile failed, bpf expr is '%s': %s", - dumper->str_dumper_sym, dumper->str_bpf_expr, pcap_strerror(ret)); - goto __errout; - } - - MR_INFO(" "); - MR_INFO("Application %s, BPF Dumper %s:", appsym, dumper->str_dumper_sym); - MR_INFO(" BPF expression : %s", dumper->str_bpf_expr); - MR_INFO(" BPF offset : %d", dumper->bpf_offset); - MR_INFO(" Backend : %s", str_bpf_dumper_backend[dumper->backend]); - MR_INFO(" Dumpfile(or device) : %s", dumper->str_dumpfile); - - return dumper; - -__errout: - if (dumper != NULL) bpf_dumper_destory(dumper); - return NULL; -}
\ No newline at end of file diff --git a/app/src/marsio.c b/app/src/marsio.c index 5a10097..c84ff7e 100644 --- a/app/src/marsio.c +++ b/app/src/marsio.c @@ -13,6 +13,7 @@ #include <arpa/inet.h> #include <ctrlmsg.h> #include <ctrlmsg_define.h> +#include <tap.h> #include <MESA_prof_load.h> #include <unistd.h> #include <signal.h> @@ -82,7 +83,7 @@ void __mrapp_mem_protect_unlock_mempool_cb(struct rte_mempool * mp, void *arg) #if 0 /* 基于ASAN的内存保护模式 * 该保护模式开启后,共享大页内存将标记为不可达,对共享内存的访问将被ASAN探测并记录 */ -static void mrapp_mem_protect_with_asan_init(struct mr_instance * instance) +static void mrapp_mem_protect_with_asan_init(struct ref_mr_instance * instance) { MESA_load_profile_uint_def(instance->app_cfgfile_path, "protect", "enable", &instance->memory_protect_with_asan, 0); @@ -278,52 +279,6 @@ static int mrapp_distributer_init(struct mr_instance * instance) return RT_SUCCESS; } -static int mrapp_bpf_dumper_init(struct mr_instance * instance, struct mr_vdev * vdev) -{ - const char * str_devsym = vdev->devsym; - const char * str_cfgfile = instance->app_cfgfile_path; - - /* 从应用配置文件中读取Dumper的配置。Dumper系调试使用,由用户通过配置文件指定, - 读不到相关选项,则不启用Dumper。 */ - - char str_section[MR_SYMBOL_MAX] = { 0 }; - snprintf(str_section, sizeof(str_section), "bpfdump:%s", str_devsym); - - unsigned int __opt_enable = 0; - MESA_load_profile_uint_def(str_cfgfile, str_section, "enable", &__opt_enable, 0); - - /* 是否启用 */ - if (!__opt_enable) return RT_SUCCESS; - - char __opt_str_bpf_expr[MR_STRING_MAX] = { 0 }; - char __opt_str_dumpfile[MR_STRING_MAX] = { 0 }; - - unsigned int __opt_direction = 0; - unsigned int __opt_backend = 0; - unsigned int __opt_offset = 0; - - MESA_load_profile_string_def(str_cfgfile, str_section, "bpf_expr", __opt_str_bpf_expr, - sizeof(__opt_str_bpf_expr), NULL); - MESA_load_profile_string_def(str_cfgfile, str_section, "dumpfile", __opt_str_dumpfile, - sizeof(__opt_str_dumpfile), NULL); - - MESA_load_profile_uint_def(str_cfgfile, str_section, "bpf_offset", &__opt_offset, 0); - MESA_load_profile_uint_def(str_cfgfile, str_section, "direction", &__opt_direction, 0); - MESA_load_profile_uint_def(str_cfgfile, str_section, "backend", &__opt_backend, 0); - - vdev->bpf_dumper = bpf_dumper_create(instance->appsym, __opt_backend, - str_devsym, __opt_str_dumpfile, __opt_str_bpf_expr); - - if (unlikely(vdev->bpf_dumper == NULL)) - { - MR_ERROR("In application %s, BPF dumper for device %s create failed.", - instance->appsym, str_devsym); return RT_ERR; - } - - return RT_SUCCESS; -} - - static unsigned __table_strip(char *str, unsigned len) { int newlen = len; @@ -541,49 +496,6 @@ err: return RT_ERR; } -/* 邻居子系统,延迟初始化 - 设备初始化时初始化该部分 -*/ -static int mrapp_neigh_device_init(struct mr_instance * instance, struct mr_vdev * vdev) -{ - /* 读静态邻居表项 */ - struct mr_static_neigh_entry * __neigh_entry_iter; - TAILQ_FOREACH(__neigh_entry_iter, &instance->static_neigh_list, next) - { - if (strncmp(vdev->devsym, __neigh_entry_iter->devsym, sizeof(vdev->devsym)) != 0) - continue; - - int ret = neigh_create_or_update(instance->neigh, __neigh_entry_iter->in_addr, - &__neigh_entry_iter->ether_addr, vdev->vdi, 1); - - if (ret < 0) - { - MR_WARNING("Static neighbour %s->%s on device %s failed, Ignore.", - __neigh_entry_iter->str_in_addr, - __neigh_entry_iter->str_ether_addr, - __neigh_entry_iter->devsym); - - continue; - } - - MR_INFO("Static neighbour: %s->%s, device: %s.", __neigh_entry_iter->str_in_addr, - __neigh_entry_iter->str_ether_addr, __neigh_entry_iter->devsym); - } - - /* 设备开启了发送队列后,且启用了ARP功能,发免费ARP报文 */ - if (vdev->nr_txstream == 0 || vdev->en_arp == 0) - { - return RT_SUCCESS; - } - - for (int i = 0; i < instance->nr_gratuitous_arp_send; i++) - { - arp_request_send(vdev->vdi, 0, vdev->vdi->vdev->in_addr); - } - - return RT_SUCCESS; -} - /* 注册应用 */ static int mrapp_app_register(struct mr_instance * instance) { @@ -737,14 +649,6 @@ static int __open_device_response_handler(struct ctrlmsg_handler * ct_hand, __open_device_unposion(mr_vdev->vdi); } - /* 启用了IP地址,处理ARP请求和ICMP请求 */ - struct vdev * vdev = mr_vdev->vdi->vdev; - if (vdev->in_addr.s_addr != 0 && vdev->in_mask.s_addr != 0) - { - mr_vdev->en_arp = 1; - mr_vdev->en_icmp = 1; - } - instance->nr_vdevs++; wake_up: @@ -781,15 +685,14 @@ struct mr_vdev * marsio_open_device(struct mr_instance * instance, { /* 构造虚设备打开请求 */ struct ctrl_msg_vdev_open_request req_msg; - ctrl_msg_header_construct(&req_msg.msg_header, sizeof(req_msg), - CTRL_MSG_TYPE_REQUEST, CTRLMSG_TOPIC_VDEV_OPEN); + ctrl_msg_header_construct(&req_msg.msg_header, sizeof(req_msg), CTRL_MSG_TYPE_REQUEST, CTRLMSG_TOPIC_VDEV_OPEN); snprintf((char *)req_msg.devsym, sizeof(req_msg.devsym), "%s", devsym); req_msg.nr_rxstream = nr_rxstream; req_msg.nr_txstream = nr_txstream; /* 发送请求 */ - ctrlmsg_msg_send(instance->ctrlmsg_handler, NULL, (struct ctrl_msg_header*)&req_msg); + ctrlmsg_msg_send(instance->ctrlmsg_handler, NULL, (struct ctrl_msg_header *)&req_msg); /* 等待应答 */ /* TODO: 抽象出单独的函数 */ @@ -804,19 +707,21 @@ struct mr_vdev * marsio_open_device(struct mr_instance * instance, /* 查询打开结果 */ struct mr_vdev * vdev = marsio_device_lookup(instance, devsym); - if (vdev == NULL) return NULL; + if (vdev == NULL) + { + return NULL; + } MR_INFO(" "); MR_INFO("Application %s, Device %s:", instance->appsym, vdev->devsym); MR_INFO(" Rx Queue Count : %d", vdev->nr_rxstream); MR_INFO(" Tx Queue Count : %d", vdev->nr_txstream); - MR_INFO(" ARP protocol handler : %s", vdev->en_arp ? "Enable" : "Disable"); - MR_INFO(" ICMP protocol handler : %s", vdev->en_arp ? "Enable" : "Disable"); - /* 调试捕包 */ - mrapp_bpf_dumper_init(instance, vdev); - /* 静态邻居表 */ - mrapp_neigh_device_init(instance, vdev); + if (vdev->vdi->vdev->representor_config.enable > 0) + { + tap_representor_init(instance, vdev); + } + return vdev; } diff --git a/app/src/rawio.c b/app/src/rawio.c index 1f4c762..2b32174 100644 --- a/app/src/rawio.c +++ b/app/src/rawio.c @@ -39,7 +39,7 @@ int mrapp_packet_fast_send_burst(struct vdev_instance * vdi, queue_id_t qid, PROTECT_rte_mbuf_poison_bulk(mbufs, nr_mbufs); rte_spinlock_lock(&__f_fast_lock); - int ret = vnode_mirror_enqueue_bulk(vdi->vnode_ltx_prod, qid, mbufs, mbufs_hash, nr_mbufs); + int ret = vnode_mirror_enqueue_bulk(vdi->vnode_ftx_prod, qid, mbufs, mbufs_hash, nr_mbufs); rte_spinlock_unlock(&__f_fast_lock); return ret; } @@ -47,23 +47,26 @@ int mrapp_packet_fast_send_burst(struct vdev_instance * vdi, queue_id_t qid, int marsio_recv_burst(struct mr_vdev * vdev, queue_id_t qid, marsio_buff_t * mbufs[], int nr_mbufs) { assert(qid < vdev->nr_rxstream); - if (unlikely(qid >= vdev->nr_rxstream)) return -EINVAL; + if (unlikely(qid >= vdev->nr_rxstream)) + return -EINVAL; struct vdev_instance * vdi = vdev->vdi; struct rte_mbuf ** __mbufs = (struct rte_mbuf **)mbufs; int ret = vnode_mirror_dequeue_burst(vdi->vnode_rx_cons, qid, __mbufs, nr_mbufs); - if (unlikely(ret == 0)) goto out; + if (unlikely(ret == 0)) + goto out; PROTECT_rte_mbuf_unpoison_bulk(__mbufs, ret); - /* ARP */ - if (vdev->en_arp) arp_entry(vdev->instance, vdi, qid, __mbufs, ret); - /* ICMP */ - if (vdev->en_icmp) icmp_entry(vdev->instance, vdi, qid, __mbufs, ret); + if (vdev->tap_representor != NULL) + { + tap_representor_entry(vdev, qid, mbufs, ret); + } /* 报文合法性检查,防止Double-Free */ - for (int i = 0; i < ret; i++) __rte_mbuf_sanity_check(__mbufs[i], i); + for (int i = 0; i < ret; i++) + __rte_mbuf_sanity_check(__mbufs[i], i); /* 线程运行情况统计,收报情况,对于非注册线程不统计 */ if (thread_info.instance != NULL) @@ -73,13 +76,6 @@ int marsio_recv_burst(struct mr_vdev * vdev, queue_id_t qid, marsio_buff_t * mbu thread_info.instance->stat[tid].packet_recv_length = __packet_total_len(__mbufs, ret); } - /* BPF Dumper */ - if (unlikely(vdev->bpf_dumper != NULL)) - { - bpf_dumper_write(vdev->bpf_dumper, __mbufs, ret); - - } - PROTECT_rte_mbuf_poison_bulk(__mbufs, ret); out: return ret; @@ -143,10 +139,6 @@ int marsio_send_burst(struct mr_sendpath * sendpath, queue_id_t sid, marsio_buff hash_t hash[MR_BURST_MAX]; for (int i = 0; i < nr_mbufs; i++) hash[i] = __mbufs[i]->hash.usr; - /* BPF Dumper */ - if (unlikely(sendpath->vdev->bpf_dumper != NULL)) - bpf_dumper_write(sendpath->vdev->bpf_dumper, __mbufs, nr_mbufs); - /* 线程运行情况统计 */ if (thread_info.instance != NULL) { @@ -204,10 +196,6 @@ int marsio_send_burst_with_options(struct mr_sendpath * sendpath, queue_id_t sid hash_t hash[MR_BURST_MAX]; for (int i = 0; i < nr_mbufs; i++) hash[i] = __mbufs[i]->hash.usr; - /* BPF Dumper */ - if (unlikely(sendpath->vdev->bpf_dumper != NULL)) - bpf_dumper_write(sendpath->vdev->bpf_dumper, __mbufs, nr_mbufs); - /* 线程运行情况统计 */ if (thread_info.instance != NULL) { @@ -219,12 +207,7 @@ int marsio_send_burst_with_options(struct mr_sendpath * sendpath, queue_id_t sid /* 提交到队列前,应用不再具有访问权限,对mbuf进行保护 */ PROTECT_rte_mbuf_poison_bulk(__mbufs, nr_mbufs); - if (options & MARSIO_SEND_OPT_FAST) - { - vnode_mirror_enqueue_bulk(sendpath->target_vdi->vnode_ftx_prod, sid, - __mbufs, hash, nr_mbufs); - } - else if(options & MARSIO_SEND_OPT_CTRL) + if(options & MARSIO_SEND_OPT_CTRL) { vnode_mirror_enqueue_bulk(sendpath->target_vdi->vnode_ltx_prod, sid, __mbufs, hash, nr_mbufs); @@ -248,7 +231,7 @@ err: for (int i = 0; i < nr_mbufs; i++) rte_pktmbuf_free(mbufs[i]); return RT_ERR; -} + } void marsio_send_burst_flush(struct mr_sendpath * sendpath, queue_id_t sid) { diff --git a/app/src/tap.c b/app/src/tap.c new file mode 100644 index 0000000..bb0c131 --- /dev/null +++ b/app/src/tap.c @@ -0,0 +1,398 @@ +#include <common.h> +#include <pcap/pcap.h> +#include <rte_atomic.h> +#include <rte_epoll.h> +#include <rte_ether.h> +#include <rte_malloc.h> +#include <rte_mbuf.h> +#include <sys/socket.h> +#include <sys/types.h> +#include <sys/epoll.h> + +#include <assert.h> +#include <fcntl.h> +#include <linux/if.h> +#include <linux/if_tun.h> +#include <marsio.h> +#include <mrapp.h> +#include <mrb_define.h> +#include <netinet/in.h> +#include <sys/ioctl.h> +#include <tap.h> +#include <unistd.h> + +static int tap_ioctl(int fd, unsigned long request, struct ifreq * ifr, int set) +{ + short req_flags = ifr->ifr_flags; + switch (request) + { + case SIOCSIFFLAGS: + if (ioctl(fd, SIOCGIFFLAGS, ifr) < 0) + goto error; + if (set) + ifr->ifr_flags |= req_flags; + else + ifr->ifr_flags &= ~req_flags; + break; + + case SIOCGIFFLAGS: + case SIOCGIFHWADDR: + case SIOCSIFHWADDR: + case SIOCSIFMTU: + break; + default: + return -EINVAL; + } + + if (ioctl(fd, request, ifr) < 0) + goto error; + + return 0; + +error: + MR_ERROR("%s: ioctl(%lu) failed with error: %s", ifr->ifr_name, request, strerror(errno)); + return -errno; +} + +struct tap_device +{ + struct mr_instance * ref_mr_instance; + struct mr_vdev * ref_vdev; + + int tap_fd; + struct rte_epoll_event epoll_event; + + rte_atomic64_t stat_write_pkts; + rte_atomic64_t stat_write_pktlen; + rte_atomic64_t stat_write_drops; + + rte_atomic64_t stat_read_pkts; + rte_atomic64_t stat_read_pktlen; + rte_atomic64_t stat_read_drops; +}; + +static int tap_resp_dev_filter(struct vdev * vdev_desc, struct rte_mbuf * mbuf) +{ + struct mrb_metadata * mrb_meta = (struct mrb_metadata *)mrbuf_cz_data(mbuf, MR_NODE_CTRLZONE_ID); + struct pkt_parser_result * parser_result = &mrb_meta->pkt_parser_result; + const struct rte_ether_hdr * ether_hdr = rte_pktmbuf_mtod(mbuf, const struct rte_ether_hdr *); + + /* not local's mac addr or broadcast packet, ignore it */ + if (rte_is_broadcast_ether_addr(ðer_hdr->dst_addr) == 0 && + rte_is_same_ether_addr(ðer_hdr->dst_addr, &vdev_desc->ether_addr) == 0) + { + return 0; + } + + /* for arp, rarp and lldp, only check the dest's mac address */ + if (ether_hdr->ether_type == htons(RTE_ETHER_TYPE_ARP) && vdev_desc->representor_config.redirect_local_arp > 0) + { + return 1; + } + + if (ether_hdr->ether_type == htons(RTE_ETHER_TYPE_RARP) && vdev_desc->representor_config.redirect_local_rarp > 0) + { + return 1; + } + + if (ether_hdr->ether_type == htons(RTE_ETHER_TYPE_LLDP) && vdev_desc->representor_config.redirect_local_lldp > 0) + { + return 1; + } + + /* allow layers are ETHER->IPv4->TCP, ETHER->IPv4->UDP, the ipv6 is not supported for now. */ + static const uint16_t exp_ipv4_tcp[] = { + LAYER_TYPE_ID_ETHER, + LAYER_TYPE_ID_IPV4, + LAYER_TYPE_ID_TCP, + }; + + static const uint16_t exp_ipv4_udp[] = { + LAYER_TYPE_ID_ETHER, + LAYER_TYPE_ID_IPV4, + LAYER_TYPE_ID_UDP, + }; + static const uint16_t exp_ipv4_others[] = { + LAYER_TYPE_ID_ETHER, + LAYER_TYPE_ID_IPV4, + }; + + if (complex_layer_type_expect(parser_result, exp_ipv4_tcp, RTE_DIM(exp_ipv4_tcp)) == 0 || + complex_layer_type_expect(parser_result, exp_ipv4_udp, RTE_DIM(exp_ipv4_udp)) == 0 || + complex_layer_type_expect(parser_result, exp_ipv4_others, RTE_DIM(exp_ipv4_others)) == 0) + { + return 1; + } + else + { + return 0; + } + + assert(false); +} + +static int tap_device_tx(struct tap_device * tap_dev, struct rte_mbuf * mbuf) +{ + const char * pkt_ptr = rte_pktmbuf_mtod(mbuf, const char *); + unsigned int pkt_len = rte_pktmbuf_data_len(mbuf); + + if (pkt_ptr == NULL || pkt_len == 0) + { + rte_atomic64_add(&tap_dev->stat_write_drops, 1); + goto err; + } + + ssize_t len = write(tap_dev->tap_fd, pkt_ptr, pkt_len); + if (unlikely(len < 0)) + { + rte_atomic64_add(&tap_dev->stat_write_drops, 1); + goto err; + } + + rte_atomic64_add(&tap_dev->stat_write_pkts, 1); + rte_atomic64_add(&tap_dev->stat_write_pktlen, pkt_len); + + return 0; + +err: + return -1; +} + +static int tap_device_rx(struct tap_device * tap_dev, unsigned int queue_id, marsio_buff_t * buffs[], + unsigned int nr_buffs) +{ + char buff[ETH_MAX_MTU]; + ssize_t sz_buff = read(tap_dev->tap_fd, buff, sizeof(buff)); + if (sz_buff == -1 && (errno == EWOULDBLOCK || errno == EAGAIN)) + { + return 0; + } + else if (unlikely(sz_buff < 0)) + { + return -1; + } + + int ret = marsio_buff_malloc_global(tap_dev->ref_mr_instance, buffs, 1, MARSIO_SOCKET_ID_ANY, MARSIO_LCORE_ID_ANY); + if (unlikely(ret < 0)) + { + return -2; + } + + struct rte_mbuf * mbuf = (struct rte_mbuf *)buffs[0]; + assert(mbuf != NULL); + + char * mbuf_data_ptr = rte_pktmbuf_append(mbuf, sz_buff); + if (unlikely(mbuf_data_ptr == NULL)) + { + marsio_buff_free(tap_dev->ref_mr_instance, buffs, 1, MARSIO_SOCKET_ID_ANY, MARSIO_LCORE_ID_ANY); + return -3; + } + + rte_memcpy(mbuf_data_ptr, buff, sz_buff); + return 1; +} + +static struct tap_device * tap_device_create(struct mr_instance * mr_instance, const char * name, struct rte_ether_addr * hwaddr) +{ + struct tap_device * tap_dev_inst = rte_zmalloc(NULL, sizeof(struct tap_device), 0); + MR_VERIFY_MALLOC(tap_dev_inst); + + tap_dev_inst->ref_mr_instance = mr_instance; + int ioctl_sock = -1; + int tap_fd = -1; + + tap_fd = open("/dev/net/tun", O_RDWR); + if (tap_fd < 0) + { + MR_ERROR("open /dev/net/tun failed: %s", strerror(errno)); + goto errout; + } + + ioctl_sock = socket(AF_INET, SOCK_DGRAM, 0); + if (ioctl_sock < 0) + { + MR_ERROR("unable to get a socket for management: %s", strerror(errno)); + goto errout; + } + + struct ifreq ifr = {.ifr_flags = IFF_TAP | IFF_NO_PI}; + snprintf(ifr.ifr_name, IFNAMSIZ, "%s", name); + + int ret = ioctl(tap_fd, TUNSETIFF, (void *)&ifr); + if (ret < 0) + { + MR_ERROR("tap %s tap_fd(tap_fd = %d) ioctl failed: %s", name, tap_fd, strerror(errno)); + goto errout; + } + + /* set the tap device in up and running status */ + struct ifreq link_up_ifr = {.ifr_flags = IFF_UP | IFF_RUNNING}; + snprintf(link_up_ifr.ifr_name, IFNAMSIZ, "%s", name); + + ret = tap_ioctl(ioctl_sock, SIOCSIFFLAGS, &link_up_ifr, 1); + if (ret < 0) + { + MR_ERROR("tap device %s link up failed.", name); + goto errout; + } + + /* clear the ifr, and use it as mac addr setup */ + ret = tap_ioctl(ioctl_sock, SIOCGIFHWADDR, &ifr, 0); + if (ret < 0) + { + MR_ERROR("tap device %s mac address get failed.", name); + goto errout; + } + + ifr.ifr_hwaddr.sa_family = AF_LOCAL; + rte_memcpy(ifr.ifr_hwaddr.sa_data, hwaddr, RTE_ETHER_ADDR_LEN); + ret = tap_ioctl(ioctl_sock, SIOCSIFHWADDR, &ifr, 1); + if (ret < 0) + { + MR_ERROR("tap device %s mac address set failed.", name); + goto errout; + } + + MR_INFO("tap device %s for created.", name); + tap_dev_inst->tap_fd = tap_fd; + + close(ioctl_sock); + ioctl_sock = -1; + + /* first time called, need to create evfd */ + if (mr_instance->tap_resp_epfd == 0) + { + mr_instance->tap_resp_epfd = epoll_create1(EPOLL_CLOEXEC); + } + + /* prepare add the device handle to instance, and join to epoll fd */ + struct rte_epoll_event epoll_event = { + .epdata.event = EPOLLIN, + .epdata.data = (void *)tap_dev_inst, + }; + + tap_dev_inst->epoll_event = epoll_event; + ret = rte_epoll_ctl(mr_instance->tap_resp_epfd, EPOLL_CTL_ADD, tap_fd, &tap_dev_inst->epoll_event); + if (ret < 0) + { + MR_ERROR("failed at add tap_fd %d to epoll_fd for tap representor %s.", tap_fd, name); + goto errout; + } + + /* add to the tap resp sets */ + mr_instance->tap_resp_devices[mr_instance->nr_tap_resp_devices] = tap_dev_inst; + mr_instance->nr_tap_resp_devices++; + return tap_dev_inst; + +errout: + if (tap_dev_inst != NULL) + { + rte_free(tap_dev_inst); + } + + if (ioctl_sock >= 0) + { + close(ioctl_sock); + } + + if (tap_fd >= 0) + { + close(tap_fd); + } + + return NULL; +} + +static void * tap_representor_poll_thread_entry(void * arg) +{ +#define TAP_RESP_EVENTS_MAX 8 + struct mr_instance * mr_instance = (struct mr_instance *)arg; + struct rte_epoll_event epoll_events[TAP_RESP_EVENTS_MAX] = {}; + int ret = 0; + + while (1) + { + int n = rte_epoll_wait(mr_instance->tap_resp_epfd, epoll_events, TAP_RESP_EVENTS_MAX, -1); + if (unlikely(n < 0)) + { + MR_ERROR("rte_epoll_wait returned error %d, tap_resp poll thread terminated.", errno); + goto errout; + } + + /* handle the read event, read the packet then redirect to shmdev queues */ + for (int i = 0; i < n; i++) + { + struct tap_device * tap_dev = (struct tap_device *)(epoll_events[i].epdata.data); + struct mr_vdev * vdev = tap_dev->ref_vdev; + + marsio_buff_t * buff[1]; + ret = tap_device_rx(tap_dev, 0, buff, RTE_DIM(buff)); + + if (ret <= 0) + { + continue; + } + + ret = mrapp_packet_fast_send_burst(vdev->vdi, 0, (struct rte_mbuf **)buff, RTE_DIM(buff)); + if (unlikely(ret < 0)) + { + marsio_buff_free(mr_instance, buff, RTE_DIM(buff), MARSIO_SOCKET_ID_ANY, MARSIO_LCORE_ID_ANY); + } + } + } + +errout: + return (void *)NULL; +} + +int tap_representor_entry(struct mr_vdev * vdev, unsigned int qid, marsio_buff_t * buffs[], unsigned int nr_buffs) +{ + static rte_spinlock_t tap_dev_write_lock = {0}; + + for (unsigned int i = 0; i < nr_buffs; i++) + { + struct rte_mbuf * mbuf = (struct rte_mbuf *)buffs[i]; + if (tap_resp_dev_filter(vdev->vdi->vdev, mbuf) <= 0) + { + continue; + } + + /* spinlock at here */ + rte_spinlock_lock(&tap_dev_write_lock); + tap_device_tx(vdev->tap_representor, mbuf); + rte_spinlock_unlock(&tap_dev_write_lock); + } + + return 0; +} + +int tap_representor_init(struct mr_instance * mr_instance, struct mr_vdev * vdev) +{ + struct vdev * _vdev = vdev->vdi->vdev; + struct tap_device * tap_dev = tap_device_create(mr_instance, vdev->devsym, &_vdev->ether_addr); + if (unlikely(tap_dev == NULL)) + { + MR_ERROR("failed at create tap representor for %s, ignore it.", vdev->devsym); + return -1; + } + + tap_dev->ref_mr_instance = mr_instance; + tap_dev->ref_vdev = vdev; + vdev->tap_representor = tap_dev; + + /* create the tap representor poll thread at first time. + * this thread is use for recv the packet from the tap, and forward to the rings */ + if (mr_instance->pid_tap_resp_poll == 0) + { + int ret = pthread_create(&mr_instance->pid_tap_resp_poll, NULL, tap_representor_poll_thread_entry, + (void *)mr_instance); + if (unlikely(ret < 0)) + { + MR_ERROR("failed at creating thread for tap representor poll routine: %s", strerror(errno)); + return -2; + } + } + + MR_INFO("tap representor for %s created. ", vdev->devsym); + return 0; +}
\ No newline at end of file diff --git a/include/internal/vdev_define.h b/include/internal/vdev_define.h index e85d30e..6579ef6 100644 --- a/include/internal/vdev_define.h +++ b/include/internal/vdev_define.h @@ -80,6 +80,17 @@ struct vdev /* 网卡状态 */ uint8_t link_status; + + /* representor config */ + struct + { + unsigned int enable; + unsigned int redirect_local_arp; + unsigned int redirect_local_rarp; + unsigned int redirect_local_lldp; + unsigned int redirect_local_ipv4; + unsigned int redirect_local_ipv6; + } representor_config; }; /* 虚拟设备句柄,应用使用 */ diff --git a/service/include/sc_devmgr.h b/service/include/sc_devmgr.h index 6f3ceca..f840234 100644 --- a/service/include/sc_devmgr.h +++ b/service/include/sc_devmgr.h @@ -41,7 +41,7 @@ enum mr_dev_role MR_DEV_ROLE_TAP_INTERFACE, MR_DEV_ROLE_ENDPOINT_INTERFACE, MR_DEV_ROLE_NF_INTERFACE, - MR_DEV_ROLE_SYSTEM_INTERFACE, + MR_DEV_ROLE_KERNEL_RESP_INTERFACE, }; enum mr_dev_type @@ -50,6 +50,24 @@ enum mr_dev_type MR_DEV_TYPE_BOND, }; +enum representor_ns +{ + REPRESENTOR_NS_SERVICE, + REPRESENTOR_NS_APP, +}; + +struct representor_config +{ + char str_representor_symbol[MR_SYMBOL_MAX]; + enum representor_ns ns_type; + + unsigned int redirect_local_arp; + unsigned int redirect_local_rarp; + unsigned int redirect_local_lldp; + unsigned int redirect_local_ipv4; + unsigned int redirect_local_ipv6; +}; + struct dpdk_dev_stats { struct @@ -170,6 +188,15 @@ struct mr_dev_desc unsigned int is_bond_slave; struct mr_dev_desc * bond_master; + struct representor_config * representor_config; + + /* only available at service namespace representor */ + struct + { + struct mr_dev_desc * device_representor; + struct mr_dev_desc * represented_device; + }; + /* for endpoint */ struct { diff --git a/service/src/core.c b/service/src/core.c index f0af720..48f574d 100644 --- a/service/src/core.c +++ b/service/src/core.c @@ -289,6 +289,7 @@ int sc_dataplane_thread(void * arg) return 0; } +#if 0 static const char * __str_disable_or_enable_ptr(void * ptr) { if (ptr == NULL) @@ -296,6 +297,7 @@ static const char * __str_disable_or_enable_ptr(void * ptr) else return "Enable"; } +#endif static const char * __str_disable_or_enable_uint(unsigned int value) { diff --git a/service/src/devmgr.c b/service/src/devmgr.c index 6ebc0bd..d2b3258 100644 --- a/service/src/devmgr.c +++ b/service/src/devmgr.c @@ -10,7 +10,6 @@ #include <rte_bus_pci.h> #include <rte_config.h> #include <rte_debug.h> -#include <rte_eth_bond.h> #include <rte_ethdev.h> #include <rte_ether.h> #include <rte_flow.h> @@ -21,6 +20,7 @@ #include <rte_node_eth_api.h> #include <rte_pci.h> #include <rte_string_fns.h> +#include <rte_eth_bond.h> #include <MESA_prof_load.h> #include <cJSON.h> @@ -120,7 +120,7 @@ static int mr_dev_desc_status_print(struct mr_dev_desc * dev_desc) [MR_DEV_ROLE_TAP_INTERFACE] = "tap", [MR_DEV_ROLE_ENDPOINT_INTERFACE] = "endpoint", [MR_DEV_ROLE_NF_INTERFACE] = "network function", - [MR_DEV_ROLE_SYSTEM_INTERFACE] = "system", + [MR_DEV_ROLE_KERNEL_RESP_INTERFACE] = "kernel resp", }; static const char * dev_type_as_str[] = { @@ -171,6 +171,80 @@ struct mr_dev_desc * mr_dev_desc_create(struct devmgr_main * devmgr_main, const return dev_desc; } +static int kernel_resp_crosslink(struct devmgr_main * devmgr_main) +{ + unsigned int dev_iterator = 0; + struct mr_dev_desc * dev_desc_iter = NULL; + + while ((dev_desc_iter = mr_dev_desc_iterate(devmgr_main, &dev_iterator)) != NULL) + { + /* this device has no representor */ + if (dev_desc_iter->representor_config == NULL) + { + continue; + } + + struct representor_config * resp_config = dev_desc_iter->representor_config; + + /* the device has a representor in app namespace, + * this representor will create by the app library */ + if (resp_config->ns_type != REPRESENTOR_NS_SERVICE) + { + continue; + } + + struct mr_dev_desc * representor = mr_dev_desc_lookup(devmgr_main, resp_config->str_representor_symbol); + if (representor == NULL) + { + MR_WARNING("The representor device %s for device %s is not existed.", resp_config->str_representor_symbol, + dev_desc_iter->symbol); + continue; + } + + assert(representor->represented_device == NULL); + assert(dev_desc_iter->device_representor == NULL); + + dev_desc_iter->device_representor = representor; + representor->represented_device = dev_desc_iter; + } + + return 0; +} + +static struct representor_config * kernel_resp_config_load(struct devmgr_main * devmgr_main, const char * devsym) +{ + struct sc_main * sc = devmgr_main->sc; + const char * cfg = sc->local_cfgfile; + + char str_section[MR_SYMBOL_MAX]; + snprintf(str_section, sizeof(str_section), "device:%s", devsym); + + unsigned int en_representor = 0; + MESA_load_profile_uint_def(cfg, str_section, "en_representor", &en_representor, 0); + + if (en_representor == 0) + { + return NULL; + } + + struct representor_config * resp_cfg = ZMALLOC(sizeof(struct representor_config)); + MR_VERIFY_MALLOC(resp_cfg); + + /* for default, redirect all kinds of local packets except tunnels */ + MESA_load_profile_uint_def(cfg, str_section, "representor_ns", &resp_cfg->ns_type, 0); + MESA_load_profile_string_def(cfg, str_section, "representor_dev", resp_cfg->str_representor_symbol, + sizeof(resp_cfg->str_representor_symbol), ""); + + MESA_load_profile_uint_def(cfg, str_section, "redirect_local_arp", &resp_cfg->redirect_local_arp, 1); + MESA_load_profile_uint_def(cfg, str_section, "redirect_local_rarp", &resp_cfg->redirect_local_ipv4, 1); + MESA_load_profile_uint_def(cfg, str_section, "redirect_local_lldp", &resp_cfg->redirect_local_lldp, 1); + MESA_load_profile_uint_def(cfg, str_section, "redirect_local_ipv4", &resp_cfg->redirect_local_ipv4, 1); + MESA_load_profile_uint_def(cfg, str_section, "redirect_local_ipv6", &resp_cfg->redirect_local_ipv6, 1); + + return resp_cfg; +} + + int mr_dev_desc_config_load(struct devmgr_main * devmgr_main, struct mr_dev_desc * dev_desc) { const char * cfgfile = devmgr_main->sc->local_cfgfile; @@ -180,6 +254,9 @@ int mr_dev_desc_config_load(struct devmgr_main * devmgr_main, struct mr_dev_desc MESA_load_profile_uint_def(cfgfile, str_section, "driver", &dev_desc->drv_type, MR_DEV_DRV_TYPE_DPDK_PCI); MESA_load_profile_uint_def(cfgfile, str_section, "role", &dev_desc->role_type, MR_DEV_ROLE_NONE); + /* for the kernel resp */ + dev_desc->representor_config = kernel_resp_config_load(devmgr_main, dev_desc->symbol); + /* Get ether */ char str_ether[MR_STRING_MAX] = {}; if (MESA_load_profile_string_nodef(cfgfile, str_section, "ether", str_ether, sizeof(str_ether)) >= 0) @@ -426,12 +503,30 @@ int shmdev_setup_one_device(struct devmgr_main * devmgr_main, const char * devsy return RT_ERR; } - struct vdev * shmdev_handle = vdev_lookup(sc->vdev_main, devsym); - MR_VERIFY_2(shmdev_handle != NULL, "vdev_lookup() returns NULL"); + struct vdev * vdev_desc = vdev_lookup(sc->vdev_main, devsym); + MR_VERIFY_2(vdev_desc != NULL, "vdev_lookup() returns NULL"); dev_desc->port_id = devmgr_main->shmdev_port_id_counter++; dev_desc->drv_type = MR_DEV_DRV_TYPE_SHMDEV; - dev_desc->shm_dev_desc = shmdev_handle; + dev_desc->shm_dev_desc = vdev_desc; + + /* inherit configuration from dev_desc */ + struct representor_config * resp_config = dev_desc->representor_config; + if(resp_config != NULL && resp_config->ns_type == REPRESENTOR_NS_APP) + { + vdev_desc->representor_config.enable = 1; + vdev_desc->representor_config.redirect_local_arp = resp_config->redirect_local_arp; + vdev_desc->representor_config.redirect_local_lldp = resp_config->redirect_local_lldp; + vdev_desc->representor_config.redirect_local_rarp = resp_config->redirect_local_rarp; + vdev_desc->representor_config.redirect_local_ipv4 = resp_config->redirect_local_ipv4; + vdev_desc->representor_config.redirect_local_ipv6 = resp_config->redirect_local_ipv6; + } + + /* copy the ether addr, inet addr to vdev desc */ + vdev_desc->ether_addr = dev_desc->eth_addr; + vdev_desc->in_addr = dev_desc->in_addr; + vdev_desc->in_mask = dev_desc->in_mask; + vdev_desc->in_gateway = dev_desc->in_gateway; /* add to index array */ devmgr_main->dev_descs[dev_desc->port_id] = dev_desc; @@ -1823,6 +1918,9 @@ int devmgr_init(struct devmgr_main * devmgr_main) return RT_ERR; } + /* kernel resp cross link */ + kernel_resp_crosslink(devmgr_main); + /* 启动物理设备状态更新线程 */ pthread_t _pid_link_update; ret = pthread_create(&_pid_link_update, NULL, dpdk_dev_link_state_update_thread, (void *)devmgr_main); diff --git a/service/src/node.c b/service/src/node.c index b8f1ddc..dba70be 100644 --- a/service/src/node.c +++ b/service/src/node.c @@ -22,6 +22,7 @@ #include <sc_common.h> #include <sc_node.h> +#if 0 static int ethdev_rx_tx_nodes_setup(struct sc_main * sc, struct node_manager_main * ctx) { /* For each physical devices, the dpdk's library will create: @@ -33,7 +34,7 @@ static int ethdev_rx_tx_nodes_setup(struct sc_main * sc, struct node_manager_mai struct dpdk_dev * phydev_iter = NULL; unsigned int nr_workers = sc->nr_io_thread; -#if 0 + while (phydev_iterate(sc->phydev_main, &phydev_iter) >= 0) { struct rte_node_ethdev_config * ethdev_config = &ctx->node_ethdev_config[ctx->nr_node_ethdev_config]; @@ -84,8 +85,8 @@ static int ethdev_rx_tx_nodes_setup(struct sc_main * sc, struct node_manager_mai error: return RT_ERR; -#endif } +#endif static int graph_init(struct sc_main * sc, struct node_manager_main * ctx) { diff --git a/service/src/node_eth_ingress.c b/service/src/node_eth_ingress.c index c553ecf..05b222d 100644 --- a/service/src/node_eth_ingress.c +++ b/service/src/node_eth_ingress.c @@ -32,6 +32,7 @@ enum ETH_INGRESS_NEXT_FORWARDER, ETH_INGRESS_NEXT_HEALTH_CHECK, ETH_INGRESS_NEXT_PKT_DROP, + ETH_INGRESS_NEXT_ETH_EGRESS, ETH_INGRESS_NEXT_MAX, }; @@ -65,9 +66,89 @@ int node_eth_ingress_init(struct node_manager_main * node_mgr_main) return RT_SUCCESS; } -static int endpoint_pkt_filter(struct mr_dev_desc * dev_desc, unsigned int graph_id, struct rte_mbuf * mbuf, - struct pkt_parser_result * parser_result) +static int kernel_resp_dev_filter(struct mr_dev_desc * dev_desc, struct rte_mbuf * mbuf, struct pkt_parser_result * parser_result) { + struct representor_config * resp_cfg = dev_desc->representor_config; + const struct rte_ether_hdr * ether_hdr = rte_pktmbuf_mtod(mbuf, const struct rte_ether_hdr *); + + /* not local's mac addr or broadcast packet, ignore it */ + if (rte_is_broadcast_ether_addr(ðer_hdr->dst_addr) == 0 && + rte_is_same_ether_addr(ðer_hdr->dst_addr, &dev_desc->eth_addr) == 0) + { + return 0; + } + + /* for arp, rarp and lldp, only check the dest's mac address */ + if (ether_hdr->ether_type == htons(RTE_ETHER_TYPE_ARP) && resp_cfg->redirect_local_arp > 0) + { + return 1; + } + + if (ether_hdr->ether_type == htons(RTE_ETHER_TYPE_RARP) && resp_cfg->redirect_local_rarp > 0) + { + return 1; + } + + if (ether_hdr->ether_type == htons(RTE_ETHER_TYPE_LLDP) && resp_cfg->redirect_local_lldp > 0) + { + return 1; + } + + /* allow layers are ETHER->IPv4->TCP, ETHER->IPv4->UDP, the ipv6 is not supported for now. */ + static const uint16_t exp_ipv4_tcp[] = { + LAYER_TYPE_ID_ETHER, + LAYER_TYPE_ID_IPV4, + LAYER_TYPE_ID_TCP, + }; + + static const uint16_t exp_ipv4_udp[] = { + LAYER_TYPE_ID_ETHER, + LAYER_TYPE_ID_IPV4, + LAYER_TYPE_ID_UDP, + }; + static const uint16_t exp_ipv4_others[] = { + LAYER_TYPE_ID_ETHER, + LAYER_TYPE_ID_IPV4, + }; + + if (complex_layer_type_expect(parser_result, exp_ipv4_tcp, RTE_DIM(exp_ipv4_tcp)) == 0 || + complex_layer_type_expect(parser_result, exp_ipv4_udp, RTE_DIM(exp_ipv4_udp)) == 0 || + complex_layer_type_expect(parser_result, exp_ipv4_others, RTE_DIM(exp_ipv4_others)) == 0) + { + return 1; + } + else + { + return 0; + } + + assert(false); +} + +static int endpoint_dev_packet_handler(struct mr_dev_desc * dev_desc, unsigned int graph_id, struct rte_mbuf * mbuf, + struct mrb_metadata * mrb_meta) +{ + struct pkt_parser_result * parser_result = &mrb_meta->pkt_parser_result; + + /* this device has a kernel resp device, redirect all arp, icmp and bfd packets to resp device */ + if (dev_desc->device_representor != NULL) + { + /* should go to kernel resp device */ + if (kernel_resp_dev_filter(dev_desc->device_representor, mbuf, parser_result) > 0) + { + /* goto the kernel resp device directly */ + mrb_meta->port_egress = dev_desc->device_representor->port_id; + return ETH_INGRESS_NEXT_ETH_EGRESS; + } + } + + /* as a resp device, all packet from resp device should go to represented device directly */ + else if (dev_desc->represented_device != NULL) + { + mrb_meta->port_egress = dev_desc->represented_device->port_id; + return ETH_INGRESS_NEXT_ETH_EGRESS; + } + /* check this packet is to local programs or not, * packet should be like: Eth, IPv4, TCP/UDP, et al. */ if (parser_result->nr_layers < 3) @@ -140,12 +221,10 @@ static __rte_always_inline uint16_t eth_ingress_node_process(struct rte_graph * pkts += 1; n_left_from -= 1; - struct mrb_metadata * private_ctrlzone = (struct mrb_metadata *)mrbuf_cz_data(mbuf, MR_NODE_CTRLZONE_ID); - struct pkt_parser_result * pkt_parser_result = &private_ctrlzone->pkt_parser_result; - + struct mrb_metadata * mrb_meta = (struct mrb_metadata *)mrbuf_cz_data(mbuf, MR_NODE_CTRLZONE_ID); ETH_INGRESS_STAT_ADD(eth_ingress_main, graph_id, total_pkts, 1); - port_id_t port_ingress = private_ctrlzone->port_ingress; + port_id_t port_ingress = mrb_meta->port_ingress; struct mr_dev_desc * dev_desc = mr_dev_desc_lookup_by_port_id(sc->devmgr_main, port_ingress); assert(dev_desc != NULL); @@ -164,9 +243,10 @@ static __rte_always_inline uint16_t eth_ingress_node_process(struct rte_graph * next_node_index = ETH_INGRESS_NEXT_VWIRE_INGRESS; goto node_enqueue; } - else if (dev_desc->role_type == MR_DEV_ROLE_ENDPOINT_INTERFACE) + else if (dev_desc->role_type == MR_DEV_ROLE_ENDPOINT_INTERFACE || + dev_desc->role_type == MR_DEV_ROLE_KERNEL_RESP_INTERFACE) { - next_node_index = endpoint_pkt_filter(dev_desc, graph_id, mbuf, pkt_parser_result); + next_node_index = endpoint_dev_packet_handler(dev_desc, graph_id, mbuf, mrb_meta); goto node_enqueue; } else if (dev_desc->role_type == MR_DEV_ROLE_NF_INTERFACE) @@ -226,6 +306,7 @@ static struct rte_node_register eth_ingress_node_base = { [ETH_INGRESS_NEXT_BRIDGE] = "bridge", [ETH_INGRESS_NEXT_BFD] = "bfd", [ETH_INGRESS_NEXT_VWIRE_INGRESS] = "vwire_ingress", + [ETH_INGRESS_NEXT_ETH_EGRESS] = "eth_egress", [ETH_INGRESS_NEXT_FORWARDER] = "forwarder", [ETH_INGRESS_NEXT_HEALTH_CHECK] = "health_check_deal_answer", [ETH_INGRESS_NEXT_PKT_DROP] = "pkt_drop", diff --git a/service/src/node_phydev.c b/service/src/node_phydev.c index ea825e5..fd5c1a3 100644 --- a/service/src/node_phydev.c +++ b/service/src/node_phydev.c @@ -28,6 +28,8 @@ static struct dev_node_main * p_dev_node_main = &st_dev_node_main; static_assert(sizeof(struct dev_node_ctx) <= RTE_NODE_CTX_SZ, "dev_node_ctx size must smaller than RTE_NODE_CTX_SZ"); + + static __rte_always_inline uint16_t dpdk_dev_rx_node_process(struct rte_graph * graph, struct rte_node * node, void ** objs, uint16_t cnt) { @@ -54,11 +56,11 @@ static __rte_always_inline uint16_t dpdk_dev_rx_node_process(struct rte_graph * for (unsigned int i = 0; i < node->idx; i++) { struct rte_mbuf * mbuf = (struct rte_mbuf *)node->objs[i]; - struct mrb_metadata * private_ctrlzone = mrbuf_cz_data(mbuf, MR_NODE_CTRLZONE_ID); - memset(private_ctrlzone, 0, sizeof(struct mrb_metadata)); + struct mrb_metadata * mrb_meta = mrbuf_cz_data(mbuf, MR_NODE_CTRLZONE_ID); + memset(mrb_meta, 0, sizeof(struct mrb_metadata)); - private_ctrlzone->port_ingress = ctx->dev_desc->port_id; - private_ctrlzone->port_egress = UINT16_MAX; + mrb_meta->port_ingress = ctx->dev_desc->port_id; + mrb_meta->port_egress = UINT16_MAX; /* Parser Pkt */ struct pkt_parser pkt_parser; @@ -66,7 +68,7 @@ static __rte_always_inline uint16_t dpdk_dev_rx_node_process(struct rte_graph * complex_parser_ether(&pkt_parser, rte_pktmbuf_mtod(mbuf, const char *)); /* copy the result to private zone */ - private_ctrlzone->pkt_parser_result = pkt_parser.result; + mrb_meta->pkt_parser_result = pkt_parser.result; } /* move to next node */ diff --git a/tools/classifier_rule_test/CMakeLists.txt b/tools/classifier_rule_test/CMakeLists.txt index 4f41670..5485a81 100644 --- a/tools/classifier_rule_test/CMakeLists.txt +++ b/tools/classifier_rule_test/CMakeLists.txt @@ -6,4 +6,4 @@ include_directories(${DPDK_INCLUDE_DIR}) add_executable(classifier_rule_test classifier_rule_test.c) target_link_libraries(classifier_rule_test MESA_prof_load_static infra m MESA_htable_static ${SYSTEMD_LIBRARIES}) -install(PROGRAMS ${CMAKE_CURRENT_BINARY_DIR}/classifier_rule_test DESTINATION bin COMPONENT Program)
\ No newline at end of file +#install(PROGRAMS ${CMAKE_CURRENT_BINARY_DIR}/classifier_rule_test DESTINATION bin COMPONENT Program)
\ No newline at end of file diff --git a/tools/lb_rule_test/CMakeLists.txt b/tools/lb_rule_test/CMakeLists.txt index f24431f..fa24fe5 100644 --- a/tools/lb_rule_test/CMakeLists.txt +++ b/tools/lb_rule_test/CMakeLists.txt @@ -6,4 +6,4 @@ include_directories(${DPDK_INCLUDE_DIR}) add_executable(lb_rule_test lb_rule_test.c) target_link_libraries(lb_rule_test MESA_prof_load_static infra m MESA_htable_static ${SYSTEMD_LIBRARIES}) -install(PROGRAMS ${CMAKE_CURRENT_BINARY_DIR}/lb_rule_test DESTINATION bin COMPONENT Program)
\ No newline at end of file +#install(PROGRAMS ${CMAKE_CURRENT_BINARY_DIR}/lb_rule_test DESTINATION bin COMPONENT Program)
\ No newline at end of file |
