diff options
| author | root <[email protected]> | 2014-12-30 10:54:23 +0800 |
|---|---|---|
| committer | root <[email protected]> | 2014-12-30 10:54:23 +0800 |
| commit | 3727452c19fdc87d534af5c4eb4efce26d935593 (patch) | |
| tree | 722f62ed9b02d70021049f787268c305218fe560 /driver | |
G
多进程串联转发驱动程序,第一次提交:
完成了下面的功能:
(1)基于DPDK的多进程串联转发
(2)支持PPPOE/VLAN
(3)光保护支持
(4)自启动脚本支持
Diffstat (limited to 'driver')
| -rw-r--r-- | driver/.gitignore | 8 | ||||
| -rw-r--r-- | driver/Makefile | 44 | ||||
| -rw-r--r-- | driver/apt.c | 284 | ||||
| -rw-r--r-- | driver/apt.h | 11 | ||||
| -rw-r--r-- | driver/config.c | 752 | ||||
| -rw-r--r-- | driver/init.c | 598 | ||||
| -rw-r--r-- | driver/layer.c | 267 | ||||
| -rw-r--r-- | driver/layer.h | 39 | ||||
| -rw-r--r-- | driver/main.c | 228 | ||||
| -rw-r--r-- | driver/nl2fwd.h | 19 | ||||
| -rw-r--r-- | driver/nstat.c | 270 | ||||
| -rw-r--r-- | driver/nstat.h | 72 | ||||
| -rw-r--r-- | driver/runtime.c | 531 | ||||
| -rw-r--r-- | driver/version.c | 5 | ||||
| -rw-r--r-- | driver/version.h | 9 |
15 files changed, 3137 insertions, 0 deletions
diff --git a/driver/.gitignore b/driver/.gitignore new file mode 100644 index 0000000..75fb413 --- /dev/null +++ b/driver/.gitignore @@ -0,0 +1,8 @@ +*.o +*.ko +*.swp +*.tmp +*.log +tags +.tags + diff --git a/driver/Makefile b/driver/Makefile new file mode 100644 index 0000000..2c80601 --- /dev/null +++ b/driver/Makefile @@ -0,0 +1,44 @@ +
+ifeq ($(MODULES_STATS),0)
+ MODULES_CFAGS += -DAPP_STAT=0
+endif
+
+TARGET = driver
+MAJOR_VERSION = 1
+SUB_VERSION = 1
+
+DPDK_INCLUDE = $(DPDK_ROOT)/$(DPDK_TARGET)/include
+DPDK_LIB_DIR = $(DPDK_ROOT)/$(DPDK_TARGET)/lib
+DPDK_LIB = $(wildcard ${DPDK_LIB_DIR}/*.a)
+DPDK_CONFIG = $(DPDK_INCLUDE)/rte_config.h
+
+DIR_INC = -I$(DPDK_INCLUDE) -I$(APP_ROOT)/include/MESA -I$(APP_ROOT)/include/serial
+DIR_SRC = ./
+DIR_OBJ = ./
+DIR_BIN = ./
+DIR_LIB = $(APP_ROOT)/lib/
+
+MODULES = -Wl,--start-group $(DPDK_LIB) -Wl,--end-group
+MODULES += $(APP_ROOT)/lib/libMESA_prof_load.a
+
+CC = gcc
+SRC = $(wildcard ${DIR_SRC}/*.c)
+OBJ = $(patsubst %.c,${DIR_OBJ}/%.o,$(notdir ${SRC}))
+LDFLAG += -lrt -lpthread
+CFLAGS += -g -fPIC ${OPTFLAGS} ${DIR_INC} -L${DIR_LIB} -std=gnu99 -include ${DPDK_CONFIG} $(MODULES_CFAGS)
+
+${TARGET}:${OBJ}
+ ${CC} ${LDFLAG} -o $@ ${OBJ} ${MODULES}
+${DIR_OBJ}/%.o:${DIR_SRC}/%.c
+ ${CC} ${CFLAGS} -c $< -o $@
+
+.PHONY:install clean
+
+all: $(TARGET)
+
+clean:
+ rm -f *.o
+install:
+ cp -f ${TARGET} ${INSTALL}
+distclean: clean
+ rm -f ${TARGET}
diff --git a/driver/apt.c b/driver/apt.c new file mode 100644 index 0000000..55aa284 --- /dev/null +++ b/driver/apt.c @@ -0,0 +1,284 @@ + +/////////////////////////////////////////////////////////////////// +/// Copyright(C) Institude of Information Engineering,CAS 2014 +/// +/// @brief 自动运行参数模块 +/// +/// 用于自动获取运行参数的模块,尽量减少用户需要配置的参数。 +/// +/// @author 陆秋文<[email protected]> +/// @date 2014-07-03 +/// +/// +/////////////////////////////////////////////////////////////////// + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <stdint.h> +#include <assert.h> + +#include <unistd.h> +#include <dirent.h> + +#include "apt.h" +#include <MESA_prof_load.h> + +#ifndef STRING_MAX +#define STRING_MAX 2048 +#endif + +#ifndef APP_MAX_STRING +#define APP_MAX_STRING 4096 +#endif + +#define APT_PCAPDEVICE_TYPE_NONE 0 +#define APT_PCAPDEVICE_TYPE_FILE 1 +#define APT_PCAPDEVICE_TYPE_ETH 2 +#define APT_PCAPDEVICE_TYPE_FILE_RXTX_SPLIT 3 +#define APT_PCAPDEVICE_TYPE_ETH_RXTX_SPLIT 4 + +extern const char * conf_path[]; +extern const int n_conf_path; + +static const char * apt_find_conffile() +{ + for(int i = 0; i < n_conf_path; i++) + if(access(conf_path[i],R_OK) == 0) + return conf_path[i]; + return NULL; +} + +/* 基于PCAP的虚拟设备参数配置 */ +static int apt_paras_eal_veth(int argc_max,__out__ int * argc,__out__ char * argv[]) +{ + const char * conffile = apt_find_conffile(); + if(conffile == NULL) + return -2; + + int n_eth = 0; + MESA_load_profile_int_def(conffile,"pcap_compatibility","n_device",&n_eth,0); + + if(n_eth <= 0) + return 0; + + for(int i = 0; i < n_eth; i++) + { + char eth_section_name[APP_MAX_STRING]; + char command_line[APP_MAX_STRING]; + + memset(command_line,0,APP_MAX_STRING); + memset(eth_section_name,0,APP_MAX_STRING); + + sprintf(eth_section_name,"pcap_device_%d",i); + sprintf(command_line,"eth_pcap%d,",i); + + char rx_pcap[APP_MAX_STRING]; + char tx_pcap[APP_MAX_STRING]; + char rx_iface[APP_MAX_STRING]; + char tx_iface[APP_MAX_STRING]; + char iface[APP_MAX_STRING]; + + MESA_load_profile_string_def(conffile,eth_section_name,"rx_pcap",rx_pcap,APP_MAX_STRING,""); + MESA_load_profile_string_def(conffile,eth_section_name,"tx_pcap",tx_pcap,APP_MAX_STRING,""); + MESA_load_profile_string_def(conffile,eth_section_name,"rx_iface",rx_iface,APP_MAX_STRING,""); + MESA_load_profile_string_def(conffile,eth_section_name,"tx_iface",tx_iface,APP_MAX_STRING,""); + MESA_load_profile_string_def(conffile,eth_section_name,"iface",iface,APP_MAX_STRING,""); + + if(strnlen(rx_pcap,APP_MAX_STRING) != 0) + { + strncat(command_line,"rx_pcap=",APP_MAX_STRING); + strncat(command_line,rx_pcap,APP_MAX_STRING); + strncat(command_line,",",APP_MAX_STRING); + } + + if(strnlen(tx_pcap,APP_MAX_STRING) != 0) + { + strncat(command_line,"tx_pcap=",APP_MAX_STRING); + strncat(command_line,tx_pcap,APP_MAX_STRING); + strncat(command_line,",",APP_MAX_STRING); + } + + if(strnlen(tx_pcap,APP_MAX_STRING) != 0) + { + strncat(command_line,"rx_iface=",APP_MAX_STRING); + strncat(command_line,rx_iface,APP_MAX_STRING); + strncat(command_line,",",APP_MAX_STRING); + } + + if(strnlen(tx_pcap,APP_MAX_STRING) != 0) + { + strncat(command_line,"tx_iface=",APP_MAX_STRING); + strncat(command_line,tx_iface,APP_MAX_STRING); + strncat(command_line,",",APP_MAX_STRING); + } + + if(strnlen(tx_pcap,APP_MAX_STRING) != 0) + { + strncat(command_line,"iface",APP_MAX_STRING); + strncat(command_line,iface,APP_MAX_STRING); + } + + if(*argc > (argc_max - 1)) + return -2; + strncpy(argv[(*argc)++],"--vdev",STRING_MAX); + + if(*argc > (argc_max - 1)) + return -2; + strncpy(argv[(*argc)++],command_line,STRING_MAX); + } + + return 0; +} + +static int apt_paras_eal_ccpu(int argc_max,__out__ int * argc,__out__ char * argv[]) +{ + const char * conffile = apt_find_conffile(); + if(conffile == NULL) + return -2; + + int n_arg_rx = 0; + uint64_t core_ret = 0; + + MESA_load_profile_int_def(conffile,"rx_common","rx_count",&n_arg_rx,0); + + if(n_arg_rx <= 0) + return -2; + + for(int i = 0; i < n_arg_rx; i++) + { + + char rx_section_name[STRING_MAX]; + sprintf(rx_section_name,"rx_%d",i); + + int lcore; + MESA_load_profile_int_def(conffile,rx_section_name,"lcore",&lcore,0); + core_ret |= 1 << lcore; + } + + if(*argc > (argc_max - 1)) + return -2; + strncpy(argv[(*argc)++],"-c",STRING_MAX); + + char core_hex[STRING_MAX]; + sprintf(core_hex,"0x%x",(unsigned int)core_ret); + + if(*argc > (argc_max - 1)) + return -2; + + strncpy(argv[(*argc)++],core_hex,STRING_MAX); + return 0; +} + +static int apt_paras_eal_bpci(int argc_max,__out__ int * argc,__out__ char * argv[]) +{ + struct dirent * device_dir_p; + DIR * device_dir; + + if((device_dir = opendir("/sys/class/net/")) == NULL) + return -1; + + + while((device_dir_p = readdir(device_dir)) != NULL) + { + if(strcmp(device_dir_p->d_name,".") == 0 || + strcmp(device_dir_p->d_name,"..") == 0 || + strcmp(device_dir_p->d_name,"lo") == 0) + continue; + + char devsym_dir[STRING_MAX]; + strncpy(devsym_dir,"/sys/class/net/",STRING_MAX); + strncat(devsym_dir,device_dir_p->d_name,STRING_MAX); + strncat(devsym_dir,"/device",STRING_MAX); + + char devsym_info[STRING_MAX]; + readlink(devsym_dir,devsym_info,STRING_MAX); + + char devsym_pci[STRING_MAX]; + strncpy(devsym_pci,devsym_info + 11 ,STRING_MAX); + + if(strcmp(devsym_pci,"") == 0) + continue; + + if(*argc > (argc_max - 1)) + return -2; + strncpy(argv[(*argc)++],"-b",STRING_MAX); + + if(*argc > (argc_max - 1)) + return -2; + strncpy(argv[(*argc)++],devsym_pci,STRING_MAX); + } + + closedir(device_dir); + + return 0; +} + +/* get the -n paras from configure file. */ + +int apt_paras_eal_memchannals(int argc_max,__out__ int * argc,__out__ char * argv[]) +{ + int isenable_m = 0; + int n_memchal = 0; + + //Read it from configure file + for(int i = 0; i < n_conf_path; i++) + { + if(access(conf_path[i],R_OK) == 0) + { + MESA_load_profile_int_def(conf_path[i],"eal","enable_spec_memc",&(isenable_m),0); + MESA_load_profile_int_def(conf_path[i],"eal","n_mem_channel",&(n_memchal),0); + break; + } + } + + if(isenable_m <= 0) + return 0; + + char n_memchal_string[STRING_MAX]; + sprintf(n_memchal_string,"%d",n_memchal); + + if(*argc > (argc_max - 1)) + return -2; + strncpy(argv[(*argc)++],"-n",STRING_MAX); + + if(*argc > (argc_max - 1)) + return -2; + strncpy(argv[(*argc)++],n_memchal_string,STRING_MAX); + + return 0; +} + +int apt_paras_eal_virtaddr(int argc_max, __out__ int * argc, __out__ char * argv[]) +{ + int enable = 0; + char virtaddr[APP_MAX_STRING]; + + const char * fcfg_path = apt_find_conffile(); + MESA_load_profile_int_def(fcfg_path,"virtaddr","enable",&(enable),0); + MESA_load_profile_string_def(fcfg_path,"virtaddr","virtaddr",virtaddr,APP_MAX_STRING,""); + + if(enable > 0) + { + strncpy(argv[(*argc)++],"--base-virtaddr",STRING_MAX); + strncpy(argv[(*argc)++],virtaddr,STRING_MAX); + } + + return 0; +} + +int apt_paras_eal(int argc_max,__out__ int * argc,__out__ char * argv[]) +{ + if(apt_paras_eal_bpci(argc_max,argc,argv) < 0) + return -1; + if(apt_paras_eal_ccpu(argc_max,argc,argv) < 0) + return -1; + if(apt_paras_eal_memchannals(argc_max,argc,argv) < 0) + return -1; + if(apt_paras_eal_veth(argc_max,argc,argv) < 0) + return -1; + if(apt_paras_eal_virtaddr(argc_max,argc,argv) < 0) + return -1; + + return 0; +} diff --git a/driver/apt.h b/driver/apt.h new file mode 100644 index 0000000..bdfaf86 --- /dev/null +++ b/driver/apt.h @@ -0,0 +1,11 @@ + +#ifndef __APT_INCLUDE_H__ +#define __APT_INCLUDE_H__ + +#ifndef __out__ +#define __out__ +#endif + +int apt_paras_eal(int argc_max,__out__ int * argc,__out__ char * argv[]); + +#endif diff --git a/driver/config.c b/driver/config.c new file mode 100644 index 0000000..4b60997 --- /dev/null +++ b/driver/config.c @@ -0,0 +1,752 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2013 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include <stdio.h> +#include <stdlib.h> +#include <stdint.h> +#include <inttypes.h> +#include <sys/types.h> +#include <string.h> +#include <sys/queue.h> +#include <stdarg.h> +#include <errno.h> +#include <unistd.h> + + +#include <rte_common.h> +#include <rte_byteorder.h> +#include <rte_log.h> +#include <rte_memory.h> +#include <rte_memcpy.h> +#include <rte_memzone.h> +#include <rte_tailq.h> +#include <rte_eal.h> +#include <rte_per_lcore.h> +#include <rte_launch.h> +#include <rte_atomic.h> +#include <rte_cycles.h> +#include <rte_prefetch.h> +#include <rte_lcore.h> +#include <rte_per_lcore.h> +#include <rte_branch_prediction.h> +#include <rte_interrupts.h> +#include <rte_pci.h> +#include <rte_random.h> +#include <rte_debug.h> +#include <rte_ether.h> +#include <rte_ethdev.h> +#include <rte_ring.h> +#include <rte_mempool.h> +#include <rte_mbuf.h> +#include <rte_ip.h> +#include <rte_tcp.h> +#include <rte_lpm.h> +#include <rte_string_fns.h> + +#include <assert.h> + +#include "main.h" +#include "MESA_prof_load.h" + +struct app_params app; +const char *conf_path[] = { + "conf/nl2fwd.conf", + ".nl2fwd.conf", + "~/.nl2fwd.conf", + "/etc/nl2fwd.conf", + "/usr/local/etc/nl2fwd.conf"}; + +const int n_conf_path = 5; + +#ifndef APP_MAX_STRING +#define APP_MAX_STRING 4096 +#endif + +void app_print_usage(void) +{ + printf("usages\n"); +} + +static int app_parse_conffile_rx(char * fcfg_path) +{ + int n_arg_rx = 0; + MESA_load_profile_int_def(fcfg_path,"rx_common","rx_count",&n_arg_rx,0); + + if(n_arg_rx <= 0) + return -1; + + for(int i = 0; i < n_arg_rx; i++) + { + struct app_lcore_params *lp; + + char rx_section_name[APP_MAX_STRING]; + sprintf(rx_section_name,"rx_%d",i); + + int port,queue,lcore; + MESA_load_profile_int_def(fcfg_path,rx_section_name,"port",&port,0); + MESA_load_profile_int_def(fcfg_path,rx_section_name,"queue",&queue,0); + MESA_load_profile_int_def(fcfg_path,rx_section_name,"lcore",&lcore,0); + + /* Enable port and queue for later initialization */ + if ((port >= APP_MAX_NIC_PORTS) || (queue >= APP_MAX_RX_QUEUES_PER_NIC_PORT)) { + return -3; + } + if (app.nic_rx_queue_mask[port][queue] != 0) { + return -4; + } + app.enabled_port_mask |= 1 << port; + app.nic_rx_queue_mask[port][queue] = 1; + + /* Check and assign (port, queue) to I/O lcore */ + if (rte_lcore_is_enabled(lcore) == 0) { + return -5; + } + + if (lcore >= APP_MAX_LCORES) { + return -6; + } + lp = &app.lcore_params[lcore]; + if (lp->type == e_APP_LCORE_WORKER) { + return -7; + } + lp->type = e_APP_LCORE_IO; + for (int j = 0; j < lp->io.rx.n_nic_queues; j++) { + if ((lp->io.rx.nic_queues[j].port == port) && + (lp->io.rx.nic_queues[j].queue == queue)) { + return -8; + } + } + if (lp->io.rx.n_nic_queues >= APP_MAX_NIC_RX_QUEUES_PER_IO_LCORE) { + return -9; + } + lp->io.rx.nic_queues[lp->io.rx.n_nic_queues].port = (uint8_t) port; + lp->io.rx.nic_queues[lp->io.rx.n_nic_queues].queue = (uint8_t) queue; + lp->io.rx.n_nic_queues ++; + +#if 0 + int tx_port_existed = 0; + for(int j = 0; j < lp->io.tx.n_nic_ports; j++) { + if (lp->io.tx.nic_ports[j] == (uint8_t) port) { + tx_port_existed = 1; + break; + } + } + + if(!tx_port_existed) { + lp->io.tx.nic_ports[lp->io.tx.n_nic_ports] = port; + lp->io.tx.n_nic_ports ++; + app.enabled_port_mask |= 1 << (uint8_t)port; + app.nic_tx_port_mask[(uint8_t)port] = 1; + } +#endif + } + + return 0; +} + +#if 0 +static int app_parse_conffile_tx(char *fcfg_path) +{ + + char tx_strbuf[APP_MAX_STRING]; + char * parse_arg = tx_strbuf; + int tx_enable; + + MESA_load_profile_int_def(fcfg_path,"tx","tx_enable",&tx_enable,0); + MESA_load_profile_string_def(fcfg_path,"tx","tx_port",parse_arg, + sizeof(char) * APP_MAX_STRING,""); + if(!tx_enable) + return 0; + + for(parse_arg = strtok(parse_arg,","); + parse_arg != NULL; parse_arg = strtok(NULL,",")) + { + int port = 0; + int ret = sscanf(parse_arg,"%d",&port); + if(ret <= 0) + return -1; //Parse Error. + if(port > APP_MAX_NIC_PORTS|| port < 0) + return -2; // Wrong Port number. + + //Set TX Mask and Port Mask. + app.enabled_port_mask |= 1 << (uint8_t)port; + app.nic_tx_port_mask[(uint8_t)port] = 1; + } + + return 0; +} +#endif + +static int app_parse_conffile_tx(char * fcfg_path) +{ + int n_arg_tx = 0; + MESA_load_profile_int_def(fcfg_path,"tx_common","tx_count",&n_arg_tx,0); + + if(n_arg_tx <= 0) + return -1; + + for(int i = 0; i < n_arg_tx; i++) + { + struct app_lcore_params *lp; + char tx_section_name[APP_MAX_STRING]; + sprintf(tx_section_name,"tx_%d",i); + + int port,lcore,nb_queues; + MESA_load_profile_int_def(fcfg_path,tx_section_name,"port",&port,0); + MESA_load_profile_int_def(fcfg_path,tx_section_name,"lcore",&lcore,0); + MESA_load_profile_int_def(fcfg_path,tx_section_name,"nqueue",&nb_queues,0); + + /* Enable port and queue for later initialization */ + if (port >= APP_MAX_NIC_PORTS) { + return -3; + } + + if (nb_queues <= 0 || port >= APP_MAX_TX_QUEUES_PER_NIC_PORT) + return -4; + + if (app.nic_tx_port_mask[port] != 0) { + return -5; + } + + app.nic_tx_port_mask[port] = nb_queues; + + /* Check and assign (port, queue) to I/O lcore */ + if (rte_lcore_is_enabled(lcore) == 0) { + return -6; + } + + if (lcore >= APP_MAX_LCORES) { + return -7; + } + lp = &app.lcore_params[lcore]; + if (lp->type == e_APP_LCORE_WORKER) { + return -8; + } + lp->type = e_APP_LCORE_IO; + for (int j = 0; j < lp->io.tx.n_nic_ports; j++) { + if (lp->io.tx.nic_ports[j] == port) { + return -9; + } + } + if (lp->io.tx.n_nic_ports >= APP_MAX_NIC_TX_PORTS_PER_IO_LCORE) { + return -10; + } + lp->io.tx.nic_ports[lp->io.tx.n_nic_ports] = (uint8_t) port; + lp->io.tx.nic_n_queues[lp->io.tx.n_nic_ports] = (uint8_t) nb_queues; + lp->io.tx.n_nic_ports ++; + } + + return 0; +} + +static int app_parse_conffile_w(char *fcfg_path) +{ + + char worker_buf[APP_MAX_STRING]; + MESA_load_profile_string_def(fcfg_path,"worker","lcore",worker_buf, + sizeof(char) * APP_MAX_STRING,""); + + char *p = worker_buf; + + while (*p != 0) { + struct app_lcore_params *lp; + uint32_t lcore; + + + errno = 0; + lcore = strtoul(p, NULL, 0); + if ((errno != 0)) { + return -2; + } + + /* Check and enable worker lcore */ +#if 0 + if (rte_lcore_is_enabled(lcore) == 0) { + return -3; + } +#endif + + if (lcore >= APP_MAX_LCORES) { + return -4; + } + lp = &app.lcore_params[lcore]; + if (lp->type == e_APP_LCORE_IO) { + return -5; + } + lp->type = e_APP_LCORE_WORKER; + + p = strchr(p, ','); + if (p == NULL) { + break; + } + p++; + } + + return 0; +} + + + +static int app_parse_conffile_rxtx_paras(char * fcfg_path) +{ + MESA_load_profile_int_def(fcfg_path,"common","nic_rx_ring_size", + &app.nic_rx_ring_size,APP_DEFAULT_NIC_RX_RING_SIZE); + MESA_load_profile_int_def(fcfg_path,"common","nic_tx_ring_size", + &app.nic_tx_ring_size,APP_DEFAULT_NIC_TX_RING_SIZE); + MESA_load_profile_int_def(fcfg_path,"common","ring_rx_size", + &app.ring_rx_size, APP_DEFAULT_RING_RX_SIZE); + MESA_load_profile_int_def(fcfg_path,"common","ring_tx_size", + &app.ring_tx_size, APP_DEFAULT_RING_TX_SIZE); + + + MESA_load_profile_int_def(fcfg_path,"common","burst_size_io_rx_read", + &app.burst_size_io_rx_read,APP_DEFAULT_BURST_SIZE_IO_RX_READ); + MESA_load_profile_int_def(fcfg_path,"common","burst_size_io_rx_write", + &app.burst_size_io_rx_write,APP_DEFAULT_BURST_SIZE_IO_RX_WRITE); + MESA_load_profile_int_def(fcfg_path,"common","burst_size_io_tx_read", + &app.burst_size_io_tx_read,APP_DEFAULT_BURST_SIZE_IO_TX_READ); + MESA_load_profile_int_def(fcfg_path,"common","burst_size_io_tx_write", + &app.burst_size_io_tx_write,APP_DEFAULT_BURST_SIZE_IO_TX_WRITE); + MESA_load_profile_int_def(fcfg_path,"common","burst_size_worker_read", + &app.burst_size_worker_read,APP_DEFAULT_BURST_SIZE_WORKER_READ); + MESA_load_profile_int_def(fcfg_path,"common","burst_size_worker_write", + &app.burst_size_worker_write,APP_DEFAULT_BURST_SIZE_WORKER_WRITE); + + + return 0; +} + + + +static int app_parse_conffile_map(char * fcfg_path) +{ + int rx_port,tx_port; + int n_record = 0; + + MESA_load_profile_int_def(fcfg_path,"map","n_map",&n_record,0); + + if(n_record <= 0) + return 0; + + char map_strbuf[APP_MAX_STRING]; + + for(int i = 0; i < n_record; i++) + { + char map_key[APP_MAX_STRING]; + + char * map_strbuf_p = map_strbuf; + char * port_str = NULL; + + sprintf(map_key,"map_%d",i); + + int ret = MESA_load_profile_string_def(fcfg_path,"map",map_key,map_strbuf, + sizeof(char) * APP_MAX_STRING,""); + if(ret < 0) + return -1; + + port_str = strtok(map_strbuf_p, ","); + ret = sscanf(port_str,"%d",&rx_port); + + if(ret <= 0) + return -1; + + // Add Map Record for RX Port. + app.rxtx_port_map[i].rx_port = rx_port; + + int j = 1; + for(j = 1,port_str = strtok(NULL, ","); + port_str != NULL; + port_str = strtok(NULL, ",")) + { + if(sscanf(port_str,"%d",&tx_port) < 0) + return -1; + if(tx_port > APP_MAX_NIC_PORTS || tx_port < 0) + return -1; + if(app.nic_tx_port_mask[tx_port] == 0) + return -1; + + // Add Map Record for TX Report + app.rxtx_port_map[i].tx_port[j-1] = tx_port; + + } + app.rxtx_port_map[i].n_tx_port = (uint32_t)j; + (app.n_rxtx_port_map)++; + + app.rxtx_stream_record[(uint8_t)app.n_rxtx_stream].rx_port = rx_port; + app.rxtx_stream_record[(uint8_t)app.n_rxtx_stream].tx_port = tx_port; + (app.n_rxtx_stream)++; + + app.map_type = e_APP_MAP_TYPE_PORTMAP; + + } + + assert(app.n_rxtx_port_map == n_record); + + return n_record; +} + +static int app_parse_conffile_stat(char *fcfg_path) +{ + MESA_load_profile_int_def(fcfg_path,"stat","enable",&(app.statistics.enable),1); + MESA_load_profile_int_def(fcfg_path,"stat","print", &(app.statistics.is_printmsg),1); + MESA_load_profile_int_def(fcfg_path,"stat","sample_time", &(app.statistics.sample_time),1); + + if(app.watchdog_paras.enable) + app.statistics.enable = 1; + + return 0; +} + + +static int app_parse_conffile_mempool(char *fcfg_path) +{ + + MESA_load_profile_int_def(fcfg_path,"mempool","mempool_buffers", + &(app.mempool.mempool_buffers),APP_DEFAULT_MEMPOOL_BUFFERS); + MESA_load_profile_int_def(fcfg_path,"mempool","mempool_mbuf_size", + &(app.mempool.mempool_mbuf_size),APP_DEFAULT_MBUF_SIZE); + MESA_load_profile_int_def(fcfg_path,"mempool","mempool_cache_size", + &(app.mempool.mempool_cache_size),APP_DEFAULT_MEMPOOL_CACHE_SIZE); + + return 0; +} + + +int app_parse_args(int argc, char **argv) +{ + + + app.enabled_port_mask = 0; + app.key_type = KEY_TYPE_IS_IP; + + for(int i = 0; i < n_conf_path ; i++) + { + if(access(conf_path[i],R_OK) == 0) { + + char * path = (char *)conf_path[i]; + + if(app_parse_conffile_rx(path) < 0) + return -1; + if(app_parse_conffile_tx(path) < 0) + return -2; + if(app_parse_conffile_w(path) < 0) + return -3; + if(app_parse_conffile_rxtx_paras(path) < 0) + return -4; + if(app_parse_conffile_stat(path) < 0) + return -5; + if(app_parse_conffile_map(path) < 0) + return -6; + if(app_parse_conffile_mempool(path) < 0) + return -7; + return 0; + } + } + return -9; +} + +int +app_get_nic_rx_queues_per_port(uint8_t port) +{ + uint32_t i, count; + + if (port >= APP_MAX_NIC_PORTS) { + return -1; + } + + count = 0; + for (i = 0; i < APP_MAX_RX_QUEUES_PER_NIC_PORT; i ++) { + if (app.nic_rx_queue_mask[port][i] == 1) { + count ++; + } + } + + return count; +} + +int +app_get_lcore_for_nic_rx(uint8_t port, uint8_t queue, uint32_t *lcore_out) +{ + uint32_t lcore; + + for (lcore = 0; lcore < APP_MAX_LCORES; lcore ++) { + struct app_lcore_params_io *lp = &app.lcore_params[lcore].io; + uint32_t i; + + if (app.lcore_params[lcore].type != e_APP_LCORE_IO) { + continue; + } + + for (i = 0; i < lp->rx.n_nic_queues; i ++) { + if ((lp->rx.nic_queues[i].port == port) && + (lp->rx.nic_queues[i].queue == queue)) { + *lcore_out = lcore; + return 0; + } + } + } + + return -1; +} + +int +app_get_lcore_for_nic_tx(uint8_t port, uint32_t *lcore_out) +{ + uint32_t lcore; + + for (lcore = 0; lcore < APP_MAX_LCORES; lcore ++) { + struct app_lcore_params_io *lp = &app.lcore_params[lcore].io; + uint32_t i; + + if (app.lcore_params[lcore].type != e_APP_LCORE_IO) { + continue; + } + + for (i = 0; i < lp->tx.n_nic_ports; i ++) { + if (lp->tx.nic_ports[i] == port) { + *lcore_out = lcore; + return 0; + } + } + } + + return -1; +} + +int +app_is_socket_used(uint32_t socket) +{ + uint32_t lcore; + + for (lcore = 0; lcore < APP_MAX_LCORES; lcore ++) { + if (app.lcore_params[lcore].type == e_APP_LCORE_DISABLED) { + continue; + } + + if (socket == rte_lcore_to_socket_id(lcore)) { + return 1; + } + } + + return 0; +} + +uint32_t +app_get_lcores_io_rx(void) +{ + uint32_t lcore, count; + + count = 0; + for (lcore = 0; lcore < APP_MAX_LCORES; lcore ++) { + struct app_lcore_params_io *lp_io = &app.lcore_params[lcore].io; + + if ((app.lcore_params[lcore].type != e_APP_LCORE_IO) || + (lp_io->rx.n_nic_queues == 0)) { + continue; + } + count ++; + } + return count; +} + +uint32_t +app_get_lcores_worker(void) +{ + uint32_t lcore, count; + + count = 0; + for (lcore = 0; lcore < APP_MAX_LCORES; lcore ++) { + if (app.lcore_params[lcore].type != e_APP_LCORE_WORKER) { + continue; + } + + count ++; + } + + if (count > APP_MAX_WORKER_LCORES) { + rte_panic("Algorithmic error (too many worker lcores)\n"); + return 0; + } + + return count; +} + +void +app_print_params(void) +{ + unsigned port, queue, lcore, i, j; + + /* Print NIC RX configuration */ + printf("NIC RX ports: "); + for (port = 0; port < APP_MAX_NIC_PORTS; port ++) { + uint32_t n_rx_queues = app_get_nic_rx_queues_per_port((uint8_t) port); + + if (n_rx_queues == 0) { + continue; + } + + printf("%u (", port); + for (queue = 0; queue < APP_MAX_RX_QUEUES_PER_NIC_PORT; queue ++) { + if (app.nic_rx_queue_mask[port][queue] == 1) { + printf("%u ", queue); + } + } + printf(") "); + } + printf(";\n"); + + /* Print I/O lcore RX params */ + for (lcore = 0; lcore < APP_MAX_LCORES; lcore ++) { + struct app_lcore_params_io *lp = &app.lcore_params[lcore].io; + + if ((app.lcore_params[lcore].type != e_APP_LCORE_IO) || + (lp->rx.n_nic_queues == 0)) { + continue; + } + + printf("I/O lcore %u (socket %u): ", lcore, rte_lcore_to_socket_id(lcore)); + + printf("RX ports "); + for (i = 0; i < lp->rx.n_nic_queues; i ++) { + printf("(%u, %u) ", + (unsigned) lp->rx.nic_queues[i].port, + (unsigned) lp->rx.nic_queues[i].queue); + } + printf("; "); + + printf("Output rings "); + for (i = 0; i < lp->rx.n_rings; i ++) { + printf("%p ", lp->rx.rings[i]); + } + printf(";\n"); + } + + /* Print worker lcore RX params */ + for (lcore = 0; lcore < APP_MAX_LCORES; lcore ++) { + struct app_lcore_params_worker *lp = &app.lcore_params[lcore].worker; + + if (app.lcore_params[lcore].type != e_APP_LCORE_WORKER) { + continue; + } + + printf("Worker lcore %u (socket %u) ID %u: ", + lcore, + rte_lcore_to_socket_id(lcore), + (unsigned)lp->worker_id); + + printf("Input rings "); + for (i = 0; i < lp->n_rings_in; i ++) { + printf("%p ", lp->rings_in[i]); + } + + printf(";\n"); + } + + printf("\n"); + + /* Print NIC TX configuration */ + printf("NIC TX ports: "); + for (port = 0; port < APP_MAX_NIC_PORTS; port ++) { + if (app.nic_tx_port_mask[port] == 1) { + printf("%u ", port); + } + } + printf(";\n"); + + /* Print I/O TX lcore params */ + for (lcore = 0; lcore < APP_MAX_LCORES; lcore ++) { + struct app_lcore_params_io *lp = &app.lcore_params[lcore].io; + uint32_t n_workers = app_get_lcores_worker(); + + if ((app.lcore_params[lcore].type != e_APP_LCORE_IO) || + (lp->tx.n_nic_ports == 0)) { + continue; + } + + printf("I/O lcore %u (socket %u): ", lcore, rte_lcore_to_socket_id(lcore)); + + printf("Input rings per TX port "); + for (i = 0; i < lp->tx.n_nic_ports; i ++) { + port = lp->tx.nic_ports[i]; + + printf("%u (", port); + for (j = 0; j < n_workers; j ++) { + printf("%p ", lp->tx.rings[port][j]); + } + printf(") "); + + } + + printf(";\n"); + } + + /* Print worker lcore TX params */ + for (lcore = 0; lcore < APP_MAX_LCORES; lcore ++) { + struct app_lcore_params_worker *lp = &app.lcore_params[lcore].worker; + + if (app.lcore_params[lcore].type != e_APP_LCORE_WORKER) { + continue; + } + + printf("Worker lcore %u (socket %u) ID %u: \n", + lcore, + rte_lcore_to_socket_id(lcore), + (unsigned)lp->worker_id); + + printf("Output rings per TX port "); + for (port = 0; port < APP_MAX_NIC_PORTS; port ++) { + if (lp->rings_out[port] != NULL) { + printf("%u (%p) ", port, lp->rings_out[port]); + } + } + + printf(";\n"); + } + + /* Rings */ + printf("Ring sizes: NIC RX = %u; Worker in = %u; Worker out = %u; NIC TX = %u;\n", + (unsigned) app.nic_rx_ring_size, + (unsigned) app.ring_rx_size, + (unsigned) app.ring_tx_size, + (unsigned) app.nic_tx_ring_size); + + /* Bursts */ + printf("Burst sizes: I/O RX (rd = %u, wr = %u); Worker (rd = %u, wr = %u); I/O TX (rd = %u, wr = %u)\n", + (unsigned) app.burst_size_io_rx_read, + (unsigned) app.burst_size_io_rx_write, + (unsigned) app.burst_size_worker_read, + (unsigned) app.burst_size_worker_write, + (unsigned) app.burst_size_io_tx_read, + (unsigned) app.burst_size_io_tx_write); +} diff --git a/driver/init.c b/driver/init.c new file mode 100644 index 0000000..51df8d2 --- /dev/null +++ b/driver/init.c @@ -0,0 +1,598 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2013 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include <stdio.h> +#include <stdlib.h> +#include <stdint.h> +#include <inttypes.h> +#include <sys/types.h> +#include <string.h> +#include <sys/queue.h> +#include <stdarg.h> +#include <errno.h> +#include <getopt.h> + +#include <rte_common.h> +#include <rte_byteorder.h> +#include <rte_log.h> +#include <rte_memory.h> +#include <rte_memcpy.h> +#include <rte_memzone.h> +#include <rte_tailq.h> +#include <rte_eal.h> +#include <rte_per_lcore.h> +#include <rte_launch.h> +#include <rte_atomic.h> +#include <rte_cycles.h> +#include <rte_prefetch.h> +#include <rte_lcore.h> +#include <rte_per_lcore.h> +#include <rte_branch_prediction.h> +#include <rte_interrupts.h> +#include <rte_pci.h> +#include <rte_random.h> +#include <rte_debug.h> +#include <rte_ether.h> +#include <rte_ethdev.h> +#include <rte_ring.h> +#include <rte_mempool.h> +#include <rte_mbuf.h> +#include <rte_string_fns.h> +#include <rte_ip.h> +#include <rte_tcp.h> +#include <rte_lpm.h> +#include <rte_version.h> + +#include "main.h" +#include "nstat.h" + +static struct rte_eth_conf port_conf = { + .rxmode = { + .split_hdr_size = 0, + .header_split = 0, /**< Header Split disabled */ + .hw_ip_checksum = 0, /**< IP checksum offload enabled */ + .hw_vlan_filter = 0, /**< VLAN filtering disabled */ + .hw_vlan_strip = 0, + .jumbo_frame = 0, /**< Jumbo Frame Support disabled */ + .hw_strip_crc = 0, /**< CRC stripped by hardware */ + }, + .rx_adv_conf = { + .rss_conf = { + .rss_key = NULL, + .rss_hf = ETH_RSS_IPV4 | ETH_RSS_IPV6, + }, + }, + .txmode = { + //.mq_mode = ETH_MQ_TX_NONE, + .mq_mode = ETH_DCB_NONE, + }, +}; + +static struct rte_eth_rxconf rx_conf = { + .rx_thresh = { + .pthresh = APP_DEFAULT_NIC_RX_PTHRESH, + .hthresh = APP_DEFAULT_NIC_RX_HTHRESH, + .wthresh = APP_DEFAULT_NIC_RX_WTHRESH, + }, + .rx_free_thresh = APP_DEFAULT_NIC_RX_FREE_THRESH, + .rx_drop_en = APP_DEFAULT_NIC_RX_DROP_EN, +}; + +static struct rte_eth_txconf tx_conf = { + .tx_thresh = { + .pthresh = APP_DEFAULT_NIC_TX_PTHRESH, + .hthresh = APP_DEFAULT_NIC_TX_HTHRESH, + .wthresh = APP_DEFAULT_NIC_TX_WTHRESH, + }, + .tx_free_thresh = APP_DEFAULT_NIC_TX_FREE_THRESH, + .tx_rs_thresh = APP_DEFAULT_NIC_TX_RS_THRESH, +}; + +static int +app_get_worker_core_number(void) +{ + unsigned lcore; + int count = 0; + + for (lcore = 0; lcore < APP_MAX_LCORES; lcore ++) { + if (app.lcore_params[lcore].type != e_APP_LCORE_WORKER) { + continue; + } + count++; + } + return count; +} + +static void +app_assign_worker_ids(void) +{ + uint32_t lcore, worker_id; + + /* Assign ID for each worker */ + worker_id = 0; + for (lcore = 0; lcore < APP_MAX_LCORES; lcore ++) { + struct app_lcore_params_worker *lp_worker = &app.lcore_params[lcore].worker; + + if (app.lcore_params[lcore].type != e_APP_LCORE_WORKER) { + continue; + } + + lp_worker->worker_id = worker_id; + worker_id ++; + } +} + +static void +app_init_mbuf_pools(void) +{ + unsigned socket, lcore; + + /* Init the buffer pools */ + for (socket = 0; socket < APP_MAX_SOCKETS; socket ++) { + char name[32]; + if (app_is_socket_used(socket) == 0) { + continue; + } + + snprintf(name, sizeof(name), "mbuf_pool_%u", socket); + printf("Creating the mbuf pool for socket %u ...\n", socket); + +#if 0 + app.pools[socket] = rte_mempool_create( + name, + APP_DEFAULT_MEMPOOL_BUFFERS, + APP_DEFAULT_MBUF_SIZE, + APP_DEFAULT_MEMPOOL_CACHE_SIZE, + sizeof(struct rte_pktmbuf_pool_private), + rte_pktmbuf_pool_init, NULL, + rte_pktmbuf_init, NULL, + socket, + 0); +#endif + app.pools[socket] = rte_mempool_create( + name, + app.mempool.mempool_buffers, + app.mempool.mempool_mbuf_size, + app.mempool.mempool_cache_size, + sizeof(struct rte_pktmbuf_pool_private), + rte_pktmbuf_pool_init, NULL, + rte_pktmbuf_init, NULL, + socket, + 0); + if (app.pools[socket] == NULL) { + rte_panic("Cannot create mbuf pool on socket %u\n", socket); + } + } + + for (lcore = 0; lcore < APP_MAX_LCORES; lcore ++) { + if (app.lcore_params[lcore].type == e_APP_LCORE_DISABLED) { + continue; + } + + socket = rte_lcore_to_socket_id(lcore); + app.lcore_params[lcore].pool = app.pools[socket]; + } +} + + +static void +app_init_rings_rx(void) +{ + unsigned lcore; + + /* Initialize the rings for the RX side */ + for (lcore = 0; lcore < APP_MAX_LCORES; lcore ++) { + struct app_lcore_params_io *lp_io = &app.lcore_params[lcore].io; + unsigned socket_io, lcore_worker; + + if ((app.lcore_params[lcore].type != e_APP_LCORE_IO) || + (lp_io->rx.n_nic_queues == 0)) { + continue; + } + + socket_io = rte_lcore_to_socket_id(lcore); + + for (lcore_worker = 0; lcore_worker < APP_MAX_LCORES; lcore_worker ++) { + char name[32]; + struct app_lcore_params_worker *lp_worker = &app.lcore_params[lcore_worker].worker; + struct rte_ring *ring = NULL; + + if (app.lcore_params[lcore_worker].type != e_APP_LCORE_WORKER) { + continue; + } + + printf("Creating ring to connect I/O lcore %u (socket %u) with worker lcore %u ...\n", + lcore, + socket_io, + lcore_worker); + snprintf(name, sizeof(name), "app_ring_rx_io%u_w%u", + lcore, + lcore_worker); + ring = rte_ring_create( + name, + app.ring_rx_size, + socket_io, + RING_F_SP_ENQ | RING_F_SC_DEQ); + if (ring == NULL) { + rte_panic("Cannot create ring to connect I/O core %u with worker core %u\n", + lcore, + lcore_worker); + } + + lp_io->rx.rings[lp_io->rx.n_rings] = ring; + lp_io->rx.n_rings ++; + + lp_worker->rings_in[lp_worker->n_rings_in] = ring; + lp_worker->n_rings_in ++; + } + } + + +#if 0 + /* Init the rings for the TX side */ + /* Add by Lu Qiuwen <[email protected]> at 2014-12-04 */ + for (lcore = 0; lcore < APP_MAX_LCORES; lcore ++) { + struct app_lcore_params_worker *lp_worker = &app.lcore_params[lcore].worker; + unsigned socket_io, port; + + if (app.lcore_params[lcore].type != e_APP_LCORE_WORKER) { + continue; + } + + if (lp_worker->n_rings_in != app_get_lcores_io_rx()) { + rte_panic("Algorithmic error (worker input rings)\n"); + } + + socket_io = rte_lcore_to_socket_id(lcore); + for (port = 0; port < APP_MAX_NIC_PORTS; port++) { + char name[32]; + struct rte_ring * ring = NULL; + + if(app.nic_tx_port_mask[port] != 1) + continue; + + printf("Creating ring to connect NIC %u (socket %u) in worker lcore %u ... \n", + port, + socket_io, + lcore); + rte_snprintf(name, sizeof(name), "app_ring_tx_s%u_p%u_w_%u", + socket_io, + port, + lcore); + ring = rte_ring_create( + name, + app.ring_tx_size, + socket_io, + RING_F_SP_ENQ | RING_F_SC_DEQ); + if (ring == NULL) { + rte_panic("Cannot create ring NIC %u in worker lcore %u\n", + port, + lcore); + } + + lp_worker->rings_out[port] = ring; + } + } + + /* Connect I/O TX to working tx rings, by Lu Qiuwen at 2014-12-04 */ + for (lcore = 0; lcore < APP_MAX_LCORES; lcore ++) { + struct app_lcore_params_io *lp_io = &app.lcore_params[lcore].io; + unsigned lcore_worker, port_id; + + if ((app.lcore_params[lcore].type != e_APP_LCORE_IO) || + (lp_io->rx.n_nic_queues == 0)) { + continue; + } + + for(lcore_worker = 0; lcore < APP_MAX_LCORES; lcore++) { + struct app_lcore_params_worker * lp_worker = &app.lcore_params[lcore].worker; + struct rte_ring * ring = NULL; + + if (app.lcore_params[lcore].type != e_APP_LCORE_WORKER) { + continue; + } + + for(int i = 0; i < lp_io->tx.n_nic_ports; i++) { + port_id = lp_io->tx.nic_ports[i]; + ring = lp_worker->rings_out[port_id]; + + lp_io->tx.rings[port_id][lcore_worker] = ring; + + printf("Link rings between worker %u(port %u) with I/O core %u\n", + lcore_worker, + port_id, + lcore); + } + } + } +#endif +} + +static void +app_init_rings_tx(void) +{ + unsigned lcore; + + /* Initialize the rings for the TX side */ + for (lcore = 0; lcore < APP_MAX_LCORES; lcore ++) { + struct app_lcore_params_worker *lp_worker = &app.lcore_params[lcore].worker; + unsigned port; + + if (app.lcore_params[lcore].type != e_APP_LCORE_WORKER) { + continue; + } + + for (port = 0; port < APP_MAX_NIC_PORTS; port ++) { + char name[32]; + struct app_lcore_params_io *lp_io = NULL; + struct rte_ring *ring; + uint32_t socket_io, lcore_io; + + if (app.nic_tx_port_mask[port] == 0) { + continue; + } + + if (app_get_lcore_for_nic_tx((uint8_t) port, &lcore_io) < 0) { + rte_panic("Algorithmic error (no I/O core to handle TX of port %u)\n", + port); + } + + lp_io = &app.lcore_params[lcore_io].io; + socket_io = rte_lcore_to_socket_id(lcore_io); + + printf("Creating ring to connect worker lcore %u with TX port %u (through I/O lcore %u) (socket %u) ...\n", + lcore, port, (unsigned)lcore_io, (unsigned)socket_io); + snprintf(name, sizeof(name), "app_ring_tx_w%u_p%u", lcore, port); + printf("%s\n",name); + + ring = rte_ring_create( + name, + app.ring_tx_size, + socket_io, + RING_F_SP_ENQ | RING_F_SC_DEQ); + if (ring == NULL) { + rte_panic("Cannot create ring to connect worker core %u with TX port %u\n", + lcore, + port); + } + + lp_worker->rings_out[port] = ring; + lp_io->tx.rings[port][lp_worker->worker_id] = ring; + } + } + + for (lcore = 0; lcore < APP_MAX_LCORES; lcore ++) { + struct app_lcore_params_io *lp_io = &app.lcore_params[lcore].io; + unsigned i; + + if ((app.lcore_params[lcore].type != e_APP_LCORE_IO) || + (lp_io->tx.n_nic_ports == 0)) { + continue; + } + + for (i = 0; i < lp_io->tx.n_nic_ports; i ++){ + unsigned port, j; + + port = lp_io->tx.nic_ports[i]; + for (j = 0; j < app_get_lcores_worker(); j ++) { + if (lp_io->tx.rings[port][j] == NULL) { + rte_panic("Algorithmic error (I/O TX rings)\n"); + } + } + } + } +} + + + + +/* Check the link status of all ports in up to 9s, and print them finally */ +static void +check_all_ports_link_status(uint8_t port_num, uint32_t port_mask) +{ +#define CHECK_INTERVAL 100 /* 100ms */ +#define MAX_CHECK_TIME 90 /* 9s (90 * 100ms) in total */ + uint8_t portid, count, all_ports_up, print_flag = 0; + struct rte_eth_link link; + uint32_t n_rx_queues, n_tx_queues; + + printf("\nChecking link status"); + fflush(stdout); + for (count = 0; count <= MAX_CHECK_TIME; count++) { + all_ports_up = 1; + for (portid = 0; portid < port_num; portid++) { + if ((port_mask & (1 << portid)) == 0) + continue; + n_rx_queues = app_get_nic_rx_queues_per_port(portid); + n_tx_queues = app.nic_tx_port_mask[portid]; + if ((n_rx_queues == 0) && (n_tx_queues == 0)) + continue; + memset(&link, 0, sizeof(link)); + rte_eth_link_get_nowait(portid, &link); + /* print link status if flag set */ + if (print_flag == 1) { + if (link.link_status) + printf("Port %d Link Up - speed %u " + "Mbps - %s\n", (uint8_t)portid, + (unsigned)link.link_speed, + (link.link_duplex == ETH_LINK_FULL_DUPLEX) ? + ("full-duplex") : ("half-duplex\n")); + else + printf("Port %d Link Down\n", + (uint8_t)portid); + continue; + } + /* clear all_ports_up flag if any link down */ + if (link.link_status == 0) { + all_ports_up = 0; + break; + } + } + /* after finally printing all link status, get out */ + if (print_flag == 1) + break; + + if (all_ports_up == 0) { + printf("."); + fflush(stdout); + rte_delay_ms(CHECK_INTERVAL); + } + + /* set the print_flag if all ports up or timeout */ + if (all_ports_up == 1 || count == (MAX_CHECK_TIME - 1)) { + print_flag = 1; + printf("done\n"); + } + } +} + +static void +app_init_nics(void) +{ + unsigned socket; + uint32_t lcore; + uint8_t port, queue; + int ret; + uint32_t n_rx_queues, n_tx_queues; + + /* Init driver */ + printf("Initializing the PMD driver ...\n"); + +#if RTE_VER_MAJOR >=1 && RTE_VER_MINOR <= 6 + if (rte_pmd_init_all() < 0) { + rte_panic("Cannot init PMD\n"); + } +#endif + + if (rte_eal_pci_probe() < 0) { + rte_panic("Cannot probe PCI\n"); + } + + app.worker_core_num = app_get_worker_core_number(); + + /* Init NIC ports and queues, then start the ports */ + for (port = 0; port < APP_MAX_NIC_PORTS; port ++) { + struct rte_mempool *pool; + + n_rx_queues = app_get_nic_rx_queues_per_port(port); + n_tx_queues = app.nic_tx_port_mask[port]; + + if ((n_rx_queues == 0) && (n_tx_queues == 0)) { + continue; + } + + /* Init port */ + printf("Initializing NIC port %u ...\n", (unsigned) port); + ret = rte_eth_dev_configure( + port, + (uint8_t) n_rx_queues, + (uint8_t) n_tx_queues, + &port_conf); + if (ret < 0) { + rte_panic("Cannot init NIC port %u (%d)\n", (unsigned) port, ret); + } + rte_eth_promiscuous_enable(port); + + /* Init RX queues */ + for (queue = 0; queue < APP_MAX_RX_QUEUES_PER_NIC_PORT; queue ++) { + if (app.nic_rx_queue_mask[port][queue] == 0) { + continue; + } + + app_get_lcore_for_nic_rx(port, queue, &lcore); + socket = rte_lcore_to_socket_id(lcore); + pool = app.lcore_params[lcore].pool; + + printf("Initializing NIC port %u RX queue %u ...\n", + (unsigned) port, + (unsigned) queue); + ret = rte_eth_rx_queue_setup( + port, + queue, + (uint16_t) app.nic_rx_ring_size, + socket, + &rx_conf, + pool); + if (ret < 0) { + rte_panic("Cannot init RX queue %u for port %u (%d)\n", + (unsigned) queue, + (unsigned) port, + ret); + } + } + + /* Init TX queues */ + //RSYS if (app.nic_tx_port_mask[port] == 1) { + for (queue = 0; queue < n_tx_queues; queue ++) { + app_get_lcore_for_nic_tx(port, &lcore); + socket = rte_lcore_to_socket_id(lcore); + printf("Initializing NIC port %u TX queue %u ...\n", + (unsigned) port, (unsigned) queue); + ret = rte_eth_tx_queue_setup( + port, + queue, //0, + (uint16_t) app.nic_tx_ring_size, + socket, + &tx_conf); + if (ret < 0) { + rte_panic("Cannot init TX queue %u for port %d (%d)\n", + queue, + port, + ret); + } + } + + /* Start port */ + ret = rte_eth_dev_start(port); + if (ret < 0) { + rte_panic("Cannot start port %d (%d)\n", port, ret); + } + } + + check_all_ports_link_status(APP_MAX_NIC_PORTS, (~0x0)); +} + + +void +app_init(void) +{ + app_assign_worker_ids(); + app_init_mbuf_pools(); + app_init_rings_rx(); + app_init_rings_tx(); + app_init_nics(); + + nstat_init(); + printf("Initialization completed.\n"); +} diff --git a/driver/layer.c b/driver/layer.c new file mode 100644 index 0000000..0e3a219 --- /dev/null +++ b/driver/layer.c @@ -0,0 +1,267 @@ +/* Network Layer Jumper Functions + * + * Author : Lu Qiuwen <[email protected]> + * Date : 2014-12-19 + * Last : 2014-12-19 + * + * The code is ported from SAPP Platform <dealpkt/net_common.c, sapp_20141219> + * writted by Li Jia. + * + * Changelog. + * + * + */ + +#include <stdio.h> +#include <stdlib.h> +#include <assert.h> +#include "layer.h" + +#include <linux/if_ether.h> +#include <net/if_arp.h> +#include <netinet/ip6.h> +#include <netinet/ip.h> +#include <netinet/in.h> + +#define PPPOE_TYPE_IPV4 0x0021 +#define PPPOE_TYPE_IPV6 0x0057 + +static int eth_jump_to_layer(int skip_len,const char *raw_data, int raw_layer_type, int expect_layer_type); +static int vlan8021q_jump_to_layer(int skip_len,const char *raw_data, int raw_layer_type, int expect_layer_type); +static int pppoe_jump_to_layer(int skip_len,const char *raw_data, int raw_layer_type, int expect_layer_type); +static int __common_eth_type_dispatch(int skip_len, uint16_t eth_type, const char *next_layer_data, int raw_layer_type, int expect_layer_type); +static int __common_pppoe_type_dispatch(int skip_len, uint16_t eth_type, const char * next_layer_data, int raw_layer_type, int expect_layer_type); + +static inline int check_layer_type(int layer_type) +{ + switch(layer_type){ + case ADDR_TYPE_IPV4: + case ADDR_TYPE_IPV6: + case ADDR_TYPE_VLAN: + case ADDR_TYPE_MAC: + case ADDR_TYPE_ARP: + case ADDR_TYPE_GRE: + case ADDR_TYPE_MPLS: + case ADDR_TYPE_PPPOE_SES: + case ADDR_TYPE_TCP: + case ADDR_TYPE_UDP: + case ADDR_TYPE_L2TP: + break; + + default: + return -1; + } + + return 0; +} + +static int arp_jump_to_layer(int skip_len,const char * raw_data, int raw_layer_type, int except_layer_type) +{ + /* arp协议不承载任何上层其他协议 */ + return -1; +} + + +static int ipv4_jump_to_layer(int skip_len,const char *raw_data, int raw_layer_type, int expect_layer_type) +{ + struct ip *p_ip_hdr = (struct ip *)raw_data; + int ip_hdr_len = p_ip_hdr->ip_hl * 4; + //const char *next_layer_data = raw_data + ip_hdr_len; + + switch(p_ip_hdr->ip_p){ + case IPPROTO_TCP: + if(ADDR_TYPE_TCP == expect_layer_type){ + skip_len += ip_hdr_len; + }else{ + skip_len = -1; /* tcp 层之上不承载其他协议 */ + } + break; + + case IPPROTO_UDP: + if(ADDR_TYPE_UDP == expect_layer_type){ + skip_len += ip_hdr_len; + }else{ + skip_len = -1; /* tcp 层之上不承载其他协议, teredo隧道模式不支持此类跳转 */ + } + break; + + default: + skip_len = -1; + break; + } + + if(skip_len < 0){ + return -1; + } + + return skip_len; +} + +static int ipv6_jump_to_layer(int skip_len,const char *raw_data, int raw_layer_type, int expect_layer_type) +{ + // to do !!! + return -1; +} + +static int pppoe_ses_jump_to_layer(int skip_len,const char *raw_data, int raw_layer_type, int expect_layer_type) +{ + uint16_t * eth_type = (uint16_t *) raw_data + 3; + skip_len += 8; + + return __common_pppoe_type_dispatch(skip_len,ntohs(*eth_type), raw_data + 8, raw_layer_type, expect_layer_type); +} + +/* PPPoE层协议跳转 */ +static int __common_pppoe_type_dispatch(int skip_len, uint16_t eth_type, const char * next_layer_data, int raw_layer_type, int expect_layer_type) +{ + switch(eth_type) + { + case PPPOE_TYPE_IPV4: + if(ADDR_TYPE_IPV4 != expect_layer_type){ + skip_len = ipv4_jump_to_layer(skip_len,next_layer_data, ADDR_TYPE_IPV4, expect_layer_type); + } + break; + + case PPPOE_TYPE_IPV6: + if(ADDR_TYPE_IPV6 != expect_layer_type){ + skip_len = ipv6_jump_to_layer(skip_len,next_layer_data, ADDR_TYPE_IPV6, expect_layer_type); + } + break; + + default: + skip_len = -1; + break; + } + + return skip_len; +} + +/* 以太网、VLAN层跳转 */ +static int __common_eth_type_dispatch(int skip_len, uint16_t eth_type, const char *next_layer_data, int raw_layer_type, int expect_layer_type) +{ + switch(eth_type){ + case ETH_P_ARP: + if(ADDR_TYPE_ARP != expect_layer_type){ + skip_len = arp_jump_to_layer(skip_len,next_layer_data, ADDR_TYPE_ARP, expect_layer_type); + } + break; + + case ETH_P_8021Q: + if(ADDR_TYPE_VLAN != expect_layer_type){ + skip_len = vlan8021q_jump_to_layer(skip_len,next_layer_data, ADDR_TYPE_VLAN, expect_layer_type); + } + break; + + case ETH_P_IP: + if(ADDR_TYPE_IPV4 != expect_layer_type){ + skip_len = ipv4_jump_to_layer(skip_len,next_layer_data, ADDR_TYPE_IPV4, expect_layer_type); + } + break; + + case ETH_P_IPV6: + if(ADDR_TYPE_IPV6 != expect_layer_type){ + skip_len = ipv6_jump_to_layer(skip_len,next_layer_data, ADDR_TYPE_IPV6, expect_layer_type); + } + break; + + case ETH_P_PPP_SES: + if(ADDR_TYPE_PPPOE_SES != expect_layer_type){ + skip_len = pppoe_ses_jump_to_layer(skip_len,next_layer_data, ADDR_TYPE_PPPOE_SES, expect_layer_type); + } + break; + + default: + skip_len = -1; + break; + } + + return skip_len; +} + +static int vlan8021q_jump_to_layer(int skip_len, const char *raw_data, int raw_layer_type, int expect_layer_type) +{ + uint16_t * eth_type = (uint16_t *) raw_data + 1; + skip_len += 4; + + return __common_eth_type_dispatch(skip_len,ntohs(*eth_type), raw_data + 4, raw_layer_type, expect_layer_type); +} + +static int eth_jump_to_layer(int skip_len, const char *raw_data, int raw_layer_type, int expect_layer_type) +{ + struct ethhdr *p_eth_hdr = (struct ethhdr *)raw_data; + unsigned short eth_type = ntohs(p_eth_hdr->h_proto); + skip_len += sizeof(struct ethhdr); + const char *next_layer_data = raw_data + sizeof(struct ethhdr); + + return __common_eth_type_dispatch(skip_len, eth_type, next_layer_data, raw_layer_type, expect_layer_type); +} + +const void * network_data_jump_to_layer(const char *raw_data, int raw_layer_type, int expect_layer_type) +{ + int ret = 0; + + if(check_layer_type(raw_layer_type) < 0){ + return NULL; + } + + if(check_layer_type(expect_layer_type) < 0){ + return NULL; + } + + switch(raw_layer_type){ + case ADDR_TYPE_MAC: + ret = eth_jump_to_layer(ret,raw_data, raw_layer_type, expect_layer_type); + break; + + case ADDR_TYPE_ARP: + ret = arp_jump_to_layer(ret,raw_data, raw_layer_type, expect_layer_type); + break; + case ADDR_TYPE_VLAN: + ret = vlan8021q_jump_to_layer(ret,raw_data, raw_layer_type, expect_layer_type); + break; + case ADDR_TYPE_PPPOE_SES: + case ADDR_TYPE_MPLS: + case ADDR_TYPE_IPV4: + ret = ipv4_jump_to_layer(ret,raw_data, raw_layer_type, expect_layer_type); + break; + case ADDR_TYPE_IPV6: + case ADDR_TYPE_GRE: + /* to do */ + break; + + default: + return NULL; + } + + if(ret < 0) + return NULL; + + return (raw_data + ret); +} + +const void * network_data_jump_to_layer_ip(const char * data, uint16_t * layer_type) +{ + void * dataret_ipv4 = NULL; + void * dataret_ipv6 = NULL; + + dataret_ipv4 = network_data_jump_to_layer(data,ADDR_TYPE_MAC,ADDR_TYPE_IPV4); + + if(dataret_ipv4 != NULL) + { + * layer_type = ADDR_TYPE_IPV4; + return dataret_ipv4; + } + + dataret_ipv6 = network_data_jump_to_layer(data,ADDR_TYPE_MAC,ADDR_TYPE_IPV6); + + if(dataret_ipv6 != NULL) + { + * layer_type = ADDR_TYPE_IPV6; + return dataret_ipv6; + } + + return NULL; +} + + + diff --git a/driver/layer.h b/driver/layer.h new file mode 100644 index 0000000..67f96ab --- /dev/null +++ b/driver/layer.h @@ -0,0 +1,39 @@ +/* Network Layer Jumper Functions + * + * Author : Lu Qiuwen <[email protected]> + * Date : 2014-12-19 + * Last : 2014-12-19 + * + * Changelog. + * + */ + +#ifndef __SERIAL_MULTIPROCESS_LAYER_JMP_INCLUDE_H__ +#define __SERIAL_MULTIPROCESS_LAYER_JMP_INCLUDE_H__ + +#include <stdint.h> + + +//地址类型定义 +enum addr_type_t{ + __ADDR_TYPE_INIT = 0, + ADDR_TYPE_IPV4, /* 1 */ + ADDR_TYPE_IPV6, /* 2 */ + ADDR_TYPE_VLAN, /* 3 */ + ADDR_TYPE_MAC, /* 4 */ + ADDR_TYPE_ARP, /* 5 */ + ADDR_TYPE_GRE, /* 6 */ + ADDR_TYPE_MPLS, /* 7 */ + ADDR_TYPE_PPPOE_SES, /* 8 */ + ADDR_TYPE_TCP, /* 9 */ + ADDR_TYPE_UDP, /* 10 */ + ADDR_TYPE_L2TP, /* 11 */ + ADDR_TYPE_STREAM_TUPLE4_V4, /* 12, 混合地址类型, 基于IPv4地址的四元组信息 */ + ADDR_TYPE_STREAM_TUPLE4_V6, /* 13, 混合地址类型, 基于IPv6地址的四元组信息 */ + __ADDR_TYPE_MAX, /* 14 */ +}; + +const void * network_data_jump_to_layer(const char *raw_data, int raw_layer_type, int expect_layer_type); +const void * network_data_jump_to_layer_ip(const char * data, uint16_t * layer_type); + +#endif diff --git a/driver/main.c b/driver/main.c new file mode 100644 index 0000000..34bffb4 --- /dev/null +++ b/driver/main.c @@ -0,0 +1,228 @@ + +#include <stdio.h> +#include <signal.h> +#include <unistd.h> + + +#include <rte_common.h> +#include <rte_byteorder.h> +#include <rte_log.h> +#include <rte_memory.h> +#include <rte_memcpy.h> +#include <rte_memzone.h> +#include <rte_tailq.h> +#include <rte_eal.h> +#include <rte_per_lcore.h> +#include <rte_launch.h> +#include <rte_atomic.h> +#include <rte_cycles.h> +#include <rte_prefetch.h> +#include <rte_lcore.h> +#include <rte_per_lcore.h> +#include <rte_branch_prediction.h> +#include <rte_interrupts.h> +#include <rte_pci.h> +#include <rte_random.h> +#include <rte_debug.h> +#include <rte_ether.h> +#include <rte_ethdev.h> +#include <rte_ring.h> +#include <rte_mempool.h> +#include <rte_mbuf.h> +#include <rte_ip.h> +#include <rte_tcp.h> +#include <rte_lpm.h> +#include <rte_memcpy.h> + +#include "nl2fwd.h" +#include "main.h" +#include "apt.h" +#include "nstat.h" + + +FILE *warn_output_stream = NULL; +FILE *err_output_stream = NULL; + +#if 0 +static inline struct rte_mbuf * rte_rxmbuf_alloc(struct rte_mempool *mp) +{ + struct rte_mbuf *m; + m = __rte_mbuf_raw_alloc(mp); + __rte_mbuf_sanity_check_raw(m,RTE_MBUF_PKT,0); + + return (m); +} + +int dpdk_register_callfunc(int (*func)(const unsigned char *data,int datalen,int port_in,int lcore)) +{ + app.pkt_callback_f.rx_pkt_process = func; + app.pkt_callback_f.rx_pkt_process_dst = NULL; + return 0; +} + +int dpdk_register_callfunc_dst(int (*func)(const unsigned char *data,int datalen,int dst,int lcore)) +{ + app.pkt_callback_f.rx_pkt_process_dst = func; + app.pkt_callback_f.rx_pkt_process = NULL; + return 0; +} + +int dpdk_send_packet_dst(unsigned char *data, int datalen, int dst) +{ + if(dst <0 || dst >= (int)app.n_rxtx_stream) + return -1; + uint32_t port = app.rxtx_stream_record[dst].tx_port; + + return dpdk_send_packet_port(data,datalen,port); +} + +int app_lcore_worker_tx_buffer_to_send (struct rte_mbuf *pkt, uint8_t port); +#endif +#if 0 +int dpdk_send_packet_port(const unsigned char *data, int datalen, int port) +{ + struct rte_mbuf * pkt_burst = NULL; + + int lcore_id = rte_lcore_id(); + + if(data == NULL) { + fprintf(warn_output_stream,"WARNING(core_id=%d,datalen=%d): Invalid TX packet info, ignore the pkt.\n", + lcore_id,datalen); + return -1; + } + + pkt_burst = rte_rxmbuf_alloc(app.lcore_params[lcore_id].pool); + + if(pkt_burst == NULL) { + fprintf(warn_output_stream,"WARNING(core_id=%d),Alloc mbuf failed,ignore the pkt\n", + lcore_id); + return -1; + } + + pkt_burst->pkt.nb_segs = 1; + pkt_burst->pkt.next = NULL; + pkt_burst->pkt.in_port = port; + pkt_burst->pkt.pkt_len = datalen; + pkt_burst->pkt.data_len = datalen; + + rte_memcpy(pkt_burst->pkt.data, data, datalen); + pkt_burst->ol_flags |= PKT_TX_IP_CKSUM; + + app_lcore_worker_tx_buffer_to_send(pkt_burst,port); + + return 0; +} +#endif + +void sigint_handler(int signo) +{ + uint8_t portid; + uint8_t nb_ports = rte_eth_dev_count(); + + signo = 0; + + for (portid = 0; portid < nb_ports; portid++) { + rte_eth_dev_stop((uint8_t) portid); + printf(" stop ethernet device %d \n", portid); + } + + exit(0); +} + + +#ifndef NL2FWD_APT_MAXPARAS +#define NL2FWD_APT_MAXPARAS 2048 +#endif + +#ifndef NL2FWD_APT_STRING_MAX +#define NL2FWD_APT_STRING_MAX 2048 +#endif + +#ifndef NL2FWD_APT_ENABLE +#define NL2FWD_APT_ENABLE 1 +#endif + +int dpdk_init_apt(); +int dpdk_init_t(int argc, char **argv); + +int main(int argc,char **argv) +{ +#if NL2FWD_APT_ENABLE + dpdk_init_apt(); +#else + dpdk_init_t(argc,argv); +#endif +} + +int dpdk_init_apt() +{ + char * argv_t[NL2FWD_APT_MAXPARAS]; + int argc_t = 0; + + for(int i = 0; i < NL2FWD_APT_MAXPARAS; i++) + { + argv_t[i] = (char *)malloc(sizeof(char) * NL2FWD_APT_STRING_MAX); + } + strncpy(argv_t[argc_t++],"./nl2fwd",NL2FWD_APT_STRING_MAX); + + apt_paras_eal(NL2FWD_APT_MAXPARAS,&argc_t,argv_t); + dpdk_init_t(argc_t,argv_t); + + return 0; +} + +int dpdk_init_t(int argc, char **argv) +{ + int ret; + + warn_output_stream = stderr; + err_output_stream = stderr; + + /* Init EAL */ + ret = rte_eal_init(argc,argv); + if (ret < 0) + return -1; + + /* Parse application arguments (after the EAL ones) */ + ret = app_parse_args(argc, argv); + if (ret < 0) { + app_print_usage(); + return -1; + } + + /* Init */ + app_init(); + app_print_params(); + + /* Install the signal handlers */ + signal(SIGHUP, sigint_handler); + signal(SIGINT, sigint_handler); + signal(SIGQUIT, sigint_handler); + signal(SIGABRT, sigint_handler); + signal(SIGTERM, sigint_handler); + + + dpdk_run(); + + return 0; +} + + +int dpdk_run() +{ + uint32_t lcore; +#if APP_STATS + pthread_t ntid; + pthread_create(&ntid, NULL,nstat_thread_entry,NULL); +#endif + + /* Launch per-lcore init on every lcore */ + rte_eal_mp_remote_launch(app_lcore_main_loop, NULL, CALL_MASTER); + RTE_LCORE_FOREACH_SLAVE(lcore) { + if (rte_eal_wait_lcore(lcore) < 0) + { + return -1; + } + } + return 0; +} diff --git a/driver/nl2fwd.h b/driver/nl2fwd.h new file mode 100644 index 0000000..5277de0 --- /dev/null +++ b/driver/nl2fwd.h @@ -0,0 +1,19 @@ + +/* Header file of new_l2fwd_lib in Load Balance Model. + * Autuor : Lu Qiuwen + * Date : 2014-03-04 + */ + + +#ifndef __L2FWD_LIB_INCLUDE__ +#define __L2FWD_LIB_INCLUDE__ + +int dpdk_send_packet_dst(unsigned char *data, int datalen, int dst); +int dpdk_send_packet_port(const unsigned char *data, int datalen, int port); +int dpdk_init(int argc, char **argv); +int dpdk_run(); +int dpdk_register_callfunc(int (*func)(const unsigned char *data,int datalen,int port_in,int lcore)); +int dpdk_register_callfunc_dst(int (*func)(const unsigned char *data,int datalen,int dst,int lcore)); + + +#endif diff --git a/driver/nstat.c b/driver/nstat.c new file mode 100644 index 0000000..14e01ab --- /dev/null +++ b/driver/nstat.c @@ -0,0 +1,270 @@ +
+
+/* Stat Module in Serial-Multiprocess
+ Author : Lu Qiuwen <[email protected]>
+ Date : 2014-12-07
+
+*/
+
+
+#include <sys/mman.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <string.h>
+
+#include <rte_memory.h>
+#include <rte_debug.h>
+#include <rte_memcpy.h>
+#include <rte_cycles.h>
+#include <rte_common.h>
+#include <rte_atomic.h>
+
+#include "nstat.h"
+#include <main.h>
+
+#define SHM_FILEMODE 00777
+
+struct nstat_handle * nstat_common_handle = NULL;
+struct nstat_handle * nstat_handle_last = NULL;
+struct stat_summerize_t * nstat_summerize = NULL;
+
+void nstat_summerize_loop(struct nstat_handle * handle_now, struct nstat_handle * handle_past);
+void nstat_print_loop(struct nstat_handle * handle_now, struct nstat_handle * handle_past, FILE * fstream);
+
+#define TBPS (1ull * 1000 * 1000 * 1000 * 1000)
+#define GBPS (1ull * 1000 * 1000 * 1000)
+#define MBPS (1ull * 1000 * 1000)
+#define KBPS (1ull * 1000)
+
+static void unit_translate(uint64_t number, float * f_number, char * unit)
+{
+ if(number > TBPS)
+ {
+ *f_number = number * 1.0 / TBPS;
+ *unit = 'T';
+ }
+ else if(number > GBPS)
+ {
+ *f_number = number * 1.0 / GBPS;
+ *unit = 'G';
+ }
+ else if(number > MBPS)
+ {
+ *f_number = number * 1.0 / MBPS;
+ *unit = 'M';
+ }
+ else if(number > KBPS)
+ {
+ *f_number = number * 1.0 / KBPS;
+ *unit = 'K';
+ }
+ else
+ {
+ *f_number = number * 1.0;
+ *unit = ' ';
+ }
+}
+
+int nstat_init()
+{
+ int shm_fd = shm_open(NSTAT_SHAREDMEMORY_SYMBOL, O_RDWR|O_CREAT, SHM_FILEMODE);
+ ftruncate(shm_fd, sizeof(struct nstat_handle));
+
+ void * shm_ptr = mmap(NULL, sizeof(struct nstat_handle), PROT_READ | PROT_WRITE,
+ MAP_SHARED, shm_fd, 0);
+ if(shm_ptr == NULL)
+ rte_panic("cannot create nstat shared memory");
+
+ int shm_fd_sum = shm_open(NSTAT_SHAREMEMORY_SUMMERIZE_SYMBOL, O_RDWR|O_CREAT, SHM_FILEMODE);
+ ftruncate(shm_fd_sum, sizeof(struct stat_summerize_t));
+
+ void * shm_ptr_sum = mmap(NULL, sizeof(struct stat_summerize_t), PROT_READ | PROT_WRITE,
+ MAP_SHARED, shm_fd_sum, 0);
+ if(shm_ptr_sum == NULL)
+ rte_panic("cannot create nstat shared memory");
+
+ nstat_common_handle = (struct nstat_handle *) shm_ptr;
+ nstat_handle_last = (struct nstat_handle *)malloc(sizeof(struct nstat_handle));
+ nstat_summerize = (struct stat_summerize_t *) shm_ptr_sum;
+
+ memset(nstat_common_handle,0,sizeof(struct nstat_handle));
+ memset(nstat_handle_last,0,sizeof(struct nstat_handle));
+ memset(nstat_summerize,0,sizeof(struct stat_summerize_t));
+
+ nstat_common_handle->stat_interval = app.statistics.sample_time;
+ nstat_handle_last->stat_interval = app.statistics.sample_time;
+
+ rte_rwlock_init(&nstat_summerize->rwlock);
+
+ return 0;
+}
+
+int nstat_destroy()
+{
+ free(nstat_handle_last);
+ free(nstat_summerize);
+ return shm_unlink(NSTAT_SHAREDMEMORY_SYMBOL);
+}
+
+void * nstat_thread_entry()
+{
+ if(!app.statistics.enable)
+ return (void *)0;
+
+ while(1)
+ {
+ sleep(app.statistics.sample_time);
+ nstat_summerize_loop(nstat_common_handle,nstat_handle_last);
+
+ if(app.statistics.is_printmsg)
+ nstat_print_loop(nstat_common_handle,nstat_handle_last,stdout);
+
+ rte_memcpy(nstat_handle_last,nstat_common_handle,sizeof(struct nstat_handle));
+ }
+
+}
+
+void nstat_summerize_loop(struct nstat_handle * handle_now, struct nstat_handle * handle_past)
+{
+ rte_rwlock_write_lock(&nstat_summerize->rwlock);
+
+ nstat_summerize->dropped = 0;
+
+ for(int port_id = 0; port_id < APP_MAX_NIC_PORTS; port_id ++)
+ {
+ if(app.enabled_port_mask & (1 << port_id) == 0)
+ continue;
+ nstat_summerize->dropped += handle_now->port[port_id].rx_drop_packets;
+ nstat_summerize->dropped += handle_now->port[port_id].tx_drop_packets;
+ }
+
+ for(int lcore_id = 0; lcore_id < APP_MAX_LCORES; lcore_id++)
+ {
+ if(app.lcore_params[lcore_id].type == e_APP_LCORE_WORKER)
+ {
+ nstat_summerize->dropped += handle_now->worker_stat[lcore_id].dropped;
+ }
+ if(app.lcore_params[lcore_id].type == e_APP_LCORE_IO)
+ {
+ nstat_summerize->dropped += handle_now->io_rx[lcore_id].dropped;
+ nstat_summerize->dropped += handle_now->io_tx[lcore_id].dropped;
+ }
+ }
+
+ rte_rwlock_write_unlock(&nstat_summerize->rwlock);
+
+ return;
+}
+
+void nstat_print_loop(struct nstat_handle * handle_now, struct nstat_handle * handle_past, FILE * fstream)
+{
+ uint64_t calibration;
+ static uint64_t hpet_last = 0, hpet_cur = 0;
+
+ hpet_cur = rte_get_timer_cycles();
+ if(hpet_last != 0)
+ calibration = (hpet_cur - hpet_last) * 1.0 / rte_get_timer_hz();
+ else
+ calibration = handle_now->stat_interval;
+
+ hpet_last = hpet_cur;
+
+ const char clr[] = {27,'[','2','J','\0'};
+ const char topleft[] = {27,'[','1',';','1','H','\0'};
+
+ fprintf(fstream,"%s%s",clr,topleft);
+
+ fprintf(fstream,"--------------------------------------------------------------------------------------------------------------------------------------\n");
+ fprintf(fstream," Network Ports Statistics \n");
+ fprintf(fstream,"--------------------------------------------------------------------------------------------------------------------------------------\n");
+
+
+ for(int port_id = 0; port_id < APP_MAX_NIC_PORTS; port_id ++)
+ {
+ if((app.enabled_port_mask & (1 << port_id)) == 0)
+ continue;
+
+ uint64_t nic_rx_burst = (handle_now->port[port_id].rx_packets - handle_past->port[port_id].rx_packets) / calibration;
+ uint64_t nic_tx_burst = (handle_now->port[port_id].tx_packets - handle_past->port[port_id].tx_packets) / calibration;
+ uint64_t nic_rx_bytes = (handle_now->port[port_id].rx_bytes - handle_past->port[port_id].rx_bytes) / calibration;
+ uint64_t nic_tx_bytes = (handle_now->port[port_id].tx_bytes - handle_past->port[port_id].tx_bytes) / calibration;
+
+ float fps_rx,bps_rx,fps_tx,bps_tx;
+ char fps_rx_unit,bps_rx_unit,fps_tx_unit,bps_tx_unit;
+
+ unit_translate(nic_rx_burst, &fps_rx, &fps_rx_unit);
+ unit_translate(nic_tx_burst, &fps_tx, &fps_tx_unit);
+ unit_translate(nic_rx_bytes * 8, &bps_rx, &bps_rx_unit);
+ unit_translate(nic_tx_bytes * 8, &bps_tx, &bps_tx_unit);
+
+ fprintf(fstream,"Port %d\n",port_id);
+ fprintf(fstream,"RX: packets%20lu | bytes:%20lu | dropped %10lu | fps:%7.2f%c | bps: %7.2f%c\n",
+ handle_now->port[port_id].rx_packets, handle_now->port[port_id].rx_bytes,
+ handle_now->port[port_id].rx_drop_packets, fps_rx,fps_rx_unit,bps_rx,bps_rx_unit);
+ fprintf(fstream,"TX: packets%20lu | bytes:%20lu | dropped %10lu | fps:%7.2f%c | bps: %7.2f%c\n",
+ handle_now->port[port_id].tx_packets, handle_now->port[port_id].tx_bytes,
+ handle_now->port[port_id].tx_drop_packets, fps_tx, fps_tx_unit, bps_tx, bps_tx_unit);
+ }
+
+ fprintf(fstream,"--------------------------------------------------------------------------------------------------------------------------------------\n");
+ fprintf(fstream," Input/Output(I/O) Statistics \n");
+ fprintf(fstream,"--------------------------------------------------------------------------------------------------------------------------------------\n");
+
+ for(int lcore_id = 0; lcore_id < APP_MAX_LCORES; lcore_id++)
+ {
+ if(app.lcore_params[lcore_id].type == e_APP_LCORE_IO)
+ {
+ uint64_t io_rx_burst = (handle_now->io_rx[lcore_id].packets - handle_past->io_rx[lcore_id].packets) / calibration;
+ uint64_t io_tx_burst = (handle_now->io_tx[lcore_id].packets - handle_past->io_tx[lcore_id].packets) / calibration;
+ uint64_t io_rx_bytes = (handle_now->io_rx[lcore_id].bytes - handle_past->io_rx[lcore_id].bytes) /calibration;
+ uint64_t io_tx_bytes = (handle_now->io_tx[lcore_id].bytes - handle_past->io_tx[lcore_id].bytes) /calibration;
+
+ float fps_rx,bps_rx,fps_tx,bps_tx;
+ char fps_rx_unit,bps_rx_unit,fps_tx_unit,bps_tx_unit;
+
+ unit_translate(io_rx_burst, &fps_rx, &fps_rx_unit);
+ unit_translate(io_tx_burst, &fps_tx, &fps_tx_unit);
+ unit_translate(io_rx_bytes * 8, &bps_rx, &bps_rx_unit);
+ unit_translate(io_tx_bytes * 8, &bps_tx, &bps_tx_unit);
+
+ fprintf(fstream,"Core %d\n",lcore_id);
+ fprintf(fstream,"RX: packets%20lu | bytes:%20lu | dropped %10lu | fps:%7.2f%c | bps: %7.2f%c\n",
+ handle_now->io_rx[lcore_id].packets , handle_now->io_rx[lcore_id].bytes,
+ handle_now->io_rx[lcore_id].dropped, fps_rx,fps_rx_unit,bps_rx,bps_rx_unit);
+ fprintf(fstream,"TX: packets%20lu | bytes:%20lu | dropped %10lu | fps:%7.2f%c | bps: %7.2f%c\n",
+ handle_now->io_tx[lcore_id].packets , handle_now->io_tx[lcore_id].bytes,
+ handle_now->io_tx[lcore_id].dropped, fps_tx,fps_tx_unit,bps_tx,bps_tx_unit);
+ }
+ }
+
+ fprintf(fstream,"--------------------------------------------------------------------------------------------------------------------------------------\n");
+ fprintf(fstream," Worker(Client Process) Statistics \n");
+ fprintf(fstream,"--------------------------------------------------------------------------------------------------------------------------------------\n");
+
+ for(int lcore_id = 0; lcore_id < APP_MAX_LCORES; lcore_id++)
+ {
+ if(app.lcore_params[lcore_id].type == e_APP_LCORE_WORKER)
+ {
+ uint64_t runtime_delta = handle_now->worker_stat[lcore_id].app_runtime - handle_past->worker_stat[lcore_id].app_runtime;
+ uint64_t cycle_delta = handle_now->worker_stat[lcore_id].app_cycles - handle_past->worker_stat[lcore_id].app_cycles;
+ float runtime_per_cycle = 0;
+
+ if(likely(cycle_delta != 0))
+ runtime_per_cycle = (float)runtime_delta / cycle_delta;
+ else
+ runtime_per_cycle = 0;
+
+ fprintf(fstream,"WORKER %2d | packets %20lu | bytes: %20lu | dropped %10lu | runtime %7.2f\n",
+ lcore_id,
+ handle_now->worker_stat[lcore_id].packets,
+ handle_now->worker_stat[lcore_id].bytes,
+ handle_now->worker_stat[lcore_id].dropped,
+ runtime_per_cycle);
+ }
+ }
+
+ fprintf(fstream,"--------------------------------------------------------------------------------------------------------------------------------------\n");
+ return;
+}
diff --git a/driver/nstat.h b/driver/nstat.h new file mode 100644 index 0000000..6611495 --- /dev/null +++ b/driver/nstat.h @@ -0,0 +1,72 @@ +
+#ifndef __SERIAL_MULTIPROCESS_NSTAT_SERVER_INCLUDE_H__
+#define __SERIAL_MULTIPROCESS_NSTAT_SERVER_INCLUDE_H__
+
+#include <rte_mbuf.h>
+#include <nstat_common.h>
+
+// Get the Pkts total length.
+static inline uint64_t nstat_pktslen(struct rte_mbuf ** mbufs, unsigned nb_mbufs)
+{
+ uint64_t datalen = 0;
+ for(int i = 0; i < nb_mbufs; i++)
+ {
+ datalen += mbufs[i]->pkt.pkt_len + APP_PKT_OVERHEAD;
+ }
+
+ return datalen;
+}
+
+static inline void nstat_port_count_rx(struct nstat_handle * handle, struct rte_mbuf ** mbufs, unsigned nb_mbufs, uint8_t port_id)
+{
+ handle->port[port_id].rx_packets += nb_mbufs;
+ handle->port[port_id].rx_bytes += nstat_pktslen(mbufs, nb_mbufs);
+ return;
+}
+
+static inline void nstat_port_count_tx(struct nstat_handle * handle, struct rte_mbuf ** mbufs, unsigned nb_mbufs, uint8_t port_id)
+{
+ handle->port[port_id].tx_packets += nb_mbufs;
+ handle->port[port_id].tx_bytes += nstat_pktslen(mbufs, nb_mbufs);
+ return;
+}
+
+static inline void nstat_port_count_remove_tx(struct nstat_handle * handle, struct rte_mbuf ** mbufs, unsigned nb_mbufs, uint8_t port_id)
+{
+ handle->port[port_id].tx_packets -= nb_mbufs;
+ handle->port[port_id].tx_bytes -= nstat_pktslen(mbufs, nb_mbufs);
+ handle->port[port_id].tx_drop_packets += nb_mbufs;
+ return;
+}
+
+static inline void nstat_io_count_rx(struct nstat_handle * handle, struct rte_mbuf ** mbufs, unsigned nb_mbufs, uint8_t lcore_id)
+{
+ handle->io_rx[lcore_id].packets += nb_mbufs;
+ handle->io_rx[lcore_id].bytes += nstat_pktslen(mbufs,nb_mbufs);
+ return;
+}
+
+static inline void nstat_io_count_tx(struct nstat_handle * handle, struct rte_mbuf ** mbufs, unsigned nb_mbufs, uint8_t lcore_id)
+{
+ handle->io_tx[lcore_id].packets += nb_mbufs;
+ handle->io_tx[lcore_id].bytes += nstat_pktslen(mbufs,nb_mbufs);
+ return;
+}
+
+static inline void nstat_io_drop_rx(struct nstat_handle * handle, struct rte_mbuf ** mbufs, unsigned nb_mbufs, uint8_t lcore_id)
+{
+ handle->io_rx[lcore_id].dropped += nb_mbufs;
+ return;
+}
+
+static inline void nstat_io_drop_tx(struct nstat_handle * handle, struct rte_mbuf ** mbufs, unsigned nb_mbufs, uint8_t lcore_id)
+{
+ handle->io_tx[lcore_id].dropped += nb_mbufs;
+ return;
+}
+
+int nstat_init();
+int nstat_destroy();
+void * nstat_thread_entry();
+
+#endif
diff --git a/driver/runtime.c b/driver/runtime.c new file mode 100644 index 0000000..5f47736 --- /dev/null +++ b/driver/runtime.c @@ -0,0 +1,531 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2013 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include <stdio.h> +#include <stdlib.h> +#include <stdint.h> +#include <inttypes.h> +#include <sys/types.h> +#include <string.h> +#include <sys/queue.h> +#include <stdarg.h> +#include <errno.h> +#include <getopt.h> +#include <unistd.h> + +#include <sys/time.h> + + +#include <rte_common.h> +#include <rte_byteorder.h> +#include <rte_log.h> +#include <rte_memory.h> +#include <rte_memcpy.h> +#include <rte_memzone.h> +#include <rte_tailq.h> +#include <rte_eal.h> +#include <rte_per_lcore.h> +#include <rte_launch.h> +#include <rte_atomic.h> +#include <rte_cycles.h> +#include <rte_prefetch.h> +#include <rte_lcore.h> +#include <rte_per_lcore.h> +#include <rte_branch_prediction.h> +#include <rte_interrupts.h> +#include <rte_pci.h> +#include <rte_random.h> +#include <rte_debug.h> +#include <rte_ether.h> +#include <rte_ethdev.h> +#include <rte_ring.h> +#include <rte_mempool.h> +#include <rte_mbuf.h> +#include <rte_ip.h> +#include <rte_tcp.h> +#include <rte_lpm.h> +#include <rte_spinlock.h> +#include <rte_ether.h> +#include <rte_ip.h> +#include <rte_tcp.h> +#include <rte_udp.h> + +#include "main.h" +#include "nstat.h" +#include "layer.h" + +union hashkey_un +{ + uint64_t key[2]; + struct hashkey + { + uint32_t src_ip; + uint32_t dst_ip; + + uint16_t src_port; + uint16_t dst_port; + uint8_t ip_prot; + uint8_t rsv0; + uint16_t rsv1; + }hashkey_st; +}; + +static inline int get_hash_result(uint8_t *data, uint32_t *hash_value) +{ + uint16_t protrol_type = 0; + data = (uint8_t *) network_data_jump_to_layer_ip(data,&protrol_type); + + switch(protrol_type) + { + case ADDR_TYPE_IPV4: + { + struct ipv4_hdr * ipv4hdr = (struct ipv4_hdr *) data; + uint32_t key_ipv4 = 0; + + uint8_t *src_addr = (uint8_t *)&(ipv4hdr->src_addr); + uint8_t *dst_addr = (uint8_t *)&(ipv4hdr->dst_addr); + + key_ipv4 += src_addr[0] + dst_addr[0]; + key_ipv4 += src_addr[1] + dst_addr[1]; + key_ipv4 += src_addr[2] + dst_addr[2]; + key_ipv4 += src_addr[3] + dst_addr[3]; + + * hash_value = key_ipv4; + break; + } + case ADDR_TYPE_IPV6: + { + struct ipv6_hdr * ipv6hdr = (struct ipv6_hdr *)data; + uint32_t key_ipv6; + int i = 0; + for(i = 0; i < 16; i++) + key_ipv6 += (ipv6hdr->src_addr[i])+(ipv6hdr->dst_addr[i]); + + *hash_value = key_ipv6; + break; + } + + default: + { + * hash_value = rte_rand(); + } + } + + return 0; +} + +static inline void +app_lcore_io_rx_buffer_to_send ( + struct app_lcore_params_io *lp, + uint32_t worker, + struct rte_mbuf *mbuf, + uint32_t bsz) +{ + uint32_t pos; + int ret; + unsigned lcore_id = rte_lcore_id(); + + pos = lp->rx.mbuf_out[worker].n_mbufs; + lp->rx.mbuf_out[worker].array[pos ++] = mbuf; + + if (likely(pos < bsz)) { + lp->rx.mbuf_out[worker].n_mbufs = pos; + return; + } + + ret = rte_ring_enqueue_bulk( + lp->rx.rings[worker], + (void **) lp->rx.mbuf_out[worker].array, + bsz); + + if (unlikely(ret == -ENOBUFS)) { + uint32_t k; + nstat_io_drop_rx(nstat_common_handle,lp->rx.mbuf_out[worker].array,bsz,lcore_id); + + for (k = 0; k < bsz; k ++) { + struct rte_mbuf *m = lp->rx.mbuf_out[worker].array[k]; + rte_pktmbuf_free(m); + } + } + + lp->rx.mbuf_out[worker].n_mbufs = 0; + lp->rx.mbuf_out_flush[worker] = 0; +} + +static inline void +app_lcore_io_rx( + struct app_lcore_params_io *lp, + uint32_t n_workers, + uint32_t bsz_rd, + uint32_t bsz_wr) +{ + struct rte_mbuf *mbuf_1_0, *mbuf_1_1, *mbuf_2_0, *mbuf_2_1; + uint8_t *data_1_0, *data_1_1 = NULL; + uint32_t i; + uint32_t hash_value; + unsigned lcore_id = rte_lcore_id(); + + for (i = 0; i < lp->rx.n_nic_queues; i ++) { + uint8_t port = lp->rx.nic_queues[i].port; + uint8_t queue = lp->rx.nic_queues[i].queue; + uint32_t n_mbufs, j; + + n_mbufs = rte_eth_rx_burst(port, queue, lp->rx.mbuf_in.array, (uint16_t) bsz_rd); + + if(unlikely(n_mbufs == 0)) + continue; + + nstat_port_count_rx(nstat_common_handle, lp->rx.mbuf_in.array, n_mbufs, port); + nstat_io_count_rx(nstat_common_handle, lp->rx.mbuf_in.array, n_mbufs, lcore_id); + +#if APP_IO_RX_DROP_ALL_PACKETS + for (j = 0; j < n_mbufs; j ++) { + struct rte_mbuf *pkt = lp->rx.mbuf_in.array[j]; + rte_pktmbuf_free(pkt); + } + + continue; +#endif + uint32_t lcore = rte_lcore_id(); + + mbuf_1_0 = lp->rx.mbuf_in.array[0]; + mbuf_1_1 = lp->rx.mbuf_in.array[1]; + data_1_0 = rte_pktmbuf_mtod(mbuf_1_0, uint8_t *); + if (likely(n_mbufs > 1)) { + data_1_1 = rte_pktmbuf_mtod(mbuf_1_1, uint8_t *); + } + + mbuf_2_0 = lp->rx.mbuf_in.array[2]; + mbuf_2_1 = lp->rx.mbuf_in.array[3]; + APP_IO_RX_PREFETCH0(mbuf_2_0); + APP_IO_RX_PREFETCH0(mbuf_2_1); + + for (j = 0; j + 3 < n_mbufs; j += 2) { + struct rte_mbuf *mbuf_0_0, *mbuf_0_1; + uint8_t *data_0_0, *data_0_1; + uint32_t worker_0, worker_1; + + mbuf_0_0 = mbuf_1_0; + mbuf_0_1 = mbuf_1_1; + data_0_0 = data_1_0; + data_0_1 = data_1_1; + + mbuf_1_0 = mbuf_2_0; + mbuf_1_1 = mbuf_2_1; + data_1_0 = rte_pktmbuf_mtod(mbuf_2_0, uint8_t *); + data_1_1 = rte_pktmbuf_mtod(mbuf_2_1, uint8_t *); + APP_IO_RX_PREFETCH0(data_1_0); + APP_IO_RX_PREFETCH0(data_1_1); + + mbuf_2_0 = lp->rx.mbuf_in.array[j+4]; + mbuf_2_1 = lp->rx.mbuf_in.array[j+5]; + APP_IO_RX_PREFETCH0(mbuf_2_0); + APP_IO_RX_PREFETCH0(mbuf_2_1); + + if(get_hash_result(data_0_0, &hash_value)) + { + rte_pktmbuf_free(mbuf_0_0); + } + else + { + worker_0 = hash_value % n_workers; + app_lcore_io_rx_buffer_to_send(lp, worker_0, mbuf_0_0, bsz_wr); + } + + if(get_hash_result(data_0_1, &hash_value)) + { + rte_pktmbuf_free(mbuf_0_1); + } + else + { + worker_1 = hash_value % n_workers; + app_lcore_io_rx_buffer_to_send(lp, worker_1, mbuf_0_1, bsz_wr); + } + } + + /* Handle the last 1, 2 (when n_mbufs is even) or 3 (when n_mbufs is odd) packets */ + for ( ; j < n_mbufs; j += 1) { + struct rte_mbuf *mbuf; + uint8_t *data; + uint32_t worker; + + mbuf = mbuf_1_0; + mbuf_1_0 = mbuf_1_1; + mbuf_1_1 = mbuf_2_0; + mbuf_2_0 = mbuf_2_1; + + data = rte_pktmbuf_mtod(mbuf, uint8_t *); + + APP_IO_RX_PREFETCH0(mbuf_1_0); + + if(get_hash_result(data, &hash_value)) + { + rte_pktmbuf_free(mbuf); + } + else + { + worker = hash_value % n_workers; + app_lcore_io_rx_buffer_to_send(lp, worker, mbuf, bsz_wr); + } + } + } +} + +static inline void +app_lcore_io_rx_flush(struct app_lcore_params_io *lp, uint32_t n_workers) +{ + uint32_t worker; + unsigned lcore_id = rte_lcore_id(); + + for (worker = 0; worker < n_workers; worker ++) { + int ret; + + if (likely((lp->rx.mbuf_out_flush[worker] == 0) || + (lp->rx.mbuf_out[worker].n_mbufs == 0))) { + lp->rx.mbuf_out_flush[worker] = 1; + continue; + } + + ret = rte_ring_sp_enqueue_bulk(lp->rx.rings[worker],(void **) lp->rx.mbuf_out[worker].array, + lp->rx.mbuf_out[worker].n_mbufs); + + //TODO: STAT Handle + + if (unlikely(ret < 0)) { + uint32_t k; + nstat_io_drop_rx(nstat_common_handle,lp->rx.mbuf_out[worker].array,lp->rx.mbuf_out[worker].n_mbufs,lcore_id); + + for (k = 0; k < lp->rx.mbuf_out[worker].n_mbufs; k ++) { + struct rte_mbuf *pkt_to_free = lp->rx.mbuf_out[worker].array[k]; + rte_pktmbuf_free(pkt_to_free); + } + } + + lp->rx.mbuf_out[worker].n_mbufs = 0; + lp->rx.mbuf_out_flush[worker] = 1; + } +} + + +/* Lu Qiuwen<[email protected]>, at 2014-12-04 */ +/* Modified from DPDK 1.7.1 examples/load_balancer/runtime.c */ + +static inline void +app_lcore_io_tx( + struct app_lcore_params_io *lp, + uint32_t n_workers, + uint32_t bsz_rd, + uint32_t bsz_wr) +{ + uint32_t worker; + unsigned lcore_id = rte_lcore_id(); + + for (worker = 0; worker < n_workers; worker ++) { + uint32_t i; + + for (i = 0; i < lp->tx.n_nic_ports; i ++) { + uint8_t port = lp->tx.nic_ports[i]; + uint8_t nqueue = lp->tx.nic_n_queues[i]; + struct rte_ring *ring = lp->tx.rings[port][worker]; + uint32_t n_mbufs, n_pkts; + int ret; + + n_mbufs = lp->tx.mbuf_out[port].n_mbufs; + ret = rte_ring_sc_dequeue_bulk( + ring, + (void **) &lp->tx.mbuf_out[port].array[n_mbufs], + bsz_rd); + + if (unlikely(ret == -ENOENT)) { + continue; + } + + nstat_io_count_tx(nstat_common_handle, &lp->tx.mbuf_out[port].array[n_mbufs],bsz_rd, lcore_id); + n_mbufs += bsz_rd; + +#if APP_IO_TX_DROP_ALL_PACKETS + { + uint32_t j; + APP_IO_TX_PREFETCH0(lp->tx.mbuf_out[port].array[0]); + APP_IO_TX_PREFETCH0(lp->tx.mbuf_out[port].array[1]); + + for (j = 0; j < n_mbufs; j ++) { + if (likely(j < n_mbufs - 2)) { + APP_IO_TX_PREFETCH0(lp->tx.mbuf_out[port].array[j + 2]); + } + + rte_pktmbuf_free(lp->tx.mbuf_out[port].array[j]); + } + + lp->tx.mbuf_out[port].n_mbufs = 0; + continue; + } +#endif + + if (unlikely(n_mbufs < bsz_wr)) { + lp->tx.mbuf_out[port].n_mbufs = n_mbufs; + continue; + } + + n_pkts = rte_eth_tx_burst( + port, + worker % nqueue, + lp->tx.mbuf_out[port].array, + (uint16_t) n_mbufs); + + // NSTAT Handle + nstat_port_count_tx(nstat_common_handle, lp->tx.mbuf_out[port].array, + n_mbufs, port); + + if (unlikely(n_pkts < n_mbufs)) { + uint32_t k; + + nstat_port_count_remove_tx(nstat_common_handle, &(lp->tx.mbuf_out[port].array[n_pkts]), + n_mbufs - n_pkts, port); + nstat_io_drop_tx(nstat_common_handle, &(lp->tx.mbuf_out[port].array[n_pkts]), + n_mbufs - n_pkts, lcore_id); + + for (k = n_pkts; k < n_mbufs; k ++) { + struct rte_mbuf *pkt_to_free = lp->tx.mbuf_out[port].array[k]; + rte_pktmbuf_free(pkt_to_free); + } + } + lp->tx.mbuf_out[port].n_mbufs = 0; + lp->tx.mbuf_out_flush[port] = 0; + } + } +} + +static inline void +app_lcore_io_tx_flush(struct app_lcore_params_io *lp) +{ + uint8_t port; + unsigned lcore_id = rte_lcore_id(); + + for (port = 0; port < lp->tx.n_nic_ports; port ++) { + uint32_t n_pkts; + + if (likely((lp->tx.mbuf_out_flush[port] == 0) || + (lp->tx.mbuf_out[port].n_mbufs == 0))) { + lp->tx.mbuf_out_flush[port] = 1; + continue; + } + + n_pkts = rte_eth_tx_burst( + port, + 0, + lp->tx.mbuf_out[port].array, + (uint16_t) lp->tx.mbuf_out[port].n_mbufs); + + nstat_port_count_tx(nstat_common_handle, lp->tx.mbuf_out[port].array, + lp->tx.mbuf_out[port].n_mbufs, port); + + if (unlikely(n_pkts < lp->tx.mbuf_out[port].n_mbufs)) { + uint32_t k; + + nstat_port_count_remove_tx(nstat_common_handle, &(lp->tx.mbuf_out[port].array[n_pkts]), + lp->tx.mbuf_out[port].n_mbufs - n_pkts, port); + nstat_io_drop_tx(nstat_common_handle, &(lp->tx.mbuf_out[port].array[n_pkts]), + lp->tx.mbuf_out[port].n_mbufs - n_pkts, lcore_id); + + for (k = n_pkts; k < lp->tx.mbuf_out[port].n_mbufs; k ++) { + struct rte_mbuf *pkt_to_free = lp->tx.mbuf_out[port].array[k]; + rte_pktmbuf_free(pkt_to_free); + } + } + + lp->tx.mbuf_out[port].n_mbufs = 0; + lp->tx.mbuf_out_flush[port] = 1; + } +} + +/* end ---------- at 2014-12-04 ------- */ + +static void +app_lcore_main_loop_io(void) +{ + uint32_t lcore = rte_lcore_id(); + struct app_lcore_params_io *lp = &app.lcore_params[lcore].io; + uint32_t n_workers = app_get_lcores_worker(); + uint32_t n_io_cores = app_get_lcores_io_rx(); + uint64_t i = 0; + + uint32_t bsz_rx_rd = app.burst_size_io_rx_read; + uint32_t bsz_rx_wr = app.burst_size_io_rx_write; + uint32_t bsz_tx_rd = app.burst_size_io_tx_read; + uint32_t bsz_tx_wr = app.burst_size_io_tx_write; + + for ( ; ; ) { + if (APP_LCORE_IO_FLUSH && (unlikely(i == APP_LCORE_IO_FLUSH))) { + if (likely(lp->rx.n_nic_queues > 0)) { + app_lcore_io_rx_flush(lp, n_workers); + } + i = 0; + } + + if (likely(lp->rx.n_nic_queues > 0)) { + app_lcore_io_rx(lp, n_workers, bsz_rx_rd, bsz_rx_wr); + } + i ++; + + if(APP_LCORE_IO_FLUSH && (unlikely(i == APP_LCORE_IO_FLUSH))) { + if(likely(lp->tx.n_nic_ports > 0)) { + app_lcore_io_tx_flush(lp); + } + i = 0; + } + + if (likely(lp->tx.n_nic_ports> 0)) { + app_lcore_io_tx(lp, n_workers, bsz_rx_rd, bsz_rx_wr); + } + i ++; + } +} + +int +app_lcore_main_loop(__attribute__((unused)) void *arg) +{ + struct app_lcore_params *lp; + unsigned lcore; + + lcore = rte_lcore_id(); + lp = &app.lcore_params[lcore]; + + if (lp->type == e_APP_LCORE_IO) { + printf("Logical core %u (I/O) main loop.\n", lcore); + app_lcore_main_loop_io(); + } + + return 0; +} diff --git a/driver/version.c b/driver/version.c new file mode 100644 index 0000000..1cd1c45 --- /dev/null +++ b/driver/version.c @@ -0,0 +1,5 @@ + +#include "version.h" + +const char __serial_driver_version_20141219__[] __attribute__((used)) = "__SERIAL_DRIVER_VERSION_20141219__"; +const char __SERIAL_DRIVER_VERSION_20141219__[] __attribute__((used)) = "__serial_driver_version_20141219__"; diff --git a/driver/version.h b/driver/version.h new file mode 100644 index 0000000..5c421b1 --- /dev/null +++ b/driver/version.h @@ -0,0 +1,9 @@ + +#ifndef __VERSION_LIB_INCLUDE_H__ +#define __VERSION_LIB_INCLUDE_H__ + +extern const char __serial_version_info__[]; +extern const char __serial_version_platform__[]; + +#endif + |
