#include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "sc_trace.h" #include #include #include #include #include #include #include #include #include #include const char * mr_logo = " __ ___ _ _____ ______\n" " / |/ /___ ___________(_)__/__ / ___ _________ / ____/___ ____ __ __\n" " / /|_/ / __ `/ ___/ ___/ / __ \\/ / / _ \\/ ___/ __ \\/ / / __ \\/ __ \\/ / / /\n" " / / / / /_/ / / (__ ) / /_/ / /__/ __/ / / /_/ / /___/ /_/ / /_/ / /_/ /\n" " /_/ /_/\\__,_/_/ /____/_/\\____/____/\\___/_/ \\____/\\____/\\____/ .___/\\__, /\n" " /_/ /____/\n"; #ifdef GITVER const char service_git_version[] = GITVER; #else const char service_git_version[] = ""; #endif #ifndef MR_SERVICE_MAX_EAL_ARGC #define MR_SERVICE_MAX_EAL_ARGC 512 #endif #ifndef MR_SERVICE_DEFAULT_MONIT_FILE #define MR_SERVICE_DEFAULT_MONIT_FILE "/var/run/mrzcpd/mrmonit.daemon" #endif #ifndef MR_SERVICE_DEFAULT_HWFILE #define MR_SERVICE_DEFAULT_HWFILE "/var/run/mrzcpd/hwfile.json" #endif #ifndef MR_SERVICE_DEFAULT_CFG #define MR_SERVICE_DEFAULT_CFG "/etc/mrglobal.conf" #endif #ifndef MR_SERVICE_DEFAULT_DYNAMIC_CFG #define MR_SERVICE_DEFAULT_DYNAMIC_CFG "/etc/mrglobal.dynamic.conf" #endif #ifndef MR_SERVICE_DEFAULT_APPSYM #define MR_SERVICE_DEFAULT_APPSYM "service" #endif #ifndef MR_SERVICE_DEFAULT_PROGSYM #define MR_SERVICE_DEFAULT_PROGSYM "mrzcpd" #endif #ifndef MR_PMD_BASIC_LIB_PATH #define MR_PMD_BASIC_LIB_PATH "/opt/tsg/mrzcpd/lib/pmds" #endif #ifndef MR_PMD_FOR_MLX5_DRIVER_PATH #define MR_PMD_FOR_MLX5_DRIVER_PATH "/opt/tsg/mrzcpd/lib/pmds/mlx5" #endif #ifndef MR_SERVICE_DEFAULT_RX_BURST #define MR_SERVICE_DEFAULT_RX_BURST 32 #endif #ifndef MR_SERVICE_DEFAULT_TX_BURST #define MR_SERVICE_DEFAULT_TX_BURST 32 #endif #ifndef MR_SERVICE_DEFAULT_IDLE_THRESHOLD #define MR_SERVICE_DEFAULT_IDLE_THRESHOLD 1000000 #endif #ifndef MR_SERVICE_DEFAULT_CHECK_SPINLOCK #define MR_SERVICE_DEFAULT_CHECK_SPINLOCK 1 #endif #ifndef MR_SERVICE_DEFAULT_CHECK_CTRL_SPINLOCK #define MR_SERVICE_DEFAULT_CHECK_CTRL_SPINLOCK 1 #endif #ifndef MR_SERVICE_DEFAULT_CHECK_CTRL_SPINLOCK_INTERVAL #define MR_SERVICE_DEFAULT_CHECK_CTRL_SPINLOCK_INTERVAL 1000 #endif #ifndef MR_SERVICE_DEFAULT_CHECK_MEMLEAK #define MR_SERVICE_DEFAULT_CHECK_MEMLEAK 1 #endif #ifndef MR_SERVICE_DEFAULT_PKT_TIMESTAMP #define MR_SERVICE_DEFAULT_PKT_TIMESTAMP 0 #endif #ifndef MR_SERVICE_DEFAULT_PKT_DUMPER #define MR_SERVICE_DEFAULT_PKT_DUMPER 1 #endif #ifndef MR_SERVICE_DEFAULT_PKT_LATENCY #define MR_SERVICE_DEFAULT_PKT_LATENCY 0 #endif int mr_config_init_ready = 0; unsigned int g_logger_to_stdout = 1; unsigned int g_logger_level = LOG_DEBUG; unsigned int g_monit_interval = 1; unsigned int g_keep_running = 1; unsigned int g_in_protect_mode = 0; rte_spinlock_t g_in_protect_lock = RTE_SPINLOCK_INITIALIZER; static struct sc_main * g_sc_main = NULL; /* 写入Command参数 */ static void __write_arg(char * eal_argv[], unsigned int * eal_argc, unsigned int max_argc, const char * value) { assert(max_argc >= *eal_argc); char * mem = (char *)malloc(MR_STRING_MAX * sizeof(char)); assert(mem != NULL); snprintf(mem, MR_STRING_MAX * sizeof(char), "%s", value); eal_argv[(*eal_argc)++] = mem; return; } #define WRITE_ARG(x) \ do \ { \ __write_arg(eal_argv, &eal_argc, MR_SERVICE_MAX_EAL_ARGC, x); \ } while (0) static void sc_help() { fprintf(stdout, "\n"); exit(EXIT_SUCCESS); } static void sc_version() { fprintf(stdout, "%s\n", service_git_version); exit(EXIT_SUCCESS); } struct sc_main * sc_main_new() { struct sc_main * instance = malloc(sizeof(struct sc_main)); MR_VERIFY_MALLOC(instance); memset(instance, 0, sizeof(struct sc_main)); return instance; } struct sc_main * sc_main_get() { MR_VERIFY_2(g_sc_main != NULL, "Service Main Object is NULL"); return g_sc_main; } void sc_instance_init_progsym(struct sc_main * instance) { int ret = strncmp(instance->appsym, "", sizeof(instance->appsym)); if (ret == 0) snprintf(instance->appsym, sizeof(instance->appsym), "%s", MR_SERVICE_DEFAULT_APPSYM); return; } extern int sc_monit_loop(struct sc_main * sc_main); void sw_forward_rxtx_loop(struct sc_main * sc, unsigned int lcore_id); void sw_forward_idle_loop(struct sc_main * sc, unsigned int lcore_id); void sw_vlan_base_forward_rxtx_loop(struct sc_main * sc, unsigned int lcore_id); void smartoffload_rxtx_loop(struct sc_main * sc, unsigned int lcore_id); void * health_check_thread(void * args); int32_t sw_forward_service_entry(void * args); int32_t sw_vlan_base_forward_service_entry(void * args); int32_t smartoffload_service_entry(void * args); int32_t node_manager_pkt_graph_service_entry(void * args); int olp_config_reload(struct sc_main * sc_main); int classifier_rule_update(struct sc_main * sc); int sc_check_spinlock_loop(struct sc_main * sc_main) { if (sc_main->keepalive == NULL) return 0; rte_keepalive_dispatch_pings(NULL, sc_main->keepalive); return 0; } void sc_keepalive_failure_handler(void * data, const int id_core) { MR_ERROR("Thread %d is dead, exit. \n", id_core); /* print the stack trace */ abort(); } int sc_check_memleak_loop(struct sc_main * sc_main) { return mrb_buffer_leak_detect(sc_main); } void * sc_smartoffload_thread(void * args) { struct sc_main * sc_main = (struct sc_main *)args; pthread_detach(pthread_self()); if (sc_main->smartoffload_main == NULL) return (void *)NULL; while (g_keep_running) { // smartoffload_rxtx_loop(sc_main, 0); } return (void *)NULL; } void * sc_ctrlplane_thread(void * args) { struct sc_main * sc_main = (struct sc_main *)args; pthread_detach(pthread_self()); while (g_keep_running) { sc_monit_loop(sc_main); sc_check_spinlock_loop(sc_main); sc_check_memleak_loop(sc_main); sleep(1); } return (void *)NULL; } int sc_dataplane_thread(void * arg) { unsigned int lcore_id = rte_lcore_id(); struct sc_main * sc_main = (struct sc_main *)arg; uint64_t idle_counter = 0; MR_INFO("Dataplane thread %d is running ... ", lcore_id); while (g_keep_running) { /* 优化分支预测,以宏定义的默认行为为多数行为 */ #if MR_SERVICE_DEFAULT_KEEPALIVE if (likely(sc_main->keepalive != NULL)) #else if (unlikely(sc_main->keepalive != NULL)) #endif { rte_keepalive_mark_alive(sc_main->keepalive); } #if MR_SERVICE_DEFAULT_IDLE_THRESHOLD if (likely(sc_main->idle_threshold) && unlikely(idle_counter == sc_main->idle_threshold)) #else if (unlikely(sc_main->idle_threshold) && unlikely(idle_counter == sc_main->idle_threshold)) #endif { // sw_forward_idle_loop(sc_main, lcore_id); idle_counter = 0; } idle_counter++; // sw_forward_rxtx_loop(sc_main, lcore_id); // sw_vlan_base_forward_rxtx_loop(sc_main, lcore_id); #if MR_SERVICE_DEFAULT_PKT_LATENCY if (likely(sc_main->en_pkt_latency) #else if (unlikely(sc_main->en_pkt_latency) #endif && lcore_id == sc_main->pkt_latency_lcore_id) { rte_latencystats_update(); } } return 0; } #if 0 static const char * __str_disable_or_enable_ptr(void * ptr) { if (ptr == NULL) return "Disable"; else return "Enable"; } #endif static const char * __str_disable_or_enable_uint(unsigned int value) { if (value) return "Enable"; else return "Disable"; } static void __cpu_set_to_string(const cpu_set_t * cpu_set, char * output) { char str_cpu_set[MR_STRING_MAX] = {}; for (unsigned int lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { if (CPU_ISSET(lcore_id, cpu_set)) { unsigned int str_cpu_set_len = strlen(str_cpu_set); const char * format = str_cpu_set_len == 0 ? "%u" : ",%u"; snprintf(str_cpu_set + str_cpu_set_len, sizeof(str_cpu_set) - 1, format, lcore_id); } } if (strlen(str_cpu_set) > 0 && str_cpu_set[strlen(str_cpu_set) - 1] == ',') { str_cpu_set[strlen(str_cpu_set) - 1] = '\0'; } strcpy(output, str_cpu_set); } void sc_config_dump(struct sc_main * sc) { MR_INFO(" "); MR_INFO("Runtime Information"); MR_INFO(" Dataplane thread count : %d", CPU_COUNT(&sc->cpu_set_io)); MR_INFO(" Packet burst on RX direction : %d", sc->nr_rx_burst); MR_INFO(" Packet burst on TX direction : %d", sc->nr_tx_burst); MR_INFO(" Packet distribute mode : %s", ldbc_str_dist_mode(sc->dist_object)); MR_INFO(" Packet hash function mode : %s", ldbc_str_hash_mode(sc->dist_object)); MR_INFO(" Local configure file : %s", sc->local_cfgfile); MR_INFO(" Local hardware configure file : %s", sc->local_hwfile); MR_INFO(" Check spinlock : %s", __str_disable_or_enable_uint(sc->en_spinlock_check)); } /* EAL环境初始化 */ static void sc_eal_init(struct sc_main * sc, const char * cmd) { char * eal_argv[MR_SERVICE_MAX_EAL_ARGC]; unsigned int eal_argc = 0; int ret = 0; WRITE_ARG(cmd); unsigned int io_cores[RTE_MAX_LCORE] = {}; int nr_io_cores = 0; unsigned int smartoffload_cores[RTE_MAX_LCORE] = {}; int nr_smartoffload_cores = 0; unsigned int main_core = 0; MESA_load_profile_uint_def(sc->local_cfgfile, "service", "main_core", &main_core, 0); nr_io_cores = MESA_load_profile_uint_range(sc->local_cfgfile, "service", "iocore", RTE_DIM(io_cores), io_cores); nr_smartoffload_cores = MESA_load_profile_uint_range(sc->local_cfgfile, "service", "offload_core", RTE_DIM(smartoffload_cores), smartoffload_cores); if (nr_io_cores < 0) { nr_io_cores = 0; } if (nr_smartoffload_cores < 0) { nr_smartoffload_cores = 0; } cpu_set_t cpu_set_io; cpu_set_t cpu_set_offload; cpu_set_t cpu_set_eal; CPU_ZERO(&cpu_set_io); CPU_ZERO(&cpu_set_offload); CPU_ZERO(&cpu_set_eal); /* TODO: check main_core is existed in iocore or offload_cores or not. */ CPU_SET(main_core, &cpu_set_eal); for (unsigned int i = 0; i < nr_io_cores; i++) { CPU_SET(io_cores[i], &cpu_set_io); } for (unsigned int i = 0; i < nr_smartoffload_cores; i++) { CPU_SET(smartoffload_cores[i], &cpu_set_offload); } /* merge the cpu set and convert the cpu set to eal parameters */ CPU_OR(&cpu_set_eal, &cpu_set_eal, &cpu_set_io); CPU_OR(&cpu_set_eal, &cpu_set_eal, &cpu_set_offload); char str_eal_cores[MR_STRING_MAX] = {}; __cpu_set_to_string(&cpu_set_eal, str_eal_cores); WRITE_ARG("-l"); WRITE_ARG(str_eal_cores); sc->cpu_set_io = cpu_set_io; sc->cpu_set_offload = cpu_set_offload; char str_virtaddr[MR_STRING_MAX]; ret = MESA_load_profile_string_nodef(sc->local_cfgfile, "eal", "virtaddr", str_virtaddr, sizeof(str_virtaddr)); if (ret >= 0) { WRITE_ARG("--base-virtaddr"); WRITE_ARG(str_virtaddr); } unsigned int en_no_huge = 0; MESA_load_profile_uint_def(sc->local_cfgfile, "eal", "nohuge", &en_no_huge, 0); if (en_no_huge > 0) { WRITE_ARG("--no-huge"); } unsigned int en_no_pci = 0; MESA_load_profile_uint_def(sc->local_cfgfile, "eal", "nopci", &en_no_pci, 0); if (en_no_pci > 0) { WRITE_ARG("--no-pci"); } unsigned int mem = 0; ret = MESA_load_profile_uint_nodef(sc->local_cfgfile, "eal", "mem", &mem); char str_mem[MR_STRING_MAX]; snprintf(str_mem, sizeof(str_mem) - 1, "%u", mem); if (ret >= 0) { WRITE_ARG("-m"); WRITE_ARG(str_mem); } unsigned int mem_channel = 0; ret = MESA_load_profile_uint_nodef(sc->local_cfgfile, "eal", "mem_channel", &mem_channel); char str_mem_channel[MR_STRING_MAX]; snprintf(str_mem_channel, sizeof(str_mem_channel) - 1, "%u", mem_channel); if (ret >= 0) { WRITE_ARG("-n"); WRITE_ARG(str_mem_channel); } char str_socket_mem[MR_STRING_MAX]; ret = MESA_load_profile_string_nodef(sc->local_cfgfile, "eal", "socket-mem", str_socket_mem, sizeof(str_socket_mem)); if (ret > 0) { WRITE_ARG("--socket-mem"); WRITE_ARG(str_socket_mem); } char str_huge_dir[MR_STRING_MAX]; ret = MESA_load_profile_string_nodef(sc->local_cfgfile, "eal", "huge-dir", str_huge_dir, sizeof(str_huge_dir)); if (ret >= 0) { WRITE_ARG("--huge-dir"); WRITE_ARG(str_huge_dir); } char str_file_prefix[MR_STRING_MAX]; ret = MESA_load_profile_string_nodef(sc->local_cfgfile, "eal", "file-prefix", str_file_prefix, sizeof(str_file_prefix)); if (ret >= 0) { WRITE_ARG("--file-prefix"); WRITE_ARG(str_file_prefix); } unsigned int en_single_file_segments = 0; MESA_load_profile_uint_def(sc->local_cfgfile, "eal", "single-file-segments", &en_single_file_segments, 0); if (en_single_file_segments) { WRITE_ARG("--single-file-segments"); } unsigned int legacy_mem = 0; MESA_load_profile_uint_def(sc->local_cfgfile, "eal", "legacy_mem", &legacy_mem, 1); if (legacy_mem > 0) { WRITE_ARG("--legacy-mem"); } devmgr_eal_args_generate(sc->devmgr_main, eal_argv, &eal_argc, MR_SERVICE_MAX_EAL_ARGC); #if defined(__x86_64__) /* Force AVX512 */ unsigned int max_simd_bit_width = 0; MESA_load_profile_uint_def(sc->local_cfgfile, "eal", "max_simd_bit_width", &max_simd_bit_width, 0); if (max_simd_bit_width > 0) { rte_vect_set_max_simd_bitwidth(max_simd_bit_width); } #endif // DPDK和SYSTEMD的日志级别差1 unsigned int loglevel = g_logger_level + 1; MESA_load_profile_uint_def(sc->local_cfgfile, "eal", "loglevel", &loglevel, loglevel); /* 检查日志选项,必须在1~8之间 */ if (!(loglevel >= RTE_LOG_EMERG && loglevel <= RTE_LOG_DEBUG)) { MR_CFGERR_INVALID_VALUE(sc->local_cfgfile, "eal", "loglevel", "Must between LOG_DEBUG(8) and LOG_EMERG(1)"); exit(EXIT_FAILURE); } rte_log_set_global_level(loglevel); g_logger_level = loglevel - 1; char str_loglevel[MR_STRING_MAX]; snprintf(str_loglevel, sizeof(str_loglevel), "%d", loglevel); WRITE_ARG("--log-level"); WRITE_ARG(str_loglevel); char str_eal_cmdline[MR_STRING_MAX]; unsigned int curser_str_eal_cmdline = 0; for (int i = 0; i < eal_argc; i++) { curser_str_eal_cmdline += snprintf(str_eal_cmdline + curser_str_eal_cmdline, sizeof(str_eal_cmdline) - curser_str_eal_cmdline, "%s ", eal_argv[i]); } MR_INFO("EAL Parameters: %s", str_eal_cmdline); /* EAL初始化前的线程亲和性 */ ret = pthread_getaffinity_np(pthread_self(), sizeof(cpu_set_t), &sc->cpu_set_before_eal); MR_VERIFY_2(ret >= 0, "Cannot get init thread affinity: %s", strerror(errno)); #if RTE_VERSION >= RTE_VERSION_NUM(23, 11, 0, 0) #define MR_RTE_MAX_MEMZONE 51200 ret = rte_memzone_max_set(MR_RTE_MAX_MEMZONE); MR_VERIFY(rte_memzone_max_get() == MR_RTE_MAX_MEMZONE); #endif ret = rte_eal_init((int)eal_argc, eal_argv); if (ret < 0) { MR_ERROR("Cannot init EAL Environment, Failed."); exit(EXIT_FAILURE); } sc->nr_io_thread = CPU_COUNT(&sc->cpu_set_io); /* EAL初始化后的线程亲和性 */ ret = pthread_getaffinity_np(pthread_self(), sizeof(cpu_set_t), &sc->cpu_set_after_eal); MR_VERIFY_2(ret >= 0, "Cannot get init thread affinity: %s", strerror(errno)); /* 恢复CPU亲和性设置 */ ret = pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &sc->cpu_set_before_eal); MR_VERIFY_2(ret >= 0, "Cannot set init thread affinity: %s", strerror(errno)); } static int sc_rpc_server_init(struct sc_main * sc) { char rpc_addr[MR_STRING_MAX] = {0}; unsigned int rpc_port = 0; struct sockaddr_in srv_sockaddr; MESA_load_profile_string_def(sc->local_cfgfile, "rpc", "addr", rpc_addr, sizeof(rpc_addr), RPC_DEFAULT_ADDR); MESA_load_profile_uint_def(sc->local_cfgfile, "rpc", "port", &rpc_port, RPC_DEFAULT_PORT); if (inet_pton(AF_INET, rpc_addr, &srv_sockaddr.sin_addr) <= 0) { MR_CFGERR_INVALID_FORMAT(sc->local_cfgfile, "rpc", "addr"); return RT_ERR; } srv_sockaddr.sin_port = htons(rpc_port); srv_sockaddr.sin_family = AF_INET; sc->rpc_srv_handler = rpc_server_alloc_handler(); if (sc->rpc_srv_handler == NULL) { return RT_ERR; } if (rpc_server_listen_accept(sc->rpc_srv_handler, srv_sockaddr) != 0) { return RT_ERR; } return RT_SUCCESS; } static int sc_distributer_init(struct sc_main * sc) { unsigned int distmode = LDBC_DIST_OUTER_TUPLE2; unsigned int hashmode = LDBC_HASH_SYM_CRC; MESA_load_profile_uint_def(sc->local_cfgfile, "service", "distmode", &distmode, LDBC_DIST_OUTER_TUPLE2); MESA_load_profile_uint_def(sc->local_cfgfile, "service", "hashmode", &hashmode, LDBC_HASH_SYM_CRC); if (distmode < 0 || distmode >= LDBC_DIST_MAX) { MR_CFGERR_INVALID_FORMAT(sc->local_cfgfile, "service", "distmode"); return RT_ERR; } if (hashmode < 0 || hashmode >= LDBC_HASH_MAX) { MR_CFGERR_INVALID_FORMAT(sc->local_cfgfile, "service", "hashmode"); return RT_ERR; } sc->dist_object = distributer_create(distmode, hashmode, 0); if (sc->dist_object == NULL) { MR_ERROR("Create distributer handler failed. "); return RT_ERR; } return RT_SUCCESS; } /* 加载共用参数 */ static int sc_g_config_init(struct sc_main * sc) { /* 数据面参数 --- RX侧Burst */ MESA_load_profile_uint_def(sc->local_cfgfile, "burst", "nr_rx_burst", &sc->nr_rx_burst, MR_SERVICE_DEFAULT_RX_BURST); /* 数据面参数 --- TX侧Burst */ MESA_load_profile_uint_def(sc->local_cfgfile, "burst", "nr_tx_burst", &sc->nr_tx_burst, MR_SERVICE_DEFAULT_TX_BURST); if (sc->nr_rx_burst > MR_BURST_MAX) { MR_CFGERR_INVALID_FORMAT(sc->local_cfgfile, "burst", "nr_rx_burst"); return RT_ERR; } if (sc->nr_tx_burst > MR_BURST_MAX) { MR_CFGERR_INVALID_FORMAT(sc->local_cfgfile, "burst", "nr_tx_burst"); return RT_ERR; } int ret = 0; /* 数据面idle调用门限 */ MESA_load_profile_uint_def(sc->local_cfgfile, "service", "idle_threshold", &sc->idle_threshold, MR_SERVICE_DEFAULT_IDLE_THRESHOLD); /* 数据面线程保活 */ MESA_load_profile_uint_def(sc->local_cfgfile, "keepalive", "check_spinlock", &sc->en_spinlock_check, MR_SERVICE_DEFAULT_CHECK_SPINLOCK); /* 控制面线程保活 */ MESA_load_profile_uint_def(sc->local_cfgfile, "keepalive", "check_ctrl_spinlock", &sc->en_ctrl_spinlock_check, MR_SERVICE_DEFAULT_CHECK_CTRL_SPINLOCK); /* 控制面线程保活间隔 */ MESA_load_profile_uint_def(sc->local_cfgfile, "keepalive", "check_ctrl_spinlock_interval", &sc->ctrl_spinlock_check_interval, MR_SERVICE_DEFAULT_CHECK_CTRL_SPINLOCK_INTERVAL); /* mpack metadata */ MESA_load_profile_uint_def(sc->local_cfgfile, "mpack", "metadata_enable", &sc->en_mpack_metadata, 0); /* mpack route ctx */ MESA_load_profile_uint_def(sc->local_cfgfile, "mpack", "route_ctx_enable", &sc->en_mpack_route_ctx, 0); /* 报文捕获 */ MESA_load_profile_uint_def(sc->local_cfgfile, "debug", "pkt_dumper", &sc->en_pkt_dumper, MR_SERVICE_DEFAULT_PKT_DUMPER); /* 报文时间戳 */ MESA_load_profile_uint_def(sc->local_cfgfile, "debug", "pkt_timestamp", &sc->en_pkt_timestamp, MR_SERVICE_DEFAULT_PKT_TIMESTAMP); /* 报文延迟统计 */ MESA_load_profile_uint_def(sc->local_cfgfile, "debug", "pkt_latency", &sc->en_pkt_latency, MR_SERVICE_DEFAULT_PKT_LATENCY); MESA_load_profile_uint_def(sc->local_cfgfile, "debug", "pkt_latency_sample_interval", &sc->pkt_latency_sample_interval, 10 * 1000 * 1000); /* 报文延迟统计核心 */ ret = MESA_load_profile_uint_nodef(sc->local_cfgfile, "debug", "pkt_latency_lcore_id", &sc->pkt_latency_lcore_id); /* not set, use the first io core as the latency stat core */ if (ret < 0) { sc->pkt_latency_lcore_id = cpu_set_location(&sc->cpu_set_io, 0); } return RT_SUCCESS; } extern char ** environ; static char ** __main_argv = NULL; /* pointer to argument vector */ static char * __main_last_argv = NULL; /* end of argv */ void __setproctitle_init(int argc, char ** argv, char ** envp) { int i; for (i = 0; envp[i] != NULL; i++) // calc envp num continue; environ = (char **)malloc(sizeof(char *) * (i + 1)); // malloc envp pointer for (i = 0; envp[i] != NULL; i++) { environ[i] = malloc(sizeof(char) * strlen(envp[i])); strcpy(environ[i], envp[i]); } environ[i] = NULL; __main_argv = argv; if (i > 0) __main_last_argv = envp[i - 1] + strlen(envp[i - 1]); else __main_last_argv = argv[argc - 1] + strlen(argv[argc - 1]); return; } struct service_performance_monit { uint32_t service_id; uint64_t service_last_cycles; uint64_t service_last_call_counts; }; void service_performance_calc_cycle_per_loop(struct service_performance_monit * handle, uint64_t * out_cycle_per_call_counts) { uint64_t cycles_this_time = 0; uint64_t call_count_this_time = 0; rte_service_attr_get(handle->service_id, RTE_SERVICE_ATTR_CYCLES, &cycles_this_time); rte_service_attr_get(handle->service_id, RTE_SERVICE_ATTR_CALL_COUNT, &call_count_this_time); uint64_t delta_cycles = cycles_this_time - handle->service_last_cycles; uint64_t delta_call_count = call_count_this_time - handle->service_last_call_counts; handle->service_last_cycles = cycles_this_time; handle->service_last_call_counts = call_count_this_time; if (delta_call_count > 0) { *out_cycle_per_call_counts = delta_cycles / delta_call_count; } else { *out_cycle_per_call_counts = 0; } } static struct service_performance_monit sw_info_node; cJSON * service_monit_loop(struct sc_main * sc) { uint64_t cycle_per_call_counts_node = 0; service_performance_calc_cycle_per_loop(&sw_info_node, &cycle_per_call_counts_node); cJSON * j_root = cJSON_CreateArray(); cJSON * j_serv_node = cJSON_CreateObject(); cJSON_AddStringToObject(j_serv_node, "symbol", "node"); cJSON_AddNumberToObject(j_serv_node, "cycles_per_call_count", cycle_per_call_counts_node); cJSON_AddItemToArray(j_root, j_serv_node); return j_root; } static int service_register_helper(const struct rte_service_spec * spec, unsigned int * out_service_id) { int ret = 0; uint32_t service_id = 0; ret = rte_service_component_register(spec, &service_id); if (unlikely(ret < 0)) { MR_ERROR("Failed at register service %s: ret = %d", spec->name, ret); return ret; } rte_service_component_runstate_set(service_id, 1); rte_service_set_stats_enable(service_id, 1); ret = rte_service_runstate_set(service_id, 1); if (unlikely(ret < 0)) { MR_ERROR("Failed at set runstate for service %s: ret = %d", spec->name, ret); return ret; } *out_service_id = service_id; return 0; } /* 检查本进程是否通过SYSTEMD启动 */ static int __check_is_notify() { char * notify_socket = getenv("NOTIFY_SOCKET"); return notify_socket == NULL ? 0 : 1; } __rte_unused static void signal_handler(int signum) { g_keep_running = 0; } void * config_reload_handler(void * arg) { ssize_t s; uintptr_t sfd = (uintptr_t)arg; struct signalfd_siginfo fdsi; pthread_detach(pthread_self()); while (!mr_config_init_ready) { sleep(1); } while (1) { s = read(sfd, &fdsi, sizeof(fdsi)); if (s != sizeof(fdsi)) { continue; } if (fdsi.ssi_signo != SIGHUP) { continue; } olp_config_reload(g_sc_main); classifier_rule_update(g_sc_main); dp_trace_config_update(g_sc_main); } close(sfd); return (void *)NULL; } int config_reload_thread() { int sfd; int ret = 0; sigset_t mask; pthread_t thread_id; sigemptyset(&mask); sigaddset(&mask, SIGHUP); if (sigprocmask(SIG_BLOCK, &mask, NULL) == -1) { MR_ERROR("SIGHUP: sigprocmask failed : %s", strerror(errno)); return RT_ERR; } sfd = signalfd(-1, &mask, 0); if (sfd == -1) { MR_ERROR("SIGHUP: signalfd failed : %s", strerror(errno)); return RT_ERR; } ret = pthread_create(&thread_id, NULL, config_reload_handler, (void *)(uintptr_t)sfd); if (ret != 0) { MR_ERROR("config reload thread failed : %s", strerror(ret)); return RT_ERR; } return RT_SUCCESS; } extern int hwinfo_init(struct sc_main * sc); extern int phydev_init(struct sc_main * sc); extern int devmgr_early_init(struct sc_main * sc); extern int devmgr_init(struct devmgr_main * devmgr_main); extern void devmgr_deinit(struct devmgr_main * devmgr_main); extern int sw_forward_init(struct sc_main * sc); extern int app_main_init(struct sc_main * sc); extern int vdev_dump(struct sc_main * sc); extern int sw_vlan_base_forward_init(struct sc_main * sc); extern int smartoffload_init(struct sc_main * sc); extern int flow_main_init(struct sc_main * sc); extern int node_manager_init(struct sc_main * sc); extern int eth_ingress_init(struct sc_main * sc); extern int node_eth_egress_init(struct sc_main * sc); extern int classifier_init(struct sc_main * sc); extern int lb_init(struct sc_main * sc); extern int ef_init(struct sc_main * sc); extern int tera_init(struct sc_main * sc); extern int vwire_init(struct sc_main * sc); extern int health_check_init(struct sc_main * sc); extern int bridge_init(struct sc_main * sc); extern int mr_pdump_init(struct sc_main * sc); extern int http_serv_init(struct sc_main * sc_main); extern int olp_manager_init(struct sc_main * sc_main); extern int lai_init(struct sc_main * sc); extern int bfd_init(struct sc_main * sc); int marsio_service_main(int argc, char * argv[]) { /* 解析命令行参数 */ struct sc_main * sc = sc_main_new(); g_sc_main = sc; /* Configure File */ snprintf(sc->local_cfgfile, sizeof(sc->local_logfile), "%s", MR_SERVICE_DEFAULT_CFG); /* Application Symbol */ snprintf(sc->appsym, sizeof(sc->appsym), "%s", MR_SERVICE_DEFAULT_APPSYM); /* Monit File */ snprintf(sc->local_monitfile, sizeof(sc->local_monitfile), "%s", MR_SERVICE_DEFAULT_MONIT_FILE); /* Hardware File */ snprintf(sc->local_hwfile, sizeof(sc->local_hwfile), "%s", MR_SERVICE_DEFAULT_HWFILE); /* Configure Dynamic File */ snprintf(sc->local_dyfile, sizeof(sc->local_dyfile), "%s", MR_SERVICE_DEFAULT_DYNAMIC_CFG); /* 通过Systemd启动,关闭标准输出,采用syslog方式记录日志 */ if (__check_is_notify()) { g_logger_to_stdout = 0; } else { g_logger_to_stdout = 1; } int ret = 0; int opt = 0; while ((opt = getopt(argc, argv, "hvc:w:d:s:")) != -1) { switch (opt) { case '?': case 'h': sc_help(); break; case 'v': sc_version(); break; case 'c': snprintf(sc->local_cfgfile, sizeof(sc->local_cfgfile), "%s", optarg); break; case 'w': snprintf(sc->local_logfile, sizeof(sc->local_logfile), "%s", optarg); break; case 'd': snprintf(sc->local_hwfile, sizeof(sc->local_hwfile), "%s", optarg); break; case 's': snprintf(sc->local_dyfile, sizeof(sc->local_dyfile), "%s", optarg); break; default: break; } } unsigned int pdump_inited = 0; if (g_logger_to_stdout) { MR_INFO("%s", mr_logo); } MR_INFO("MARSIO ZeroCopy Packet I/O Driver, Version: %s, build with %s", service_git_version, rte_version()); /* Check Configure File */ if (access(sc->local_cfgfile, R_OK) != 0) { MR_ERROR("Configure File %s is not existed. Failed. ", sc->local_cfgfile); ret = EXIT_FAILURE; goto quit; } /* Check Hardware File */ if (access(sc->local_hwfile, R_OK) != 0) { MR_WARNING("Hardware File %s is not existed. ", sc->local_hwfile); } /* Exegesis the signal for fast stop */ // signal(SIGINT, signal_handler); // signal(SIGTERM, signal_handler); config_reload_thread(); if (devmgr_early_init(sc) != RT_SUCCESS) { MR_ERROR("Device manager early initialization failed."); ret = EXIT_FAILURE; goto quit; } #if MR_TEST_SIGSEGV char * __ptr_null = NULL; *__ptr_null = 0; #endif /* 加载EAL选项 */ sc_eal_init(sc, argv[0]); if (sc_g_config_init(sc) != RT_SUCCESS) { MR_ERROR("Global configuration load failed. "); ret = EXIT_FAILURE; goto quit; } if (sc_rpc_server_init(sc) != RT_SUCCESS) { MR_ERROR("rpc server module initialization failed. "); ret = EXIT_FAILURE; goto quit; } if (sc_distributer_init(sc) != RT_SUCCESS) { MR_ERROR("Distributer module initialization failed. "); ret = EXIT_FAILURE; goto quit; } if (app_main_init(sc) != RT_SUCCESS) { MR_ERROR("App management module initialization failed. "); ret = EXIT_FAILURE; goto quit; } #if 0 if (flow_main_init(sc) != RT_SUCCESS) { MR_ERROR("offload management module initialization failed. "); ret = EXIT_FAILURE; goto quit; } #endif if (hwinfo_init(sc) != RT_SUCCESS) { MR_ERROR("Hardware information initialization failed. "); ret = EXIT_FAILURE; goto quit; } if (mrb_init(sc) != RT_SUCCESS) { MR_ERROR("Pktmbuf pools initialization failed."); ret = EXIT_FAILURE; goto quit; } if (vdev_main_init(sc) != RT_SUCCESS) { MR_ERROR("Virtual devices initialization failed. "); ret = EXIT_FAILURE; goto quit; } if (devmgr_init(sc->devmgr_main) != RT_SUCCESS) { MR_ERROR("Device manager initialization failed."); ret = EXIT_FAILURE; goto quit; } mr_dp_trace_init(sc); #if 0 if (smartoffload_init(sc) != RT_SUCCESS) { MR_ERROR("SmartOffload module initialization failed. "); ret = EXIT_FAILURE; goto quit; } #endif #if 0 if (sw_forward_init(sc) != RT_SUCCESS) { MR_ERROR("Direct switch module initialization failed. "); ret = EXIT_FAILURE; goto quit; } #endif if (node_manager_init(sc) != RT_SUCCESS) { MR_ERROR("Node Manager module initialization failed. "); ret = EXIT_FAILURE; goto quit; } /* Port adapter mapping init */ if (port_adapter_mapping_init() != RT_SUCCESS) { MR_ERROR("Port adapter mapping initialization failed. "); ret = EXIT_FAILURE; goto quit; } /* Must Before 'lb' init */ if (health_check_init(sc) != RT_SUCCESS) { MR_ERROR("Health check initialization failed. "); ret = EXIT_FAILURE; goto quit; } /* Must Before 'Classifier' init */ if (lb_init(sc) != RT_SUCCESS) { MR_ERROR("Load Balance initialization failed. "); ret = EXIT_FAILURE; goto quit; } /* Must Before 'Classifier' init */ if (ef_init(sc) != RT_SUCCESS) { MR_ERROR("Etherfabric initialization failed. "); ret = EXIT_FAILURE; goto quit; } if (tera_init(sc) != RT_SUCCESS) { MR_ERROR("Tera initialization failed. "); ret = EXIT_FAILURE; goto quit; } /* Must Before 'vwire' init */ if (olp_manager_init(sc) != RT_SUCCESS) { MR_ERROR("olp manager initialization failed. "); ret = EXIT_FAILURE; goto quit; } /* Must Before 'Classifier' init */ if (vwire_init(sc) != RT_SUCCESS) { MR_ERROR("Virtual Wire module initialization failed."); ret = EXIT_FAILURE; goto quit; } if (classifier_init(sc) != RT_SUCCESS) { MR_ERROR("Classifier Table initialization failed. "); ret = EXIT_FAILURE; goto quit; } if (bridge_init(sc) != RT_SUCCESS) { MR_ERROR("Bridge initialization failed. "); ret = EXIT_FAILURE; goto quit; } if (mr_pdump_init(sc) != RT_SUCCESS) { MR_ERROR("Mrpdump initialization failed. "); ret = EXIT_FAILURE; goto quit; } if (lai_init(sc) != RT_SUCCESS) { MR_ERROR("LAI(Link aware injector) initialization failed. "); ret = EXIT_FAILURE; goto quit; } if (bfd_init(sc) != RT_SUCCESS) { MR_ERROR("BFD initialization failed. "); ret = EXIT_FAILURE; goto quit; } sc_config_dump(sc); vdev_dump(sc); port_adapter_mapping_dump(); if (rte_pdump_init() != 0) { MR_WARNING("Packet dump library initialization failed."); } else { pdump_inited = 1; } rte_metrics_init(SOCKET_ID_ANY); /* 延迟监测 */ if (sc->en_pkt_latency) { ret = rte_latencystats_init(sc->pkt_latency_sample_interval, NULL); if (ret != 0) { MR_ERROR("packet latency stats module init failed, ret = %d", ret); ret = EXIT_FAILURE; goto quit; } MR_INFO("packet latency stats enabled, sample interval(ns)=%u", sc->pkt_latency_sample_interval); } if (rpc_server_dispatch_thread(sc->rpc_srv_handler) != RT_SUCCESS) { MR_ERROR("Launch rpc dispatch thread failed"); ret = EXIT_FAILURE; goto quit; } pthread_t ctrlplane_thread_id; ret = pthread_create(&ctrlplane_thread_id, NULL, sc_ctrlplane_thread, sc); if (ret != 0) { MR_ERROR("Launch ctrlplane thread failed : %s", strerror(ret)); ret = EXIT_FAILURE; goto quit; } pthread_t health_check_thread_id; ret = pthread_create(&health_check_thread_id, NULL, health_check_thread, sc); if (ret != 0) { MR_ERROR("Launch health check thread failed : %s", strerror(ret)); ret = EXIT_FAILURE; goto quit; } /* 采用NOTIFY方式启动,通知操作系统完成了初始化 */ if (__check_is_notify()) sd_notify(0, "READY=1"); mr_config_init_ready = 1; sleep(1); /* 死锁检测 */ if (sc->en_spinlock_check) { sc->keepalive = rte_keepalive_create(sc_keepalive_failure_handler, sc); if (sc->keepalive == NULL) { MR_ERROR("Create spinlock checker handler failed. "); ret = EXIT_FAILURE; goto quit; } } unsigned int lcore_id_iter = 0; RTE_LCORE_FOREACH(lcore_id_iter) { if (sc->keepalive) { rte_keepalive_register_core(sc->keepalive, (int)lcore_id_iter); MR_INFO("Keepalive register for thread %d successfully.", lcore_id_iter); } } http_serv_init(sc); /* 恢复CPU亲和性设置为EAL后的线程绑定参数 */ ret = pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &sc->cpu_set_after_eal); MR_VERIFY_2(ret >= 0, "Cannot set init thread affinity: %s", strerror(errno)); #if 0 struct rte_service_spec smartoffload_spec = { .name = "smartoffload_service", .callback = smartoffload_service_entry, .callback_userdata = sc, .capabilities = RTE_SERVICE_CAP_MT_SAFE, .socket_id = 0, }; #endif struct rte_service_spec node_spec = { .name = "node_service", .callback = node_manager_pkt_graph_service_entry, .callback_userdata = sc, .capabilities = RTE_SERVICE_CAP_MT_SAFE, .socket_id = 0, }; ret = service_register_helper(&node_spec, &sw_info_node.service_id); if (unlikely(ret < 0)) { MR_ERROR("Failed at starting service %s", node_spec.name); goto quit; } for (unsigned int lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { if (!CPU_ISSET(lcore_id, &sc->cpu_set_io)) continue; ret = rte_service_lcore_add(lcore_id); if (ret && ret != -EALREADY) { MR_ERROR("Failed at add core %u to service cores: ret = %d", lcore_id, ret); goto quit; } ret = rte_service_map_lcore_set(sw_info_node.service_id, lcore_id, 1); if (rte_service_map_lcore_get(sw_info_node.service_id, lcore_id) == 1) { MR_INFO("Setup core %u for service %s...", lcore_id, node_spec.name); } else { MR_ERROR("Failed at set lcore map for service %s: lcore_id = %d, ret = %d", node_spec.name, lcore_id, ret); goto quit; } ret = rte_service_lcore_start(lcore_id); if (ret && ret != -EALREADY) { MR_ERROR("Failed at start service core %u: ret = %d", lcore_id, ret); goto quit; } } while (true) { sleep(1); sd_notify(0, "WATCHDOG=1"); } quit: if (sc->devmgr_main != NULL) { devmgr_deinit(sc->devmgr_main); } if (pdump_inited) rte_pdump_uninit(); return 0; }