From 1258872a1addb9983f51b15e609e3ce1d72fa8f0 Mon Sep 17 00:00:00 2001 From: Lu Qiuwen Date: Thu, 20 Jul 2023 21:16:47 +0500 Subject: 开启mlx5网卡的delay_drop功能,增加参与rss的因子,增加buffer leak的重复检测机制。 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- app/include/mrapp.h | 3 +++ app/src/marsio.c | 13 ++++++++----- infra/src/ctrlmsg.c | 6 +++--- service/src/devmgr.c | 8 +++----- service/src/mrb.c | 32 +++++++++++++++++++++++++------- 5 files changed, 42 insertions(+), 20 deletions(-) diff --git a/app/include/mrapp.h b/app/include/mrapp.h index 4d3de30..661e624 100644 --- a/app/include/mrapp.h +++ b/app/include/mrapp.h @@ -173,6 +173,9 @@ struct mr_instance /* mempool cache map */ struct mp_cache_map * mp_cache_map[MR_MEMPOOL_COUNT_MAX]; unsigned int nr_mp_cache_map; + + /* vdev buffer size */ + unsigned int sz_vdev_buffer; }; /* EAL环境是否初始化 */ diff --git a/app/src/marsio.c b/app/src/marsio.c index 71122bd..bc08bd3 100644 --- a/app/src/marsio.c +++ b/app/src/marsio.c @@ -58,6 +58,8 @@ struct mr_instance * _current_instance = NULL; #define MRAPP_DEFAULT_NEIGH_GRATUITOUS_ARP_SEND 3 #endif +#define MR_VDEV_BUFFER_SIZE 512 + /* 写入Command参数 */ static void __write_arg(char * eal_argv[], unsigned int * eal_argc, unsigned int max_argc, const char * value) { @@ -137,6 +139,8 @@ static void mrapp_rx_notify_init(struct mr_instance * instance) &instance->notify_throttle_threshold, 128); MESA_load_profile_uint_def(instance->g_cfgfile_path, "service", "poll_wait_enable", &instance->en_notify, 1); + MESA_load_profile_uint_def(instance->g_cfgfile_path, "service", "vdev_buffer_size", &instance->sz_vdev_buffer, + MR_VDEV_BUFFER_SIZE); } /* EAL环境初始化 */ @@ -822,19 +826,18 @@ struct mr_vdev * marsio_open_device(struct mr_instance * instance, const char * } } -#define MR_VDEV_BUFFER_SIZE 512 for (unsigned int i = 0; i < mr_vdev->nr_rxstream; i++) { - size_t sz_rx_buffer = sizeof(struct mr_vdev_rx_buffer) + sizeof(struct rte_mbuf *) * MR_VDEV_BUFFER_SIZE; + size_t sz_rx_buffer = sizeof(struct mr_vdev_rx_buffer) + sizeof(struct rte_mbuf *) * instance->sz_vdev_buffer; mr_vdev->rx_buffer[i] = rte_zmalloc(NULL, sz_rx_buffer, 0); - mr_vdev->rx_buffer[i]->size = MR_VDEV_BUFFER_SIZE; + mr_vdev->rx_buffer[i]->size = instance->sz_vdev_buffer; } for (unsigned int i = 0; i < mr_vdev->nr_txstream; i++) { - size_t sz_tx_buffer = sizeof(struct mr_vdev_tx_buffer) + sizeof(struct rte_mbuf *) * MR_VDEV_BUFFER_SIZE; + size_t sz_tx_buffer = sizeof(struct mr_vdev_tx_buffer) + sizeof(struct rte_mbuf *) * instance->sz_vdev_buffer; mr_vdev->tx_buffer[i] = rte_zmalloc(NULL, sz_tx_buffer, 0); - mr_vdev->tx_buffer[i]->size = MR_VDEV_BUFFER_SIZE; + mr_vdev->tx_buffer[i]->size = instance->sz_vdev_buffer; } strncpy(mr_vdev->devsym, (char *)msg_resp->devsym, MR_SYMBOL_MAX); diff --git a/infra/src/ctrlmsg.c b/infra/src/ctrlmsg.c index a6f11f9..ee1c8b1 100644 --- a/infra/src/ctrlmsg.c +++ b/infra/src/ctrlmsg.c @@ -472,7 +472,7 @@ static int __server_mode_create(struct ctrlmsg_handler * handle, struct sockaddr goto out; } - int tcp_keepcnt = 4; + int tcp_keepcnt = 2; if (setsockopt(listen_fd, IPPROTO_TCP, TCP_KEEPCNT, (const void*)&tcp_keepcnt, sizeof(int)) < 0) { MR_ERROR("Set crash event listen fd keepcnt failed : %s", strerror(errno)); @@ -486,14 +486,14 @@ static int __server_mode_create(struct ctrlmsg_handler * handle, struct sockaddr goto out; } - int tcp_keepidle = 5; + int tcp_keepidle = 1; if (setsockopt(listen_fd, IPPROTO_TCP, TCP_KEEPIDLE, (const void*)&tcp_keepidle, sizeof(int)) < 0) { MR_ERROR("Set crash event listen fd keepidle failed : %s", strerror(errno)); goto out; } - int tcp_user_timeout = 300; + int tcp_user_timeout = 100; if (setsockopt(listen_fd, IPPROTO_TCP, TCP_USER_TIMEOUT, (const void*)&tcp_user_timeout, sizeof(int)) < 0) { MR_ERROR("Set crash event listen fd user timeout failed : %s", strerror(errno)); diff --git a/service/src/devmgr.c b/service/src/devmgr.c index c472539..9778bd4 100644 --- a/service/src/devmgr.c +++ b/service/src/devmgr.c @@ -624,15 +624,13 @@ static int gen_dpdk_dev_ethconf(struct dpdk_dev * dev, unsigned nr_rxq_use, stru } else if (dev->rssmode == MR_DEV_RSSMODE_4TUPLE_SYM) { - eth_conf.rx_adv_conf.rss_conf.rss_hf = ETH_RSS_NONFRAG_IPV4_TCP | ETH_RSS_NONFRAG_IPV6_TCP | - ETH_RSS_NONFRAG_IPV4_UDP | ETH_RSS_NONFRAG_IPV6_UDP; + eth_conf.rx_adv_conf.rss_conf.rss_hf = RTE_ETH_RSS_IP | RTE_ETH_RSS_TCP | RTE_ETH_RSS_UDP; eth_conf.rx_adv_conf.rss_conf.rss_key = default_sym_rss_key; eth_conf.rx_adv_conf.rss_conf.rss_key_len = sizeof(default_sym_rss_key); } else if (dev->rssmode == MR_DEV_RSSMODE_4TUPLE_ASYM) { - eth_conf.rx_adv_conf.rss_conf.rss_hf = ETH_RSS_NONFRAG_IPV4_TCP | ETH_RSS_NONFRAG_IPV6_TCP | - ETH_RSS_NONFRAG_IPV4_UDP | ETH_RSS_NONFRAG_IPV6_UDP; + eth_conf.rx_adv_conf.rss_conf.rss_hf = RTE_ETH_RSS_IP | RTE_ETH_RSS_TCP | RTE_ETH_RSS_UDP; eth_conf.rx_adv_conf.rss_conf.rss_key = NULL; } @@ -1869,7 +1867,7 @@ void devmgr_eal_args_generate(struct devmgr_main * devmgr_main, char * eal_argv[ "mprq_en=1,rxqs_min_mprq=1,mprq_log_stride_num=9,txq_inline_mpw=128,allow_duplicate_pattern=0"); #endif - "rxq_pkt_pad_en=1,txq_inline_mpw=128,allow_duplicate_pattern=0"); + "rxq_pkt_pad_en=1,txq_inline_mpw=128,delay_drop=0x1,allow_duplicate_pattern=0"); } eal_argv[(*eal_argc)++] = str_dev_option; diff --git a/service/src/mrb.c b/service/src/mrb.c index e7615a9..1b533a5 100644 --- a/service/src/mrb.c +++ b/service/src/mrb.c @@ -84,6 +84,8 @@ struct mrb_pool struct rte_mempool * direct; struct rte_mempool * indirect; + + unsigned int leak_check_counter; }; TAILQ_HEAD(mrb_pool_list, mrb_pool); @@ -119,6 +121,8 @@ struct mrb_main float buffer_leak_warn_threshold; /* Buffer泄露检查,低低限参数 */ float buffer_leak_restart_threshold; + /* Buffer check retries */ + unsigned int buffer_leak_retry_times_threshold; }; struct mrb_pool_adapter @@ -567,6 +571,8 @@ int mrb_buffer_leak_config(struct sc_main * sc) MESA_load_profile_uint_def(cfgfile, "buffer_leak_detect", "restart_threshold", &__restart_threshold, __restart_threshold); + MESA_load_profile_uint_def(cfgfile, "buffer_leak_detect", "retry_times", &ctx->buffer_leak_retry_times_threshold, 5); + ctx->buffer_leak_warn_threshold = 1 - __warn_threshold / 1000.0; ctx->buffer_leak_restart_threshold = 1 - __restart_threshold / 1000.0; @@ -727,19 +733,31 @@ int mrb_buffer_leak_detect(struct sc_main * sc) d_avail, in_avail); #endif + /* 低限 */ + if (d_avail <= warn_threshold_d || in_avail <= warn_threshold_in) + { + MR_WARNING("Pktmbuf pool %s is nearly empty(d_avail = %u, in_avail = %d).", mrb_pool_iter->symbol, + d_avail, in_avail); + } + /* 低低限 */ if (d_avail <= restart_threshold_d || in_avail <= restart_threshold_in) { - MR_ERROR("Pktmbuf pool %s is empty(d_avail = %u, in_avail = %d), MRZCPD service exit. \n", - mrb_pool_iter->symbol, d_avail, in_avail); - exit(EXIT_FAILURE); + MR_ERROR("Pktmbuf pool %s is empty(d_avail = %u, in_avail = %d), retry times = %d", mrb_pool_iter->symbol, + d_avail, in_avail, mrb_pool_iter->leak_check_counter); + mrb_pool_iter->leak_check_counter++; + } + else + { + mrb_pool_iter->leak_check_counter = 0; } - /* 低限 */ - if (d_avail <= warn_threshold_d || in_avail <= warn_threshold_in) + if (mrb_pool_iter->leak_check_counter >= ctx->buffer_leak_retry_times_threshold) { - MR_WARNING("Pktmbuf pool %s is nearly empty(d_avail = %u, in_avail = %d).", mrb_pool_iter->symbol, d_avail, - in_avail); + MR_ERROR("Pktmbuf pool %s is empty(d_avail = %u, in_avail = %d), exceed the retries threshold, " + "MRZCPD service exit. \n", mrb_pool_iter->symbol, d_avail, in_avail); + + exit(EXIT_FAILURE); } } -- cgit v1.2.3