diff options
| author | Qiuwen Lu <[email protected]> | 2017-11-07 13:31:11 +0800 |
|---|---|---|
| committer | Qiuwen Lu <[email protected]> | 2017-11-07 13:31:11 +0800 |
| commit | a19a64fde99fe775d12e2341eb235d3677a7a220 (patch) | |
| tree | 55cd160085968c57ae4c25f400b14fbffe205f45 | |
| parent | 58db7c979a7b8899600c5d7c758e5a32d9eefa9f (diff) | |
Buffer泄露检测支持用户设置低限、低低限。v4.2.32-20171109
| -rw-r--r-- | service/include/sc_common.h | 2 | ||||
| -rw-r--r-- | service/include/sc_mrb.h | 2 | ||||
| -rw-r--r-- | service/src/core.c | 8 | ||||
| -rw-r--r-- | service/src/mrb.c | 143 |
4 files changed, 123 insertions, 32 deletions
diff --git a/service/include/sc_common.h b/service/include/sc_common.h index add2141..5f7b3b7 100644 --- a/service/include/sc_common.h +++ b/service/include/sc_common.h @@ -74,8 +74,6 @@ struct sc_main /* 控制死锁检测检测间隔 */ unsigned int ctrl_spinlock_check_interval; - /* 异常状态检测标志位,内存泄露检测 */ - unsigned int en_memleak_check; /* 软件报文时间戳 */ unsigned int en_pkt_timestamp; /* 捕包支持 */ diff --git a/service/include/sc_mrb.h b/service/include/sc_mrb.h index 260019b..21b9f74 100644 --- a/service/include/sc_mrb.h +++ b/service/include/sc_mrb.h @@ -10,4 +10,4 @@ struct rte_mempool * mrb_indirect_mempool_locate(struct mrb_main * ctx, int mrb_init(struct sc_main * sc); -int mrb_memleak_check(struct sc_main * sc);
\ No newline at end of file +int mrb_buffer_leak_detect(struct sc_main * sc);
\ No newline at end of file diff --git a/service/src/core.c b/service/src/core.c index 9dbaaa2..be8cd66 100644 --- a/service/src/core.c +++ b/service/src/core.c @@ -184,8 +184,7 @@ void sc_keepalive_failure_handler(void *data, const int id_core) int sc_check_memleak_loop(struct sc_main * sc_main) { - if (sc_main->en_memleak_check == 0) return 0; - return mrb_memleak_check(sc_main); + return mrb_buffer_leak_detect(sc_main); } void * sc_ctrlplane_thread(void * args) @@ -277,7 +276,6 @@ void sc_config_dump(struct sc_main * sc) MR_INFO(" Local configure file : %s", sc->local_cfgfile); MR_INFO(" Local hardware configure file : %s", sc->local_hwfile); MR_INFO(" Check spinlock : %s", __str_disable_or_enable_uint(sc->en_spinlock_check)); - MR_INFO(" Check memleak : %s", __str_disable_or_enable_uint(sc->en_memleak_check)); MR_INFO(" "); MR_INFO("Modules:"); @@ -493,10 +491,6 @@ static int sc_g_config_init(struct sc_main * sc) MESA_load_profile_uint_def(sc->local_cfgfile, "keepalive", "check_ctrl_spinlock_interval", &sc->ctrl_spinlock_check_interval, MR_SERVICE_DEFAULT_CHECK_CTRL_SPINLOCK_INTERVAL); - /* 内存泄露检测 */ - MESA_load_profile_uint_def(sc->local_cfgfile, "keepalive", "check_memleak", - &sc->en_memleak_check, MR_SERVICE_DEFAULT_CHECK_MEMLEAK); - /* 报文捕获 */ MESA_load_profile_uint_def(sc->local_cfgfile, "debug", "pkt_dumper", &sc->en_pkt_dumper, MR_SERVICE_DEFAULT_PKT_DUMPER); diff --git a/service/src/mrb.c b/service/src/mrb.c index 07120d4..304855b 100644 --- a/service/src/mrb.c +++ b/service/src/mrb.c @@ -58,6 +58,18 @@ enum mrb_create_mode #define MRB_POOL_COUNT_MAX 16 #endif +#ifndef MRB_POOL_BUFFER_LEAK_DETECT +#define MRB_POOL_BUFFER_LEAK_DETECT 1 +#endif + +#ifndef MRB_POOL_BUFFER_LEAK_DETECT_WARN_THRESHOLD +#define MRB_POOL_BUFFER_LEAK_DETECT_WARN_THRESHOLD 0.95 +#endif + +#ifndef MRB_POOL_BUFFER_LEAK_DETECT_RESTART_THRESHOLD +#define MRB_POOL_BUFFER_LEAK_DETECT_RESTART_THRESHOLD 1.00 +#endif + struct mrb_pool { TAILQ_ENTRY(mrb_pool) next; @@ -99,6 +111,13 @@ struct mrb_main /* 内存池寻址模式 */ struct mrb_pool_adapter * adapter; + + /* Buffer泄露检查 */ + unsigned int en_buffer_leak_detect; + /* Buffer泄露检查,低限参数 */ + float buffer_leak_warn_threshold; + /* Buffer泄露检查,低低限参数 */ + float buffer_leak_restart_threshold; }; struct mrb_pool_adapter @@ -531,6 +550,49 @@ int mrb_pool_config(struct sc_main * sc) return ctx->adapter->fn_config(ctx->adapter, ctx); } +/* Buffer池泄露告警 */ +int mrb_buffer_leak_config(struct sc_main * sc) +{ + const char * cfgfile = sc->local_cfgfile; + struct mrb_main * ctx = sc->mrb_pool_main; + + MESA_load_profile_uint_def(cfgfile, "buffer_leak_detect", "check_buffer_leak", + &ctx->en_buffer_leak_detect, MRB_POOL_BUFFER_LEAK_DETECT); + + /* 放大系数1000,避免读入浮点数 */ + unsigned int __warn_threshold = MRB_POOL_BUFFER_LEAK_DETECT_WARN_THRESHOLD * 1000; + unsigned int __restart_threshold = MRB_POOL_BUFFER_LEAK_DETECT_RESTART_THRESHOLD * 1000; + + MESA_load_profile_uint_def(cfgfile, "buffer_leak_detect", "warn_threshold", &__warn_threshold, + __warn_threshold); + MESA_load_profile_uint_def(cfgfile, "buffer_leak_detect", "restart_threshold", &__restart_threshold, + __restart_threshold); + + ctx->buffer_leak_warn_threshold = 1 - __warn_threshold / 1000.0; + ctx->buffer_leak_restart_threshold = 1 - __restart_threshold / 1000.0; + + /* 参数合法性检查,必须在0和1之间 */ + if (ctx->buffer_leak_warn_threshold < 0 || ctx->buffer_leak_warn_threshold > 1) + { + MR_CFGERR_INVALID_VALUE(cfgfile, "buffer_leak_detect", "warn_threshold", + "threshold must smaller than 1000"); return RT_ERR; + } + + if (ctx->buffer_leak_restart_threshold < 0 || ctx->buffer_leak_restart_threshold > 1) + { + MR_CFGERR_INVALID_VALUE(cfgfile, "buffer_leak_detect", "restart_threshold", + "threshold must smller than 1000"); return RT_ERR; + } + + if (ctx->en_buffer_leak_detect) + { + MR_INFO("Buffer leak detect is enable, warning threshold is %f, restart threshold is %f", + 1 - ctx->buffer_leak_warn_threshold, 1 - ctx->buffer_leak_restart_threshold); + } + + return RT_SUCCESS; +} + int mrb_init(struct sc_main * sc) { sc->mrb_pool_main = ZMALLOC(sizeof(struct mrb_main)); @@ -552,6 +614,10 @@ int mrb_init(struct sc_main * sc) ret = ctx->adapter->fn_create_pool(ctx->adapter, ctx); if (ret < 0) return ret; + /* Buffer泄露监测 */ + ret = mrb_buffer_leak_config(sc); + if (ret < 0) return ret; + return RT_SUCCESS; } @@ -595,31 +661,64 @@ cJSON * mrb_monit_loop(struct sc_main * sc) return j_mrb_array; } -int mrb_memleak_check(struct sc_main * sc) -{
+int mrb_buffer_leak_detect(struct sc_main * sc) +{ struct mrb_main * ctx = sc->mrb_pool_main; - struct mrb_pool * mrb_pool_iter = NULL; - TAILQ_FOREACH(mrb_pool_iter, &ctx->pool_list, next) - { - /* 此处,为什么没有采用rte_mempool_avail_count()来读取可用的MBUF数量? - rte_mempool_avail_count()计算了各核心中Cahce住的mbufs数量,这一部分 - mbufs虽然可用,但仅限于本核心,其他核心是无法使用的。 - - 如果采用这种方式读取数量,会发现数量虽然大于零,但收包的核心已经无法取得空闲 - 的MBUF,导致丢包。内存泄露监测模块就是要监测这种现象。 - - rte_mempool_ops_get_count()是DPDK的内部函数,不推荐外面的应用调用。 - 在后续的DPDK版本升级中,要注意关注这一函数行为的变化。 - */ - unsigned int d_avail = rte_mempool_ops_get_count(mrb_pool_iter->direct); - unsigned int in_avail = rte_mempool_ops_get_count(mrb_pool_iter->indirect); + struct mrb_pool * mrb_pool_iter = NULL; + + /* 探测开关 */ + if (!ctx->en_buffer_leak_detect) return RT_SUCCESS; - /* 此处,没有写0,因为malloc批量操作时,小于Burst剩余数量也会失败 */ - if (d_avail >= MR_BURST_MAX && in_avail >= MR_BURST_MAX) continue; + TAILQ_FOREACH(mrb_pool_iter, &ctx->pool_list, next) + { + /* 此处,为什么没有采用rte_mempool_avail_count()来读取可用的MBUF数量? + rte_mempool_avail_count()计算了各核心中Cahce住的mbufs数量,这一部分 + mbufs虽然可用,但仅限于本核心,其他核心是无法使用的。 + + 如果采用这种方式读取数量,会发现数量虽然大于零,但收包的核心已经无法取得空闲 + 的MBUF,导致丢包。内存泄露监测模块就是要监测这种现象。 + + rte_mempool_ops_get_count()是DPDK的内部函数,不推荐外面的应用调用。 + 在后续的DPDK版本升级中,要注意关注这一函数行为的变化。 + */ + unsigned int d_avail = rte_mempool_ops_get_count(mrb_pool_iter->direct); + unsigned int in_avail = rte_mempool_ops_get_count(mrb_pool_iter->indirect); + + /* 阈值,根据Buffer池大小计算 */ + unsigned int warn_threshold_d = (unsigned int)(mrb_pool_iter->direct->size * + ctx->buffer_leak_warn_threshold); + unsigned int warn_threshold_in = (unsigned int)(mrb_pool_iter->indirect->size * + ctx->buffer_leak_warn_threshold); + unsigned int restart_threshold_d = (unsigned int)(mrb_pool_iter->direct->size * + ctx->buffer_leak_restart_threshold); + unsigned int restart_threshold_in = (unsigned int)(mrb_pool_iter->indirect->size * + ctx->buffer_leak_restart_threshold); + + warn_threshold_d = RTE_MAX(warn_threshold_d, MR_BURST_MAX); + restart_threshold_d = RTE_MAX(restart_threshold_d, MR_BURST_MAX); + warn_threshold_in = RTE_MAX(warn_threshold_in, MR_BURST_MAX); + restart_threshold_in = RTE_MAX(restart_threshold_in, MR_BURST_MAX); + +#if 0 + MR_DEBUG("warn_threshold_d = %u, restart_threshold_d = %u, " + "warn_threshold_in = %u, restart_threshold_in = %u, d_avail = %u, in_avail = %u", + warn_threshold_d, restart_threshold_d, warn_threshold_in, restart_threshold_in, + d_avail, in_avail); +#endif - /* 检查失败,退出 */ - MR_ERROR("Pktmbuf pool %s is empty(d_avail = %u, in_avail = %d), MRZCPD service exit. \n", - mrb_pool_iter->symbol, d_avail, in_avail); exit(EXIT_FAILURE); + /* 低低限 */ + if (d_avail <= restart_threshold_d || in_avail <= restart_threshold_in) + { + MR_ERROR("Pktmbuf pool %s is empty(d_avail = %u, in_avail = %d), MRZCPD service exit. \n", + mrb_pool_iter->symbol, d_avail, in_avail); exit(EXIT_FAILURE); + } + + /* 低限 */ + if (d_avail <= warn_threshold_d || in_avail <= warn_threshold_in) + { + MR_WARNING("Pktmbuf pool %s is nearly empty(d_avail = %u, in_avail = %d).", + mrb_pool_iter->symbol, d_avail, in_avail); + } } return 0; |
