diff options
| author | Qiuwen Lu <[email protected]> | 2017-07-25 14:14:34 +0800 |
|---|---|---|
| committer | Qiuwen Lu <[email protected]> | 2017-07-25 14:14:34 +0800 |
| commit | a9d87e61409bfc75efee0ee8f5a550b0c4b0a430 (patch) | |
| tree | 5420efd311aa35a367c7af652013fee18cd39461 | |
| parent | fbff4ce9bc87eb2940435bdac972890c06dd6706 (diff) | |
增加内存泄露检测功能,修正tunnat的dir标志位。v4.2.7-20170725
- 增加PKTMBUF内存池的泄露监测。当PKTMBUF内存池耗尽后,驱动服务进程退出,避免因PKTMBUF耗尽导致无法收报。
- 修正tunnat置dir标志位的问题。测试环境发现置dir标志位无效,需要置online_test标志位,怀疑结构体写反了。
| -rw-r--r-- | service/include/sc_common.h | 5 | ||||
| -rw-r--r-- | service/include/sc_mrb.h | 4 | ||||
| -rw-r--r-- | service/src/core.c | 72 | ||||
| -rw-r--r-- | service/src/mrb.c | 30 | ||||
| -rw-r--r-- | tunnat/include/tunnel.h | 2 |
5 files changed, 87 insertions, 26 deletions
diff --git a/service/include/sc_common.h b/service/include/sc_common.h index a869454..e8a1c42 100644 --- a/service/include/sc_common.h +++ b/service/include/sc_common.h @@ -60,6 +60,11 @@ struct sc_main /* 数据面:IDLE操作门限 */ unsigned int idle_threshold; + /* 异常状态检测标志位,死锁检测 */ + unsigned int en_spinlock_check; + /* 异常状态检测标志位,内存泄露检测 */ + unsigned int en_memleak_check; + /* 负载均衡器 */ struct distributer * dist_object; /* keepalive监测 */ diff --git a/service/include/sc_mrb.h b/service/include/sc_mrb.h index d596106..260019b 100644 --- a/service/include/sc_mrb.h +++ b/service/include/sc_mrb.h @@ -8,4 +8,6 @@ struct rte_mempool * mrb_direct_mempool_locate(struct mrb_main * ctx, struct rte_mempool * mrb_indirect_mempool_locate(struct mrb_main * ctx, const char * symbol, socket_id_t socket_id, cpu_id_t cpu_id); -int mrb_init(struct sc_main * sc);
\ No newline at end of file +int mrb_init(struct sc_main * sc); + +int mrb_memleak_check(struct sc_main * sc);
\ No newline at end of file diff --git a/service/src/core.c b/service/src/core.c index 4a320dc..225290f 100644 --- a/service/src/core.c +++ b/service/src/core.c @@ -66,11 +66,15 @@ const char service_git_version[] = ""; #endif #ifndef MR_SERVICE_DEFAULT_IDLE_THRESHOLD -#define MR_SERVICE_DEFAULT_IDLE_THRESHOLD 1000000 +#define MR_SERVICE_DEFAULT_IDLE_THRESHOLD 1000000 #endif -#ifndef MR_SERVICE_DEFAULT_KEEPALIVE -#define MR_SERVICE_DEFAULT_KEEPALIVE 1 +#ifndef MR_SERVICE_DEFAULT_CHECK_SPINLOCK +#define MR_SERVICE_DEFAULT_CHECK_SPINLOCK 1 +#endif + +#ifndef MR_SERVICE_DEFAULT_CHECK_MEMLEAK +#define MR_SERVICE_DEFAULT_CHECK_MEMLEAK 1 #endif @@ -136,7 +140,7 @@ extern int sc_monit_loop(struct sc_main * sc_main); void sw_forward_rxtx_loop(struct sc_main * sc, unsigned int lcore_id); void sw_forward_idle_loop(struct sc_main * sc, unsigned int lcore_id); -int sc_keepalive_loop(struct sc_main * sc_main) +int sc_check_spinlock_loop(struct sc_main * sc_main) { if (sc_main->keepalive == NULL) return 0; rte_keepalive_dispatch_pings(NULL, sc_main->keepalive); @@ -150,6 +154,12 @@ void sc_keepalive_failure_handler(void *data, const int id_core) return; } +int sc_check_memleak_loop(struct sc_main * sc_main) +{ + if (sc_main->en_memleak_check == 0) return 0; + return mrb_memleak_check(sc_main); +} + void * sc_ctrlplane_thread(void * args) { struct sc_main * sc_main = (struct sc_main *)args; @@ -158,7 +168,8 @@ void * sc_ctrlplane_thread(void * args) while (g_keep_running) { sc_monit_loop(sc_main); - sc_keepalive_loop(sc_main); + sc_check_spinlock_loop(sc_main); + sc_check_memleak_loop(sc_main); sleep(1); } @@ -204,12 +215,18 @@ int sc_dataplane_thread(void * arg) return 0; } -static const char * __str_disable_or_enable(void * ptr) +static const char * __str_disable_or_enable_ptr(void * ptr) { if (ptr == NULL) return "Disable"; else return "Enable"; } +static const char * __str_disable_or_enable_uint(unsigned int value) +{ + if (value) return "Enable"; + else return "Disable"; +} + void sc_config_dump(struct sc_main * sc) { MR_INFO(" "); @@ -221,14 +238,16 @@ void sc_config_dump(struct sc_main * sc) MR_INFO(" Packet hash function mode : %s", ldbc_str_hash_mode(sc->dist_object)); MR_INFO(" Local configure file : %s", sc->local_cfgfile); MR_INFO(" Local hardware configure file : %s", sc->local_hwfile); + MR_INFO(" Check spinlock : %s", __str_disable_or_enable_uint(sc->en_spinlock_check)); + MR_INFO(" Check memleak : %s", __str_disable_or_enable_uint(sc->en_memleak_check)); + MR_INFO(" "); MR_INFO("Modules:"); - MR_INFO(" Hardware information module : %s", __str_disable_or_enable(sc->hwinfo_main)); - MR_INFO(" Ctrlplane message module : %s", __str_disable_or_enable(sc->ctrlmsg_handler)); - MR_INFO(" PHY device module : %s", __str_disable_or_enable(sc->phydev_main)); - MR_INFO(" VIRT device module : %s", __str_disable_or_enable(sc->vdev_main)); - MR_INFO(" Forward module : %s", __str_disable_or_enable(sc->sw_forward_main)); - MR_INFO(" Keepalive module : %s", __str_disable_or_enable(sc->keepalive)); + MR_INFO(" Hardware information module : %s", __str_disable_or_enable_ptr(sc->hwinfo_main)); + MR_INFO(" Ctrlplane message module : %s", __str_disable_or_enable_ptr(sc->ctrlmsg_handler)); + MR_INFO(" PHY device module : %s", __str_disable_or_enable_ptr(sc->phydev_main)); + MR_INFO(" VIRT device module : %s", __str_disable_or_enable_ptr(sc->vdev_main)); + MR_INFO(" Forward module : %s", __str_disable_or_enable_ptr(sc->sw_forward_main)); return; } @@ -400,20 +419,13 @@ static int sc_g_config_init(struct sc_main * sc) &sc->idle_threshold, MR_SERVICE_DEFAULT_IDLE_THRESHOLD); /* 数据面线程保活 */ - unsigned int en_keepalive = MR_SERVICE_DEFAULT_KEEPALIVE; MESA_load_profile_uint_def(sc->local_cfgfile, "keepalive", "check_spinlock", - &en_keepalive, MR_SERVICE_DEFAULT_KEEPALIVE); + &sc->en_spinlock_check, MR_SERVICE_DEFAULT_CHECK_SPINLOCK); - if (en_keepalive) - { - sc->keepalive = rte_keepalive_create(sc_keepalive_failure_handler, sc); - if (sc->keepalive == NULL) - { - MR_ERROR("Create keepalive handler failed. "); - return RT_ERR; - } - } - + /* 内存泄露检测 */ + MESA_load_profile_uint_def(sc->local_cfgfile, "keepalive", "check_memleak", + &sc->en_memleak_check, MR_SERVICE_DEFAULT_CHECK_MEMLEAK); + return RT_SUCCESS; } @@ -694,6 +706,18 @@ int main(int argc, char * argv[]) if (__check_is_notify()) sd_notify(0, "READY=1"); sleep(1); + /* 死锁检测 */ + if (sc->en_spinlock_check) + { + sc->keepalive = rte_keepalive_create(sc_keepalive_failure_handler, sc); + } + + if (sc->en_spinlock_check && sc->keepalive == NULL) + { + MR_ERROR("Create spinlock checker handler failed. "); + ret = EXIT_FAILURE; goto quit; + } + unsigned int lcore_id; RTE_LCORE_FOREACH_SLAVE(lcore_id) { diff --git a/service/src/mrb.c b/service/src/mrb.c index 182f192..07120d4 100644 --- a/service/src/mrb.c +++ b/service/src/mrb.c @@ -593,4 +593,34 @@ cJSON * mrb_monit_loop(struct sc_main * sc) } return j_mrb_array; +} + +int mrb_memleak_check(struct sc_main * sc) +{
+ struct mrb_main * ctx = sc->mrb_pool_main; + struct mrb_pool * mrb_pool_iter = NULL; + TAILQ_FOREACH(mrb_pool_iter, &ctx->pool_list, next) + { + /* 此处,为什么没有采用rte_mempool_avail_count()来读取可用的MBUF数量? + rte_mempool_avail_count()计算了各核心中Cahce住的mbufs数量,这一部分 + mbufs虽然可用,但仅限于本核心,其他核心是无法使用的。 + + 如果采用这种方式读取数量,会发现数量虽然大于零,但收包的核心已经无法取得空闲 + 的MBUF,导致丢包。内存泄露监测模块就是要监测这种现象。 + + rte_mempool_ops_get_count()是DPDK的内部函数,不推荐外面的应用调用。 + 在后续的DPDK版本升级中,要注意关注这一函数行为的变化。 + */ + unsigned int d_avail = rte_mempool_ops_get_count(mrb_pool_iter->direct); + unsigned int in_avail = rte_mempool_ops_get_count(mrb_pool_iter->indirect); + + /* 此处,没有写0,因为malloc批量操作时,小于Burst剩余数量也会失败 */ + if (d_avail >= MR_BURST_MAX && in_avail >= MR_BURST_MAX) continue; + + /* 检查失败,退出 */ + MR_ERROR("Pktmbuf pool %s is empty(d_avail = %u, in_avail = %d), MRZCPD service exit. \n", + mrb_pool_iter->symbol, d_avail, in_avail); exit(EXIT_FAILURE); + } + + return 0; }
\ No newline at end of file diff --git a/tunnat/include/tunnel.h b/tunnat/include/tunnel.h index 5f95a85..4994eff 100644 --- a/tunnat/include/tunnel.h +++ b/tunnat/include/tunnel.h @@ -119,7 +119,7 @@ public: void ReverseAddress() { - vxlan_hdr_.dir = ~vxlan_hdr_.dir; + vxlan_hdr_.online_test = ~vxlan_hdr_.online_test; } static int PacketForwardModify(const char * pkt, unsigned int pkt_len); |
