summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorQiuwen Lu <[email protected]>2017-07-25 14:14:34 +0800
committerQiuwen Lu <[email protected]>2017-07-25 14:14:34 +0800
commita9d87e61409bfc75efee0ee8f5a550b0c4b0a430 (patch)
tree5420efd311aa35a367c7af652013fee18cd39461
parentfbff4ce9bc87eb2940435bdac972890c06dd6706 (diff)
增加内存泄露检测功能,修正tunnat的dir标志位。v4.2.7-20170725
- 增加PKTMBUF内存池的泄露监测。当PKTMBUF内存池耗尽后,驱动服务进程退出,避免因PKTMBUF耗尽导致无法收报。 - 修正tunnat置dir标志位的问题。测试环境发现置dir标志位无效,需要置online_test标志位,怀疑结构体写反了。
-rw-r--r--service/include/sc_common.h5
-rw-r--r--service/include/sc_mrb.h4
-rw-r--r--service/src/core.c72
-rw-r--r--service/src/mrb.c30
-rw-r--r--tunnat/include/tunnel.h2
5 files changed, 87 insertions, 26 deletions
diff --git a/service/include/sc_common.h b/service/include/sc_common.h
index a869454..e8a1c42 100644
--- a/service/include/sc_common.h
+++ b/service/include/sc_common.h
@@ -60,6 +60,11 @@ struct sc_main
/* 数据面:IDLE操作门限 */
unsigned int idle_threshold;
+ /* 异常状态检测标志位,死锁检测 */
+ unsigned int en_spinlock_check;
+ /* 异常状态检测标志位,内存泄露检测 */
+ unsigned int en_memleak_check;
+
/* 负载均衡器 */
struct distributer * dist_object;
/* keepalive监测 */
diff --git a/service/include/sc_mrb.h b/service/include/sc_mrb.h
index d596106..260019b 100644
--- a/service/include/sc_mrb.h
+++ b/service/include/sc_mrb.h
@@ -8,4 +8,6 @@ struct rte_mempool * mrb_direct_mempool_locate(struct mrb_main * ctx,
struct rte_mempool * mrb_indirect_mempool_locate(struct mrb_main * ctx,
const char * symbol, socket_id_t socket_id, cpu_id_t cpu_id);
-int mrb_init(struct sc_main * sc); \ No newline at end of file
+int mrb_init(struct sc_main * sc);
+
+int mrb_memleak_check(struct sc_main * sc); \ No newline at end of file
diff --git a/service/src/core.c b/service/src/core.c
index 4a320dc..225290f 100644
--- a/service/src/core.c
+++ b/service/src/core.c
@@ -66,11 +66,15 @@ const char service_git_version[] = "";
#endif
#ifndef MR_SERVICE_DEFAULT_IDLE_THRESHOLD
-#define MR_SERVICE_DEFAULT_IDLE_THRESHOLD 1000000
+#define MR_SERVICE_DEFAULT_IDLE_THRESHOLD 1000000
#endif
-#ifndef MR_SERVICE_DEFAULT_KEEPALIVE
-#define MR_SERVICE_DEFAULT_KEEPALIVE 1
+#ifndef MR_SERVICE_DEFAULT_CHECK_SPINLOCK
+#define MR_SERVICE_DEFAULT_CHECK_SPINLOCK 1
+#endif
+
+#ifndef MR_SERVICE_DEFAULT_CHECK_MEMLEAK
+#define MR_SERVICE_DEFAULT_CHECK_MEMLEAK 1
#endif
@@ -136,7 +140,7 @@ extern int sc_monit_loop(struct sc_main * sc_main);
void sw_forward_rxtx_loop(struct sc_main * sc, unsigned int lcore_id);
void sw_forward_idle_loop(struct sc_main * sc, unsigned int lcore_id);
-int sc_keepalive_loop(struct sc_main * sc_main)
+int sc_check_spinlock_loop(struct sc_main * sc_main)
{
if (sc_main->keepalive == NULL) return 0;
rte_keepalive_dispatch_pings(NULL, sc_main->keepalive);
@@ -150,6 +154,12 @@ void sc_keepalive_failure_handler(void *data, const int id_core)
return;
}
+int sc_check_memleak_loop(struct sc_main * sc_main)
+{
+ if (sc_main->en_memleak_check == 0) return 0;
+ return mrb_memleak_check(sc_main);
+}
+
void * sc_ctrlplane_thread(void * args)
{
struct sc_main * sc_main = (struct sc_main *)args;
@@ -158,7 +168,8 @@ void * sc_ctrlplane_thread(void * args)
while (g_keep_running)
{
sc_monit_loop(sc_main);
- sc_keepalive_loop(sc_main);
+ sc_check_spinlock_loop(sc_main);
+ sc_check_memleak_loop(sc_main);
sleep(1);
}
@@ -204,12 +215,18 @@ int sc_dataplane_thread(void * arg)
return 0;
}
-static const char * __str_disable_or_enable(void * ptr)
+static const char * __str_disable_or_enable_ptr(void * ptr)
{
if (ptr == NULL) return "Disable";
else return "Enable";
}
+static const char * __str_disable_or_enable_uint(unsigned int value)
+{
+ if (value) return "Enable";
+ else return "Disable";
+}
+
void sc_config_dump(struct sc_main * sc)
{
MR_INFO(" ");
@@ -221,14 +238,16 @@ void sc_config_dump(struct sc_main * sc)
MR_INFO(" Packet hash function mode : %s", ldbc_str_hash_mode(sc->dist_object));
MR_INFO(" Local configure file : %s", sc->local_cfgfile);
MR_INFO(" Local hardware configure file : %s", sc->local_hwfile);
+ MR_INFO(" Check spinlock : %s", __str_disable_or_enable_uint(sc->en_spinlock_check));
+ MR_INFO(" Check memleak : %s", __str_disable_or_enable_uint(sc->en_memleak_check));
+
MR_INFO(" ");
MR_INFO("Modules:");
- MR_INFO(" Hardware information module : %s", __str_disable_or_enable(sc->hwinfo_main));
- MR_INFO(" Ctrlplane message module : %s", __str_disable_or_enable(sc->ctrlmsg_handler));
- MR_INFO(" PHY device module : %s", __str_disable_or_enable(sc->phydev_main));
- MR_INFO(" VIRT device module : %s", __str_disable_or_enable(sc->vdev_main));
- MR_INFO(" Forward module : %s", __str_disable_or_enable(sc->sw_forward_main));
- MR_INFO(" Keepalive module : %s", __str_disable_or_enable(sc->keepalive));
+ MR_INFO(" Hardware information module : %s", __str_disable_or_enable_ptr(sc->hwinfo_main));
+ MR_INFO(" Ctrlplane message module : %s", __str_disable_or_enable_ptr(sc->ctrlmsg_handler));
+ MR_INFO(" PHY device module : %s", __str_disable_or_enable_ptr(sc->phydev_main));
+ MR_INFO(" VIRT device module : %s", __str_disable_or_enable_ptr(sc->vdev_main));
+ MR_INFO(" Forward module : %s", __str_disable_or_enable_ptr(sc->sw_forward_main));
return;
}
@@ -400,20 +419,13 @@ static int sc_g_config_init(struct sc_main * sc)
&sc->idle_threshold, MR_SERVICE_DEFAULT_IDLE_THRESHOLD);
/* 数据面线程保活 */
- unsigned int en_keepalive = MR_SERVICE_DEFAULT_KEEPALIVE;
MESA_load_profile_uint_def(sc->local_cfgfile, "keepalive", "check_spinlock",
- &en_keepalive, MR_SERVICE_DEFAULT_KEEPALIVE);
+ &sc->en_spinlock_check, MR_SERVICE_DEFAULT_CHECK_SPINLOCK);
- if (en_keepalive)
- {
- sc->keepalive = rte_keepalive_create(sc_keepalive_failure_handler, sc);
- if (sc->keepalive == NULL)
- {
- MR_ERROR("Create keepalive handler failed. ");
- return RT_ERR;
- }
- }
-
+ /* 内存泄露检测 */
+ MESA_load_profile_uint_def(sc->local_cfgfile, "keepalive", "check_memleak",
+ &sc->en_memleak_check, MR_SERVICE_DEFAULT_CHECK_MEMLEAK);
+
return RT_SUCCESS;
}
@@ -694,6 +706,18 @@ int main(int argc, char * argv[])
if (__check_is_notify()) sd_notify(0, "READY=1");
sleep(1);
+ /* 死锁检测 */
+ if (sc->en_spinlock_check)
+ {
+ sc->keepalive = rte_keepalive_create(sc_keepalive_failure_handler, sc);
+ }
+
+ if (sc->en_spinlock_check && sc->keepalive == NULL)
+ {
+ MR_ERROR("Create spinlock checker handler failed. ");
+ ret = EXIT_FAILURE; goto quit;
+ }
+
unsigned int lcore_id;
RTE_LCORE_FOREACH_SLAVE(lcore_id)
{
diff --git a/service/src/mrb.c b/service/src/mrb.c
index 182f192..07120d4 100644
--- a/service/src/mrb.c
+++ b/service/src/mrb.c
@@ -593,4 +593,34 @@ cJSON * mrb_monit_loop(struct sc_main * sc)
}
return j_mrb_array;
+}
+
+int mrb_memleak_check(struct sc_main * sc)
+{
+ struct mrb_main * ctx = sc->mrb_pool_main;
+ struct mrb_pool * mrb_pool_iter = NULL;
+ TAILQ_FOREACH(mrb_pool_iter, &ctx->pool_list, next)
+ {
+ /* 此处,为什么没有采用rte_mempool_avail_count()来读取可用的MBUF数量?
+ rte_mempool_avail_count()计算了各核心中Cahce住的mbufs数量,这一部分
+ mbufs虽然可用,但仅限于本核心,其他核心是无法使用的。
+
+ 如果采用这种方式读取数量,会发现数量虽然大于零,但收包的核心已经无法取得空闲
+ 的MBUF,导致丢包。内存泄露监测模块就是要监测这种现象。
+
+ rte_mempool_ops_get_count()是DPDK的内部函数,不推荐外面的应用调用。
+ 在后续的DPDK版本升级中,要注意关注这一函数行为的变化。
+ */
+ unsigned int d_avail = rte_mempool_ops_get_count(mrb_pool_iter->direct);
+ unsigned int in_avail = rte_mempool_ops_get_count(mrb_pool_iter->indirect);
+
+ /* 此处,没有写0,因为malloc批量操作时,小于Burst剩余数量也会失败 */
+ if (d_avail >= MR_BURST_MAX && in_avail >= MR_BURST_MAX) continue;
+
+ /* 检查失败,退出 */
+ MR_ERROR("Pktmbuf pool %s is empty(d_avail = %u, in_avail = %d), MRZCPD service exit. \n",
+ mrb_pool_iter->symbol, d_avail, in_avail); exit(EXIT_FAILURE);
+ }
+
+ return 0;
} \ No newline at end of file
diff --git a/tunnat/include/tunnel.h b/tunnat/include/tunnel.h
index 5f95a85..4994eff 100644
--- a/tunnat/include/tunnel.h
+++ b/tunnat/include/tunnel.h
@@ -119,7 +119,7 @@ public:
void ReverseAddress()
{
- vxlan_hdr_.dir = ~vxlan_hdr_.dir;
+ vxlan_hdr_.online_test = ~vxlan_hdr_.online_test;
}
static int PacketForwardModify(const char * pkt, unsigned int pkt_len);