summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--service/include/sc_common.h2
-rw-r--r--service/src/core.c79
-rw-r--r--service/src/monit.c46
-rwxr-xr-xtools/monit_device/monit_device.py25
4 files changed, 101 insertions, 51 deletions
diff --git a/service/include/sc_common.h b/service/include/sc_common.h
index 2959678..e9d1ea8 100644
--- a/service/include/sc_common.h
+++ b/service/include/sc_common.h
@@ -106,6 +106,8 @@ struct sc_main
unsigned int en_pkt_latency;
/* 延迟统计核心 */
unsigned int pkt_latency_lcore_id;
+ /* 延迟统计采样周期 */
+ unsigned int pkt_latency_sample_interval;
/* 负载均衡器 */
struct distributer * dist_object;
/* keepalive监测 */
diff --git a/service/src/core.c b/service/src/core.c
index fffc419..3b1ecae 100644
--- a/service/src/core.c
+++ b/service/src/core.c
@@ -109,14 +109,6 @@ const char service_git_version[] = "";
#define MR_SERVICE_DEFAULT_PKT_LATENCY 0
#endif
-#ifndef MR_SERVICE_DEFAULT_PKT_LATENCY_LCORE_ID
-#define MR_SERVICE_DEFAULT_PKT_LATENCY_LCORE_ID 0
-#endif
-
-#ifndef MR_SERVICE_DEFAULT_SIGSEGV_TAKEOVER
-#define MR_SERVICE_DEFAULT_SIGSEGV_TAKEOVER 0
-#endif
-
unsigned int g_logger_to_stdout = 1;
unsigned int g_logger_level = LOG_DEBUG;
unsigned int g_monit_interval = 1;
@@ -657,6 +649,8 @@ static int sc_g_config_init(struct sc_main * sc)
return RT_ERR;
}
+ int ret = 0;
+
/* 数据面idle调用门限 */
MESA_load_profile_uint_def(sc->local_cfgfile, "service", "idle_threshold", &sc->idle_threshold,
MR_SERVICE_DEFAULT_IDLE_THRESHOLD);
@@ -685,13 +679,17 @@ static int sc_g_config_init(struct sc_main * sc)
MESA_load_profile_uint_def(sc->local_cfgfile, "debug", "pkt_latency", &sc->en_pkt_latency,
MR_SERVICE_DEFAULT_PKT_LATENCY);
+ MESA_load_profile_uint_def(sc->local_cfgfile, "debug", "pkt_latency_sample_interval",
+ &sc->pkt_latency_sample_interval, 10 * 1000 * 1000);
+
/* 报文延迟统计核心 */
- MESA_load_profile_uint_def(sc->local_cfgfile, "debug", "pkt_latency_lcore_id", &sc->pkt_latency_lcore_id,
- MR_SERVICE_DEFAULT_PKT_LATENCY);
+ ret = MESA_load_profile_uint_nodef(sc->local_cfgfile, "debug", "pkt_latency_lcore_id", &sc->pkt_latency_lcore_id);
- /* SIGSERV接管选项 */
- MESA_load_profile_uint_def(sc->local_cfgfile, "debug", "sigsegv_takeover", &sc->en_sig_segv_takeover,
- MR_SERVICE_DEFAULT_SIGSEGV_TAKEOVER);
+ /* not set, use the first io core as the latency stat core */
+ if (ret < 0)
+ {
+ sc->pkt_latency_lcore_id = cpu_set_location(&sc->cpu_set_io, 0);
+ }
return RT_SUCCESS;
}
@@ -885,12 +883,6 @@ __rte_unused static void signal_handler(int signum)
g_keep_running = 0;
}
-static void signal_handler_sigsegv()
-{
- rte_panic("SIGSERV happens, MRZCPD is abort. \n");
- return;
-}
-
extern int hwinfo_init(struct sc_main * sc);
extern int phydev_init(struct sc_main * sc);
extern int devmgr_early_init(struct sc_main * sc);
@@ -1016,15 +1008,6 @@ int main(int argc, char * argv[])
goto quit;
}
- /* SIGSERV signal takeover
- if the options is opened, the program will print the backtrace while
- handle the signal.
- */
- if (sc->en_sig_segv_takeover)
- {
- signal(SIGSEGV, signal_handler_sigsegv);
- }
-
if (sc_ctrlmsg_init(sc) != RT_SUCCESS)
{
MR_ERROR("Ctrlmsg module initialization failed. ");
@@ -1188,12 +1171,29 @@ int main(int argc, char * argv[])
pdump_inited = 1;
}
+ rte_metrics_init(SOCKET_ID_ANY);
+
+ /* 延迟监测 */
+ if (sc->en_pkt_latency)
+ {
+ ret = rte_latencystats_init(sc->pkt_latency_sample_interval, NULL);
+ if (ret != 0)
+ {
+ MR_ERROR("packet latency stats module init failed, ret = %d", ret);
+ ret = EXIT_FAILURE;
+ goto quit;
+ }
+
+ MR_INFO("packet latency stats enabled, sample interval(ns)=%u",sc->pkt_latency_sample_interval);
+ }
+
if (ctrlmsg_thread_launch(sc->ctrlmsg_handler) != RT_SUCCESS)
{
MR_ERROR("Launch ctrlmsg thread failed");
ret = EXIT_FAILURE;
goto quit;
}
+
if (rpc_server_dispatch_thread(sc->rpc_srv_handler) != RT_SUCCESS)
{
MR_ERROR("Launch rpc dispatch thread failed");
@@ -1237,28 +1237,14 @@ int main(int argc, char * argv[])
goto quit;
}
- rte_metrics_init(SOCKET_ID_ANY);
http_serv_init(sc);
- /* 延迟监测 */
- if (sc->en_pkt_latency)
- {
- ret = rte_latencystats_init(1, NULL);
- }
-
- if (sc->en_pkt_latency && ret != 0)
- {
- MR_ERROR("Lantency stats module init failed, ret = %d", ret);
- ret = EXIT_FAILURE;
- goto quit;
- }
-
- unsigned int lcore_id;
- RTE_LCORE_FOREACH(lcore_id)
+ unsigned int lcore_id_iter = 0;
+ RTE_LCORE_FOREACH(lcore_id_iter)
{
if (sc->keepalive)
- rte_keepalive_register_core(sc->keepalive, lcore_id);
- MR_INFO("Keepalive register for thread %d successfully.", lcore_id);
+ rte_keepalive_register_core(sc->keepalive, (int)lcore_id_iter);
+ MR_INFO("Keepalive register for thread %d successfully.", lcore_id_iter);
}
/* 恢复CPU亲和性设置为EAL后的线程绑定参数 */
@@ -1284,7 +1270,6 @@ int main(int argc, char * argv[])
};
/* set eal all cores run as service cores */
- uint16_t lcore_id_iter = 0;
RTE_LCORE_FOREACH(lcore_id_iter)
{
ret = rte_service_lcore_add(lcore_id_iter);
diff --git a/service/src/monit.c b/service/src/monit.c
index 25c395f..8fe4793 100644
--- a/service/src/monit.c
+++ b/service/src/monit.c
@@ -6,11 +6,14 @@
#include <rte_ethdev.h>
#include <rte_pci.h>
+#include <rte_latencystats.h>
+#include <rte_malloc.h>
#include <sc_common.h>
#include <sc_devmgr.h>
#include <sc_vdev.h>
#include "cJSON.h"
+#include "common.h"
extern unsigned int g_monit_interval;
@@ -123,6 +126,47 @@ static cJSON * __create_vdev_stats(struct vdev * vdev, unsigned int nr_serv_thre
return j_vdev_stats;
}
+
+static cJSON * monit_pkt_latency_global(struct sc_main * sc)
+{
+ if (sc->en_pkt_latency == 0)
+ {
+ return NULL;
+ }
+
+ static struct rte_metric_name * metric_names = NULL;
+ static struct rte_metric_value * metric_values = NULL;
+ static unsigned int nr_metrics = 0;
+
+ if (metric_names == NULL)
+ {
+ nr_metrics = rte_latencystats_get_names(NULL, 0);
+ metric_names = ZMALLOC(sizeof(struct rte_metric_name) * nr_metrics);
+ metric_values = ZMALLOC(sizeof(struct rte_metric_value) * nr_metrics);
+
+ rte_latencystats_get_names(metric_names, nr_metrics);
+ rte_latencystats_get(metric_values, nr_metrics);
+ }
+
+ assert(metric_names != NULL);
+ assert(metric_values != NULL);
+
+ rte_latencystats_update();
+
+ /* get the metric value */
+ cJSON * j_metric = cJSON_CreateObject();
+ rte_latencystats_get(metric_values, nr_metrics);
+
+ for (unsigned int i = 0; i < nr_metrics; i++)
+ {
+ struct rte_metric_name * name_iter = &metric_names[i];
+ struct rte_metric_value * value_iter = &metric_values[i];
+ cJSON_AddNumberToObject(j_metric, name_iter->name, value_iter->value);
+ }
+
+ return j_metric;
+}
+
// 运行时原始报文设备统计计数
static cJSON * monit_vdev(struct sc_main * sc)
{
@@ -313,6 +357,8 @@ static cJSON * monit_root(struct sc_main * sc)
#endif
cJSON_AddItemToObject(j_root, "app", app_monit_loop(sc));
cJSON_AddItemToObject(j_root, "service", service_monit_loop(sc));
+ cJSON_AddItemToObject(j_root, "pkt_latency", monit_pkt_latency_global(sc));
+
// cJSON_AddItemToObject(j_root, "offload", smartoffload_monit_loop(sc));
cJSON_AddItemToObject(j_root, "eth-ingress", eth_ingress_node_monit_loop(sc));
cJSON_AddItemToObject(j_root, "bridge", bridge_node_monit_loop(sc));
diff --git a/tools/monit_device/monit_device.py b/tools/monit_device/monit_device.py
index 0f8d7e6..8b906c8 100755
--- a/tools/monit_device/monit_device.py
+++ b/tools/monit_device/monit_device.py
@@ -23,7 +23,6 @@ TITLE_VECTOR = ['PhyRXFrame', 'PhyRXBits', 'PhyRXMissed', 'PhyRXError',
'PhyRXNoBUF', 'PhyTXFrame', 'PhyTXBits', 'PhyTXError',
'UsrRXDrops', 'UsrTXDrops']
-
TITLE_MAP = {'PhyRXFrame': 'ipackets',
'PhyRXBits': 'ibytes',
'PhyRXMissed': 'imissed',
@@ -62,6 +61,11 @@ TITLE_MAP_PROMETHEUS = {
'usr_tx_drop_total': 'usertxdrop'
}
+TITLE_MAP_PKT_LATENCY_PROMETHEUS = {
+ "avg_latency_ns": "pkt_latency_avg_ns",
+ "jitter_ns": "pkt_latency_jitter_ns",
+}
+
def locate_vector_by_symbol(vector, symbol):
return [s for s in vector if s['symbol'] == symbol]
@@ -71,6 +75,17 @@ def list_all_phydev(json_fp):
return [s['symbol'] for s in json_fp['device']]
+def dump_pkt_latency_prometheus_output(json_fp):
+ resp = ''
+ try:
+ for item in TITLE_MAP_PKT_LATENCY_PROMETHEUS:
+ value = json_fp['pkt_latency'][item]
+ resp += '%s %u\n' % (item, value)
+ except KeyError:
+ resp = ''
+ return resp
+
+
def phydev_value_read(json_fp, str_device, str_item):
phydevs = locate_vector_by_symbol(json_fp['device'], str_device)
return phydevs[0]['stats']['accumulative'][str_item]
@@ -95,7 +110,6 @@ def trans_to_human_readable(value):
def dump_human_table(json_fp, devsym, is_human_number=0):
-
print('\nTime: %s, Physical device: %s' % (time.strftime('%c'), devsym))
table_phydev = prettytable.PrettyTable([' '] + TITLE_VECTOR,
@@ -129,6 +143,7 @@ def dump_human_table(json_fp, devsym, is_human_number=0):
table_phydev.add_row(SpeedList)
print(table_phydev)
+
# APM sendlog format
def dump_prometheus_output(json_fp, devsym):
@@ -140,7 +155,6 @@ def dump_prometheus_output(json_fp, devsym):
def setup_argv_parser(phydev_list):
-
parser = argparse.ArgumentParser(
description='Marsio ZeroCopy Tools -- Monitor NIC devices')
@@ -181,11 +195,13 @@ class PrometheusClient(BaseHTTPRequestHandler):
BaseHTTPRequestHandler.__init__(self, request, client_address, server)
def do_GET(self):
- if (self.path == '/metrics'):
+ if self.path == '/metrics':
resp = ''
+
for devsym in self.phydev_list:
resp += dump_prometheus_output(self.json_fp, devsym)
+ resp += dump_pkt_latency_prometheus_output(self.json_fp)
self.send_response(200)
self.send_header('Content-type', 'text/plain; version=0.0.4')
self.end_headers()
@@ -198,6 +214,7 @@ class PrometheusClient(BaseHTTPRequestHandler):
def prometheus_client_init(json_fp, phydev_list, prometheus_client_port):
HTTPServer(("", prometheus_client_port), PrometheusClient).serve_forever()
+
def main():
signal.signal(signal.SIGINT, sigint_handler)