diff options
| -rw-r--r-- | service/include/sc_common.h | 2 | ||||
| -rw-r--r-- | service/src/core.c | 79 | ||||
| -rw-r--r-- | service/src/monit.c | 46 | ||||
| -rwxr-xr-x | tools/monit_device/monit_device.py | 25 |
4 files changed, 101 insertions, 51 deletions
diff --git a/service/include/sc_common.h b/service/include/sc_common.h index 2959678..e9d1ea8 100644 --- a/service/include/sc_common.h +++ b/service/include/sc_common.h @@ -106,6 +106,8 @@ struct sc_main unsigned int en_pkt_latency; /* 延迟统计核心 */ unsigned int pkt_latency_lcore_id; + /* 延迟统计采样周期 */ + unsigned int pkt_latency_sample_interval; /* 负载均衡器 */ struct distributer * dist_object; /* keepalive监测 */ diff --git a/service/src/core.c b/service/src/core.c index fffc419..3b1ecae 100644 --- a/service/src/core.c +++ b/service/src/core.c @@ -109,14 +109,6 @@ const char service_git_version[] = ""; #define MR_SERVICE_DEFAULT_PKT_LATENCY 0 #endif -#ifndef MR_SERVICE_DEFAULT_PKT_LATENCY_LCORE_ID -#define MR_SERVICE_DEFAULT_PKT_LATENCY_LCORE_ID 0 -#endif - -#ifndef MR_SERVICE_DEFAULT_SIGSEGV_TAKEOVER -#define MR_SERVICE_DEFAULT_SIGSEGV_TAKEOVER 0 -#endif - unsigned int g_logger_to_stdout = 1; unsigned int g_logger_level = LOG_DEBUG; unsigned int g_monit_interval = 1; @@ -657,6 +649,8 @@ static int sc_g_config_init(struct sc_main * sc) return RT_ERR; } + int ret = 0; + /* 数据面idle调用门限 */ MESA_load_profile_uint_def(sc->local_cfgfile, "service", "idle_threshold", &sc->idle_threshold, MR_SERVICE_DEFAULT_IDLE_THRESHOLD); @@ -685,13 +679,17 @@ static int sc_g_config_init(struct sc_main * sc) MESA_load_profile_uint_def(sc->local_cfgfile, "debug", "pkt_latency", &sc->en_pkt_latency, MR_SERVICE_DEFAULT_PKT_LATENCY); + MESA_load_profile_uint_def(sc->local_cfgfile, "debug", "pkt_latency_sample_interval", + &sc->pkt_latency_sample_interval, 10 * 1000 * 1000); + /* 报文延迟统计核心 */ - MESA_load_profile_uint_def(sc->local_cfgfile, "debug", "pkt_latency_lcore_id", &sc->pkt_latency_lcore_id, - MR_SERVICE_DEFAULT_PKT_LATENCY); + ret = MESA_load_profile_uint_nodef(sc->local_cfgfile, "debug", "pkt_latency_lcore_id", &sc->pkt_latency_lcore_id); - /* SIGSERV接管选项 */ - MESA_load_profile_uint_def(sc->local_cfgfile, "debug", "sigsegv_takeover", &sc->en_sig_segv_takeover, - MR_SERVICE_DEFAULT_SIGSEGV_TAKEOVER); + /* not set, use the first io core as the latency stat core */ + if (ret < 0) + { + sc->pkt_latency_lcore_id = cpu_set_location(&sc->cpu_set_io, 0); + } return RT_SUCCESS; } @@ -885,12 +883,6 @@ __rte_unused static void signal_handler(int signum) g_keep_running = 0; } -static void signal_handler_sigsegv() -{ - rte_panic("SIGSERV happens, MRZCPD is abort. \n"); - return; -} - extern int hwinfo_init(struct sc_main * sc); extern int phydev_init(struct sc_main * sc); extern int devmgr_early_init(struct sc_main * sc); @@ -1016,15 +1008,6 @@ int main(int argc, char * argv[]) goto quit; } - /* SIGSERV signal takeover - if the options is opened, the program will print the backtrace while - handle the signal. - */ - if (sc->en_sig_segv_takeover) - { - signal(SIGSEGV, signal_handler_sigsegv); - } - if (sc_ctrlmsg_init(sc) != RT_SUCCESS) { MR_ERROR("Ctrlmsg module initialization failed. "); @@ -1188,12 +1171,29 @@ int main(int argc, char * argv[]) pdump_inited = 1; } + rte_metrics_init(SOCKET_ID_ANY); + + /* 延迟监测 */ + if (sc->en_pkt_latency) + { + ret = rte_latencystats_init(sc->pkt_latency_sample_interval, NULL); + if (ret != 0) + { + MR_ERROR("packet latency stats module init failed, ret = %d", ret); + ret = EXIT_FAILURE; + goto quit; + } + + MR_INFO("packet latency stats enabled, sample interval(ns)=%u",sc->pkt_latency_sample_interval); + } + if (ctrlmsg_thread_launch(sc->ctrlmsg_handler) != RT_SUCCESS) { MR_ERROR("Launch ctrlmsg thread failed"); ret = EXIT_FAILURE; goto quit; } + if (rpc_server_dispatch_thread(sc->rpc_srv_handler) != RT_SUCCESS) { MR_ERROR("Launch rpc dispatch thread failed"); @@ -1237,28 +1237,14 @@ int main(int argc, char * argv[]) goto quit; } - rte_metrics_init(SOCKET_ID_ANY); http_serv_init(sc); - /* 延迟监测 */ - if (sc->en_pkt_latency) - { - ret = rte_latencystats_init(1, NULL); - } - - if (sc->en_pkt_latency && ret != 0) - { - MR_ERROR("Lantency stats module init failed, ret = %d", ret); - ret = EXIT_FAILURE; - goto quit; - } - - unsigned int lcore_id; - RTE_LCORE_FOREACH(lcore_id) + unsigned int lcore_id_iter = 0; + RTE_LCORE_FOREACH(lcore_id_iter) { if (sc->keepalive) - rte_keepalive_register_core(sc->keepalive, lcore_id); - MR_INFO("Keepalive register for thread %d successfully.", lcore_id); + rte_keepalive_register_core(sc->keepalive, (int)lcore_id_iter); + MR_INFO("Keepalive register for thread %d successfully.", lcore_id_iter); } /* 恢复CPU亲和性设置为EAL后的线程绑定参数 */ @@ -1284,7 +1270,6 @@ int main(int argc, char * argv[]) }; /* set eal all cores run as service cores */ - uint16_t lcore_id_iter = 0; RTE_LCORE_FOREACH(lcore_id_iter) { ret = rte_service_lcore_add(lcore_id_iter); diff --git a/service/src/monit.c b/service/src/monit.c index 25c395f..8fe4793 100644 --- a/service/src/monit.c +++ b/service/src/monit.c @@ -6,11 +6,14 @@ #include <rte_ethdev.h> #include <rte_pci.h> +#include <rte_latencystats.h> +#include <rte_malloc.h> #include <sc_common.h> #include <sc_devmgr.h> #include <sc_vdev.h> #include "cJSON.h" +#include "common.h" extern unsigned int g_monit_interval; @@ -123,6 +126,47 @@ static cJSON * __create_vdev_stats(struct vdev * vdev, unsigned int nr_serv_thre return j_vdev_stats; } + +static cJSON * monit_pkt_latency_global(struct sc_main * sc) +{ + if (sc->en_pkt_latency == 0) + { + return NULL; + } + + static struct rte_metric_name * metric_names = NULL; + static struct rte_metric_value * metric_values = NULL; + static unsigned int nr_metrics = 0; + + if (metric_names == NULL) + { + nr_metrics = rte_latencystats_get_names(NULL, 0); + metric_names = ZMALLOC(sizeof(struct rte_metric_name) * nr_metrics); + metric_values = ZMALLOC(sizeof(struct rte_metric_value) * nr_metrics); + + rte_latencystats_get_names(metric_names, nr_metrics); + rte_latencystats_get(metric_values, nr_metrics); + } + + assert(metric_names != NULL); + assert(metric_values != NULL); + + rte_latencystats_update(); + + /* get the metric value */ + cJSON * j_metric = cJSON_CreateObject(); + rte_latencystats_get(metric_values, nr_metrics); + + for (unsigned int i = 0; i < nr_metrics; i++) + { + struct rte_metric_name * name_iter = &metric_names[i]; + struct rte_metric_value * value_iter = &metric_values[i]; + cJSON_AddNumberToObject(j_metric, name_iter->name, value_iter->value); + } + + return j_metric; +} + // 运行时原始报文设备统计计数 static cJSON * monit_vdev(struct sc_main * sc) { @@ -313,6 +357,8 @@ static cJSON * monit_root(struct sc_main * sc) #endif cJSON_AddItemToObject(j_root, "app", app_monit_loop(sc)); cJSON_AddItemToObject(j_root, "service", service_monit_loop(sc)); + cJSON_AddItemToObject(j_root, "pkt_latency", monit_pkt_latency_global(sc)); + // cJSON_AddItemToObject(j_root, "offload", smartoffload_monit_loop(sc)); cJSON_AddItemToObject(j_root, "eth-ingress", eth_ingress_node_monit_loop(sc)); cJSON_AddItemToObject(j_root, "bridge", bridge_node_monit_loop(sc)); diff --git a/tools/monit_device/monit_device.py b/tools/monit_device/monit_device.py index 0f8d7e6..8b906c8 100755 --- a/tools/monit_device/monit_device.py +++ b/tools/monit_device/monit_device.py @@ -23,7 +23,6 @@ TITLE_VECTOR = ['PhyRXFrame', 'PhyRXBits', 'PhyRXMissed', 'PhyRXError', 'PhyRXNoBUF', 'PhyTXFrame', 'PhyTXBits', 'PhyTXError', 'UsrRXDrops', 'UsrTXDrops'] - TITLE_MAP = {'PhyRXFrame': 'ipackets', 'PhyRXBits': 'ibytes', 'PhyRXMissed': 'imissed', @@ -62,6 +61,11 @@ TITLE_MAP_PROMETHEUS = { 'usr_tx_drop_total': 'usertxdrop' } +TITLE_MAP_PKT_LATENCY_PROMETHEUS = { + "avg_latency_ns": "pkt_latency_avg_ns", + "jitter_ns": "pkt_latency_jitter_ns", +} + def locate_vector_by_symbol(vector, symbol): return [s for s in vector if s['symbol'] == symbol] @@ -71,6 +75,17 @@ def list_all_phydev(json_fp): return [s['symbol'] for s in json_fp['device']] +def dump_pkt_latency_prometheus_output(json_fp): + resp = '' + try: + for item in TITLE_MAP_PKT_LATENCY_PROMETHEUS: + value = json_fp['pkt_latency'][item] + resp += '%s %u\n' % (item, value) + except KeyError: + resp = '' + return resp + + def phydev_value_read(json_fp, str_device, str_item): phydevs = locate_vector_by_symbol(json_fp['device'], str_device) return phydevs[0]['stats']['accumulative'][str_item] @@ -95,7 +110,6 @@ def trans_to_human_readable(value): def dump_human_table(json_fp, devsym, is_human_number=0): - print('\nTime: %s, Physical device: %s' % (time.strftime('%c'), devsym)) table_phydev = prettytable.PrettyTable([' '] + TITLE_VECTOR, @@ -129,6 +143,7 @@ def dump_human_table(json_fp, devsym, is_human_number=0): table_phydev.add_row(SpeedList) print(table_phydev) + # APM sendlog format def dump_prometheus_output(json_fp, devsym): @@ -140,7 +155,6 @@ def dump_prometheus_output(json_fp, devsym): def setup_argv_parser(phydev_list): - parser = argparse.ArgumentParser( description='Marsio ZeroCopy Tools -- Monitor NIC devices') @@ -181,11 +195,13 @@ class PrometheusClient(BaseHTTPRequestHandler): BaseHTTPRequestHandler.__init__(self, request, client_address, server) def do_GET(self): - if (self.path == '/metrics'): + if self.path == '/metrics': resp = '' + for devsym in self.phydev_list: resp += dump_prometheus_output(self.json_fp, devsym) + resp += dump_pkt_latency_prometheus_output(self.json_fp) self.send_response(200) self.send_header('Content-type', 'text/plain; version=0.0.4') self.end_headers() @@ -198,6 +214,7 @@ class PrometheusClient(BaseHTTPRequestHandler): def prometheus_client_init(json_fp, phydev_list, prometheus_client_port): HTTPServer(("", prometheus_client_port), PrometheusClient).serve_forever() + def main(): signal.signal(signal.SIGINT, sigint_handler) |
