summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorlijia <[email protected]>2019-08-09 18:11:11 +0800
committerlijia <[email protected]>2019-08-09 18:11:11 +0800
commitc5820eb4b36985c9f0f5ea44eb57c13568e7c791 (patch)
treee87a08dc70f64b82256737dc8577978379b764f2
parentd5f4683fcc139c4e2b25c2f62548cd4215f3c51b (diff)
增加根据每块计算板的不同, 灵活判断应该重启哪些应用
-rw-r--r--py_cmd/common_modules_deploy.json12
-rw-r--r--py_cmd/common_modules_operator.json26
-rw-r--r--py_tools/get_traffic_by_marsio.py2
-rw-r--r--py_tools/get_traffic_by_proc.py115
-rw-r--r--py_tools/get_traffic_by_psutil.py24
-rw-r--r--py_tools/tsg_monit_interface.py77
6 files changed, 219 insertions, 37 deletions
diff --git a/py_cmd/common_modules_deploy.json b/py_cmd/common_modules_deploy.json
new file mode 100644
index 0000000..786644f
--- /dev/null
+++ b/py_cmd/common_modules_deploy.json
@@ -0,0 +1,12 @@
+#此文件定义四块计算板分别运行什么模块
+#先根据tsg_chassis_ip.json, 根据当前设备的ip地址, 知道当前设备的sled名称
+#然后根据设备类型名称, 找到当前运行的所有模块(应用)名称
+#然后根据模块名称, 到common_modules_operator.json查找每个模块的操作方法
+{
+ "modules_dispatch":
+
+ "mcn0": ["kni"]
+ "mcn1": ["tfe"]
+ "mcn2": ["tfe"]
+ "mcn3": ["tfe"]
+} \ No newline at end of file
diff --git a/py_cmd/common_modules_operator.json b/py_cmd/common_modules_operator.json
new file mode 100644
index 0000000..1bd6a16
--- /dev/null
+++ b/py_cmd/common_modules_operator.json
@@ -0,0 +1,26 @@
+#此文件定义, 每个模块的启动、停止、检查运行状态的方法
+{
+ "modules":[
+ {sapp:
+ start:
+ "cwd"
+ "exe"
+ stop:
+ "killall -9 r3 sapp"
+ check healthy:
+ "ps -ef | grep sapp "
+ }
+
+ {telegraf:
+ start:
+ "cwd" : ""
+ "exe" : "service influxdb restart"
+ stop:
+ "service influxdb stop"
+ check healthy:
+ "service influxdb status | grep Active "
+ }
+
+ ]
+
+} \ No newline at end of file
diff --git a/py_tools/get_traffic_by_marsio.py b/py_tools/get_traffic_by_marsio.py
index 8bf1282..7dbc53f 100644
--- a/py_tools/get_traffic_by_marsio.py
+++ b/py_tools/get_traffic_by_marsio.py
@@ -117,7 +117,7 @@ def get_and_send_marsio_traffic(logger, json_fp, telegraf_client, devsym, arg_fl
if value == "":
logger.critical("can't get device:%s traffic!" %(devsym))
sys.exit(1)
- __metric_dict_speed[item] = value
+ __metric_dict_speed[item] = int(value)
telegraf_client.metric('interface', __metric_dict_speed, tags = metric_tag)
#logger.info("telegraf_client send metric")
diff --git a/py_tools/get_traffic_by_proc.py b/py_tools/get_traffic_by_proc.py
new file mode 100644
index 0000000..fec12bb
--- /dev/null
+++ b/py_tools/get_traffic_by_proc.py
@@ -0,0 +1,115 @@
+#coding=utf-8
+import sys
+import re
+import syslog
+import subprocess
+import time
+import telegraf
+import logging
+import logging.handlers
+from common_telegraf import *
+from common_logger import *
+
+Receive_bytes_index = 1
+Receive_packets_index = 2
+Receive_err_index = 3
+Receive_drop_index = 4
+
+Transmit_bytes_index = 9
+Transmit_packets_index = 10
+Transmit_err_index = 11
+Transmit_drop_index = 12
+
+#return exitcode value + output message:
+# 0: succ
+# 1: error
+def system_cmd_run(cmd_str):
+ dangerous_cmd = {"rm", "mv", "poweroff", "shutdown"}
+
+ for cmd in dangerous_cmd:
+ pattern = "\s*%s" %(cmd)
+ match_str = re.match(pattern, cmd_str)
+ if not match_str is None:
+ print("can't run this cmd:%s" %(cmd_str))
+ sys.exit(1)
+
+ try:
+ exitcode, output = subprocess.getstatusoutput(cmd_str)
+ except Exception as e:
+ #if exitcode != 0:
+ # output = ""
+ print(e)
+ return 1, e
+
+ return exitcode, output
+
+def get_traffic_from_proc(logger, device_name):
+ cmd_str = "cat /proc/net/dev | grep %s:" %(device_name)
+ res_code, res_buf = system_cmd_run(cmd_str)
+ if res_code != 0:
+ return ""
+ if len(res_buf) <= 0:
+ return ""
+
+ logger.debug(res_buf)
+
+ sections_array = re.split(r'[:\t\s\r\n]\s*', res_buf.strip())
+
+ packets_recv = sections_array[Receive_packets_index]
+ bytes_recv = sections_array[Receive_bytes_index]
+ errin = sections_array[Receive_err_index]
+ dropin = sections_array[Receive_drop_index]
+
+ packets_sent = sections_array[Transmit_packets_index]
+ bytes_sent = sections_array[Transmit_bytes_index]
+ errout = sections_array[Transmit_err_index]
+ dropout = sections_array[Transmit_drop_index]
+
+ return packets_recv,bytes_recv,errin,dropin,packets_sent,bytes_sent,errout,dropout
+
+def send_metrics_by_proc(logger, telegraf_client, dev_name, arg_flow_type, arg_node_name):
+ metric_tag = {'device': dev_name, 'flow_type':arg_flow_type, 'node':arg_node_name}
+
+ old_packets_recv,old_bytes_recv,old_errin,old_dropin,old_packets_sent,old_bytes_sent,old_errout,old_dropout = get_traffic_from_proc(logger,dev_name)
+ if len(old_packets_recv) <= 0:
+ logger.error("can't get %s status" %(dev_name))
+ return 1
+ time.sleep(1)
+ new_packets_recv,new_bytes_recv,new_errin,new_dropin,new_packets_sent,new_bytes_sent,new_errout,new_dropout = get_traffic_from_proc(logger,dev_name)
+ if len(new_packets_recv) <= 0:
+ logger.error("can't get %s status" %(dev_name))
+ return 1
+
+ metrict_val = {}
+ metrict_val['PhyRXBits'] = int(8 *(int(new_bytes_recv) - int(old_bytes_recv)))
+ metrict_val['PhyRXError'] = int(new_errin) - int(old_errin)
+ metrict_val['PhyRXFrame'] = int(new_packets_recv) - int(old_packets_recv)
+ metrict_val['PhyRXMissed'] = 0
+ metrict_val['PhyRXNoBUF'] = 0
+
+ metrict_val['PhyTXBits'] = int(8 *(int(new_bytes_recv) - int(old_bytes_recv)))
+ metrict_val['PhyTXError'] = int(new_errin) - int(old_errin)
+ metrict_val['PhyTXFrame'] = int(new_packets_recv) - int(old_packets_recv)
+ metrict_val['PhyRXMissed'] = 0
+ metrict_val['PhyRXNoBUF'] = 0
+
+ metrict_val['UsrRXDrops'] = int(new_dropin) - int(old_dropin)
+ metrict_val['UsrTXDrops'] = int(new_dropout) - int(old_dropout)
+
+ telegraf_client.metric('interface', metrict_val, tags = metric_tag)
+
+ logger.debug(metrict_val)
+
+if __name__ == '__main__':
+ global logger
+ global time_interval
+ if len(sys.argv) <= 1:
+ print("Usage: %s timeinterval" %(sys.argv[0]))
+ time_interval = 5
+ else:
+ time_interval = int(sys.argv[1])
+ tele_tags = {}
+ telegraf_client = telegraf_init('127.0.0.1', 8126, tele_tags)
+ logger = logger_init(10)
+
+ send_metrics_by_proc(logger, telegraf_client, 'ens33', 'inline', 'mcn1') \ No newline at end of file
diff --git a/py_tools/get_traffic_by_psutil.py b/py_tools/get_traffic_by_psutil.py
index 9fb77c2..263414e 100644
--- a/py_tools/get_traffic_by_psutil.py
+++ b/py_tools/get_traffic_by_psutil.py
@@ -4,7 +4,11 @@ import sys
import psutil
import time
import telegraf
-
+import logging
+import logging.handlers
+from common_telegraf import *
+from common_logger import *
+
def get_stats_for_device(dev_name):
devlist = psutil.net_io_counters(pernic=True).keys()
@@ -38,23 +42,23 @@ def send_metrics_by_psutil(logger, telegraf_client, dev_name, arg_flow_type, arg
metric_tag = {'device': dev_name, 'flow_type':arg_flow_type, 'node':arg_node_name}
old_packets_recv,old_bytes_recv,old_errin,old_dropin,old_packets_sent,old_bytes_sent,old_errout,old_dropout = get_stats_for_device(dev_name)
- time.sleep(3)
+ time.sleep(1)
new_packets_recv,new_bytes_recv,new_errin,new_dropin,new_packets_sent,new_bytes_sent,new_errout,new_dropout = get_stats_for_device(dev_name)
metrict_val = {}
metrict_val['PhyRXBits'] = int(8 *(new_bytes_recv - old_bytes_recv))
metrict_val['PhyRXError'] = int(new_errin-old_errin)
metrict_val['PhyRXFrame'] = int(new_packets_recv - old_packets_recv)
- metrict_val['PhyRXMissed'] = 0
- metrict_val['PhyRXNoBUF'] = 0
+ metrict_val['PhyRXMissed'] = int('0')
+ metrict_val['PhyRXNoBUF'] = int('0')
metrict_val['PhyTXBits'] = int(8 *(new_bytes_recv - old_bytes_recv))
metrict_val['PhyTXError'] = int(new_errin-old_errin)
metrict_val['PhyTXFrame'] = int(new_packets_recv - old_packets_recv)
- metrict_val['PhyRXMissed'] = 0
- metrict_val['PhyRXNoBUF'] = 0
+ metrict_val['PhyRXMissed'] = int('0')
+ metrict_val['PhyRXNoBUF'] = int('0')
- metrict_val['UsrRXDrops'] = int(new_dropin - new_dropin)
+ metrict_val['UsrRXDrops'] = int(new_dropin - old_dropin)
metrict_val['UsrTXDrops'] = int(new_dropout - old_dropout)
telegraf_client.metric('interface', metrict_val, tags = metric_tag)
@@ -71,8 +75,8 @@ if __name__ == '__main__':
else:
time_interval = int(sys.argv[1])
- telegraf_client = telegraf_init('127.0.0.1', 8126)
+ tele_tags = {}
+ telegraf_client = telegraf_init('127.0.0.1', 8126, tele_tags)
logger = logger_init(10)
- telegraf_client = telegraf_init()
- send_metrics_by_psutil(logger, telegraf_client, 'ens33', 'inline', 'mcn_1')
+ send_metrics_by_psutil(logger, telegraf_client, 'ens33', 'inline', 'mcn1')
diff --git a/py_tools/tsg_monit_interface.py b/py_tools/tsg_monit_interface.py
index 64a3729..62ccfbe 100644
--- a/py_tools/tsg_monit_interface.py
+++ b/py_tools/tsg_monit_interface.py
@@ -5,8 +5,10 @@ import psutil
import time
import json
import telegraf
+import logging
from get_traffic_by_psutil import *
from get_traffic_by_marsio import *
+from get_traffic_by_proc import *
from common_telegraf import *
from common_logger import *
from common_args import *
@@ -16,37 +18,60 @@ from common_get_tags import *
INTERFACE_JSON_PATH = '/opt/tsg/etc/tsg_chassis_interface.json'
-def get_local_node():
+#return value:
+# 0, error msg, json_dict : succ
+# 1, error msg, "" : error
+def tsg_json_parse(file_name):
try:
- with open(INTERFACE_JSON_PATH) as json_fp:
- json_dict = json.load(json_fp)
- return json_dict['local_chassis_node']
- except IOError:
- return ""
+ with open(file_name) as json_fp:
+ try:
+ json_dict = json.load(json_fp)
+ return 0, "", json_dict
+ except Exception as e:
+ return 1, e, ""
+ except Exception as e:
+ return 1, e , ""
+
+def get_local_node():
+ ret, msg, json_dict = tsg_json_parse(INTERFACE_JSON_PATH)
+
+ if ret == 0:
+ return json_dict['local_chassis_node']
+ else:
+ logger.critical("%s!" %(msg))
+ sys.exit(1)
def get_interface_list():
- try:
- with open(INTERFACE_JSON_PATH) as json_fp:
- json_dict = json.load(json_fp)
- interface_list = json_dict['interface_list']
- #print(interface_list)
- for dev in interface_list:
- if 'marsio' == dev['dev_type']:
- logger.debug("get traffic stat by %s for: %s" %(dev['dev_type'], dev['dev_name']))
- # call function in get_traffic_by_marsio.py
- send_metrics_by_marsio(logger, telegraf_client, dev['dev_name'], dev['flow_type'], node_name)
- elif 'pcap' == dev['dev_type']:
- logger.debug("get traffic stat by %s for: %s, please wait for 1s..." %(dev['dev_type'], dev['dev_name']))
- # call function in get_traffic_by_psutil.py
- send_metrics_by_psutil(logger, telegraf_client, dev['dev_name'], dev['flow_type'], node_name)
- else:
- logger.critical("not support driver type %s for: %s" %(dev['dev_type'], dev['dev_name']))
- sys.exit(1)
- except IOError:
- logger.critical("can't open file %s!" %(INTERFACE_JSON_PATH))
+ ret, msg, json_dict = tsg_json_parse(INTERFACE_JSON_PATH)
+
+ if ret == 0:
+ interface_list = json_dict['interface_list']
+ else:
+ logger.critical("%s!" %(msg))
sys.exit(1)
- return ""
+
+ if len(interface_list) <= 0:
+ logger.critical("can't get interface_list from json file %s!" %(INTERFACE_JSON_PATH))
+ sys.exit(1)
+
+ #print(interface_list)
+ for dev in interface_list:
+ if 'marsio' == dev['dev_type']:
+ logger.debug("get traffic stat by %s for: %s" %(dev['dev_type'], dev['dev_name']))
+ # call function in get_traffic_by_marsio.py
+ send_metrics_by_marsio(logger, telegraf_client, dev['dev_name'], dev['flow_type'], node_name)
+ elif 'pcap' == dev['dev_type']:
+ logger.debug("get traffic stat by %s for: %s, please wait for 1s..." %(dev['dev_type'], dev['dev_name']))
+ # call function in get_traffic_by_psutil.py
+ #send_metrics_by_psutil(logger, telegraf_client, dev['dev_name'], dev['flow_type'], node_name)
+
+ #Ҳ��������ԭʼ�İ취, ����cat ����/proc/net/dev
+ send_metrics_by_proc(logger, telegraf_client, dev['dev_name'], dev['flow_type'], node_name)
+ else:
+ logger.critical("not support driver type %s for: %s" %(dev['dev_type'], dev['dev_name']))
+ sys.exit(1)
+
if __name__ == '__main__':