summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorlijia <[email protected]>2019-08-11 23:20:35 +0800
committerlijia <[email protected]>2019-08-11 23:20:35 +0800
commit78c989e3a3b7c475ae790436776c62e6fc7bbf7a (patch)
tree027944580877e70275c5fd5fa15c0b5a3e30bb87
parentc5820eb4b36985c9f0f5ea44eb57c13568e7c791 (diff)
完善相关命令, 增加diagnose命令.
-rw-r--r--.gitignore1
-rw-r--r--deploy_doc/deploy.md22
-rw-r--r--deploy_doc/oam_cli_deply.md (renamed from deploy_doc/cli部署.txt)0
-rw-r--r--deploy_doc/tsg_cli_deploy.md25
-rw-r--r--deploy_etc/tsg_module_deploy.json12
-rw-r--r--deploy_etc/tsg_modules_operator.json.bak6
-rw-r--r--py_cmd/.gitignore1
-rw-r--r--py_cmd/common_modules_deploy.json12
-rw-r--r--py_cmd/common_modules_operator.json26
-rw-r--r--py_cmd/tsg_monit_device.py.bak (renamed from py_cmd/tsg_monit_device.py)0
-rw-r--r--py_cmd/tsg_monit_device_by_telegraf.py.bak (renamed from py_cmd/tsg_monit_device_by_telegraf.py)0
-rw-r--r--py_cmd/tsg_monit_stream.py.bak (renamed from py_cmd/tsg_monit_stream.py)0
-rw-r--r--py_cmd/tsg_software_reboot.py150
-rw-r--r--py_cmd/tsg_software_reboot.py.bak230
-rw-r--r--py_common/common_args.py (renamed from py_tools/common_args.py)30
-rw-r--r--py_common/common_diagnose.py171
-rw-r--r--py_common/common_get_tags.py (renamed from py_tools/common_get_tags.py)58
-rw-r--r--py_common/common_influxdb.py41
-rw-r--r--py_common/common_json.py20
-rw-r--r--py_common/common_logger.py (renamed from py_tools/common_logger.py)0
-rw-r--r--py_common/common_modules_deploy.py48
-rw-r--r--py_common/common_modules_operator.py66
-rw-r--r--py_common/common_system_cmd.py28
-rw-r--r--py_common/common_telegraf.py (renamed from py_tools/common_telegraf.py)3
-rw-r--r--py_common/common_whoami.py54
-rw-r--r--py_tools/get_traffic_by_proc.py2
-rw-r--r--py_tools/tsg_diagnose_background.py23
-rw-r--r--py_tools/tsg_get_sn.py (renamed from py_cmd/tsg_get_sn.py)0
-rw-r--r--py_tools/tsg_update_tags.py (renamed from py_cmd/tsg_update_tags.py)2
29 files changed, 869 insertions, 162 deletions
diff --git a/.gitignore b/.gitignore
index a4fb0cc..17451b0 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1 +1,2 @@
py_bin/*
+py_temp/*
diff --git a/deploy_doc/deploy.md b/deploy_doc/deploy.md
deleted file mode 100644
index aa7c573..0000000
--- a/deploy_doc/deploy.md
+++ /dev/null
@@ -1,22 +0,0 @@
-1.交换板文件
- /opt/tsg/bin
- /opt/tsg/etc
-
- 更新telegraf.conf的global tags, 本机sn.
-
-2.计算板文件(4台)
-
-
-3.服务
-启动交换板crontab服务:
-service crond start
-chkconfig crond on
-crontab -e, 输入以下内容:
-* * * * * /opt/tsg/bin/tsg_update_tags
-* * * * * sleep 10; /opt/tsg/bin/tsg_update_tags
-* * * * * sleep 20; /opt/tsg/bin/tsg_update_tags
-* * * * * sleep 30; /opt/tsg/bin/tsg_update_tags
-* * * * * sleep 40; /opt/tsg/bin/tsg_update_tags
-* * * * * sleep 50; /opt/tsg/bin/tsg_update_tags
-
-修改后要执行: service crond restart \ No newline at end of file
diff --git a/deploy_doc/cli部署.txt b/deploy_doc/oam_cli_deply.md
index 5e4a4d6..5e4a4d6 100644
--- a/deploy_doc/cli部署.txt
+++ b/deploy_doc/oam_cli_deply.md
diff --git a/deploy_doc/tsg_cli_deploy.md b/deploy_doc/tsg_cli_deploy.md
new file mode 100644
index 0000000..cacaf5e
--- /dev/null
+++ b/deploy_doc/tsg_cli_deploy.md
@@ -0,0 +1,25 @@
+1.交换板文件
+ /opt/tsg/bin
+ /opt/tsg/tools
+ /opt/tsg/etc
+
+ 更新telegraf.conf的global tags, 本机sn.
+
+2.计算板文件(4台)
+
+
+3.服务
+启动交换板crontab服务:
+service crond start
+chkconfig crond on
+crontab -e, 输入以下内容:
+* * * * * /opt/tsg/tools/tsg_update_tags
+* * * * * /opt/tsg/tools/tsg_monit_interface
+* * * * * /opt/tsg/tools/tsg_monit_stream
+* * * * * sleep 10; /opt/tsg/tools/tsg_update_tags
+* * * * * sleep 20; /opt/tsg/tools/tsg_update_tags
+* * * * * sleep 30; /opt/tsg/tools/tsg_update_tags
+* * * * * sleep 40; /opt/tsg/tools/tsg_update_tags
+* * * * * sleep 50; /opt/tsg/tools/tsg_update_tags
+
+修改后要执行: service crond restart \ No newline at end of file
diff --git a/deploy_etc/tsg_module_deploy.json b/deploy_etc/tsg_module_deploy.json
new file mode 100644
index 0000000..0afcdb3
--- /dev/null
+++ b/deploy_etc/tsg_module_deploy.json
@@ -0,0 +1,12 @@
+{
+ "modules_deploy": {
+ "mcn0": ["kni", "a.out"],
+ "mcn1": ["tfe", "a1.out"],
+ "mcn2": ["tfe", "a2.out"],
+ "mcn3": ["tfe", "a3.out"]
+ },
+ "modules_operator": {
+ "kni": ["sapp", "r3 sapp", "/home/tsg/kni", "r2", "killall", "exec", "ps"],
+ "telegraf": ["teleraf", "#", "#", "#", "systemctl_stop", "systemctl_start", "systemctl_status"]
+ }
+} \ No newline at end of file
diff --git a/deploy_etc/tsg_modules_operator.json.bak b/deploy_etc/tsg_modules_operator.json.bak
new file mode 100644
index 0000000..273e9fa
--- /dev/null
+++ b/deploy_etc/tsg_modules_operator.json.bak
@@ -0,0 +1,6 @@
+{
+ "modules_operator": {
+ "sapp": ["sapp", "r3", "/home/tsg/kni", "r2", "killall", "exec", "ps"],
+ "telegraf": ["teleraf", "#", "#", "#", "systemctl_stop", "systemctl_start", "systemctl_status"]
+ }
+} \ No newline at end of file
diff --git a/py_cmd/.gitignore b/py_cmd/.gitignore
new file mode 100644
index 0000000..bee8a64
--- /dev/null
+++ b/py_cmd/.gitignore
@@ -0,0 +1 @@
+__pycache__
diff --git a/py_cmd/common_modules_deploy.json b/py_cmd/common_modules_deploy.json
deleted file mode 100644
index 786644f..0000000
--- a/py_cmd/common_modules_deploy.json
+++ /dev/null
@@ -1,12 +0,0 @@
-#此文件定义四块计算板分别运行什么模块
-#先根据tsg_chassis_ip.json, 根据当前设备的ip地址, 知道当前设备的sled名称
-#然后根据设备类型名称, 找到当前运行的所有模块(应用)名称
-#然后根据模块名称, 到common_modules_operator.json查找每个模块的操作方法
-{
- "modules_dispatch":
-
- "mcn0": ["kni"]
- "mcn1": ["tfe"]
- "mcn2": ["tfe"]
- "mcn3": ["tfe"]
-} \ No newline at end of file
diff --git a/py_cmd/common_modules_operator.json b/py_cmd/common_modules_operator.json
deleted file mode 100644
index 1bd6a16..0000000
--- a/py_cmd/common_modules_operator.json
+++ /dev/null
@@ -1,26 +0,0 @@
-#此文件定义, 每个模块的启动、停止、检查运行状态的方法
-{
- "modules":[
- {sapp:
- start:
- "cwd"
- "exe"
- stop:
- "killall -9 r3 sapp"
- check healthy:
- "ps -ef | grep sapp "
- }
-
- {telegraf:
- start:
- "cwd" : ""
- "exe" : "service influxdb restart"
- stop:
- "service influxdb stop"
- check healthy:
- "service influxdb status | grep Active "
- }
-
- ]
-
-} \ No newline at end of file
diff --git a/py_cmd/tsg_monit_device.py b/py_cmd/tsg_monit_device.py.bak
index e8d0e63..e8d0e63 100644
--- a/py_cmd/tsg_monit_device.py
+++ b/py_cmd/tsg_monit_device.py.bak
diff --git a/py_cmd/tsg_monit_device_by_telegraf.py b/py_cmd/tsg_monit_device_by_telegraf.py.bak
index f2366a2..f2366a2 100644
--- a/py_cmd/tsg_monit_device_by_telegraf.py
+++ b/py_cmd/tsg_monit_device_by_telegraf.py.bak
diff --git a/py_cmd/tsg_monit_stream.py b/py_cmd/tsg_monit_stream.py.bak
index c34f217..c34f217 100644
--- a/py_cmd/tsg_monit_stream.py
+++ b/py_cmd/tsg_monit_stream.py.bak
diff --git a/py_cmd/tsg_software_reboot.py b/py_cmd/tsg_software_reboot.py
index efb592c..6320e83 100644
--- a/py_cmd/tsg_software_reboot.py
+++ b/py_cmd/tsg_software_reboot.py
@@ -5,6 +5,11 @@ import syslog
import subprocess
import time
import re
+from sys import path
+path.append(r'../py_common') #将存放module的路径添加进来
+from common_modules_operator import *
+from common_whoami import *
+from common_modules_deploy import *
##define KERN_EMERG "<0>" /* system is unusable */
##define KERN_ALERT "<1>" /* action must be taken immediately */
@@ -15,7 +20,7 @@ import re
##define KERN_INFO "<6>" /* informational */
##define KERN_DEBUG "<7>" /* debug-level messages */
-MSG_PREFIX = ['EMERG', 'ALERT', 'CRIT', 'ERR', 'WARNING', 'NOTICE', 'INFO', 'DEBUG']
+G_SYS_LOG_STRING = ['EMERG', 'ALERT', 'CRIT', 'ERR', 'WARNING', 'NOTICE', 'INFO', 'DEBUG']
G_LOCAL_NODE_NAME = "TSG_MXN"
@@ -23,18 +28,18 @@ class CommandException(Exception):
pass
def tsg_restart_err_log(error_num, user_msg):
- msg = "[%s] %s" %(MSG_PREFIX[syslog.LOG_ERR], user_msg)
+ msg = "[%s] %s" %(G_SYS_LOG_STRING[syslog.LOG_ERR], user_msg)
syslog.syslog(syslog.LOG_ERR, msg)
print (msg)
- msg = "[%s] %s" %(MSG_PREFIX[syslog.LOG_ERR], "tsg software reboot error")
+ msg = "[%s] %s" %(G_SYS_LOG_STRING[syslog.LOG_ERR], "tsg software reboot error")
syslog.syslog(syslog.LOG_ERR, msg)
print (msg)
sys.exit(error_num)
def tsg_restart_succ_log():
- msg = "[%s] %s" %(MSG_PREFIX[syslog.LOG_NOTICE], "tsg software reboot success")
+ msg = "[%s] %s" %(G_SYS_LOG_STRING[syslog.LOG_NOTICE], "tsg software reboot success")
syslog.syslog(syslog.LOG_NOTICE, msg)
print (msg)
sys.exit(0)
@@ -97,36 +102,20 @@ def tsg_kill_app_process_by_killall(module_name, extra_progs):
return 0
def tsg_kill_app_process_by_systemctl_stop(module_name):
- print("TODO")
+ print("tsg_kill_app_process_by_systemctl_stop TODO!!!")
sys.exit(1)
return 0
-def tsg_stop_app_process(module_name, extra_progs, stop_method, check_method):
+def tsg_stop_app_process(module_name, extra_progs, stop_func, check_func):
res_code = 0
running_flag = 0
- if check_method == 'ps':
- check_func = tsg_check_process_health_by_ps
- elif check_method == 'systemctl_status':
- check_func = tsg_check_process_health_by_systemctl_status
- else:
- print("not support check method:%s" %(check_method))
- sys.exit(1)
-
running_flag = check_func(module_name)
if running_flag == 0:
- #print("%s is not running, start it..." %(module_name))
+ logger.debug("%s is not running, start it..." %(module_name))
return 0
#ready to stop progcess, retry for 3 times
-
- if stop_method == 'killall':
- stop_func = tsg_kill_app_process_by_killall
- elif stop_method == 'systemctl_stop':
- stop_func = tsg_kill_app_process_by_systemctl_stop
- else:
- print("not support stop method:%s" %(check_method))
- sys.exit(1)
for times in range(3):
stop_func(module_name, extra_progs)
@@ -140,55 +129,42 @@ def tsg_stop_app_process(module_name, extra_progs, stop_method, check_method):
if res_code != 0:
errmsg = "can't stop process %s" %(module_name)
+ logger.error(errmsg)
tsg_restart_err_log(res_code, errmsg)
return res_code
-def tsg_start_app_process_by_exec_call(module_name, module_cwd, module_exe, check_method):
+def tsg_start_app_process_by_exec_call(module_name, module_cwd, module_exe):
+ logger.debug("try cd to dir:%s" %(module_cwd))
try:
os.chdir(module_cwd)
except Exception as e:
print("%s" %(e))
return 1
+ logger.debug("try call exec :%s" %(module_exe))
cmd_str = "./%s" %(module_exe)
ret_code, output = system_cmd_run(cmd_str)
if ret_code != 0:
errmsg = "start program %s error, call %s/%s failed" %(module_name, module_cwd, module_exe)
tsg_restart_err_log(ret_code, errmsg)
-
- if check_method == 'ps':
- check_func = tsg_check_process_health_by_ps
- elif check_method == 'systemctl_status':
- check_func = tsg_check_process_health_by_systemctl_status
- else:
- print("not support check method:%s" %(check_method))
- return 1
-
- running_flag = check_func(module_name)
- if running_flag == 0:
- errmsg = "start process %s error" %(module_name)
- tsg_restart_err_log(ret_code, errmsg)
-
+
return 0
def tsg_start_app_process_by_systemctl_start(module_name, module_cwd, module_exe, check_method):
- print("TODO")
+ print("tsg_start_app_process_by_systemctl_start TODO!!!!!")
sys.exit(1)
-def tsg_start_app_process(module_name, module_cwd, module_exe, start_method, check_method):
- if start_method == 'exec_call':
- start_func = tsg_start_app_process_by_exec_call
- elif start_method == 'systemctl_start':
- start_func = tsg_start_app_process_by_systemctl_start
- else:
- print("not support start method:%s" %(start_method))
- sys.exit(1)
-
- ret = start_func(module_name, module_cwd, module_exe, check_method)
+def tsg_start_app_process(module_name, module_cwd, module_exe, start_func, check_func):
+ ret = start_func(module_name, module_cwd, module_exe)
if ret != 0:
- sys.exit(1)
+ return 1
+ running_flag = check_func(module_name)
+ if running_flag == 0:
+ errmsg = "start process %s error" %(module_name)
+ return 1
+
return 0
#参数说明:
@@ -201,25 +177,82 @@ def tsg_start_app_process(module_name, module_cwd, module_exe, start_method, che
# start_method: 启动应用方法
# check_method: 检测应用是否运行方法
#
-def tsg_restart_app_process(module_name, extra_progs, module_cwd, module_exe, stop_method, start_method, check_method):
+def tsg_restart_app_process(module_name, extra_progs, module_cwd, module_exe, start_func, stop_func, check_func):
res_code = 0
- res_code = tsg_stop_app_process(module_name, extra_progs, stop_method, check_method)
+ res_code = tsg_stop_app_process(module_name, extra_progs, stop_func, check_func)
if res_code != 0:
return res_code
- res_code = tsg_start_app_process(module_name, module_cwd, module_exe, start_method, check_method)
+ res_code = tsg_start_app_process(module_name, module_cwd, module_exe, start_func, check_func)
if res_code != 0:
return res_code
return 0
+
+#根据配置文件的参数, 选择用那种操作函数继续下一步
+#返回值, 函数指针:
+#ret_code, start_fun, stop_fun, check_health_fun
+def tsg_get_operator_by_config(module_operator):
+ if module_operator[TSG_OP_MODULE_START_INDEX] == 'exec':
+ start_func = tsg_start_app_process_by_exec_call
+ elif module_operator[TSG_OP_MODULE_START_INDEX] == 'systemctl_start':
+ start_func = tsg_start_app_process_by_systemctl_start
+ else:
+ errmsg = "not support start method:%s, only be [exec, systemctl_start]" %(module_operator[TSG_OP_MODULE_START_INDEX])
+ tsg_restart_err_log(1, errmsg)
+
+ if module_operator[TSG_OP_MODULE_STOP_INDEX] == 'killall':
+ stop_func = tsg_kill_app_process_by_killall
+ elif module_operator[TSG_OP_MODULE_STOP_INDEX] == 'systemctl_stop':
+ stop_func = tsg_kill_app_process_by_systemctl_stop
+ else:
+ errmsg = "not support stop method:%s, only be [killall, systemctl_stop]" %(module_operator[TSG_OP_MODULE_STOP_INDEX])
+ tsg_restart_err_log(1, errmsg)
+
+ if module_operator[TSG_OP_MODULE_STATUS_INDEX] == 'ps':
+ check_func = tsg_check_process_health_by_ps
+ elif module_operator[TSG_OP_MODULE_STATUS_INDEX] == 'systemctl_status':
+ check_func = tsg_check_process_health_by_systemctl_status
+ else:
+ errmsg = "not support check method:%s, only be [ps, systemctl_status]" %(module_operator[TSG_OP_MODULE_STATUS_INDEX])
+ tsg_restart_err_log(1, errmsg)
+
+ return 0, start_func, stop_func, check_func
def tsg_software_reboot():
#G_LOCAL_NODE_NAME = get_local_node_name()
log_handle = syslog.openlog(G_LOCAL_NODE_NAME)
-
- tsg_restart_app_process("sapp", "r3", "/home/tsg/kni", "r2", "killall", "exec_call", "ps")
+
+ sled_type,sled_id,sled_name = tsg_whoami()
+ if sled_name == "":
+ tsg_restart_err_log(1, "can't get local sled name")
+ sys.exit(1)
+
+ module_array = tsg_get_local_sled_modules(sled_name)
+ if len(module_array) <= 0:
+ tsg_restart_err_log(1, "can't get local sled modules")
+ sys.exit(1)
+
+ logger.debug("len(module_array) = %d" %(len(module_array)))
+ for module_name in module_array:
+ module_operator = tsg_get_module_opertor(module_name)
+ if len(module_operator) <= 0:
+ tsg_restart_err_log(1, "can't get local sled module operator for %s" %(module_name))
+ sys.exit(1)
+
+ ret, start_func, stop_func, check_func = tsg_get_operator_by_config(module_operator)
+ if ret != 0:
+ tsg_restart_err_log(1, "can't get operator for %s" %(module_operator[TSG_OP_MODULE_NAME_INDEX]))
+ sys.exit(1)
+
+ tsg_restart_app_process(module_operator[TSG_OP_MODULE_NAME_INDEX], module_operator[TSG_OP_MODULE_EXTRA_INDEX],
+ module_operator[TSG_OP_MODULE_CWD_INDEX],
+ module_operator[TSG_OP_MODULE_EXE_INDEX],
+ start_func, stop_func, check_func)
+
+ #tsg_restart_app_process("sapp", "r3", "/home/tsg/kni", "r2", "killall", "exec_call", "ps")
#tsg_restart_app_process("telegraf", "systemctl_stop", "systemctl_start", "systemctl_status")
#tsg_restart_app_process("marsio", "systemctl_stop", "systemctl_start", "systemctl_status")
#tsg_restart_app_process("influxd", "systemctl_stop", "systemctl_start", "systemctl_status")
@@ -227,4 +260,11 @@ def tsg_software_reboot():
tsg_restart_succ_log()
if __name__ == '__main__':
+ global logger
+
+ if len(sys.argv) >= 2 and sys.argv[1] == "debug":
+ logger = logger_init(logging.DEBUG)
+ else:
+ logger = logger_init(logging.CRITICAL)
+
tsg_software_reboot()
diff --git a/py_cmd/tsg_software_reboot.py.bak b/py_cmd/tsg_software_reboot.py.bak
new file mode 100644
index 0000000..efb592c
--- /dev/null
+++ b/py_cmd/tsg_software_reboot.py.bak
@@ -0,0 +1,230 @@
+#coding=utf-8
+import os
+import sys
+import syslog
+import subprocess
+import time
+import re
+
+##define KERN_EMERG "<0>" /* system is unusable */
+##define KERN_ALERT "<1>" /* action must be taken immediately */
+##define KERN_CRIT "<2>" /* critical conditions */
+##define KERN_ERR "<3>" /* error conditions */
+##define KERN_WARNING "<4>" /* warning conditions */
+##define KERN_NOTICE "<5>" /* normal but significant condition */
+##define KERN_INFO "<6>" /* informational */
+##define KERN_DEBUG "<7>" /* debug-level messages */
+
+MSG_PREFIX = ['EMERG', 'ALERT', 'CRIT', 'ERR', 'WARNING', 'NOTICE', 'INFO', 'DEBUG']
+
+G_LOCAL_NODE_NAME = "TSG_MXN"
+
+class CommandException(Exception):
+ pass
+
+def tsg_restart_err_log(error_num, user_msg):
+ msg = "[%s] %s" %(MSG_PREFIX[syslog.LOG_ERR], user_msg)
+ syslog.syslog(syslog.LOG_ERR, msg)
+ print (msg)
+
+ msg = "[%s] %s" %(MSG_PREFIX[syslog.LOG_ERR], "tsg software reboot error")
+ syslog.syslog(syslog.LOG_ERR, msg)
+ print (msg)
+
+ sys.exit(error_num)
+
+def tsg_restart_succ_log():
+ msg = "[%s] %s" %(MSG_PREFIX[syslog.LOG_NOTICE], "tsg software reboot success")
+ syslog.syslog(syslog.LOG_NOTICE, msg)
+ print (msg)
+ sys.exit(0)
+
+#return exitcode value + output message:
+# 0: succ
+# 1: error
+def system_cmd_run(cmd_str):
+ dangerous_cmd = {"rm", "mv", "poweroff", "shutdown"}
+
+ for cmd in dangerous_cmd:
+ pattern = "\s*%s" %(cmd)
+ match_str = re.match(pattern, cmd_str)
+ if not match_str is None:
+ print("can't run this cmd:%s" %(cmd_str))
+ sys.exit(1)
+
+ try:
+ exitcode, output = subprocess.getstatusoutput(cmd_str)
+ except Exception as e:
+ print(e)
+ print("###### %s" %(e.message))
+ #if exitcode != 0:
+ # output = ""
+ return 1, e.message
+
+ return exitcode, output
+
+#return value:
+# 1: progcess of prog_name is exist
+# 0: progcess of prog_name is not exist
+def tsg_check_process_health_by_ps(module_name):
+ cmd_str = "ps -afx | grep %s | grep -v grep" %(module_name)
+ exitcode, output = system_cmd_run(cmd_str)
+ if exitcode == 0:
+ return 1
+
+ return 0
+
+#return value:
+# 1: progcess of prog_name is exist
+# 0: progcess of prog_name is not exist
+def tsg_check_process_health_by_systemctl_status(module_name):
+ print("systemctl_status check method TODO!")
+ sys.exit(1)
+ return 0
+
+def tsg_kill_app_process_by_killall(module_name, extra_progs):
+ #todo , stop sapp, xxx, check process exist or not, maybe zombie, maybe very slow
+ command = "killall -9 %s %s" %(module_name, extra_progs)
+ try:
+ exitcode, output = subprocess.getstatusoutput(command)
+ #print("%d" %(exitcode))
+ except Exception as e:
+ pass
+
+ if exitcode != 0:
+ return 1
+
+ return 0
+
+def tsg_kill_app_process_by_systemctl_stop(module_name):
+ print("TODO")
+ sys.exit(1)
+ return 0
+
+def tsg_stop_app_process(module_name, extra_progs, stop_method, check_method):
+ res_code = 0
+ running_flag = 0
+
+ if check_method == 'ps':
+ check_func = tsg_check_process_health_by_ps
+ elif check_method == 'systemctl_status':
+ check_func = tsg_check_process_health_by_systemctl_status
+ else:
+ print("not support check method:%s" %(check_method))
+ sys.exit(1)
+
+ running_flag = check_func(module_name)
+
+ if running_flag == 0:
+ #print("%s is not running, start it..." %(module_name))
+ return 0
+ #ready to stop progcess, retry for 3 times
+
+ if stop_method == 'killall':
+ stop_func = tsg_kill_app_process_by_killall
+ elif stop_method == 'systemctl_stop':
+ stop_func = tsg_kill_app_process_by_systemctl_stop
+ else:
+ print("not support stop method:%s" %(check_method))
+ sys.exit(1)
+
+ for times in range(3):
+ stop_func(module_name, extra_progs)
+ #此处不判断stop_func的返回值, 可能程序不存在, 可能守护不存在,等原因
+ #直接用check()方法检测stop()的成功
+ res_code = tsg_check_process_health_by_ps(module_name)
+ if res_code != 0:
+ continue
+ else:
+ break
+
+ if res_code != 0:
+ errmsg = "can't stop process %s" %(module_name)
+ tsg_restart_err_log(res_code, errmsg)
+
+ return res_code
+
+def tsg_start_app_process_by_exec_call(module_name, module_cwd, module_exe, check_method):
+ try:
+ os.chdir(module_cwd)
+ except Exception as e:
+ print("%s" %(e))
+ return 1
+
+ cmd_str = "./%s" %(module_exe)
+ ret_code, output = system_cmd_run(cmd_str)
+ if ret_code != 0:
+ errmsg = "start program %s error, call %s/%s failed" %(module_name, module_cwd, module_exe)
+ tsg_restart_err_log(ret_code, errmsg)
+
+ if check_method == 'ps':
+ check_func = tsg_check_process_health_by_ps
+ elif check_method == 'systemctl_status':
+ check_func = tsg_check_process_health_by_systemctl_status
+ else:
+ print("not support check method:%s" %(check_method))
+ return 1
+
+ running_flag = check_func(module_name)
+ if running_flag == 0:
+ errmsg = "start process %s error" %(module_name)
+ tsg_restart_err_log(ret_code, errmsg)
+
+ return 0
+
+def tsg_start_app_process_by_systemctl_start(module_name, module_cwd, module_exe, check_method):
+ print("TODO")
+ sys.exit(1)
+
+def tsg_start_app_process(module_name, module_cwd, module_exe, start_method, check_method):
+ if start_method == 'exec_call':
+ start_func = tsg_start_app_process_by_exec_call
+ elif start_method == 'systemctl_start':
+ start_func = tsg_start_app_process_by_systemctl_start
+ else:
+ print("not support start method:%s" %(start_method))
+ sys.exit(1)
+
+ ret = start_func(module_name, module_cwd, module_exe, check_method)
+ if ret != 0:
+ sys.exit(1)
+
+ return 0
+
+#参数说明:
+#
+# module_name: 模块名称
+# extra_progs: 其他需要kill的附加程序, 如sapp的r3守护, 需要杀掉, 否则后台可能会重复启动sapp
+# module_cwd: 应用的绝对路径
+# module_exe: 启动应用的名称, 可能跟module_name不一样, 比如用r2启动sapp
+# stop_method: 停止应用方法
+# start_method: 启动应用方法
+# check_method: 检测应用是否运行方法
+#
+def tsg_restart_app_process(module_name, extra_progs, module_cwd, module_exe, stop_method, start_method, check_method):
+ res_code = 0
+
+ res_code = tsg_stop_app_process(module_name, extra_progs, stop_method, check_method)
+ if res_code != 0:
+ return res_code
+
+ res_code = tsg_start_app_process(module_name, module_cwd, module_exe, start_method, check_method)
+ if res_code != 0:
+ return res_code
+
+ return 0
+
+def tsg_software_reboot():
+ #G_LOCAL_NODE_NAME = get_local_node_name()
+
+ log_handle = syslog.openlog(G_LOCAL_NODE_NAME)
+
+ tsg_restart_app_process("sapp", "r3", "/home/tsg/kni", "r2", "killall", "exec_call", "ps")
+ #tsg_restart_app_process("telegraf", "systemctl_stop", "systemctl_start", "systemctl_status")
+ #tsg_restart_app_process("marsio", "systemctl_stop", "systemctl_start", "systemctl_status")
+ #tsg_restart_app_process("influxd", "systemctl_stop", "systemctl_start", "systemctl_status")
+
+ tsg_restart_succ_log()
+
+if __name__ == '__main__':
+ tsg_software_reboot()
diff --git a/py_tools/common_args.py b/py_common/common_args.py
index 52b93de..406a5b4 100644
--- a/py_tools/common_args.py
+++ b/py_common/common_args.py
@@ -1,16 +1,16 @@
-#coding=utf-8
-import argparse
-
-def setup_common_args():
- parser = argparse.ArgumentParser(description='TSG OAM Argument Parser')
-
- parser.add_argument('-g', '--debug', help = 'debug mode, default is disable',
- action='store_true', default = 0)
- parser.add_argument('-l', '--log-level', help = 'debug log level, support:10,20,30,40,50, default is:30',
- type=int, default = 30)
- parser.add_argument('--telegraf-ip', help = 'send log to telegraf ip address, default is:192.168.200.5',
- type=str, default = '192.168.200.5')
- parser.add_argument('--telegraf-port', help = 'send log to telegraf port, default is:8126',
- type=int, default = 8126)
-
+#coding=utf-8
+import argparse
+
+def setup_common_args():
+ parser = argparse.ArgumentParser(description='TSG OAM Argument Parser')
+
+ parser.add_argument('-g', '--debug', help = 'debug mode, default is disable',
+ action='store_true', default = 0)
+ parser.add_argument('-l', '--log-level', help = 'debug log level, support:10,20,30,40,50, default is:30',
+ type=int, default = 30)
+ parser.add_argument('--telegraf-ip', help = 'send log to telegraf ip address, default is:192.168.200.5',
+ type=str, default = '192.168.200.5')
+ parser.add_argument('--telegraf-port', help = 'send log to telegraf port, default is:8126',
+ type=int, default = 8126)
+
return parser \ No newline at end of file
diff --git a/py_common/common_diagnose.py b/py_common/common_diagnose.py
new file mode 100644
index 0000000..25f8234
--- /dev/null
+++ b/py_common/common_diagnose.py
@@ -0,0 +1,171 @@
+# coding: utf-8
+#diagnose公共模块, 供cli命令和backgroud后台自动运行工具调用
+import sys
+import time
+import json
+import logging
+import syslog
+import prettytable
+from sys import path
+path.append(r'../py_common') #将存放module的路径添加进来
+path.append(r'../py_cmd') #将存放module的路径添加进来
+from common_telegraf import *
+from common_logger import *
+from common_args import *
+from common_logger import *
+from common_influxdb import *
+from common_whoami import *
+from common_system_cmd import *
+from tsg_software_reboot import *
+
+#syslog 级别:
+#define KERN_EMERG "<0>" /* system is unusable */
+#define KERN_ALERT "<1>" /* action must be taken immediately */
+#define KERN_CRIT "<2>" /* critical conditions */
+#define KERN_ERR "<3>" /* error conditions */
+#define KERN_WARNING "<4>" /* warning conditions */
+#define KERN_NOTICE "<5>" /* normal but significant condition */
+#define KERN_INFO "<6>" /* informational */
+#define KERN_DEBUG "<7>" /* debug-level messages */
+#参数log_level表示高于此级别的才输出, syslog值越小优先级越高,
+#即log_level <= diagnose_level时输出结果, 如果级别高于ERR, 同时写入syslog
+
+#如果没有任何警告和错误, 显示normal
+#所有检测项不能中途退出, 即使有错误, 显示当前检测项的错误后, 继续, 保证全检测一遍
+
+#根据优先级, 设置新的日志等级
+def tsg_set_log_level(old_level, new_level):
+ if new_level < old_level:
+ return new_level
+ else:
+ return old_level
+
+
+def tsg_diagnose_for_app(log_level):
+ cur_level = syslog.LOG_INFO
+ err_code = 0
+
+ sled_type,sled_id,sled_name = tsg_whoami()
+ if sled_name == "":
+ print("can't get local sled name")
+ return 1
+
+ module_array = tsg_get_local_sled_modules(sled_name)
+ if len(module_array) <= 0:
+ cur_level = tsg_set_log_level(cur_level, syslog.LOG_ERR)
+ ptable.add_row([g_local_sled_name,"common", G_SYS_LOG_STRING[cur_level],"can't get local sled modules"])
+ #print("can't get local sled modules")
+ return 1
+
+ for module_name in module_array:
+ module_operator = tsg_get_module_opertor(module_name)
+ if len(module_operator) <= 0:
+ cur_level = tsg_set_log_level(cur_level, syslog.LOG_ERR)
+ err_msg = "can't get local sled module operator for %s" %(module_name)
+ ptable.add_row([g_local_sled_name,"common", G_SYS_LOG_STRING[cur_level], err_msg])
+ #print(err_msg)
+ err_code += 1
+
+ ret, start_func, stop_func, check_func = tsg_get_operator_by_config(module_operator)
+ if ret != 0:
+ cur_level = tsg_set_log_level(cur_level, syslog.LOG_ERR)
+ err_msg = "can't get operator for %s" %(module_operator[TSG_OP_MODULE_NAME_INDEX])
+ ptable.add_row([g_local_sled_name,"common", G_SYS_LOG_STRING[cur_level],err_msg])
+ #print(err_msg)
+ err_code += 1
+
+ running_flag = check_func(module_operator[TSG_OP_MODULE_NAME_INDEX])
+ if running_flag == 0:
+ cur_level = tsg_set_log_level(cur_level, syslog.LOG_EMERG)
+ err_msg = "app module %s is not running" %(module_name)
+ ptable.add_row([g_local_sled_name,"app", G_SYS_LOG_STRING[cur_level],err_msg])
+ err_code += 1
+
+ #todo, check for restart time
+ if cur_level >= syslog.LOG_INFO:
+ ptable.add_row([g_local_sled_name,"app", G_SYS_LOG_STRING[cur_level], "normal"])
+
+ return err_code
+
+def tsg_diagnose_for_cpu(log_level):
+ cur_level = syslog.LOG_INFO
+ ptable.add_row([g_local_sled_name,"cpu", G_SYS_LOG_STRING[cur_level],"normal"])
+ return 0
+
+def tsg_diagnose_for_mem(log_level):
+ cur_level = syslog.LOG_INFO
+ ptable.add_row([g_local_sled_name,"memory", G_SYS_LOG_STRING[cur_level],"normal"])
+ return 0
+
+def tsg_diagnose_for_disk(log_level):
+ cur_level = syslog.LOG_INFO
+
+ ret, cmd_res = system_cmd_run("df | awk {'print $1,$5'}")
+
+ print(cmd_res.split())
+
+ print(len(cmd_res.split()))
+ print(cmd_res.split()[0])
+ print(cmd_res.split()[1])
+ print(cmd_res.split()[2])
+
+ ptable.add_row([g_local_sled_name,"disk", G_SYS_LOG_STRING[cur_level],"normal"])
+ return 0
+
+def tsg_diagnose_for_interface(log_level):
+ cur_level = syslog.LOG_INFO
+ sql_str = "select * from interface where PhyRXBits+PhyRXError+PhyRXMissed+PhyRXNoBUF > 0 and time > now() -5m limit 1"
+
+ ret, points, msg = tsg_influxb_query(influxdb_client, sql_str)
+ if ret == 0:
+ for point in points:
+ cur_level = tsg_set_log_level(cur_level, syslog.LOG_WARNING)
+ msg = "%s has droped %d packets" %(point['device'], int(point['PhyRXBits']) + int(point['PhyRXMissed']) + int(point['PhyRXNoBUF']) )
+ ptable.add_row([g_local_sled_name,"physical network", G_SYS_LOG_STRING[cur_level], msg])
+
+ if cur_level >= syslog.LOG_INFO:
+ ptable.add_row([g_local_sled_name,"physical network", G_SYS_LOG_STRING[cur_level], "normal"])
+ return 0
+
+ return 1
+
+def tsg_diagnose_for_app_stream(log_level):
+ cur_level = syslog.LOG_INFO
+ ptable.add_row([g_local_sled_name,"app network", G_SYS_LOG_STRING[cur_level],"normal"])
+ return 0
+
+
+def tsg_common_diagnose(log_level):
+ #检查app进程是否存在, 是否最近n分钟内重启过
+ tsg_diagnose_for_app(log_level)
+
+ #检查CPU占用率
+ tsg_diagnose_for_cpu(log_level)
+
+ #检查mem占用率
+ tsg_diagnose_for_mem(log_level)
+
+ #检查磁盘占用率
+ tsg_diagnose_for_disk(log_level)
+
+ #检查物理网络情况
+ tsg_diagnose_for_interface(log_level)
+
+ #检查应用流量
+ tsg_diagnose_for_app_stream(log_level)
+
+if __name__ == '__main__':
+ global ptable
+ global influxdb_client
+ global g_local_sled_name
+ type, id, g_local_sled_name = tsg_whoami()
+
+ ptable = prettytable.PrettyTable()
+ ptable.field_names = ["Sled", "Type", "Level", "Status"]
+
+ ret, influxdb_client, msg = tsg_influxdb_init('127.0.0.1', 8086, 'admin', 'tsg2019', 'tsg_stat')
+ if ret != 0:
+ ptable.add_row([g_local_sled_name,"common", G_SYS_LOG_STRING[syslog.LOG_ERR],"can't connect influxDB server"])
+
+ tsg_common_diagnose(syslog.LOG_DEBUG)
+ print(ptable) \ No newline at end of file
diff --git a/py_tools/common_get_tags.py b/py_common/common_get_tags.py
index f347171..7e3d148 100644
--- a/py_tools/common_get_tags.py
+++ b/py_common/common_get_tags.py
@@ -1,30 +1,30 @@
-import json
-
-SN_JSON_PATH = '/opt/tsg/etc/tsg_tags.json'
-
-def tsg_get_tags_json():
- try:
- with open(SN_JSON_PATH) as json_fp:
- json_dict = json.load(json_fp)
- return json_dict['tags']
- except IOError:
- return ""
-
-def tsg_get_tags():
- tags = {}
- json_fp = tsg_get_tags_json()
- if len(json_fp) > 0:
- for tag_item in json_fp:
- #print(tag_item)
- tags[tag_item['tag']] = tag_item['value']
-
- return tags
-
-def main():
- tags = {}
-
- tags = tsg_get_tags()
- print(tags)
-
-if __name__ == '__main__':
+import json
+
+SN_JSON_PATH = '/opt/tsg/etc/tsg_tags.json'
+
+def tsg_get_tags_json():
+ try:
+ with open(SN_JSON_PATH) as json_fp:
+ json_dict = json.load(json_fp)
+ return json_dict['tags']
+ except IOError:
+ return ""
+
+def tsg_get_tags():
+ tags = {}
+ json_fp = tsg_get_tags_json()
+ if len(json_fp) > 0:
+ for tag_item in json_fp:
+ #print(tag_item)
+ tags[tag_item['tag']] = tag_item['value']
+
+ return tags
+
+def main():
+ tags = {}
+
+ tags = tsg_get_tags()
+ print(tags)
+
+if __name__ == '__main__':
main() \ No newline at end of file
diff --git a/py_common/common_influxdb.py b/py_common/common_influxdb.py
new file mode 100644
index 0000000..c8ca799
--- /dev/null
+++ b/py_common/common_influxdb.py
@@ -0,0 +1,41 @@
+# coding: utf-8
+
+import sys
+from influxdb import InfluxDBClient
+
+
+#return value
+#ret, result_array, message
+def tsg_influxb_query(influxdb_client, sql_str):
+ try:
+ result = influxdb_client.query(sql_str)
+ except Exception as e:
+ return 1, {}, e
+
+ return 0, result.get_points(), "succ"
+
+#return value:
+#ret, client, error_msg
+def tsg_influxdb_init(arg_host,arg_port,arg_username,arg_password,arg_db):
+ try:
+ influxdb_client = InfluxDBClient(host=arg_host,port=arg_port,username=arg_username,password=arg_password,database=arg_db, timeout=3)
+ except Exception as e:
+ print("InfluxDBClient error: %s" %(e))
+ return 1, None, e
+
+ return 0, influxdb_client, "succ"
+
+if __name__ == '__main__':
+ ret, influxdb_client, msg = tsg_influxdb_init('127.0.0.1', 8086, 'admin', 'tsg2019', 'tsg_stat')
+ if ret != 0:
+ sys.exit(1)
+
+ ret, points,msg = tsg_influxb_query(influxdb_client, "select * from interface order by time desc limit 1")
+ if ret == 0:
+ print("--for xxx in points:-----------------")
+ for point in points:
+ print("--------%s, %s" %(point['PhyRXBits'], point['PhyRXMissed']))
+
+ else:
+ print("query error, %s" %(res))
+
diff --git a/py_common/common_json.py b/py_common/common_json.py
new file mode 100644
index 0000000..61aec60
--- /dev/null
+++ b/py_common/common_json.py
@@ -0,0 +1,20 @@
+# coding: utf-8
+
+import sys
+import psutil
+import time
+import json
+
+#return value:
+# 0, error msg, json_dict : succ
+# 1, error msg, "" : error
+def tsg_json_parse(file_name):
+ try:
+ with open(file_name) as json_fp:
+ try:
+ json_dict = json.load(json_fp)
+ return 0, "", json_dict
+ except Exception as e:
+ return 1, e, ""
+ except Exception as e:
+ return 1, e , "" \ No newline at end of file
diff --git a/py_tools/common_logger.py b/py_common/common_logger.py
index e6e8f25..e6e8f25 100644
--- a/py_tools/common_logger.py
+++ b/py_common/common_logger.py
diff --git a/py_common/common_modules_deploy.py b/py_common/common_modules_deploy.py
new file mode 100644
index 0000000..301fae5
--- /dev/null
+++ b/py_common/common_modules_deploy.py
@@ -0,0 +1,48 @@
+#coding=utf-8
+import os
+import sys
+import syslog
+import subprocess
+import time
+import re
+import logging
+import logging.handlers
+from common_system_cmd import *
+from common_logger import *
+from common_json import *
+from common_modules_deploy import *
+
+# coding: utf-8
+#此文件定义四块计算板分别运行什么模块
+#先根据tsg_chassis_ip.json, 根据当前设备的ip地址, 知道当前设备的sled名称
+#然后根据设备类型名称, 找到当前运行的所有模块(应用)名称
+#然后根据模块名称, 到common_modules_operator.json查找每个模块的操作方法
+#{
+# "modules_deploy": {
+# "mcn0": ["kni", "a.out"],
+# "mcn1": ["tfe", "a1.out"],
+# "mcn2": ["tfe", "a2.out"],
+# "mcn3": ["tfe", "a3.out"]
+# }
+#}
+
+G_MODULE_DEPLOY_JSON = "/opt/tsg/etc/tsg_module_deploy.json"
+
+#返回当前板卡运行的所有模块, 数组形式
+def tsg_get_local_sled_modules(sled_name):
+ ret, err_msg, json_dict = tsg_json_parse(G_MODULE_DEPLOY_JSON)
+ if ret != 0:
+ return {}
+
+ module_list = json_dict['modules_deploy']
+ if len(module_list) <= 0:
+ return {}
+
+ return module_list[sled_name]
+
+
+if __name__ == '__main__':
+ global logger
+ logger = logger_init(10)
+ module_list = tsg_get_local_sled_modules("mcn0")
+ print(module_list) \ No newline at end of file
diff --git a/py_common/common_modules_operator.py b/py_common/common_modules_operator.py
new file mode 100644
index 0000000..5d6d1b1
--- /dev/null
+++ b/py_common/common_modules_operator.py
@@ -0,0 +1,66 @@
+#coding=utf-8
+import os
+import sys
+import syslog
+import subprocess
+import time
+import re
+import logging
+import logging.handlers
+from common_system_cmd import *
+from common_logger import *
+from common_json import *
+
+# coding: utf-8
+#先根据tsg_chassis_ip.json, 根据当前设备的ip地址, 知道当前设备的sled名称
+#然后根据设备类型名称, 找到当前运行的所有模块(应用)名称
+#然后根据模块名称, 到common_modules_operator.json查找每个模块的操作方法
+#此文件定义, 每个模块的启动、停止、检查运行状态的方法
+#操作方法参数说明:
+#
+# prog_name: 实际运行进程名称, 如kni模块实际运行的是sapp
+# extra_progs: 其他需要kill的附加程序, 如sapp的r3守护, 需要杀掉, 否则后台可能会重复启动sapp
+# module_cwd: 应用的绝对路径
+# module_exe: 启动应用的名称, 可能跟module_name不一样, 比如用r2启动sapp
+# stop_method: 停止应用方法
+# start_method: 启动应用方法
+# check_method: 检测应用是否运行方法
+#
+#例如: "kni": ["sapp", "r3", "/home/tsg/kni", "r2", "killall", "exec", "ps"],
+
+G_MODULE_DEPLOY_JSON = "/opt/tsg/etc/tsg_module_deploy.json"
+
+TSG_OP_MODULE_NAME_INDEX = 0
+TSG_OP_MODULE_EXTRA_INDEX = 1
+TSG_OP_MODULE_CWD_INDEX = 2
+TSG_OP_MODULE_EXE_INDEX = 3
+TSG_OP_MODULE_STOP_INDEX = 4
+TSG_OP_MODULE_START_INDEX = 5
+TSG_OP_MODULE_STATUS_INDEX = 6
+
+def tsg_get_module_opertor(module_name):
+ empty = []
+
+ ret, err_msg, json_dict = tsg_json_parse(G_MODULE_DEPLOY_JSON)
+ if ret != 0:
+ print("open or parse json file %s error, %s" %(G_MODULE_DEPLOY_JSON, err_msg))
+ return empty
+
+ module_oplist = json_dict['modules_operator']
+ if len(module_oplist) <= 0:
+ print("can't get modules_operator from %s" %(G_MODULE_DEPLOY_JSON))
+ return empty
+
+ for module_operator in module_oplist:
+ if module_operator == module_name:
+ #print(module_oplist[module_operator])
+ return module_oplist[module_operator]
+
+ return empty
+
+
+if __name__ == '__main__':
+ global logger
+ logger = logger_init(10)
+ module_operator = tsg_get_module_opertor("kni")
+ print(module_operator) \ No newline at end of file
diff --git a/py_common/common_system_cmd.py b/py_common/common_system_cmd.py
new file mode 100644
index 0000000..047fa40
--- /dev/null
+++ b/py_common/common_system_cmd.py
@@ -0,0 +1,28 @@
+#coding=utf-8
+import os
+import sys
+
+#return exitcode value + output message:
+# 0: succ
+# 1: error
+def system_cmd_run(cmd_str):
+ dangerous_cmd = {"rm", "mv", "poweroff", "shutdown"}
+
+ for cmd in dangerous_cmd:
+ pattern = "\s*%s" %(cmd)
+ match_str = re.match(pattern, cmd_str)
+ if not match_str is None:
+ print("can't run this cmd:%s" %(cmd_str))
+ sys.exit(1)
+
+ try:
+ exitcode, output = subprocess.getstatusoutput(cmd_str)
+ except Exception as e:
+ print(e)
+ print("###### %s" %(e.message))
+ #if exitcode != 0:
+ # output = ""
+ return 1, e.message
+
+ return exitcode, output
+ \ No newline at end of file
diff --git a/py_tools/common_telegraf.py b/py_common/common_telegraf.py
index e1cdc43..e3e5744 100644
--- a/py_tools/common_telegraf.py
+++ b/py_common/common_telegraf.py
@@ -6,4 +6,5 @@ def telegraf_init(arg_host, arg_port, arg_tags):
#global telegraf_client
#global_tags = {'host' : "lijia", 'local_ip_addr': "127.0.0.1"}
telegraf_client = telegraf.TelegrafClient(host = arg_host, port = arg_port, tags = arg_tags)
- return telegraf_client \ No newline at end of file
+ return telegraf_client
+
diff --git a/py_common/common_whoami.py b/py_common/common_whoami.py
new file mode 100644
index 0000000..786ef2a
--- /dev/null
+++ b/py_common/common_whoami.py
@@ -0,0 +1,54 @@
+#coding=utf-8
+import os
+import sys
+import syslog
+import subprocess
+import time
+import re
+import logging
+import logging.handlers
+from common_system_cmd import *
+from common_logger import *
+from common_json import *
+from common_whoami import *
+
+G_CHASSIS_IP_JSON = "/opt/tsg/etc/tsg_chassis_ip.json"
+
+
+
+#遍历所有ip地址, 判断当前的板卡类型,
+#返回值:
+# type, id, name
+#例如"mcn", 1, "mcn1"
+def tsg_whoami():
+ ret, iplist = tsg_sys_cmd_run("hostname -I")
+ if ret != 0:
+ return "", -1, ""
+
+ iparray = re.split(r'[:\t\s\r\n]\s*', iplist.strip())
+
+ ret, err_msg, json_dict = tsg_json_parse(G_CHASSIS_IP_JSON)
+ if ret != 0:
+ return "", -1, ""
+
+ sleds = json_dict['tsg_chassis_ip']
+ if len(sleds) <= 0:
+ logger.critical("can't get tsg_chassis_ip from json file %s!" %(G_CHASSIS_IP_JSON))
+ return "", -1, ""
+
+ for ipaddr in iparray:
+ #print("find ip %s in which sled..." %(ipaddr))
+ for sled in sleds:
+ #print(sled, sled['ip'])
+ #print("diff host ip:%s with json ip:%s..." %(ipaddr), sled['ip'])
+ if ipaddr == sled['ip']:
+ return sled['type'], sled['id'], sled['name']
+
+ logger.critical("local ip address can't match any item in json file %s!" %(G_CHASSIS_IP_JSON))
+ return "", -1, ""
+
+if __name__ == '__main__':
+ global logger
+ logger = logger_init(10)
+ type, id, name = tsg_whoami()
+ print(type, id, name) \ No newline at end of file
diff --git a/py_tools/get_traffic_by_proc.py b/py_tools/get_traffic_by_proc.py
index fec12bb..ddaffd8 100644
--- a/py_tools/get_traffic_by_proc.py
+++ b/py_tools/get_traffic_by_proc.py
@@ -20,7 +20,7 @@ Transmit_packets_index = 10
Transmit_err_index = 11
Transmit_drop_index = 12
-#return exitcode value + output message:
+#return exitcode value, output message:
# 0: succ
# 1: error
def system_cmd_run(cmd_str):
diff --git a/py_tools/tsg_diagnose_background.py b/py_tools/tsg_diagnose_background.py
new file mode 100644
index 0000000..bcdd037
--- /dev/null
+++ b/py_tools/tsg_diagnose_background.py
@@ -0,0 +1,23 @@
+# coding: utf-8
+#与提供给用户执行的tsg_diagnose命令区别是:
+#此文件后台周期性自动运行
+#tsg_diagnose 最低级别是warning,
+import sys
+import time
+import json
+import logging
+from common_telegraf import *
+from common_logger import *
+from common_args import *
+from common_logger import *
+from common_influxdb import *
+
+
+
+tsg_
+
+
+
+if __name__ == '__main__':
+
+
diff --git a/py_cmd/tsg_get_sn.py b/py_tools/tsg_get_sn.py
index e0bf4f3..e0bf4f3 100644
--- a/py_cmd/tsg_get_sn.py
+++ b/py_tools/tsg_get_sn.py
diff --git a/py_cmd/tsg_update_tags.py b/py_tools/tsg_update_tags.py
index 9650429..c56ed12 100644
--- a/py_cmd/tsg_update_tags.py
+++ b/py_tools/tsg_update_tags.py
@@ -20,7 +20,7 @@ def tsg_get_device_list_info_by_sn(sn):
def tsg_get_tags_by_sn(sn):
dev_list_info = tsg_get_device_list_info_by_sn(sn)
if len(dev_list_info) <= 0:
- #写个空json
+ #返回空json
return "{}"
json_dict = json.loads(dev_list_info)