summaryrefslogtreecommitdiff
path: root/MPE/Nezha/OLAP Service Status.json
diff options
context:
space:
mode:
authorwangchengcheng <[email protected]>2023-07-27 15:43:51 +0800
committerwangchengcheng <[email protected]>2023-07-27 15:43:51 +0800
commit124f687daace8b85e5c74abac04bcd0a92744a8d (patch)
tree4f563326b1be67cfb51bf6a04f1ca4d953536e76 /MPE/Nezha/OLAP Service Status.json
parent08686ae87f9efe7a590f48db74ed133b481c85b1 (diff)
P19 23.07 online-configP19
Diffstat (limited to 'MPE/Nezha/OLAP Service Status.json')
-rw-r--r--MPE/Nezha/OLAP Service Status.json2779
1 files changed, 2779 insertions, 0 deletions
diff --git a/MPE/Nezha/OLAP Service Status.json b/MPE/Nezha/OLAP Service Status.json
new file mode 100644
index 0000000..9164b22
--- /dev/null
+++ b/MPE/Nezha/OLAP Service Status.json
@@ -0,0 +1,2779 @@
+[
+ {
+ "name":"OLAP Service Status",
+ "type":"dashboard",
+ "varType":0,
+ "param":{
+ "chartShare":"none",
+ "variables":[],
+ "report":{
+ "schedule":{
+ "repeat":1,
+ "etime":"",
+ "stime":"",
+ "type":2,
+ "nums":[]
+ },
+ "receivers":[],
+ "enable":false,
+ "range":{
+ "unit":"day",
+ "interval":1,
+ "type":"previous"
+ }
+ }
+ },
+ "remark":"",
+ "charts":[
+ {
+ "name":"Summary",
+ "span":"12.0",
+ "height":"1.0",
+ "type":"Group",
+ "unit":"short",
+ "weight":0,
+ "param":{
+ "visibility":{
+ "result":"",
+ "varName":"",
+ "varValue":"",
+ "operator":""
+ },
+ "enable":{
+ "thresholds":false,
+ "legend":false,
+ "valueMapping":false
+ },
+ "collapse":false,
+ "nullType":"null"
+ },
+ "remark":"",
+ "x":"0.0",
+ "y":"1.0",
+ "datasource":"misc",
+ "children":[
+ {
+ "name":"Up Status",
+ "span":"12.0",
+ "height":"2.0",
+ "type":"Line chart",
+ "unit":"short",
+ "weight":7,
+ "param":{
+ "stack":false,
+ "legend":{
+ "values":[],
+ "placement":"bottom"
+ },
+ "enable":{
+ "thresholds":true,
+ "legend":true,
+ "valueMapping":true
+ },
+ "style":"line",
+ "nullType":"zero"
+ },
+ "remark":"The number of lived services",
+ "x":"0.0",
+ "y":"0.0",
+ "elements":[
+ {
+ "expression":"sum(up{module=\"Galaxy-gateway-nginx\"})",
+ "state":1,
+ "legend":"Gateway Nginx",
+ "name":"A"
+ },
+ {
+ "expression":"sum(up{module=\"Galaxy-qgw-service\"})",
+ "state":1,
+ "legend":"QGW Service",
+ "name":"B"
+ },
+ {
+ "expression":"sum(up{module=\"Galaxy-report-service\"})",
+ "state":1,
+ "legend":"Report Service",
+ "name":"C"
+ },
+ {
+ "expression":"sum(up{module=\"Galaxy-hos-service\"})",
+ "state":1,
+ "legend":"HOS Service",
+ "name":"D"
+ },
+ {
+ "expression":"sum(up{module=\"Galaxy-job-admin\"})",
+ "state":1,
+ "legend":"Job Admin Service",
+ "name":"E"
+ },
+ {
+ "expression":"sum(up{module=\"Galaxy-job-executor\"})",
+ "state":1,
+ "legend":"Job Executor Service",
+ "name":"F"
+ },
+ {
+ "expression":"sum(mysql_up{module=\"NC-Mariadb\"})",
+ "state":1,
+ "legend":"Mariadb",
+ "name":"G"
+ }
+ ],
+ "datasource":"metrics"
+ },
+ {
+ "name":"QGW Response Latency",
+ "span":"6.0",
+ "height":"3.0",
+ "type":"Line chart",
+ "unit":"seconds(s)",
+ "weight":8,
+ "param":{
+ "stack":false,
+ "thresholds":[
+ {
+ "val":"60",
+ "color":"#d64f40",
+ "value":90
+ }
+ ],
+ "visibility":{
+ "result":"",
+ "varName":"",
+ "varValue":"",
+ "operator":""
+ },
+ "legend":{
+ "values":[
+ "avg",
+ "last",
+ "max"
+ ],
+ "placement":"bottom"
+ },
+ "enable":{
+ "thresholds":true,
+ "visibility":false,
+ "legend":true,
+ "valueMapping":true
+ },
+ "style":"line",
+ "nullType":"zero"
+ },
+ "remark":"QGW service response latency. Used to evaluate whether there is a slow query currently.",
+ "x":"0.0",
+ "y":"2.0",
+ "elements":[
+ {
+ "expression":"sum(rate(http_server_requests_seconds_sum{module=\"Galaxy-qgw-service\", status!~\"5..\"}[5m]))/sum(rate(http_server_requests_seconds_count{module=\"Galaxy-qgw-service\", status!~\"5..\"}[5m]))",
+ "state":1,
+ "legend":"AVG",
+ "name":"A"
+ },
+ {
+ "expression":"max(http_server_requests_seconds_max{module=\"Galaxy-qgw-service\", status!~\"5..\"})",
+ "state":1,
+ "legend":" MAX",
+ "name":"B"
+ }
+ ],
+ "datasource":"metrics"
+ },
+ {
+ "name":"HOS Put Object Requests /sec",
+ "span":"6.0",
+ "height":"3.0",
+ "type":"Line chart",
+ "unit":"short",
+ "weight":9,
+ "param":{
+ "stack":false,
+ "legend":{
+ "values":[
+ "avg",
+ "last",
+ "max"
+ ],
+ "placement":"bottom"
+ },
+ "enable":{
+ "thresholds":true,
+ "legend":true,
+ "valueMapping":true
+ },
+ "style":"line",
+ "nullType":"zero"
+ },
+ "remark":"The number of put objects per second. Include total, success, and failure.",
+ "x":"6.0",
+ "y":"2.0",
+ "elements":[
+ {
+ "expression":"sum(irate(http_server_requests_seconds_count{method=\"PUT\",uri=\"/hos/{bucket}/**\"}[2m]))",
+ "state":1,
+ "legend":"Total",
+ "name":"A"
+ },
+ {
+ "expression":"sum(irate(http_server_requests_seconds_count{method=\"PUT\",status=~\"2..\",uri=\"/hos/{bucket}/**\"}[2m]))",
+ "state":1,
+ "legend":"Success",
+ "name":"B"
+ },
+ {
+ "expression":"sum(irate(http_server_requests_seconds_count{method=\"PUT\",status!~\"2..\",uri=\"/hos/{bucket}/**\"}[2m]))",
+ "state":1,
+ "legend":"Failure",
+ "name":"C"
+ }
+ ],
+ "datasource":"metrics"
+ },
+ {
+ "name":"Report Success",
+ "span":"3.0",
+ "height":"1.0",
+ "type":"Stat",
+ "unit":"short",
+ "weight":10,
+ "param":{
+ "enable":{
+ "thresholds":true,
+ "legend":true,
+ "valueMapping":true
+ },
+ "legend":{
+ "values":[],
+ "show":true,
+ "placement":"bottom"
+ },
+ "valueMapping":[
+ {
+ "color":{
+ "bac":"#E81616",
+ "text":"#000"
+ },
+ "display":"{{A.$value}}",
+ "show":false,
+ "text":"",
+ "type":"value",
+ "value":0
+ }
+ ],
+ "nullType":"connected",
+ "statistics":"last"
+ },
+ "remark":"Total number of report jobs successfully executed",
+ "x":"0.0",
+ "y":"5.0",
+ "elements":[
+ {
+ "expression":"sum(report_success_count_total{module=\"Galaxy-report-service\"})",
+ "state":1,
+ "legend":"Report Success",
+ "name":"A"
+ }
+ ],
+ "datasource":"metrics"
+ },
+ {
+ "name":"Report Failure",
+ "span":"3.0",
+ "height":"1.0",
+ "type":"Stat",
+ "unit":"none",
+ "weight":11,
+ "param":{
+ "enable":{
+ "thresholds":true,
+ "legend":true,
+ "valueMapping":false
+ },
+ "legend":{
+ "values":[],
+ "show":true,
+ "placement":"bottom"
+ },
+ "valueMapping":[
+ {
+ "regx":">10",
+ "color":{
+ "bac":"#F78539FF",
+ "text":"#000"
+ },
+ "display":"{{value}}",
+ "show":true,
+ "from":1,
+ "text":"",
+ "to":20,
+ "type":"range",
+ "error":false,
+ "value":10
+ },
+ {
+ "color":{
+ "bac":"#E30521FF",
+ "text":"#000000FF"
+ },
+ "display":"{{value}}",
+ "show":true,
+ "from":20,
+ "to":10000,
+ "type":"range",
+ "error":false
+ }
+ ],
+ "text":"value",
+ "nullType":"connected",
+ "statistics":"last"
+ },
+ "remark":"Total number of report jobs failed",
+ "x":"3.0",
+ "y":"5.0",
+ "elements":[
+ {
+ "expression":"sum(report_fail_count_total{module=\"Galaxy-report-service\"})",
+ "state":1,
+ "legend":"Report fail sum",
+ "name":"A"
+ }
+ ],
+ "datasource":"metrics"
+ },
+ {
+ "name":"Job Success(today)",
+ "span":"3.0",
+ "height":"1.0",
+ "type":"Stat",
+ "unit":"short",
+ "weight":12,
+ "param":{
+ "enable":{
+ "thresholds":true,
+ "legend":true,
+ "valueMapping":false
+ },
+ "legend":{
+ "values":[],
+ "show":true,
+ "placement":"bottom"
+ },
+ "valueMapping":[
+ {
+ "color":{
+ "bac":"#fff",
+ "text":"#000"
+ },
+ "display":"{{A.$value}}",
+ "show":false,
+ "text":"",
+ "type":"value",
+ "value":0
+ }
+ ],
+ "nullType":"connected",
+ "statistics":"last"
+ },
+ "remark":"The total number of jobs successfully executed today",
+ "x":"6.0",
+ "y":"5.0",
+ "elements":[
+ {
+ "expression":"sum(triggerCountSucTotal{module=\"Galaxy-job-admin\"}) / sum(up{module=\"Galaxy-job-admin\"})",
+ "state":1,
+ "legend":"Job Success",
+ "name":"A"
+ }
+ ],
+ "datasource":"metrics"
+ },
+ {
+ "name":"Job Failure(today)",
+ "span":"3.0",
+ "height":"1.0",
+ "type":"Stat",
+ "unit":"short",
+ "weight":13,
+ "param":{
+ "visibility":{
+ "result":"",
+ "varName":"",
+ "varValue":"",
+ "operator":""
+ },
+ "enable":{
+ "thresholds":true,
+ "visibility":false,
+ "legend":true,
+ "valueMapping":true
+ },
+ "legend":{
+ "values":[],
+ "show":true,
+ "placement":"bottom"
+ },
+ "valueMapping":[
+ {
+ "color":{
+ "bac":"#FF9C59FF",
+ "icon":"#FF9830FF",
+ "text":"#000"
+ },
+ "display":"{{value}}",
+ "show":true,
+ "from":1,
+ "text":"",
+ "to":50,
+ "type":"range",
+ "error":false,
+ "value":0
+ },
+ {
+ "color":{
+ "bac":"#E30521FF",
+ "icon":"#df2fefFF",
+ "text":"#000000FF"
+ },
+ "display":"{{value}}",
+ "show":true,
+ "from":51,
+ "to":10000,
+ "type":"range",
+ "error":false,
+ "value":50
+ }
+ ],
+ "text":"value",
+ "nullType":"connected",
+ "statistics":"last"
+ },
+ "remark":"The total number of jobs failed executed today",
+ "x":"9.0",
+ "y":"5.0",
+ "elements":[
+ {
+ "expression":"sum(triggerCountFailTotal{module=\"Galaxy-job-admin\"}) / sum(up{module=\"Galaxy-job-admin\"})",
+ "state":1,
+ "legend":"Job Failure",
+ "name":"A"
+ }
+ ],
+ "datasource":"metrics"
+ }
+ ]
+ },
+ {
+ "name":"Gateway Nginx",
+ "span":"12.0",
+ "height":"1.0",
+ "type":"Group",
+ "unit":"short",
+ "weight":1,
+ "param":{
+ "visibility":{
+ "result":"",
+ "varName":"",
+ "varValue":"",
+ "operator":""
+ },
+ "enable":{
+ "thresholds":false,
+ "visibility":false,
+ "legend":false,
+ "valueMapping":false
+ },
+ "collapse":true,
+ "nullType":"null"
+ },
+ "remark":"",
+ "x":"0.0",
+ "y":"1.0",
+ "datasource":"misc",
+ "children":[
+ {
+ "name":"Uptime",
+ "span":"6.0",
+ "height":"3.0",
+ "type":"Table",
+ "unit":"seconds(s)",
+ "weight":14,
+ "param":{
+ "enable":{
+ "thresholds":true,
+ "legend":true,
+ "valueMapping":true
+ },
+ "columns":[
+ {
+ "unit":2,
+ "display":"{{A.$legend}}",
+ "show":true,
+ "title":"Element"
+ },
+ {
+ "unit":30,
+ "display":"{{A.$value}}",
+ "show":true,
+ "title":"Value"
+ }
+ ],
+ "legend":{
+ "values":[],
+ "show":true,
+ "placement":"bottom"
+ },
+ "valueMapping":[
+ {
+ "color":{
+ "bac":"#fff",
+ "text":"#000"
+ },
+ "columns":"Value",
+ "display":"{{A.$value}}",
+ "show":false,
+ "column":"Value",
+ "text":"",
+ "type":"text",
+ "value":""
+ }
+ ],
+ "nullType":"zero",
+ "statistics":"last"
+ },
+ "remark":"Gateway Nginx running time of each asset",
+ "x":"0.0",
+ "y":"0.0",
+ "elements":[
+ {
+ "expression":"time() - nginx_vts_start_time_seconds{module=\"Galaxy-gateway-nginx\"}",
+ "state":1,
+ "legend":"{{asset}}",
+ "name":"A"
+ }
+ ],
+ "datasource":"metrics"
+ },
+ {
+ "name":"Server Requests (asset,code)",
+ "span":"6.0",
+ "height":"3.0",
+ "type":"Line chart",
+ "unit":"short",
+ "weight":15,
+ "param":{
+ "stack":false,
+ "legend":{
+ "values":[],
+ "placement":"bottom"
+ },
+ "enable":{
+ "thresholds":true,
+ "legend":true,
+ "valueMapping":true
+ },
+ "style":"line",
+ "nullType":"zero"
+ },
+ "remark":"The requests counter per second of each Gateway Nginx asset.",
+ "x":"6.0",
+ "y":"0.0",
+ "elements":[
+ {
+ "expression":"sum(irate(nginx_vts_server_requests_total{module=\"Galaxy-gateway-nginx\",code!=\"total\"}[5m])) by (code,asset)",
+ "state":1,
+ "legend":"{{asset}}_[{{code}}]",
+ "name":"A"
+ }
+ ],
+ "datasource":"metrics"
+ },
+ {
+ "name":"Upstream Requests (asset,code)",
+ "span":"6.0",
+ "height":"3.0",
+ "type":"Line chart",
+ "unit":"short",
+ "weight":16,
+ "param":{
+ "stack":false,
+ "visibility":{
+ "result":"",
+ "varName":"",
+ "varValue":"",
+ "operator":""
+ },
+ "legend":{
+ "values":[],
+ "placement":"bottom"
+ },
+ "enable":{
+ "thresholds":true,
+ "visibility":false,
+ "legend":true,
+ "valueMapping":true
+ },
+ "style":"line",
+ "nullType":"zero"
+ },
+ "remark":"The upstream requests the counter rate of each Gateway Nginx asset.",
+ "x":"0.0",
+ "y":"3.0",
+ "elements":[
+ {
+ "expression":"sum(irate(nginx_vts_upstream_requests_total{module=\"Galaxy-gateway-nginx\",code!=\"total\"}[5m])) by (code,asset)",
+ "state":1,
+ "legend":"{{asset}}_[{{code}}]",
+ "name":"A"
+ }
+ ],
+ "datasource":"metrics"
+ },
+ {
+ "name":"Upstream Response Time (backend,upstream)",
+ "span":"6.0",
+ "height":"3.0",
+ "type":"Line chart",
+ "unit":"seconds(s)",
+ "weight":17,
+ "param":{
+ "stack":false,
+ "legend":{
+ "values":[],
+ "placement":"bottom"
+ },
+ "enable":{
+ "thresholds":true,
+ "legend":true,
+ "valueMapping":true
+ },
+ "style":"line",
+ "nullType":"zero"
+ },
+ "remark":"The only upstream response processing time of each endpoint",
+ "x":"6.0",
+ "y":"3.0",
+ "elements":[
+ {
+ "expression":"max(irate(nginx_vts_upstream_response_seconds_total{module=\"Galaxy-gateway-nginx\"}[5m])) by (backend, upstream)",
+ "state":1,
+ "legend":"{{backend}}-{{upstream}}",
+ "name":"A"
+ }
+ ],
+ "datasource":"metrics"
+ },
+ {
+ "name":"Connections",
+ "span":"12.0",
+ "height":"4.0",
+ "type":"Line chart",
+ "unit":"none",
+ "weight":18,
+ "param":{
+ "stack":0,
+ "thresholds":[
+ {
+ "color":"#ed0937"
+ }
+ ],
+ "thresholdShow":true,
+ "visibility":{
+ "result":"",
+ "varName":"",
+ "varValue":"",
+ "operator":""
+ },
+ "legend":{
+ "values":[],
+ "show":true,
+ "placement":"bottom"
+ },
+ "enable":{
+ "thresholds":false,
+ "visibility":false,
+ "legend":true,
+ "valueMapping":false
+ },
+ "nullType":"null"
+ },
+ "remark":"Number of connections for the nginx proxy in each asset",
+ "x":"0.0",
+ "y":"6.0",
+ "elements":[
+ {
+ "expression":"nginx_vts_main_connections{module=\"Galaxy-gateway-nginx\",status=\"active\"}",
+ "state":1,
+ "legend":"{{asset}}-{{status}}",
+ "name":"A"
+ }
+ ],
+ "datasource":"metrics"
+ }
+ ]
+ },
+ {
+ "name":"QGW Service",
+ "span":"12.0",
+ "height":"1.0",
+ "type":"Group",
+ "unit":"short",
+ "weight":2,
+ "param":{
+ "visibility":{
+ "result":"",
+ "varName":"",
+ "varValue":"",
+ "operator":""
+ },
+ "enable":{
+ "thresholds":false,
+ "visibility":false,
+ "legend":false,
+ "valueMapping":false
+ },
+ "collapse":true,
+ "nullType":"null"
+ },
+ "remark":"",
+ "x":"0.0",
+ "y":"2.0",
+ "datasource":"misc",
+ "children":[
+ {
+ "name":"Uptime",
+ "span":"6.0",
+ "height":"2.0",
+ "type":"Table",
+ "unit":"seconds(s)",
+ "weight":19,
+ "param":{
+ "enable":{
+ "thresholds":true,
+ "legend":true,
+ "valueMapping":true
+ },
+ "columns":[
+ {
+ "unit":2,
+ "display":"{{A.$legend}}",
+ "show":true,
+ "title":"Element"
+ },
+ {
+ "unit":30,
+ "display":"{{A.$value}}",
+ "show":true,
+ "title":"Value"
+ }
+ ],
+ "legend":{
+ "values":[],
+ "show":true,
+ "placement":"bottom"
+ },
+ "valueMapping":[
+ {
+ "color":{
+ "bac":"#fff",
+ "text":"#000"
+ },
+ "columns":"Value",
+ "display":"{{A.$value}}",
+ "show":false,
+ "column":"Value",
+ "text":"",
+ "type":"text",
+ "value":""
+ }
+ ],
+ "nullType":"zero",
+ "statistics":"last"
+ },
+ "remark":"QGW Service running time of each asset",
+ "x":"0.0",
+ "y":"0.0",
+ "elements":[
+ {
+ "expression":"process_uptime_seconds{module='Galaxy-qgw-service'}",
+ "state":1,
+ "legend":"{{asset}}",
+ "name":"A"
+ }
+ ],
+ "datasource":"metrics"
+ },
+ {
+ "name":"Http Error Responses",
+ "span":"6.0",
+ "height":"2.0",
+ "type":"Line chart",
+ "unit":"short",
+ "weight":20,
+ "param":{
+ "stack":0,
+ "thresholds":[
+ {
+ "color":"#ca8bf9"
+ }
+ ],
+ "thresholdShow":true,
+ "visibility":{
+ "result":"",
+ "varName":"",
+ "varValue":"",
+ "operator":""
+ },
+ "legend":{
+ "values":[],
+ "show":true,
+ "placement":"bottom"
+ },
+ "enable":{
+ "thresholds":false,
+ "visibility":false,
+ "legend":true,
+ "valueMapping":false
+ },
+ "nullType":"zero"
+ },
+ "remark":"The number of failed requests for QGW .",
+ "x":"6.0",
+ "y":"0.0",
+ "elements":[
+ {
+ "expression":"count(http_server_requests_seconds_count{module=\"Galaxy-qgw-service\", status!~\"2..\",uri!~\".*/prometheus\"})",
+ "state":1,
+ "legend":"count",
+ "name":"A"
+ }
+ ],
+ "datasource":"metrics"
+ },
+ {
+ "name":"Http Slow Query",
+ "span":"6.0",
+ "height":"3.0",
+ "type":"Line chart",
+ "unit":"short",
+ "weight":21,
+ "param":{
+ "stack":false,
+ "thresholds":[
+ {
+ "val":"300",
+ "color":"#d64f40"
+ }
+ ],
+ "visibility":{
+ "result":"",
+ "varName":"",
+ "varValue":"",
+ "operator":""
+ },
+ "legend":{
+ "values":[],
+ "placement":"bottom"
+ },
+ "enable":{
+ "thresholds":false,
+ "visibility":false,
+ "legend":true,
+ "valueMapping":true
+ },
+ "style":"line",
+ "nullType":"zero"
+ },
+ "remark":"The number of slow queries",
+ "x":"0.0",
+ "y":"2.0",
+ "elements":[
+ {
+ "expression":"count(http_server_requests_seconds_max{module=\"Galaxy-qgw-service\",uri!~\".*/prometheus\"}>90)",
+ "state":1,
+ "legend":">90s",
+ "name":"A"
+ }
+ ],
+ "datasource":"metrics"
+ },
+ {
+ "name":"Memory Usage",
+ "span":"6.0",
+ "height":"3.0",
+ "type":"Line chart",
+ "unit":"percent(0.0-1.0)",
+ "weight":22,
+ "param":{
+ "stack":false,
+ "legend":{
+ "values":[
+ "avg",
+ "last",
+ "max"
+ ],
+ "placement":"bottom"
+ },
+ "enable":{
+ "thresholds":true,
+ "legend":true,
+ "valueMapping":true
+ },
+ "style":"line",
+ "nullType":"connected"
+ },
+ "remark":"The memory usage in each asset",
+ "x":"6.0",
+ "y":"2.0",
+ "elements":[
+ {
+ "expression":"sum by(asset) (jvm_memory_used_bytes{module=\"Galaxy-qgw-service\"}) / sum by(asset) (jvm_memory_max_bytes{module=\"Galaxy-qgw-service\"})",
+ "state":1,
+ "legend":"{{asset}}",
+ "name":"A"
+ }
+ ],
+ "datasource":"metrics"
+ }
+ ]
+ },
+ {
+ "name":"Report Service",
+ "span":"12.0",
+ "height":"3.0",
+ "type":"Group",
+ "unit":"short",
+ "weight":3,
+ "param":{
+ "collapse":true,
+ "nullType":"null"
+ },
+ "remark":"",
+ "x":"0.0",
+ "y":"2.0",
+ "datasource":"misc",
+ "children":[
+ {
+ "name":"Uptime",
+ "span":"6.0",
+ "height":"3.0",
+ "type":"Table",
+ "unit":"seconds(s)",
+ "weight":23,
+ "param":{
+ "enable":{
+ "thresholds":true,
+ "legend":true,
+ "valueMapping":true
+ },
+ "columns":[
+ {
+ "unit":2,
+ "display":"{{A.$legend}}",
+ "show":true,
+ "title":"Element"
+ },
+ {
+ "unit":30,
+ "display":"{{A.$value}}",
+ "show":true,
+ "title":"Value"
+ }
+ ],
+ "legend":{
+ "values":[],
+ "show":true,
+ "placement":"bottom"
+ },
+ "valueMapping":[
+ {
+ "color":{
+ "bac":"#fff",
+ "text":"#000"
+ },
+ "columns":"Value",
+ "display":"{{A.$value}}",
+ "show":false,
+ "column":"Value",
+ "text":"",
+ "type":"value",
+ "value":0
+ }
+ ],
+ "nullType":"zero",
+ "statistics":"last"
+ },
+ "remark":"Report Service running time of each asset",
+ "x":"0.0",
+ "y":"0.0",
+ "elements":[
+ {
+ "expression":"process_uptime_seconds{module=\"Galaxy-report-service\"}",
+ "state":1,
+ "legend":"{{asset}}",
+ "name":"A"
+ }
+ ],
+ "datasource":"metrics"
+ },
+ {
+ "name":"Execute time",
+ "span":"3.0",
+ "height":"3.0",
+ "type":"Line chart",
+ "unit":"seconds(s)",
+ "weight":24,
+ "param":{
+ "stack":false,
+ "legend":{
+ "values":[
+ "avg",
+ "last",
+ "max"
+ ],
+ "placement":"bottom"
+ },
+ "enable":{
+ "thresholds":true,
+ "legend":true,
+ "valueMapping":true
+ },
+ "style":"line",
+ "nullType":"zero"
+ },
+ "remark":"The execute time of report jobs",
+ "x":"6.0",
+ "y":"0.0",
+ "elements":[
+ {
+ "expression":"sum(rate(http_server_requests_seconds_sum{module=\"Galaxy-report-service\", status!~\"5..\"}[1m]))/sum(rate(http_server_requests_seconds_count{module=\"Galaxy-report-service\", status!~\"5..\"}[1m]))",
+ "state":1,
+ "legend":"AVG",
+ "name":"A"
+ },
+ {
+ "expression":"max(http_server_requests_seconds_max{module=\"Galaxy-report-service\", status!~\"5..\"})",
+ "state":1,
+ "legend":"MAX",
+ "name":"B"
+ }
+ ],
+ "datasource":"metrics"
+ },
+ {
+ "name":"Report Success / All",
+ "span":"3.0",
+ "height":"3.0",
+ "type":"Stat",
+ "unit":"percent(0-100)",
+ "weight":25,
+ "param":{
+ "enable":{
+ "thresholds":true,
+ "legend":true,
+ "valueMapping":true
+ },
+ "legend":{
+ "values":[],
+ "show":true,
+ "placement":"bottom"
+ },
+ "valueMapping":[
+ {
+ "color":{
+ "bac":"#fff",
+ "text":"#000"
+ },
+ "display":"{{A.$value}}",
+ "show":false,
+ "text":"",
+ "type":"value",
+ "value":0
+ }
+ ],
+ "nullType":"connected",
+ "statistics":"last"
+ },
+ "remark":"The success rate of Report jobs",
+ "x":"9.0",
+ "y":"0.0",
+ "elements":[
+ {
+ "expression":"sum(report_success_count_total{module=\"Galaxy-report-service\"}) / (sum(report_success_count_total{module=\"Galaxy-report-service\"}) + sum(report_fail_count_total{module=\"Galaxy-report-service\"})) * 100",
+ "state":1,
+ "name":"A"
+ }
+ ],
+ "datasource":"metrics"
+ },
+ {
+ "name":"Memory Usage",
+ "span":"6.0",
+ "height":"3.0",
+ "type":"Line chart",
+ "unit":"percent(0.0-1.0)",
+ "weight":26,
+ "param":{
+ "stack":false,
+ "legend":{
+ "values":[
+ "avg",
+ "last",
+ "max"
+ ],
+ "placement":"bottom"
+ },
+ "enable":{
+ "thresholds":true,
+ "legend":true,
+ "valueMapping":true
+ },
+ "style":"line",
+ "nullType":"zero"
+ },
+ "remark":"The memory usage in each asset",
+ "x":"0.0",
+ "y":"3.0",
+ "elements":[
+ {
+ "expression":"sum by(asset) (jvm_memory_used_bytes{module=\"Galaxy-report-service\"}) / sum by(asset) (jvm_memory_max_bytes{module=\"Galaxy-report-service\"})",
+ "state":1,
+ "legend":"{{asset}}",
+ "name":"A"
+ }
+ ],
+ "datasource":"metrics"
+ },
+ {
+ "name":"CPU Usage",
+ "span":"6.0",
+ "height":"3.0",
+ "type":"Line chart",
+ "unit":"percent(0.0-1.0)",
+ "weight":27,
+ "param":{
+ "stack":false,
+ "legend":{
+ "values":[
+ "avg",
+ "max"
+ ],
+ "placement":"bottom"
+ },
+ "enable":{
+ "thresholds":true,
+ "legend":true,
+ "valueMapping":true
+ },
+ "style":"line",
+ "nullType":"zero"
+ },
+ "remark":"The CPU usage in each asset",
+ "x":"6.0",
+ "y":"3.0",
+ "elements":[
+ {
+ "expression":"system_cpu_usage{module=\"Galaxy-report-service\"}",
+ "state":1,
+ "legend":"{{asset}}",
+ "name":"A"
+ }
+ ],
+ "datasource":"metrics"
+ }
+ ]
+ },
+ {
+ "name":"HOS Service",
+ "span":"12.0",
+ "height":"1.0",
+ "type":"Group",
+ "unit":"short",
+ "weight":4,
+ "param":{
+ "visibility":{
+ "result":"",
+ "varName":"",
+ "varValue":"",
+ "operator":""
+ },
+ "enable":{
+ "thresholds":false,
+ "visibility":false,
+ "legend":false,
+ "valueMapping":false
+ },
+ "collapse":true,
+ "nullType":"null"
+ },
+ "remark":"",
+ "x":"0.0",
+ "y":"2.0",
+ "datasource":"misc",
+ "children":[
+ {
+ "name":"Live Service Count",
+ "span":"4.0",
+ "height":"3.0",
+ "type":"Table",
+ "unit":"seconds(s)",
+ "weight":28,
+ "param":{
+ "visibility":{
+ "result":"",
+ "varName":"",
+ "varValue":"",
+ "operator":""
+ },
+ "enable":{
+ "thresholds":true,
+ "visibility":false,
+ "legend":true,
+ "valueMapping":true
+ },
+ "columns":[
+ {
+ "unit":2,
+ "display":"{{A.$legend}}",
+ "show":true,
+ "title":"Element"
+ },
+ {
+ "unit":30,
+ "display":"{{A.$value}}",
+ "show":true,
+ "title":"Value"
+ }
+ ],
+ "legend":{
+ "values":[],
+ "show":true,
+ "placement":"bottom"
+ },
+ "valueMapping":[
+ {
+ "color":{
+ "bac":"#fff",
+ "icon":"#f477a3FF",
+ "text":"#000"
+ },
+ "columns":"Value",
+ "display":"{{A.$value}}",
+ "show":false,
+ "column":"Value",
+ "text":"",
+ "type":"value",
+ "value":0
+ }
+ ],
+ "nullType":"zero",
+ "statistics":"last"
+ },
+ "remark":"HOS Service running time of each asset",
+ "x":"0.0",
+ "y":"0.0",
+ "elements":[
+ {
+ "expression":"process_uptime_seconds{module=\"Galaxy-hos-service\"}",
+ "state":1,
+ "legend":"{{asset}}",
+ "name":"A"
+ }
+ ],
+ "datasource":"metrics"
+ },
+ {
+ "name":"Put Object Requests /sec",
+ "span":"6.0",
+ "height":"3.0",
+ "type":"Line chart",
+ "unit":"short",
+ "weight":29,
+ "param":{
+ "stack":false,
+ "legend":{
+ "values":[
+ "avg",
+ "last",
+ "max"
+ ],
+ "placement":"bottom"
+ },
+ "enable":{
+ "thresholds":true,
+ "legend":true,
+ "valueMapping":true
+ },
+ "style":"line",
+ "nullType":"zero"
+ },
+ "remark":"The number of put objects per second",
+ "x":"0.0",
+ "y":"6.0",
+ "elements":[
+ {
+ "expression":"sum by (asset)(irate(http_server_requests_seconds_count{method=\"PUT\",uri=\"/hos/{bucket}/**\"}[5m]))",
+ "state":1,
+ "legend":"{{asset}}",
+ "name":"A"
+ }
+ ],
+ "datasource":"metrics"
+ },
+ {
+ "name":"Error Requests /sec",
+ "span":"4.0",
+ "height":"3.0",
+ "type":"Line chart",
+ "unit":"short",
+ "weight":30,
+ "param":{
+ "rightYAxis":{
+ "elementNames":[],
+ "unit":2,
+ "style":"line",
+ "label":""
+ },
+ "stack":false,
+ "thresholds":[],
+ "visibility":{
+ "result":"",
+ "varName":"",
+ "varValue":"",
+ "operator":""
+ },
+ "legend":{
+ "values":[
+ "avg",
+ "last",
+ "max"
+ ],
+ "placement":"bottom"
+ },
+ "enable":{
+ "thresholds":true,
+ "visibility":false,
+ "legend":true,
+ "valueMapping":true
+ },
+ "link":"",
+ "style":"line",
+ "valueMapping":[],
+ "nullType":"zero"
+ },
+ "remark":"The number of failed requests per second",
+ "x":"8.0",
+ "y":"0.0",
+ "elements":[
+ {
+ "expression":"sum by(status,datacenter)(irate(http_server_requests_seconds_count{uri=~\"/hos/.*\",status!~\"2..\"}[5m]))",
+ "state":1,
+ "legend":"{{status}}-{{datacenter}}",
+ "name":"A"
+ }
+ ],
+ "datasource":"metrics"
+ },
+ {
+ "name":"Download File Requests /sec",
+ "span":"6.0",
+ "height":"3.0",
+ "type":"Line chart",
+ "unit":"short",
+ "weight":31,
+ "param":{
+ "stack":false,
+ "visibility":{
+ "result":"",
+ "varName":"",
+ "varValue":"",
+ "operator":""
+ },
+ "legend":{
+ "values":[
+ "avg",
+ "last",
+ "max"
+ ],
+ "placement":"bottom"
+ },
+ "enable":{
+ "thresholds":true,
+ "visibility":false,
+ "legend":true,
+ "valueMapping":true
+ },
+ "style":"line",
+ "nullType":"zero"
+ },
+ "remark":"The number of Download File requests per second",
+ "x":"6.0",
+ "y":"6.0",
+ "elements":[
+ {
+ "expression":"sum by (asset)(irate(http_server_requests_seconds_count{method=\"GET\",uri=\"/hos/{bucket}/**\"}[5m]))",
+ "state":1,
+ "legend":"{{asset}}",
+ "name":"A"
+ }
+ ],
+ "datasource":"metrics"
+ },
+ {
+ "name":"Write Bytes /sec",
+ "span":"6.0",
+ "height":"3.0",
+ "type":"Line chart",
+ "unit":"short",
+ "weight":32,
+ "param":{
+ "stack":false,
+ "visibility":{
+ "result":"",
+ "varName":"",
+ "varValue":"",
+ "operator":""
+ },
+ "legend":{
+ "values":[
+ "avg",
+ "max"
+ ],
+ "placement":"bottom"
+ },
+ "enable":{
+ "thresholds":true,
+ "visibility":false,
+ "legend":true,
+ "valueMapping":true
+ },
+ "style":"line",
+ "nullType":"connected"
+ },
+ "remark":"Collecting and processing files with incoming byte rate in each asset",
+ "x":"6.0",
+ "y":"9.0",
+ "elements":[
+ {
+ "expression":"irate(dashInfo{severity=\"fileBytes\",module=\"Galaxy-hos-service\"}[5m])",
+ "state":1,
+ "legend":"{{asset}}",
+ "name":"A"
+ }
+ ],
+ "datasource":"metrics"
+ },
+ {
+ "name":"Put Requests By Size /sec",
+ "span":"6.0",
+ "height":"3.0",
+ "type":"Line chart",
+ "unit":"short",
+ "weight":33,
+ "param":{
+ "rightYAxis":{
+ "elementNames":[],
+ "unit":2,
+ "style":"line",
+ "label":""
+ },
+ "stack":false,
+ "thresholds":[],
+ "visibility":{
+ "result":"",
+ "varName":"",
+ "varValue":"",
+ "operator":""
+ },
+ "legend":{
+ "values":[
+ "avg",
+ "max"
+ ],
+ "placement":"bottom"
+ },
+ "enable":{
+ "thresholds":true,
+ "visibility":false,
+ "legend":true,
+ "valueMapping":true
+ },
+ "link":"",
+ "style":"line",
+ "valueMapping":[],
+ "nullType":"zero"
+ },
+ "remark":"The number of put requests per second with size distribution",
+ "x":"0.0",
+ "y":"15.0",
+ "elements":[
+ {
+ "expression":"sum by(datacenter)(irate(dashInfo{severity=\"smallFileCount\",module=\"Galaxy-hos-service\"}[5m]))",
+ "state":1,
+ "legend":"{{datacenter}}-<1MB",
+ "name":"A"
+ },
+ {
+ "expression":"sum by(datacenter)(irate(dashInfo{severity=\"middleFileCount\",module=\"Galaxy-hos-service\"}[5m]))",
+ "state":1,
+ "legend":"{{datacenter}}-1-10MB",
+ "name":"B"
+ },
+ {
+ "expression":"sum by(datacenter)(irate(dashInfo{severity=\"bigFileCount\",module=\"Galaxy-hos-service\"}[5m]))",
+ "state":1,
+ "legend":"{{datacenter}}->10MB",
+ "name":"C"
+ }
+ ],
+ "datasource":"metrics"
+ },
+ {
+ "name":"Memory Usage",
+ "span":"6.0",
+ "height":"3.0",
+ "type":"Line chart",
+ "unit":"percent(0.0-1.0)",
+ "weight":34,
+ "param":{
+ "stack":false,
+ "legend":{
+ "values":[
+ "avg",
+ "last",
+ "max"
+ ],
+ "placement":"bottom"
+ },
+ "enable":{
+ "thresholds":true,
+ "legend":true,
+ "valueMapping":true
+ },
+ "style":"line",
+ "nullType":"zero"
+ },
+ "remark":"The memory usage in each asset",
+ "x":"0.0",
+ "y":"3.0",
+ "elements":[
+ {
+ "expression":"sum by(asset)(jvm_memory_used_bytes{module=\"Galaxy-hos-service\"}>0) / sum by(asset) (jvm_memory_max_bytes{module=\"Galaxy-hos-service\"}>0)",
+ "state":1,
+ "legend":"{{asset}}",
+ "name":"A"
+ }
+ ],
+ "datasource":"metrics"
+ },
+ {
+ "name":"CPU Usage",
+ "span":"6.0",
+ "height":"3.0",
+ "type":"Line chart",
+ "unit":"percent(0.0-1.0)",
+ "weight":35,
+ "param":{
+ "stack":false,
+ "legend":{
+ "values":[
+ "avg",
+ "last",
+ "max"
+ ],
+ "placement":"bottom"
+ },
+ "enable":{
+ "thresholds":true,
+ "legend":true,
+ "valueMapping":true
+ },
+ "style":"line",
+ "nullType":"zero"
+ },
+ "remark":"The CPU usage in each asset",
+ "x":"6.0",
+ "y":"3.0",
+ "elements":[
+ {
+ "expression":"process_cpu_usage{module=\"Galaxy-hos-service\"}",
+ "state":1,
+ "legend":"{{asset}}",
+ "name":"A"
+ }
+ ],
+ "datasource":"metrics"
+ },
+ {
+ "name":"HOS Node Requests /sec",
+ "span":"4.0",
+ "height":"3.0",
+ "type":"Line chart",
+ "unit":"short",
+ "weight":36,
+ "param":{
+ "stack":false,
+ "visibility":{
+ "result":"",
+ "varName":"",
+ "varValue":"",
+ "operator":""
+ },
+ "legend":{
+ "values":[
+ "total",
+ "avg",
+ "max"
+ ],
+ "placement":"bottom"
+ },
+ "enable":{
+ "thresholds":true,
+ "visibility":false,
+ "legend":true,
+ "valueMapping":true
+ },
+ "style":"line",
+ "nullType":"zero"
+ },
+ "remark":"The number of hos requests per second",
+ "x":"4.0",
+ "y":"0.0",
+ "elements":[
+ {
+ "expression":"sum(irate(http_server_requests_seconds_count{uri=\"/hos/{bucket}/**\"}[5m])) by (asset)",
+ "state":1,
+ "legend":"{{asset}}",
+ "name":"A"
+ }
+ ],
+ "datasource":"metrics"
+ },
+ {
+ "name":"HOS Put File /sec",
+ "span":"6.0",
+ "height":"3.0",
+ "type":"Line chart",
+ "unit":"short",
+ "weight":37,
+ "param":{
+ "stack":false,
+ "visibility":{
+ "result":"",
+ "varName":"",
+ "varValue":"",
+ "operator":""
+ },
+ "legend":{
+ "values":[
+ "avg",
+ "max"
+ ],
+ "placement":"bottom"
+ },
+ "enable":{
+ "thresholds":false,
+ "visibility":false,
+ "legend":false,
+ "valueMapping":true
+ },
+ "style":"line",
+ "nullType":"null"
+ },
+ "remark":"The number of files uploaded per second",
+ "x":"0.0",
+ "y":"9.0",
+ "elements":[
+ {
+ "expression":"sum by(datacenter)(irate(dashInfo{severity=\"fileCount\",module=\"Galaxy-hos-service\"}[5m]))",
+ "state":1,
+ "legend":"{{datacenter}}",
+ "name":"A"
+ }
+ ],
+ "datasource":"metrics"
+ },
+ {
+ "name":"HOS TTL Delete File /sec",
+ "span":"6.0",
+ "height":"3.0",
+ "type":"Line chart",
+ "unit":"short",
+ "weight":38,
+ "param":{
+ "stack":false,
+ "visibility":{
+ "result":"",
+ "varName":"",
+ "varValue":"",
+ "operator":""
+ },
+ "legend":{
+ "values":[
+ "avg",
+ "max"
+ ],
+ "placement":"bottom"
+ },
+ "enable":{
+ "thresholds":false,
+ "visibility":false,
+ "legend":false,
+ "valueMapping":true
+ },
+ "style":"line",
+ "nullType":"null"
+ },
+ "remark":"The number of ttl deleted files per second.",
+ "x":"6.0",
+ "y":"15.0",
+ "elements":[
+ {
+ "expression":"sum by(datacenter)(irate(dashInfo{severity=\"ttlDeleteFileCount\",module=\"Galaxy-hos-service\"}[5m]))",
+ "state":1,
+ "legend":"DeleteCount-{{datacenter}}",
+ "name":"A"
+ },
+ {
+ "expression":"sum by(datacenter)(irate(dashInfo{severity=\"ttlDeleteFileErrorCount\",module=\"Galaxy-hos-service\"}[5m]))",
+ "state":1,
+ "legend":"DeleteErrorCount-{{datacenter}}",
+ "name":"B"
+ }
+ ],
+ "datasource":"metrics"
+ },
+ {
+ "name":"HOS Put File By Bucket /sec",
+ "span":"6.0",
+ "height":"3.0",
+ "type":"Line chart",
+ "unit":"short",
+ "weight":39,
+ "param":{
+ "rightYAxis":{
+ "elementNames":[],
+ "unit":2,
+ "style":"line",
+ "label":""
+ },
+ "stack":false,
+ "thresholds":[],
+ "visibility":{
+ "result":"",
+ "varName":"",
+ "varValue":"",
+ "operator":""
+ },
+ "legend":{
+ "values":[
+ "avg",
+ "max"
+ ],
+ "placement":"bottom"
+ },
+ "enable":{
+ "thresholds":false,
+ "visibility":false,
+ "legend":false,
+ "valueMapping":true
+ },
+ "link":"",
+ "style":"line",
+ "valueMapping":[],
+ "nullType":"null"
+ },
+ "remark":"The number of files for bucket uploaded per second",
+ "x":"0.0",
+ "y":"12.0",
+ "elements":[
+ {
+ "expression":"sum by(parent,datacenter)(irate(Hadoop_HBase_metric_putCount{namespace=\"default\",type=\"index\",indextype=\"time\"}[5m]))",
+ "state":1,
+ "legend":"{{parent}}-{{datacenter}}",
+ "name":"A"
+ }
+ ],
+ "datasource":"metrics"
+ },
+ {
+ "name":"HOS Put Requests By Bucket /sec",
+ "span":"6.0",
+ "height":"3.0",
+ "type":"Line chart",
+ "unit":"short",
+ "weight":40,
+ "param":{
+ "rightYAxis":{
+ "elementNames":[],
+ "unit":2,
+ "style":"line",
+ "label":""
+ },
+ "stack":false,
+ "thresholds":[],
+ "visibility":{
+ "result":"",
+ "varName":"",
+ "varValue":"",
+ "operator":""
+ },
+ "legend":{
+ "values":[
+ "avg",
+ "max"
+ ],
+ "placement":"bottom"
+ },
+ "enable":{
+ "thresholds":false,
+ "visibility":false,
+ "legend":false,
+ "valueMapping":true
+ },
+ "link":"",
+ "style":"line",
+ "valueMapping":[],
+ "nullType":"null"
+ },
+ "remark":"The number of put requests for bucket per second",
+ "x":"6.0",
+ "y":"12.0",
+ "elements":[
+ {
+ "expression":"sum by(table,datacenter)(irate(Hadoop_HBase_metric_putCount{namespace=\"default\",type=\"data\"}[5m]))",
+ "state":1,
+ "legend":"{{table}}-{{datacenter}}",
+ "name":"A"
+ }
+ ],
+ "datasource":"metrics"
+ }
+ ]
+ },
+ {
+ "name":"Job Service",
+ "span":"12.0",
+ "height":"3.0",
+ "type":"Group",
+ "unit":"short",
+ "weight":5,
+ "param":{
+ "collapse":true,
+ "nullType":"null"
+ },
+ "remark":"",
+ "x":"0.0",
+ "y":"21.0",
+ "datasource":"misc",
+ "children":[
+ {
+ "name":"Job Success/All",
+ "span":"3.0",
+ "height":"2.0",
+ "type":"Stat",
+ "unit":"percent(0.0-1.0)",
+ "weight":41,
+ "param":{
+ "enable":{
+ "thresholds":true,
+ "legend":true,
+ "valueMapping":false
+ },
+ "legend":{
+ "values":[],
+ "show":true,
+ "placement":"bottom"
+ },
+ "valueMapping":[
+ {
+ "color":{
+ "bac":"#fff",
+ "text":"#000"
+ },
+ "show":false,
+ "text":"",
+ "type":"value",
+ "value":0
+ }
+ ],
+ "nullType":"connected",
+ "statistics":"last"
+ },
+ "remark":"The success rate of jobs",
+ "x":"0.0",
+ "y":"0.0",
+ "elements":[
+ {
+ "expression":"sum(jobLogSuccessCount{module=\"Galaxy-job-admin\"})/sum(jobLogCount{module=\"Galaxy-job-admin\"})",
+ "state":1,
+ "name":"A"
+ }
+ ],
+ "datasource":"metrics"
+ },
+ {
+ "name":"Job Executor Sum",
+ "span":"3.0",
+ "height":"2.0",
+ "type":"Stat",
+ "unit":"short",
+ "weight":42,
+ "param":{
+ "enable":{
+ "thresholds":true,
+ "legend":true,
+ "valueMapping":true
+ },
+ "valueMapping":[
+ {
+ "color":{
+ "bac":"#fff",
+ "text":"#000"
+ },
+ "text":"",
+ "type":"value",
+ "value":""
+ }
+ ],
+ "nullType":"connected",
+ "statistics":"last"
+ },
+ "remark":"The total number of executed jobs",
+ "x":"3.0",
+ "y":"0.0",
+ "elements":[
+ {
+ "expression":"sum(jobLogCount{module=\"Galaxy-job-admin\"})/count(jobLogCount{module=\"Galaxy-job-admin\"})",
+ "state":1,
+ "name":"A"
+ }
+ ],
+ "datasource":"metrics"
+ },
+ {
+ "name":"Job Admin UP Time",
+ "span":"6.0",
+ "height":"2.0",
+ "type":"Table",
+ "unit":"seconds(s)",
+ "weight":43,
+ "param":{
+ "enable":{
+ "thresholds":true,
+ "legend":true,
+ "valueMapping":false
+ },
+ "columns":[
+ {
+ "unit":2,
+ "display":"{{A.$legend}}",
+ "show":true,
+ "title":"Element"
+ },
+ {
+ "unit":30,
+ "display":"{{A.$value}}",
+ "show":true,
+ "title":"Value"
+ }
+ ],
+ "legend":{
+ "values":[],
+ "show":true,
+ "placement":"bottom"
+ },
+ "valueMapping":[
+ {
+ "color":{
+ "bac":"#fff",
+ "text":"#000"
+ },
+ "columns":"Value",
+ "display":"{{A.$value}}",
+ "show":false,
+ "column":"Value",
+ "text":"",
+ "type":"text",
+ "error":true,
+ "value":""
+ }
+ ],
+ "nullType":"zero",
+ "statistics":"last"
+ },
+ "remark":"Job Admin Service running time of each asset",
+ "x":"6.0",
+ "y":"0.0",
+ "elements":[
+ {
+ "expression":"process_uptime_seconds{module=\"Galaxy-job-admin\"}",
+ "state":1,
+ "legend":"{{asset}}",
+ "name":"A"
+ }
+ ],
+ "datasource":"metrics"
+ },
+ {
+ "name":"Today Executed Jobs",
+ "span":"3.0",
+ "height":"1.0",
+ "type":"Stat",
+ "unit":"short",
+ "weight":44,
+ "param":{
+ "enable":{
+ "thresholds":true,
+ "legend":true,
+ "valueMapping":true
+ },
+ "valueMapping":[
+ {
+ "color":{
+ "bac":"#fff",
+ "text":"#000"
+ },
+ "text":"",
+ "type":"value",
+ "value":""
+ }
+ ],
+ "nullType":"connected",
+ "statistics":"last"
+ },
+ "remark":"The total number of executed jobs in today",
+ "x":"0.0",
+ "y":"2.0",
+ "elements":[
+ {
+ "expression":"(sum(triggerCountRunningTotal{module=\"Galaxy-job-admin\"})+sum(triggerDayCountSucList{module=\"Galaxy-job-admin\"})+sum(triggerDayCountFailList{module=\"Galaxy-job-admin\"}))/sum(up{module=\"Galaxy-job-admin\"})",
+ "state":1,
+ "name":"A"
+ }
+ ],
+ "datasource":"metrics"
+ },
+ {
+ "name":"Today Success Tasks",
+ "span":"3.0",
+ "height":"1.0",
+ "type":"Stat",
+ "unit":"short",
+ "weight":45,
+ "param":{
+ "enable":{
+ "thresholds":true,
+ "legend":true,
+ "valueMapping":true
+ },
+ "valueMapping":[
+ {
+ "color":{
+ "bac":"#fff",
+ "text":"#000"
+ },
+ "text":"",
+ "type":"value",
+ "value":""
+ }
+ ],
+ "nullType":"connected",
+ "statistics":"last"
+ },
+ "remark":"The total number of jobs successfully executed in today",
+ "x":"3.0",
+ "y":"2.0",
+ "elements":[
+ {
+ "expression":"sum(triggerDayCountSucList{module=\"Galaxy-job-admin\"})/sum(up{module=\"Galaxy-job-admin\"})",
+ "state":1,
+ "name":"A"
+ }
+ ],
+ "datasource":"metrics"
+ },
+ {
+ "name":"Today Error Tasks",
+ "span":"3.0",
+ "height":"1.0",
+ "type":"Stat",
+ "unit":"short",
+ "weight":46,
+ "param":{
+ "enable":{
+ "thresholds":true,
+ "legend":true,
+ "valueMapping":true
+ },
+ "valueMapping":[
+ {
+ "color":{
+ "bac":"#fff",
+ "text":"#000"
+ },
+ "text":"",
+ "type":"value",
+ "value":""
+ }
+ ],
+ "nullType":"null",
+ "statistics":"last"
+ },
+ "remark":"The total number of jobs failed in today",
+ "x":"6.0",
+ "y":"2.0",
+ "elements":[
+ {
+ "expression":"sum(triggerDayCountFailList{module=\"Galaxy-job-admin\"})/sum(up{module=\"Galaxy-job-admin\"})",
+ "state":1,
+ "name":"A"
+ }
+ ],
+ "datasource":"metrics"
+ },
+ {
+ "name":"Today Running Tasks",
+ "span":"3.0",
+ "height":"1.0",
+ "type":"Stat",
+ "unit":"none",
+ "weight":47,
+ "param":{
+ "enable":{
+ "thresholds":true,
+ "legend":true,
+ "valueMapping":true
+ },
+ "valueMapping":[
+ {
+ "color":{
+ "bac":"#fff",
+ "text":"#000"
+ },
+ "text":"",
+ "type":"value",
+ "value":""
+ }
+ ],
+ "nullType":"connected",
+ "statistics":"last"
+ },
+ "remark":"The total number of jobs running in today",
+ "x":"9.0",
+ "y":"2.0",
+ "elements":[
+ {
+ "expression":"sum(triggerCountRunningTotal{module=\"Galaxy-job-admin\"})/sum(up{module=\"Galaxy-job-admin\"})",
+ "state":1,
+ "name":"A"
+ }
+ ],
+ "datasource":"metrics"
+ },
+ {
+ "name":"Job Admin Response Latency",
+ "span":"6.0",
+ "height":"3.0",
+ "type":"Line chart",
+ "unit":"seconds(s)",
+ "weight":48,
+ "param":{
+ "stack":false,
+ "legend":{
+ "values":[
+ "avg",
+ "max"
+ ],
+ "placement":"bottom"
+ },
+ "enable":{
+ "thresholds":true,
+ "legend":true,
+ "valueMapping":true
+ },
+ "style":"line",
+ "nullType":"zero"
+ },
+ "remark":"The response time of admin service",
+ "x":"0.0",
+ "y":"3.0",
+ "elements":[
+ {
+ "expression":"sum(rate(http_server_requests_seconds_sum{module=\"Galaxy-job-admin\", status!~\"5..\"}[5m]))/sum(rate(http_server_requests_seconds_count{module=\"Galaxy-job-executor\", status!~\"5..\"}[5m]))",
+ "state":1,
+ "legend":"AVG",
+ "name":"A"
+ },
+ {
+ "expression":"max(http_server_requests_seconds_max{module=\"Galaxy-job-admin\", status!~\"5..\"})",
+ "state":1,
+ "legend":"MAX",
+ "name":"B"
+ }
+ ],
+ "datasource":"metrics"
+ },
+ {
+ "name":"Job Admin Memory Usage",
+ "span":"6.0",
+ "height":"3.0",
+ "type":"Line chart",
+ "unit":"percent(0.0-1.0)",
+ "weight":49,
+ "param":{
+ "stack":false,
+ "legend":{
+ "values":[
+ "avg",
+ "last",
+ "max"
+ ],
+ "placement":"bottom"
+ },
+ "enable":{
+ "thresholds":true,
+ "legend":true,
+ "valueMapping":true
+ },
+ "style":"line",
+ "nullType":"zero"
+ },
+ "remark":"The memory usage in each asset",
+ "x":"6.0",
+ "y":"3.0",
+ "elements":[
+ {
+ "expression":"sum by(asset) (jvm_memory_used_bytes{module=\"Galaxy-job-admin\"}) / sum by(asset) (jvm_memory_max_bytes{module=\"Galaxy-job-admin\"})",
+ "state":1,
+ "legend":"{{asset}}",
+ "name":"A"
+ }
+ ],
+ "datasource":"metrics"
+ },
+ {
+ "name":"Job Admin Error Requests /sec",
+ "span":"6.0",
+ "height":"3.0",
+ "type":"Line chart",
+ "unit":"short",
+ "weight":50,
+ "param":{
+ "stack":false,
+ "legend":{
+ "values":[
+ "avg",
+ "max"
+ ],
+ "placement":"bottom"
+ },
+ "enable":{
+ "thresholds":true,
+ "legend":true,
+ "valueMapping":true
+ },
+ "style":"line",
+ "nullType":"connected"
+ },
+ "remark":"The number of failed requests per second",
+ "x":"0.0",
+ "y":"6.0",
+ "elements":[
+ {
+ "expression":"sum(rate(http_server_requests_seconds_count{module=\"Galaxy-job-admin\", status!~\"2..\"}[1m]))",
+ "state":1,
+ "legend":"used_{{asset}}",
+ "name":"A"
+ }
+ ],
+ "datasource":"metrics"
+ },
+ {
+ "name":"Job Admin CPU Usage",
+ "span":"6.0",
+ "height":"3.0",
+ "type":"Line chart",
+ "unit":"percent(0.0-1.0)",
+ "weight":51,
+ "param":{
+ "stack":false,
+ "legend":{
+ "values":[
+ "avg",
+ "last",
+ "max"
+ ],
+ "placement":"bottom"
+ },
+ "enable":{
+ "thresholds":true,
+ "legend":true,
+ "valueMapping":true
+ },
+ "style":"line",
+ "nullType":"connected"
+ },
+ "remark":"The CPU usage in each asset",
+ "x":"6.0",
+ "y":"6.0",
+ "elements":[
+ {
+ "expression":"system_cpu_usage{module=\"Galaxy-job-admin\"}",
+ "state":1,
+ "legend":"{{asset}}_system_cpu_usage",
+ "name":"A"
+ }
+ ],
+ "datasource":"metrics"
+ },
+ {
+ "name":"Job Executor UP Time",
+ "span":"6.0",
+ "height":"3.0",
+ "type":"Table",
+ "unit":"seconds(s)",
+ "weight":52,
+ "param":{
+ "enable":{
+ "thresholds":true,
+ "legend":true,
+ "valueMapping":true
+ },
+ "columns":[
+ {
+ "unit":2,
+ "display":"{{A.$legend}}",
+ "show":true,
+ "title":"Element"
+ },
+ {
+ "unit":30,
+ "display":"{{A.$value}}",
+ "show":true,
+ "title":"Value"
+ }
+ ],
+ "legend":{
+ "values":[],
+ "show":true,
+ "placement":"bottom"
+ },
+ "valueMapping":[
+ {
+ "color":{
+ "bac":"#fff",
+ "text":"#000"
+ },
+ "columns":"Value",
+ "display":"{{A.$value}}",
+ "show":false,
+ "column":"Value",
+ "text":"",
+ "type":"value",
+ "value":0
+ }
+ ],
+ "nullType":"zero",
+ "statistics":"last"
+ },
+ "remark":"Job Executor Service running time of each asset",
+ "x":"0.0",
+ "y":"9.0",
+ "elements":[
+ {
+ "expression":"process_uptime_seconds{module=\"Galaxy-job-executor\"}",
+ "state":1,
+ "legend":"{{asset}}",
+ "name":"A"
+ }
+ ],
+ "datasource":"metrics"
+ },
+ {
+ "name":"Job Executor Memory Used",
+ "span":"6.0",
+ "height":"3.0",
+ "type":"Line chart",
+ "unit":"bytes",
+ "weight":53,
+ "param":{
+ "stack":false,
+ "legend":{
+ "values":[
+ "avg",
+ "max"
+ ],
+ "placement":"bottom"
+ },
+ "enable":{
+ "thresholds":true,
+ "legend":true,
+ "valueMapping":true
+ },
+ "style":"line",
+ "nullType":"connected"
+ },
+ "remark":"The memory usage in each asset",
+ "x":"6.0",
+ "y":"9.0",
+ "elements":[
+ {
+ "expression":"sum(jvm_memory_used_bytes{module=\"Galaxy-job-executor\"})by(asset)",
+ "state":1,
+ "legend":"used_{{asset}}",
+ "name":"A"
+ }
+ ],
+ "datasource":"metrics"
+ },
+ {
+ "name":"Job Executor Error Log",
+ "span":"6.0",
+ "height":"3.0",
+ "type":"Line chart",
+ "unit":"short",
+ "weight":54,
+ "param":{
+ "stack":false,
+ "legend":{
+ "values":[
+ "avg",
+ "max"
+ ],
+ "placement":"bottom"
+ },
+ "enable":{
+ "thresholds":true,
+ "legend":true,
+ "valueMapping":true
+ },
+ "style":"line",
+ "nullType":"connected"
+ },
+ "remark":"The number of failed logs per second in each asset",
+ "x":"0.0",
+ "y":"12.0",
+ "elements":[
+ {
+ "expression":"increase(logback_events_total{module=\"Galaxy-job-executor\",level=\"error\"} [5m])",
+ "state":1,
+ "legend":" {{asset}}",
+ "name":"A"
+ }
+ ],
+ "datasource":"metrics"
+ },
+ {
+ "name":"Job Executor CPU Usage",
+ "span":"6.0",
+ "height":"3.0",
+ "type":"Line chart",
+ "unit":"percent(0.0-1.0)",
+ "weight":55,
+ "param":{
+ "stack":false,
+ "legend":{
+ "values":[
+ "avg",
+ "last",
+ "max"
+ ],
+ "placement":"bottom"
+ },
+ "enable":{
+ "thresholds":true,
+ "legend":true,
+ "valueMapping":true
+ },
+ "style":"line",
+ "nullType":"zero"
+ },
+ "remark":"The CPU usage in each asset",
+ "x":"6.0",
+ "y":"12.0",
+ "elements":[
+ {
+ "expression":"system_cpu_usage{module=\"Galaxy-job-executor\"}",
+ "state":1,
+ "legend":"{{asset}}",
+ "name":"A"
+ }
+ ],
+ "datasource":"metrics"
+ }
+ ]
+ },
+ {
+ "name":"HOS Nginx",
+ "span":"12.0",
+ "height":"1.0",
+ "type":"Group",
+ "unit":"short",
+ "weight":6,
+ "param":{
+ "visibility":{
+ "result":"",
+ "varName":"",
+ "varValue":"",
+ "operator":""
+ },
+ "enable":{
+ "thresholds":false,
+ "visibility":false,
+ "legend":false,
+ "valueMapping":false
+ },
+ "collapse":true,
+ "nullType":"null"
+ },
+ "remark":"",
+ "x":"0.0",
+ "y":"20.0",
+ "datasource":"misc",
+ "children":[
+ {
+ "name":"Uptime",
+ "span":"6.0",
+ "height":"3.0",
+ "type":"Table",
+ "unit":"seconds(s)",
+ "weight":56,
+ "param":{
+ "visibility":{
+ "result":"",
+ "varName":"",
+ "varValue":"",
+ "operator":""
+ },
+ "enable":{
+ "thresholds":true,
+ "visibility":false,
+ "legend":true,
+ "valueMapping":true
+ },
+ "columns":[
+ {
+ "unit":2,
+ "display":"{{A.$legend}}",
+ "show":true,
+ "title":"Element"
+ },
+ {
+ "unit":30,
+ "display":"{{A.$value}}",
+ "show":true,
+ "title":"Value"
+ }
+ ],
+ "legend":{
+ "values":[],
+ "show":true,
+ "placement":"bottom"
+ },
+ "valueMapping":[
+ {
+ "color":{
+ "bac":"#fff",
+ "icon":"#de18f4FF",
+ "text":"#000"
+ },
+ "columns":"Value",
+ "display":"{{A.$value}}",
+ "show":false,
+ "column":"Value",
+ "text":"",
+ "type":"text",
+ "value":""
+ }
+ ],
+ "nullType":"zero",
+ "statistics":"last"
+ },
+ "remark":"HOS Nginx running time of each asset",
+ "x":"0.0",
+ "y":"0.0",
+ "elements":[
+ {
+ "expression":"time() - nginx_vts_start_time_seconds{module=\"Galaxy-hos-nginx\"}",
+ "state":1,
+ "legend":"{{asset}}",
+ "name":"A"
+ }
+ ],
+ "datasource":"metrics"
+ },
+ {
+ "name":"Connections",
+ "span":"12.0",
+ "height":"4.0",
+ "type":"Line chart",
+ "unit":"none",
+ "weight":57,
+ "param":{
+ "stack":0,
+ "thresholds":[
+ {
+ "color":"#ed0937"
+ }
+ ],
+ "thresholdShow":true,
+ "visibility":{
+ "result":"",
+ "varName":"",
+ "varValue":"",
+ "operator":""
+ },
+ "legend":{
+ "values":[],
+ "show":true,
+ "placement":"bottom"
+ },
+ "enable":{
+ "thresholds":false,
+ "visibility":false,
+ "legend":true,
+ "valueMapping":false
+ },
+ "nullType":"null"
+ },
+ "remark":"Number of connections for the nginx proxy in each asset",
+ "x":"0.0",
+ "y":"6.0",
+ "elements":[
+ {
+ "expression":"nginx_vts_main_connections{module=\"Galaxy-hos-nginx\",status=\"active\"}",
+ "state":1,
+ "legend":"{{asset}}-{{status}}",
+ "name":"A"
+ }
+ ],
+ "datasource":"metrics"
+ },
+ {
+ "name":"Server Requests (asset,code)",
+ "span":"6.0",
+ "height":"3.0",
+ "type":"Line chart",
+ "unit":"short",
+ "weight":58,
+ "param":{
+ "stack":false,
+ "visibility":{
+ "result":"",
+ "varName":"",
+ "varValue":"",
+ "operator":""
+ },
+ "legend":{
+ "values":[],
+ "placement":"bottom"
+ },
+ "enable":{
+ "thresholds":true,
+ "legend":true,
+ "valueMapping":true
+ },
+ "style":"line",
+ "nullType":"zero"
+ },
+ "remark":"The requests counter per second of each HOS Nginx asset.",
+ "x":"6.0",
+ "y":"0.0",
+ "elements":[
+ {
+ "expression":"sum(irate(nginx_vts_server_requests_total{module=\"Galaxy-hos-nginx\",code!=\"total\"}[5m])) by (code,asset)",
+ "state":1,
+ "legend":"{{asset}}_[{{code}}]",
+ "name":"A"
+ }
+ ],
+ "datasource":"metrics"
+ },
+ {
+ "name":"Upstream Requests (asset,code)",
+ "span":"6.0",
+ "height":"3.0",
+ "type":"Line chart",
+ "unit":"short",
+ "weight":59,
+ "param":{
+ "stack":false,
+ "visibility":{
+ "result":"",
+ "varName":"",
+ "varValue":"",
+ "operator":""
+ },
+ "legend":{
+ "values":[],
+ "placement":"bottom"
+ },
+ "enable":{
+ "thresholds":true,
+ "visibility":false,
+ "legend":true,
+ "valueMapping":true
+ },
+ "style":"line",
+ "nullType":"zero"
+ },
+ "remark":"The upstream requests the counter rate of each HOS Nginx asset.",
+ "x":"0.0",
+ "y":"3.0",
+ "elements":[
+ {
+ "expression":"sum(irate(nginx_vts_upstream_requests_total{module=\"Galaxy-hos-nginx\",code!=\"total\"}[5m])) by (code,asset)",
+ "state":1,
+ "legend":"{{asset}}_[{{code}}]",
+ "name":"A"
+ }
+ ],
+ "datasource":"metrics"
+ },
+ {
+ "name":"Upstream Response Time (backend,upstream)",
+ "span":"6.0",
+ "height":"3.0",
+ "type":"Line chart",
+ "unit":"seconds(s)",
+ "weight":60,
+ "param":{
+ "stack":false,
+ "visibility":{
+ "result":"",
+ "varName":"",
+ "varValue":"",
+ "operator":""
+ },
+ "legend":{
+ "values":[],
+ "placement":"bottom"
+ },
+ "enable":{
+ "thresholds":true,
+ "legend":true,
+ "valueMapping":true
+ },
+ "style":"line",
+ "nullType":"zero"
+ },
+ "remark":"The only upstream response processing time of each endpoint",
+ "x":"6.0",
+ "y":"3.0",
+ "elements":[
+ {
+ "expression":"max(irate(nginx_vts_upstream_response_seconds_total{module=\"Galaxy-hos-nginx\"}[5m])) by (backend, upstream)",
+ "state":1,
+ "legend":"{{backend}}-{{upstream}}",
+ "name":"A"
+ }
+ ],
+ "datasource":"metrics"
+ }
+ ]
+ }
+ ],
+ "children":[]
+ }
+] \ No newline at end of file