diff options
| author | wangchengcheng <[email protected]> | 2023-07-27 15:43:51 +0800 |
|---|---|---|
| committer | wangchengcheng <[email protected]> | 2023-07-27 15:43:51 +0800 |
| commit | 124f687daace8b85e5c74abac04bcd0a92744a8d (patch) | |
| tree | 4f563326b1be67cfb51bf6a04f1ca4d953536e76 /MPE/Nezha/OLAP Componet Status.json | |
| parent | 08686ae87f9efe7a590f48db74ed133b481c85b1 (diff) | |
P19 23.07 online-configP19
Diffstat (limited to 'MPE/Nezha/OLAP Componet Status.json')
| -rw-r--r-- | MPE/Nezha/OLAP Componet Status.json | 4597 |
1 files changed, 4597 insertions, 0 deletions
diff --git a/MPE/Nezha/OLAP Componet Status.json b/MPE/Nezha/OLAP Componet Status.json new file mode 100644 index 0000000..a7d846f --- /dev/null +++ b/MPE/Nezha/OLAP Componet Status.json @@ -0,0 +1,4597 @@ +[ + { + "name":"OLAP Componet Status", + "type":"dashboard", + "varType":0, + "param":{ + "chartShare":"none", + "variables":[], + "report":{ + "schedule":{ + "repeat":1, + "etime":"", + "stime":"", + "type":2, + "nums":[] + }, + "receivers":[], + "enable":false, + "range":{ + "unit":"day", + "interval":1, + "type":"previous" + } + } + }, + "remark":"", + "charts":[ + { + "name":"Summary", + "span":"12.0", + "height":"1.0", + "type":"Group", + "unit":"short", + "weight":0, + "param":{ + "thresholds":[], + "visibility":{ + "result":"", + "varName":"", + "varValue":"", + "operator":"" + }, + "enable":{ + "thresholds":false, + "legend":false, + "valueMapping":false + }, + "repeat":{ + "variable":"" + }, + "link":"", + "valueMapping":[], + "collapse":true, + "nullType":"null" + }, + "remark":"", + "x":"0.0", + "y":"1.0", + "datasource":"misc", + "children":[ + { + "name":"zookeeper live Status", + "span":"4.0", + "height":"2.0", + "type":"Line chart", + "unit":"short", + "weight":1, + "param":{ + "stack":false, + "thresholds":[ + { + "color":"#47e88d" + } + ], + "thresholdShow":true, + "visibility":{ + "result":"", + "varName":"", + "varValue":"", + "operator":"" + }, + "legend":{ + "values":[], + "show":true, + "placement":"bottom" + }, + "enable":{ + "thresholds":false, + "visibility":false, + "legend":true, + "valueMapping":false + }, + "nullType":"null" + }, + "remark":"The number of lived Zookeeper in each Data Center", + "x":"0.0", + "y":"0.0", + "elements":[ + { + "expression":"sum by (datacenter) (up{module=~\".*Zookeeper\"})", + "state":1, + "legend":"{{datacenter}}", + "name":"A" + } + ], + "datasource":"metrics" + }, + { + "name":"Nacos Live Status", + "span":"4.0", + "height":"2.0", + "type":"Line chart", + "unit":"short", + "weight":2, + "param":{ + "stack":false, + "visibility":{ + "result":"", + "varName":"", + "varValue":"", + "operator":"" + }, + "legend":{ + "values":[], + "placement":"bottom" + }, + "enable":{ + "thresholds":true, + "visibility":false, + "legend":true, + "valueMapping":true + }, + "style":"line", + "nullType":"zero" + }, + "remark":"The number of lived Nacos in each Data Center", + "x":"4.0", + "y":"0.0", + "elements":[ + { + "expression":"sum by(datacenter) (up{module=~\".*Nacos\"})", + "state":1, + "legend":"{{datacenter}}", + "name":"A" + } + ], + "datasource":"metrics" + }, + { + "name":"Kafka live Status", + "span":"4.0", + "height":"2.0", + "type":"Line chart", + "unit":"short", + "weight":3, + "param":{ + "stack":false, + "visibility":{ + "result":"", + "varName":"", + "varValue":"", + "operator":"" + }, + "legend":{ + "values":[], + "placement":"bottom" + }, + "enable":{ + "thresholds":true, + "visibility":false, + "legend":true, + "valueMapping":true + }, + "style":"line", + "nullType":"zero" + }, + "remark":"The number of lived Kafka in each Data Center", + "x":"8.0", + "y":"0.0", + "elements":[ + { + "expression":"sum by (datacenter) (up{module=~\".*Kafka\"})", + "state":1, + "legend":"{{datacenter}}", + "name":"A" + } + ], + "datasource":"metrics" + }, + { + "name":"Hadoop Yarn Live Status", + "span":"4.0", + "height":"2.0", + "type":"Line chart", + "unit":"short", + "weight":4, + "param":{ + "stack":false, + "thresholds":[], + "visibility":{ + "result":"", + "varName":"", + "varValue":"", + "operator":"" + }, + "legend":{ + "values":[], + "placement":"bottom" + }, + "enable":{ + "thresholds":true, + "visibility":false, + "legend":true, + "valueMapping":true + }, + "link":"", + "style":"line", + "valueMapping":[], + "nullType":"zero" + }, + "remark":"The number of lived Yarn in each Data Center", + "x":"8.0", + "y":"4.0", + "elements":[ + { + "expression":"sum by(datacenter)(up{module=~\".*Yarn\",endpoint=~\".*ResourceManager.*\"})", + "state":1, + "legend":"{{datacenter}}-ResourceManager", + "name":"A" + }, + { + "expression":"sum by(datacenter)(up{module=~\".*Yarn\",endpoint=~\".*NodeManager.*\"})", + "state":1, + "legend":"{{datacenter}}-NodeManager", + "name":"B" + } + ], + "datasource":"metrics" + }, + { + "name":"Druid live Status", + "span":"4.0", + "height":"2.0", + "type":"Line chart", + "unit":"short", + "weight":5, + "param":{ + "stack":false, + "legend":{ + "values":[], + "placement":"bottom" + }, + "enable":{ + "thresholds":true, + "legend":true, + "valueMapping":true + }, + "style":"line", + "nullType":"zero" + }, + "remark":"The number of lived Druid in each Data Center", + "x":"4.0", + "y":"2.0", + "elements":[ + { + "expression":"count(sys_swap_page_in{service=\"broker\"})", + "state":1, + "legend":"broker", + "name":"A" + }, + { + "expression":"count(sys_swap_page_in{service=\"router\",host=~\".*8088\"})", + "state":1, + "legend":"router", + "name":"B" + }, + { + "expression":"count(sys_swap_page_in{service=\"coordinator\"})", + "state":1, + "legend":"coordinator", + "name":"C" + }, + { + "expression":"count (sys_swap_page_in{service=\"middleManager\",host=~\".*8091\"})", + "state":1, + "legend":"middleManager", + "name":"D" + }, + { + "expression":"count (sys_swap_page_in{service=\"historical\"})", + "state":1, + "legend":"historical", + "name":"E" + } + ], + "datasource":"metrics" + }, + { + "name":"Clickhouse live Status", + "span":"4.0", + "height":"2.0", + "type":"Line chart", + "unit":"short", + "weight":6, + "param":{ + "stack":false, + "legend":{ + "values":[], + "placement":"bottom" + }, + "enable":{ + "thresholds":true, + "legend":true, + "valueMapping":true + }, + "style":"line", + "nullType":"zero" + }, + "remark":"The number of lived ClickHouse in each Data Center", + "x":"8.0", + "y":"2.0", + "elements":[ + { + "expression":"sum by(datacenter) (up{module=\"NC-Clickhouse\"})", + "state":1, + "legend":"{{datacenter}}", + "name":"A" + } + ], + "datasource":"metrics" + }, + { + "name":"Hbase Hmaster live Status", + "span":"4.0", + "height":"2.0", + "type":"Line chart", + "unit":"short", + "weight":7, + "param":{ + "stack":false, + "visibility":{ + "result":"", + "varName":"", + "varValue":"", + "operator":"" + }, + "legend":{ + "values":[], + "placement":"bottom" + }, + "enable":{ + "thresholds":true, + "visibility":false, + "legend":true, + "valueMapping":true + }, + "style":"line", + "nullType":"zero" + }, + "remark":"The number of lived Hbase Hmaster in each Data Center", + "x":"0.0", + "y":"6.0", + "elements":[ + { + "expression":"count by (datacenter) (Hadoop_HBase_numMasterWALs{module=~\".*HBase\"})", + "state":1, + "legend":"{{datacenter}}", + "name":"A" + } + ], + "datasource":"metrics" + }, + { + "name":"Hbase Region Server live Status", + "span":"4.0", + "height":"2.0", + "type":"Line chart", + "unit":"short", + "weight":8, + "param":{ + "stack":false, + "visibility":{ + "result":"", + "varName":"", + "varValue":"", + "operator":"" + }, + "legend":{ + "values":[], + "placement":"bottom" + }, + "enable":{ + "thresholds":true, + "visibility":false, + "legend":true, + "valueMapping":true + }, + "style":"line", + "nullType":"zero" + }, + "remark":"The number of lived Hbase Region in each Data Center", + "x":"4.0", + "y":"6.0", + "elements":[ + { + "expression":"sum by (datacenter) (Hadoop_HBase_numRegionServers{module=~\".*HBase\"})", + "state":1, + "legend":"{{datacenter}}", + "name":"A" + } + ], + "datasource":"metrics" + }, + { + "name":"Hadoop Namenode live Status", + "span":"4.0", + "height":"2.0", + "type":"Line chart", + "unit":"short", + "weight":9, + "param":{ + "stack":false, + "visibility":{ + "result":"", + "varName":"", + "varValue":"", + "operator":"" + }, + "legend":{ + "values":[], + "placement":"bottom" + }, + "enable":{ + "thresholds":true, + "visibility":false, + "legend":true, + "valueMapping":true + }, + "style":"line", + "nullType":"zero" + }, + "remark":"The number of lived Hadoop Namenode in each Data Center", + "x":"0.0", + "y":"4.0", + "elements":[ + { + "expression":"count by (datacenter) (Hadoop_NameNode_Total{module=~\".*Hadoop\"})", + "state":1, + "legend":"{{datacenter}}", + "name":"A" + } + ], + "datasource":"metrics" + }, + { + "name":"Hadoop Datanode live Status", + "span":"4.0", + "height":"2.0", + "type":"Line chart", + "unit":"short", + "weight":10, + "param":{ + "stack":false, + "visibility":{ + "result":"", + "varName":"", + "varValue":"", + "operator":"" + }, + "legend":{ + "values":[], + "placement":"bottom" + }, + "enable":{ + "thresholds":true, + "visibility":false, + "legend":true, + "valueMapping":true + }, + "style":"line", + "nullType":"zero" + }, + "remark":"The number of lived Hadoop Datanode in each Data Center", + "x":"4.0", + "y":"4.0", + "elements":[ + { + "expression":"count by (datacenter) (Hadoop_DataNode_HeartbeatsNumOps{module=~\".*Hadoop\"})", + "state":1, + "legend":"{{datacenter}}", + "name":"A" + } + ], + "datasource":"metrics" + }, + { + "name":"DC-GoHangout live Status", + "span":"4.0", + "height":"2.0", + "type":"Line chart", + "unit":"short", + "weight":11, + "param":{ + "rightYAxis":{ + "elementNames":[], + "unit":2, + "style":"line", + "label":"" + }, + "stack":false, + "thresholds":[], + "visibility":{ + "result":"", + "varName":"", + "varValue":"", + "operator":"" + }, + "legend":{ + "values":[], + "placement":"bottom" + }, + "enable":{ + "thresholds":true, + "visibility":false, + "legend":true, + "valueMapping":true + }, + "link":"", + "style":"line", + "valueMapping":[], + "nullType":"zero" + }, + "remark":"The number of lived GoHangout in each Data Center", + "x":"0.0", + "y":"2.0", + "elements":[ + { + "expression":"sum by(datacenter) (gohangout_status{module=\"DC-Gohangout\"})", + "state":1, + "legend":"{{datacenter}}", + "name":"A" + } + ], + "datasource":"metrics" + } + ] + }, + { + "name":"Zookeeper Status", + "span":"12.0", + "height":"1.0", + "type":"Group", + "unit":"short", + "weight":12, + "param":{ + "enable":{ + "thresholds":false, + "legend":false, + "valueMapping":false + }, + "collapse":true, + "nullType":"null" + }, + "remark":"", + "x":"0.0", + "y":"1.0", + "datasource":"misc", + "children":[ + { + "name":"Zookeeper Leader", + "span":"6.0", + "height":"3.0", + "type":"Line chart", + "unit":"short", + "weight":13, + "param":{ + "stack":false, + "legend":{ + "values":[], + "placement":"bottom" + }, + "enable":{ + "thresholds":true, + "legend":true, + "valueMapping":true + }, + "style":"line", + "nullType":"zero" + }, + "remark":"Zookeeper leader. Describe whether the zookeeper leader has changed and determine the stability of the current cluster.", + "x":"0.0", + "y":"0.0", + "elements":[ + { + "expression":"(zookeeper_leader>0)", + "state":1, + "legend":"{{datacenter}}-{{asset}}", + "name":"A" + } + ], + "datasource":"metrics" + }, + { + "name":"Disk IO Usage", + "span":"6.0", + "height":"3.0", + "type":"Line chart", + "unit":"percent(0-100)", + "weight":14, + "param":{ + "stack":false, + "thresholds":[ + { + "val":"80", + "color":"#d64f40", + "id":"3099ecf1", + "value":80 + } + ], + "visibility":{ + "result":"", + "varName":"", + "varValue":"", + "operator":"" + }, + "legend":{ + "values":[ + "avg", + "last", + "max" + ], + "placement":"bottom" + }, + "enable":{ + "thresholds":true, + "visibility":false, + "legend":true, + "valueMapping":true + }, + "link":"", + "style":"line", + "valueMapping":[], + "nullType":"zero" + }, + "remark":"The maximum percentage of DISK IO in each asset.", + "x":"6.0", + "y":"0.0", + "elements":[ + { + "expression":"sum by(asset)(rate(node_disk_io_time_seconds_total{olap_node_exporter=~\".*zookeeper.*\"}[5m]))*100", + "state":1, + "legend":"{{asset}}", + "name":"A" + } + ], + "datasource":"metrics" + }, + { + "name":"Outstanding Requests", + "span":"6.0", + "height":"3.0", + "type":"Line chart", + "unit":"short", + "weight":15, + "param":{ + "stack":false, + "thresholds":[ + { + "val":"100", + "color":"#d64f40", + "value":100 + } + ], + "legend":{ + "values":[ + "min", + "avg", + "last", + "max" + ], + "placement":"bottom" + }, + "enable":{ + "thresholds":true, + "legend":true, + "valueMapping":true + }, + "style":"line", + "nullType":"zero" + }, + "remark":"Number of queued requests in each Data Center. This goes up when the server receives more requests than it can process. When count > 100.", + "x":"0.0", + "y":"3.0", + "elements":[ + { + "expression":"sum by(datacenter) (zookeeper_outstanding_requests{module=\"NC-Zookeeper\"})", + "state":1, + "legend":"{{datacenter}}", + "name":"A" + }, + { + "expression":"sum by(datacenter) (zookeeper_outstanding_requests{module=\"DC-Zookeeper\"})", + "state":1, + "legend":"{{datacenter}}", + "name":"B" + } + ], + "datasource":"metrics" + }, + { + "name":"Connections /sec", + "span":"6.0", + "height":"3.0", + "type":"Line chart", + "unit":"short", + "weight":16, + "param":{ + "stack":false, + "legend":{ + "values":[ + "min", + "avg", + "last", + "max" + ], + "placement":"bottom" + }, + "enable":{ + "thresholds":true, + "legend":true, + "valueMapping":true + }, + "style":"line", + "nullType":"zero" + }, + "remark":"The number of connections per second in each Data Center", + "x":"6.0", + "y":"3.0", + "elements":[ + { + "expression":"sum by(datacenter) (rate(zookeeper_connections{module=\"NC-Zookeeper\"}[5m]))", + "state":1, + "legend":"{{datacenter}}", + "name":"A" + }, + { + "expression":"sum by(datacenter) (rate(zookeeper_connections{module=\"DC-Zookeeper\"}[5m]))", + "state":1, + "legend":"{{datacenter}}", + "name":"B" + } + ], + "datasource":"metrics" + }, + { + "name":"Znodes", + "span":"6.0", + "height":"3.0", + "type":"Table", + "unit":"short", + "weight":17, + "param":{ + "visibility":{ + "result":"", + "varName":"", + "varValue":"", + "operator":"" + }, + "columns":[ + { + "unit":2, + "display":"{{A.$legend}}", + "show":true, + "title":"Datacenter", + "error":false + }, + { + "unit":2, + "display":"{{A.$value}}", + "show":true, + "title":"Znodes", + "error":false + } + ], + "enable":{ + "thresholds":false, + "visibility":false, + "legend":true, + "valueMapping":false + }, + "legend":{ + "values":[], + "show":true, + "placement":"bottom" + }, + "nullType":"zero", + "statistics":"last" + }, + "remark":"Number of znodes in the Zookeeper namespace in each data center", + "x":"0.0", + "y":"6.0", + "elements":[ + { + "expression":"sum by(datacenter) (zookeeper_znode_count{module=~\"NC-Zookeeper|DC-Zookeeper\"})", + "state":1, + "legend":"{{datacenter}}", + "name":"A" + } + ], + "datasource":"metrics" + }, + { + "name":"Avg Latency", + "span":"6.0", + "height":"3.0", + "type":"Line chart", + "unit":"milliseconds", + "weight":18, + "param":{ + "stack":false, + "thresholds":[ + { + "val":"100000", + "color":"#d64f40", + "value":100000 + } + ], + "visibility":{ + "result":"", + "varName":"", + "varValue":"", + "operator":"" + }, + "legend":{ + "values":[], + "placement":"bottom" + }, + "enable":{ + "thresholds":true, + "visibility":false, + "legend":true, + "valueMapping":true + }, + "style":"line", + "nullType":"zero" + }, + "remark":"Amount of time it takes for the server to respond to a client request (since the server was started) in each data center. When latency > 10 (Ticks).", + "x":"6.0", + "y":"6.0", + "elements":[ + { + "expression":"sum by(datacenter) (irate(zookeeper_latency_avg_ms{module=~\".*Zookeeper\"}[5m]))", + "state":1, + "legend":"{{datacenter}}", + "name":"A" + } + ], + "datasource":"metrics" + }, + { + "name":"Max Latency", + "span":"6.0", + "height":"3.0", + "type":"Line chart", + "unit":"milliseconds", + "weight":19, + "param":{ + "stack":false, + "thresholds":[ + { + "val":"100000", + "color":"#d64f40" + } + ], + "visibility":{ + "result":"", + "varName":"", + "varValue":"", + "operator":"" + }, + "legend":{ + "values":[], + "placement":"bottom" + }, + "enable":{ + "thresholds":false, + "visibility":false, + "legend":true, + "valueMapping":true + }, + "style":"line", + "nullType":"zero" + }, + "remark":"Amount of time it takes for the server to respond to a client request (since the server was started) in each data center. When latency > 10 (Ticks).", + "x":"0.0", + "y":"9.0", + "elements":[ + { + "expression":"max by(datacenter) (irate(zookeeper_latency_max_ms{module=~\"NC-Zookeeper|DC-Zookeeper\"}[5m])) ", + "state":1, + "legend":"{{datacenter}}", + "name":"A" + } + ], + "datasource":"metrics" + }, + { + "name":"Sent Packets /sec", + "span":"6.0", + "height":"3.0", + "type":"Line chart", + "unit":"packets/sec", + "weight":20, + "param":{ + "stack":false, + "legend":{ + "values":[ + "avg", + "last", + "max" + ], + "placement":"bottom" + }, + "enable":{ + "thresholds":true, + "legend":true, + "valueMapping":true + }, + "style":"line", + "nullType":"zero" + }, + "remark":"Number of client packets sent (responses and notifications) in each data center.", + "x":"6.0", + "y":"9.0", + "elements":[ + { + "expression":"sum by(datacenter) (irate(zookeeper_packets_sent{module=\"NC-Zookeeper\"}[5m]))", + "state":1, + "legend":"{{datacenter}}", + "name":"A" + }, + { + "expression":"sum by(datacenter) (irate(zookeeper_packets_sent{module=\"DC-Zookeeper\"}[5m]))", + "state":1, + "legend":"{{datacenter}}", + "name":"B" + } + ], + "datasource":"metrics" + }, + { + "name":"Received Packets /sec", + "span":"6.0", + "height":"3.0", + "type":"Line chart", + "unit":"packets/sec", + "weight":21, + "param":{ + "stack":false, + "legend":{ + "values":[ + "total", + "min", + "avg", + "last", + "max" + ], + "placement":"bottom" + }, + "enable":{ + "thresholds":true, + "legend":true, + "valueMapping":true + }, + "style":"line", + "nullType":"zero" + }, + "remark":"Number of client requests (typically operations) received in each data center.", + "x":"0.0", + "y":"12.0", + "elements":[ + { + "expression":"sum by(datacenter) (irate(zookeeper_packets_received{module=\"NC-Zookeeper\"}[5m]))", + "state":1, + "legend":"{{datacenter}}", + "name":"A" + }, + { + "expression":"sum by(datacenter) (irate(zookeeper_packets_received{module=\"DC-Zookeeper\"}[5m]))", + "state":1, + "legend":"{{datacenter}}", + "name":"B" + } + ], + "datasource":"metrics" + } + ] + }, + { + "name":"Nacos Status", + "span":"12.0", + "height":"3.0", + "type":"Group", + "unit":"short", + "weight":22, + "param":{ + "collapse":true, + "nullType":"null" + }, + "remark":"", + "x":"0.0", + "y":"2.0", + "datasource":"misc", + "children":[ + { + "name":"QPS", + "span":"6.0", + "height":"3.0", + "type":"Line chart", + "unit":"short", + "weight":23, + "param":{ + "stack":false, + "legend":{ + "values":[ + "avg", + "last", + "max" + ], + "placement":"bottom" + }, + "enable":{ + "thresholds":true, + "legend":true, + "valueMapping":true + }, + "style":"line", + "nullType":"zero" + }, + "remark":"Nacos queries per second", + "x":"0.0", + "y":"0.0", + "elements":[ + { + "expression":"sum(rate(http_server_requests_seconds_count{uri=~'/v1/cs/configs|/nacos/v1/ns/instance|/nacos/v1/ns/health', module=\"NC-Nacos\"}[5m])) by (method,uri)", + "state":1, + "legend":"{{method}}", + "name":"A" + } + ], + "datasource":"metrics" + }, + { + "name":"Request Errors", + "span":"6.0", + "height":"3.0", + "type":"Line chart", + "unit":"short", + "weight":24, + "param":{ + "stack":false, + "legend":{ + "values":[ + "avg", + "last", + "max" + ], + "placement":"bottom" + }, + "enable":{ + "thresholds":true, + "legend":true, + "valueMapping":true + }, + "style":"line", + "nullType":"zero" + }, + "remark":"The requests failed counter per second of each http method.", + "x":"6.0", + "y":"0.0", + "elements":[ + { + "expression":"sum(irate(http_server_requests_seconds_count{exception!='None',module=\"NC-Nacos\"}[5m])) by (method,uri)", + "state":1, + "legend":"{{method}}-{{uri}}", + "name":"A" + } + ], + "datasource":"metrics" + }, + { + "name":"Memory Usage", + "span":"6.0", + "height":"3.0", + "type":"Line chart", + "unit":"percent(0.0-1.0)", + "weight":25, + "param":{ + "stack":false, + "legend":{ + "values":[ + "avg", + "last", + "max" + ], + "placement":"bottom" + }, + "enable":{ + "thresholds":true, + "legend":true, + "valueMapping":true + }, + "style":"line", + "nullType":"zero" + }, + "remark":"The memory usage in each asset", + "x":"0.0", + "y":"3.0", + "elements":[ + { + "expression":"sum by(asset) (jvm_memory_used_bytes{module=\"NC-Nacos\"}) / sum by(asset) (jvm_memory_max_bytes{module=\"NC-Nacos\"})", + "state":1, + "legend":"{{asset}}", + "name":"A" + } + ], + "datasource":"metrics" + }, + { + "name":"CPU Usage", + "span":"6.0", + "height":"3.0", + "type":"Line chart", + "unit":"percent(0.0-1.0)", + "weight":26, + "param":{ + "stack":false, + "legend":{ + "values":[ + "avg", + "last", + "max" + ], + "placement":"bottom" + }, + "enable":{ + "thresholds":true, + "legend":true, + "valueMapping":true + }, + "style":"line", + "nullType":"zero" + }, + "remark":"The CPU usage in each asset", + "x":"6.0", + "y":"3.0", + "elements":[ + { + "expression":"system_cpu_usage{module=\"NC-Nacos\"}", + "state":1, + "legend":"{{asset}}", + "name":"A" + } + ], + "datasource":"metrics" + } + ] + }, + { + "name":"Kafka Status", + "span":"12.0", + "height":"3.0", + "type":"Group", + "unit":"short", + "weight":27, + "param":{ + "collapse":true, + "nullType":"null" + }, + "remark":"", + "x":"0.0", + "y":"2.0", + "datasource":"misc", + "children":[ + { + "name":"Requests /sec", + "span":"6.0", + "height":"3.0", + "type":"Line chart", + "unit":"short", + "weight":28, + "param":{ + "stack":false, + "legend":{ + "values":[ + "avg", + "last", + "max" + ], + "placement":"bottom" + }, + "enable":{ + "thresholds":true, + "legend":true, + "valueMapping":true + }, + "style":"line", + "nullType":"zero" + }, + "remark":"Number of (producer|consumer|follower) requests per second in each Data Center.", + "x":"6.0", + "y":"6.0", + "elements":[ + { + "expression":"sum by(datacenter) (kafka_server_socket_server_metrics_request_rate)", + "state":1, + "legend":"{{datacenter}}", + "name":"A" + } + ], + "datasource":"metrics" + }, + { + "name":"In Bytes /sec", + "span":"6.0", + "height":"3.0", + "type":"Line chart", + "unit":"bytes", + "weight":29, + "param":{ + "stack":false, + "legend":{ + "values":[ + "total", + "min", + "avg", + "last", + "max" + ], + "placement":"bottom" + }, + "enable":{ + "thresholds":true, + "legend":true, + "valueMapping":true + }, + "style":"line", + "nullType":"zero" + }, + "remark":"Collecting and processing Traffic Records with incoming byte rate in each Data Center.", + "x":"0.0", + "y":"0.0", + "elements":[ + { + "expression":"sum by(datacenter) (kafka_server_BrokerTopicMetrics_OneMinuteRate{name=\"BytesInPerSec\",topic=\"\"})", + "state":1, + "legend":"{{datacenter}}", + "name":"A" + } + ], + "datasource":"metrics" + }, + { + "name":"Request Type Erros /sec", + "span":"6.0", + "height":"3.0", + "type":"Line chart", + "unit":"short", + "weight":30, + "param":{ + "stack":false, + "legend":{ + "values":[ + "avg", + "last", + "max" + ], + "placement":"bottom" + }, + "enable":{ + "thresholds":true, + "legend":true, + "valueMapping":true + }, + "style":"line", + "nullType":"null" + }, + "remark":"The number of rquest type errors per second in each data center.", + "x":"0.0", + "y":"9.0", + "elements":[ + { + "expression":"sum by(datacenter,error) \n(rate(kafka_network_RequestMetrics_Errors_total{module=\"NC-Kafka\",error!=\"NONE\"}[5m]))", + "state":1, + "legend":"{{error}}", + "name":"A" + } + ], + "datasource":"metrics" + }, + { + "name":"Out Bytes /sec", + "span":"6.0", + "height":"3.0", + "type":"Line chart", + "unit":"bytes", + "weight":31, + "param":{ + "stack":false, + "legend":{ + "values":[ + "total", + "min", + "avg", + "last", + "max" + ], + "placement":"bottom" + }, + "enable":{ + "thresholds":true, + "legend":true, + "valueMapping":true + }, + "style":"line", + "nullType":"zero" + }, + "remark":"Collecting and processing Traffic Records with outgoing byte rate in each Data Center.", + "x":"6.0", + "y":"0.0", + "elements":[ + { + "expression":"sum by(datacenter) (kafka_server_BrokerTopicMetrics_OneMinuteRate{name=\"BytesOutPerSec\",topic=\"\"})", + "state":1, + "legend":"{{datacenter}}", + "name":"A" + } + ], + "datasource":"metrics" + }, + { + "name":"Failed fetch request /sec", + "span":"6.0", + "height":"3.0", + "type":"Line chart", + "unit":"short", + "weight":32, + "param":{ + "stack":false, + "legend":{ + "values":[ + "avg", + "last", + "max" + ], + "placement":"bottom" + }, + "enable":{ + "thresholds":true, + "legend":true, + "valueMapping":true + }, + "style":"line", + "nullType":"zero" + }, + "remark":"Number of data read requests from consumers that brokers failed to process for this topic in each Data Center.", + "x":"6.0", + "y":"9.0", + "elements":[ + { + "expression":"sum by(datacenter) (kafka_server_BrokerTopicMetrics_OneMinuteRate{name=\"FailedFetchRequestsPerSec\",topic=\"\"})", + "state":1, + "legend":"{{datacenter}}", + "name":"A" + } + ], + "datasource":"metrics" + }, + { + "name":"Failed produce request /sec", + "span":"6.0", + "height":"3.0", + "type":"Line chart", + "unit":"short", + "weight":33, + "param":{ + "stack":false, + "legend":{ + "values":[ + "avg", + "last", + "max" + ], + "placement":"bottom" + }, + "enable":{ + "thresholds":true, + "legend":true, + "valueMapping":true + }, + "style":"line", + "nullType":"zero" + }, + "remark":"Number of data produce requests from producers that brokers failed to process for this topic in each Data Center.", + "x":"0.0", + "y":"12.0", + "elements":[ + { + "expression":"sum by(datacenter) (kafka_server_BrokerTopicMetrics_OneMinuteRate{name=\"FailedProduceRequestsPerSec\",topic=\"\"})", + "state":1, + "legend":"{{datacenter}}", + "name":"A" + } + ], + "datasource":"metrics" + }, + { + "name":"Rejected Bytes /sec", + "span":"6.0", + "height":"3.0", + "type":"Line chart", + "unit":"short", + "weight":34, + "param":{ + "stack":false, + "legend":{ + "values":[ + "avg", + "last", + "max" + ], + "placement":"bottom" + }, + "enable":{ + "thresholds":true, + "legend":true, + "valueMapping":true + }, + "style":"line", + "nullType":"zero" + }, + "remark":"Amount of data in messages rejected by broker for this topic in each Data Center.", + "x":"6.0", + "y":"12.0", + "elements":[ + { + "expression":"sum by(datacenter) (kafka_server_BrokerTopicMetrics_OneMinuteRate{name=\"BytesRejectedPerSec\",topic=\"\"})", + "state":1, + "legend":"{{datacenter}}", + "name":"A" + } + ], + "datasource":"metrics" + }, + { + "name":"Asset Disk IO Usage", + "span":"6.0", + "height":"3.0", + "type":"Line chart", + "unit":"percent(0-100)", + "weight":35, + "param":{ + "stack":false, + "thresholds":[ + { + "val":"80", + "color":"#d64f40", + "id":"6954aa6b", + "value":80 + } + ], + "visibility":{ + "result":"", + "varName":"", + "varValue":"", + "operator":"" + }, + "legend":{ + "values":[ + "avg", + "last", + "max" + ], + "placement":"bottom" + }, + "enable":{ + "thresholds":true, + "visibility":false, + "legend":true, + "valueMapping":true + }, + "link":"", + "style":"line", + "valueMapping":[], + "nullType":"zero" + }, + "remark":"The maxium percentage of DISK IO in each data center and asset.", + "x":"0.0", + "y":"3.0", + "elements":[ + { + "expression":"sum by(asset)(rate(node_disk_io_time_seconds_total{olap_node_exporter=~\".*kafka.*\"}[5m]))*100", + "state":1, + "legend":"{{asset}}", + "name":"A" + } + ], + "datasource":"metrics" + }, + { + "name":"Avg Zookeeper Request Latency", + "span":"6.0", + "height":"3.0", + "type":"Line chart", + "unit":"milliseconds", + "weight":36, + "param":{ + "stack":false, + "legend":{ + "values":[ + "min", + "avg", + "last", + "max" + ], + "placement":"bottom" + }, + "enable":{ + "thresholds":true, + "legend":true, + "valueMapping":true + }, + "style":"line", + "nullType":"zero" + }, + "remark":"The average zookeeper request latency(ms) in each data center and asset.", + "x":"0.0", + "y":"6.0", + "elements":[ + { + "expression":"avg by (datacenter,asset)(kafka_server_ZooKeeperClientMetrics_Mean{module=\"NC-Kafka\"}) ", + "state":1, + "legend":"{{datacenter}} - {{asset}}", + "name":"A" + } + ], + "datasource":"metrics" + }, + { + "name":"Open File Descriptors(Max)", + "span":"6.0", + "height":"3.0", + "type":"Line chart", + "unit":"short", + "weight":37, + "param":{ + "stack":false, + "thresholds":[ + { + "val":"52428", + "color":"#d64f40" + } + ], + "legend":{ + "values":[ + "min", + "avg", + "last", + "max" + ], + "placement":"bottom" + }, + "enable":{ + "thresholds":false, + "legend":true, + "valueMapping":true + }, + "style":"line", + "nullType":"zero" + }, + "remark":"The maxium number of open file descriptors in each data center and asset.", + "x":"6.0", + "y":"3.0", + "elements":[ + { + "expression":"max by (datacenter,asset)(process_open_fds{module=~\"NC-Kafka\"}) ", + "state":1, + "legend":"{{datacenter}} - {{asset}}", + "name":"A" + } + ], + "datasource":"metrics" + } + ] + }, + { + "name":"Hadoop Yarn Status", + "span":"12.0", + "height":"1.0", + "type":"Group", + "unit":"short", + "weight":38, + "param":{ + "thresholds":[], + "visibility":{ + "result":"", + "varName":"", + "varValue":"", + "operator":"" + }, + "enable":{ + "thresholds":false, + "legend":false, + "valueMapping":false + }, + "repeat":{ + "variable":"" + }, + "link":"", + "valueMapping":[], + "collapse":false, + "nullType":"null" + }, + "remark":"", + "x":"0.0", + "y":"2.0", + "datasource":"misc", + "children":[ + { + "name":"National Center Yarn active nodes", + "span":"6.0", + "height":"3.0", + "type":"Line chart", + "unit":"short", + "weight":39, + "param":{ + "stack":0, + "thresholds":[ + { + "color":"#6f22e2", + "id":"1a7db7a" + } + ], + "thresholdShow":true, + "visibility":{ + "result":"", + "varName":"", + "varValue":"", + "operator":"" + }, + "legend":{ + "values":[], + "show":true, + "placement":"bottom" + }, + "enable":{ + "thresholds":false, + "visibility":false, + "legend":true, + "valueMapping":false + }, + "link":"", + "valueMapping":[], + "nullType":"zero" + }, + "remark":"The active num of Hadoop Yarn nodes in the National Center.", + "x":"0.0", + "y":"3.0", + "elements":[ + { + "expression":"up{module=\"NC-Yarn\",endpoint=~\".*ResourceManager.*\"}", + "state":1, + "legend":"{{asset}}-ResourceManager", + "name":"A" + }, + { + "expression":"up{module=~\"NC-Yarn\",endpoint=~\".*NodeManager.*\"}", + "state":1, + "legend":"{{asset}}-NodeManager", + "name":"B" + } + ], + "datasource":"metrics" + }, + { + "name":"Application And Container Running Num", + "span":"6.0", + "height":"3.0", + "type":"Line chart", + "unit":"short", + "weight":40, + "param":{ + "stack":0, + "thresholds":[ + { + "color":"#933be5", + "id":"59117b59" + } + ], + "thresholdShow":true, + "visibility":{ + "result":"", + "varName":"", + "varValue":"", + "operator":"" + }, + "legend":{ + "values":[], + "show":true, + "placement":"bottom" + }, + "enable":{ + "thresholds":false, + "visibility":false, + "legend":true, + "valueMapping":false + }, + "link":"", + "valueMapping":[], + "nullType":"zero" + }, + "remark":"The number of running application and container in each Data center.", + "x":"6.0", + "y":"0.0", + "elements":[ + { + "expression":"sum by(datacenter) (Hadoop_ResourceManager_AppsRunning{module=~\".*-Yarn\",endpoint=~\".*ResourceManager.*\",q1=\"default\"})", + "state":1, + "legend":"{{datacenter}}-Application", + "name":"A" + }, + { + "expression":"sum by(datacenter) (Hadoop_ResourceManager_AllocatedContainers{module=~\".*-Yarn\",endpoint=~\".*ResourceManager.*\",q1=\"default\"})", + "state":1, + "legend":"{{datacenter}}-Container", + "name":"B" + } + ], + "datasource":"metrics" + }, + { + "name":"NodeManager Unhealthy", + "span":"6.0", + "height":"3.0", + "type":"Line chart", + "unit":"none", + "weight":41, + "param":{ + "stack":false, + "thresholds":[], + "visibility":{ + "result":"", + "varName":"", + "varValue":"", + "operator":"" + }, + "legend":{ + "values":[ + "max", + "average" + ], + "placement":"bottom" + }, + "enable":{ + "thresholds":false, + "visibility":false, + "legend":false, + "valueMapping":true + }, + "link":"", + "style":"line", + "valueMapping":[], + "nullType":"zero" + }, + "remark":"Unhealthy NodeManager nodes in each Data center Hadoop yarn cluster.", + "x":"0.0", + "y":"0.0", + "elements":[ + { + "expression":"sum by(asset)(Hadoop_ResourceManager_NumUnhealthyNMs{module=~\".*Yarn\"})", + "state":1, + "legend":"{{asset}}", + "name":"A" + } + ], + "datasource":"metrics" + }, + { + "name":"Resourcemanager Memory Usage", + "span":"6.0", + "height":"3.0", + "type":"Line chart", + "unit":"percent(0.0-1.0)", + "weight":42, + "param":{ + "stack":false, + "thresholds":[ + { + "color":"#C4162AFF", + "id":"97bd450", + "value":0.8 + } + ], + "visibility":{ + "result":"", + "varName":"", + "varValue":"", + "operator":"" + }, + "legend":{ + "values":[ + "avg", + "max" + ], + "placement":"bottom" + }, + "enable":{ + "thresholds":true, + "visibility":false, + "legend":true, + "valueMapping":true + }, + "link":"", + "style":"line", + "valueMapping":[], + "nullType":"zero" + }, + "remark":"The Hadoop Yarn ResourceManager nodes memory Utilization.", + "x":"6.0", + "y":"6.0", + "elements":[ + { + "expression":"sum by(asset) (Hadoop_ResourceManager_MemHeapUsedM{module=~\".*Yarn\",endpoint=~\".*ResourceManager.*\"}/Hadoop_ResourceManager_MemHeapMaxM{module=~\".*Yarn\",endpoint=~\".*ResourceManager.*\"})", + "state":1, + "legend":"{{asset}}", + "name":"A" + } + ], + "datasource":"metrics" + }, + { + "name":"ResourceManager CPU Usage", + "span":"6.0", + "height":"3.0", + "type":"Line chart", + "unit":"percent(0.0-1.0)", + "weight":43, + "param":{ + "stack":false, + "thresholds":[ + { + "val":"0.7", + "color":"#d64f40", + "id":"5f03a027", + "value":0.8 + } + ], + "visibility":{ + "result":"", + "varName":"", + "varValue":"", + "operator":"" + }, + "legend":{ + "values":[ + "avg", + "max" + ], + "placement":"bottom" + }, + "enable":{ + "thresholds":true, + "visibility":false, + "legend":true, + "valueMapping":true + }, + "link":"", + "style":"line", + "valueMapping":[], + "nullType":"zero" + }, + "remark":"The Hadoop Yarn ResourceManager nodes CPU Utilization.", + "x":"0.0", + "y":"6.0", + "elements":[ + { + "expression":"sum by (asset)(java_lang_OperatingSystem_ProcessCpuLoad{module=~\".*Yarn\",endpoint=~\".*ResourceManager.*\"})", + "state":1, + "legend":"{{asset}}", + "name":"A" + } + ], + "datasource":"metrics" + }, + { + "name":"Data Center Yarn active nodes", + "span":"6.0", + "height":"3.0", + "type":"Line chart", + "unit":"short", + "weight":44, + "param":{ + "stack":0, + "thresholds":[ + { + "color":"#6f22e2", + "id":"0c2cf76" + } + ], + "thresholdShow":true, + "visibility":{ + "result":"", + "varName":"", + "varValue":"", + "operator":"" + }, + "legend":{ + "values":[], + "show":true, + "placement":"bottom" + }, + "enable":{ + "thresholds":false, + "visibility":false, + "legend":true, + "valueMapping":false + }, + "link":"", + "valueMapping":[], + "nullType":"zero" + }, + "remark":"The active num of Hadoop Yarn nodes in the Data Center.", + "x":"6.0", + "y":"3.0", + "elements":[ + { + "expression":"up{module=\"DC-Yarn\",endpoint=~\".*ResourceManager.*\"}", + "state":1, + "legend":"{{asset}}-ResourceManager", + "name":"A" + }, + { + "expression":"up{module=~\"DC-Yarn\",endpoint=~\".*NodeManager.*\"}", + "state":1, + "legend":"{{asset}}-NodeManager", + "name":"B" + } + ], + "datasource":"metrics" + }, + { + "name":"NodeManager CPU Usage", + "span":"6.0", + "height":"3.0", + "type":"Line chart", + "unit":"percent(0.0-1.0)", + "weight":45, + "param":{ + "stack":false, + "thresholds":[ + { + "val":"0.7", + "color":"#d64f40", + "id":"14c9ce25", + "value":0.8 + } + ], + "visibility":{ + "result":"", + "varName":"", + "varValue":"", + "operator":"" + }, + "legend":{ + "values":[ + "avg", + "max" + ], + "placement":"bottom" + }, + "enable":{ + "thresholds":true, + "visibility":false, + "legend":true, + "valueMapping":true + }, + "link":"", + "style":"line", + "valueMapping":[], + "nullType":"zero" + }, + "remark":"The Hadoop Yarn NodeManager nodes CPU Utilization.", + "x":"0.0", + "y":"12.0", + "elements":[ + { + "expression":"sum by (asset)(java_lang_OperatingSystem_ProcessCpuLoad{module=~\".*Yarn\",endpoint=~\".*NodeManager.*\"})", + "state":1, + "legend":"{{asset}}", + "name":"A" + } + ], + "datasource":"metrics" + }, + { + "name":"NodeManager Memory Usage", + "span":"6.0", + "height":"3.0", + "type":"Line chart", + "unit":"percent(0.0-1.0)", + "weight":46, + "param":{ + "stack":false, + "thresholds":[ + { + "val":"0.7", + "color":"#d64f40", + "id":"7919a181", + "value":0.8 + } + ], + "visibility":{ + "result":"", + "varName":"", + "varValue":"", + "operator":"" + }, + "legend":{ + "values":[ + "avg", + "max" + ], + "placement":"bottom" + }, + "enable":{ + "thresholds":true, + "visibility":false, + "legend":true, + "valueMapping":true + }, + "link":"", + "style":"line", + "valueMapping":[], + "nullType":"zero" + }, + "remark":"The Hadoop Yarn NodeManager nodes memory Utilization.", + "x":"6.0", + "y":"12.0", + "elements":[ + { + "expression":"sum by (asset)(Hadoop_NodeManager_MemHeapUsedM{module=~\".*Yarn\",endpoint=~\".*NodeManager.*\"}/Hadoop_NodeManager_MemHeapMaxM{module=~\".*Yarn\",endpoint=~\".*NodeManager.*\"})", + "state":1, + "legend":"{{asset}}", + "name":"A" + } + ], + "datasource":"metrics" + }, + { + "name":"NodeManager RpcProcessing AvgTime", + "span":"6.0", + "height":"3.0", + "type":"Line chart", + "unit":"milliseconds", + "weight":47, + "param":{ + "stack":0, + "thresholds":[ + { + "color":"#7f4ee8", + "id":"57feb110" + } + ], + "thresholdShow":true, + "visibility":{ + "result":"", + "varName":"", + "varValue":"", + "operator":"" + }, + "legend":{ + "values":[ + "avg", + "max" + ], + "show":true, + "placement":"bottom" + }, + "enable":{ + "thresholds":false, + "visibility":false, + "legend":true, + "valueMapping":false + }, + "showHeader":1, + "link":"", + "valueMapping":[], + "nullType":"null" + }, + "remark":"The average RPC call time of the NodeManager.", + "x":"0.0", + "y":"18.0", + "elements":[ + { + "expression":"sum by(asset)(Hadoop_NodeManager_RpcProcessingTimeAvgTime{module=~\".*Yarn\",endpoint=~\".*NodeManager.*\",name=\"RpcActivityForPort9923\"})", + "state":1, + "legend":"{{asset}}", + "name":"A" + } + ], + "datasource":"metrics" + }, + { + "name":"ResourceManager GC Count", + "span":"6.0", + "height":"3.0", + "type":"Line chart", + "unit":"none", + "weight":48, + "param":{ + "stack":0, + "thresholds":[ + { + "color":"#bd00f2", + "id":"1eace07" + } + ], + "thresholdShow":true, + "visibility":{ + "result":"", + "varName":"", + "varValue":"", + "operator":"" + }, + "legend":{ + "values":[ + "avg", + "max" + ], + "show":true, + "placement":"bottom" + }, + "enable":{ + "thresholds":false, + "visibility":false, + "legend":true, + "valueMapping":false + }, + "showHeader":1, + "link":"", + "valueMapping":[], + "nullType":"null" + }, + "remark":"Number of GC times of the ResourceManager node", + "x":"0.0", + "y":"9.0", + "elements":[ + { + "expression":"sum by(asset,name) (irate(java_lang_GarbageCollector_CollectionCount{module=~\".*Yarn\",endpoint=~\".*ResourceManager.*\",name=~\"PS MarkSweep|PS Scavenge\"}[5m]))", + "state":1, + "legend":"{{asset}}-{{name}}", + "name":"A" + } + ], + "datasource":"metrics" + }, + { + "name":"NodeManager GC Count", + "span":"6.0", + "height":"3.0", + "type":"Line chart", + "unit":"none", + "weight":49, + "param":{ + "stack":0, + "thresholds":[ + { + "color":"#bd00f2", + "id":"df390e7" + } + ], + "thresholdShow":true, + "visibility":{ + "result":"", + "varName":"", + "varValue":"", + "operator":"" + }, + "legend":{ + "values":[ + "avg", + "max" + ], + "show":true, + "placement":"bottom" + }, + "enable":{ + "thresholds":false, + "visibility":false, + "legend":true, + "valueMapping":false + }, + "showHeader":1, + "link":"", + "valueMapping":[], + "nullType":"null" + }, + "remark":"Number of GC times of the NodeManager node", + "x":"0.0", + "y":"15.0", + "elements":[ + { + "expression":"sum by(asset,name) (irate(java_lang_GarbageCollector_CollectionCount{module=~\".*Yarn\",endpoint=~\".*NodeManager.*\",name=~\"PS MarkSweep|PS Scavenge\"}[5m]))", + "state":1, + "legend":"{{asset}}-{{name}}", + "name":"A" + } + ], + "datasource":"metrics" + }, + { + "name":"ResourceManager GC Time", + "span":"6.0", + "height":"3.0", + "type":"Line chart", + "unit":"milliseconds", + "weight":50, + "param":{ + "stack":0, + "thresholds":[ + { + "color":"#bd00f2", + "id":"7897043" + } + ], + "thresholdShow":true, + "visibility":{ + "result":"", + "varName":"", + "varValue":"", + "operator":"" + }, + "legend":{ + "values":[ + "avg", + "max" + ], + "show":true, + "placement":"bottom" + }, + "enable":{ + "thresholds":false, + "visibility":false, + "legend":true, + "valueMapping":false + }, + "showHeader":1, + "link":"", + "valueMapping":[], + "nullType":"null" + }, + "remark":"ResourceManager GC duration", + "x":"6.0", + "y":"9.0", + "elements":[ + { + "expression":"sum by(asset,name) (irate(java_lang_GarbageCollector_CollectionTime{module=~\".*Yarn\",endpoint=~\".*ResourceManager.*\",name=~\"PS MarkSweep|PS Scavenge\"}[5m]))", + "state":1, + "legend":"{{asset}}-{{name}}", + "name":"A" + } + ], + "datasource":"metrics" + }, + { + "name":"NodeManager GC Duration", + "span":"6.0", + "height":"3.0", + "type":"Line chart", + "unit":"milliseconds", + "weight":51, + "param":{ + "stack":0, + "thresholds":[ + { + "color":"#bd00f2", + "id":"7ba98f29" + } + ], + "thresholdShow":true, + "visibility":{ + "result":"", + "varName":"", + "varValue":"", + "operator":"" + }, + "legend":{ + "values":[ + "avg", + "max" + ], + "show":true, + "placement":"bottom" + }, + "enable":{ + "thresholds":false, + "visibility":false, + "legend":true, + "valueMapping":false + }, + "showHeader":1, + "link":"", + "valueMapping":[], + "nullType":"null" + }, + "remark":"ResourceManager GC duration", + "x":"6.0", + "y":"15.0", + "elements":[ + { + "expression":"sum by(asset,name) (irate(java_lang_GarbageCollector_CollectionTime{module=~\".*Yarn\",endpoint=~\".*NodeManager.*\",name=~\"PS MarkSweep|PS Scavenge\"}[5m]))", + "state":1, + "legend":"{{asset}}-{{name}}", + "name":"A" + } + ], + "datasource":"metrics" + } + ] + }, + { + "name":"Druid Status", + "span":"12.0", + "height":"3.0", + "type":"Group", + "unit":"short", + "weight":52, + "param":{ + "collapse":true, + "nullType":"null" + }, + "remark":"", + "x":"0.0", + "y":"23.0", + "datasource":"misc", + "children":[ + { + "name":"Druid Consumer Lag", + "span":"12.0", + "height":"3.0", + "type":"Line chart", + "unit":"short", + "weight":53, + "param":{ + "stack":false, + "legend":{ + "values":[ + "avg", + "max" + ], + "placement":"bottom" + }, + "enable":{ + "thresholds":true, + "legend":true, + "valueMapping":true + }, + "style":"line", + "nullType":"zero" + }, + "remark":"The Consumer Lag in each Datasource. Consumer Lag tells us how far behind each Consumer (Group) is in each Partition. The smaller the lag the more real-time the data consumption.", + "x":"0.0", + "y":"0.0", + "elements":[ + { + "expression":"sum by(dataSource)(ingest_kafka_lag{service=\"coordinator\"})", + "state":1, + "legend":"{{dataSource}}", + "name":"A" + } + ], + "datasource":"metrics" + }, + { + "name":"Index Status", + "span":"6.0", + "height":"3.0", + "type":"Line chart", + "unit":"short", + "weight":54, + "param":{ + "rightYAxis":{ + "elementNames":[], + "unit":2, + "style":"line", + "label":"" + }, + "stack":false, + "thresholds":[], + "visibility":{ + "result":"", + "varName":"", + "varValue":"", + "operator":"" + }, + "legend":{ + "values":[], + "placement":"bottom" + }, + "enable":{ + "thresholds":true, + "visibility":false, + "legend":true, + "valueMapping":true + }, + "link":"", + "style":"line", + "valueMapping":[], + "nullType":"zero" + }, + "remark":"", + "x":"0.0", + "y":"13.0", + "elements":[ + { + "expression":"avg(druid_index_running_task_num{module=\"OLAP-Node-Exporter\"})", + "state":1, + "legend":"Running", + "name":"A" + }, + { + "expression":"avg(druid_index_waiting_task_num{module=\"OLAP-Node-Exporter\"})", + "state":1, + "legend":"Waiting", + "name":"B" + }, + { + "expression":"avg(druid_index_pending_task_num{module=\"OLAP-Node-Exporter\"})", + "state":1, + "legend":"Pending", + "name":"C" + } + ], + "datasource":"metrics" + }, + { + "name":"Compact Status", + "span":"6.0", + "height":"3.0", + "type":"Line chart", + "unit":"short", + "weight":55, + "param":{ + "stack":false, + "legend":{ + "values":[], + "placement":"bottom" + }, + "enable":{ + "thresholds":true, + "legend":true, + "valueMapping":true + }, + "style":"line", + "nullType":"zero" + }, + "remark":"", + "x":"6.0", + "y":"13.0", + "elements":[ + { + "expression":"druid_compact_waiting_task_num{module=\"OLAP-Node-Exporter\"}", + "state":1, + "legend":"Waiting", + "name":"A" + }, + { + "expression":"druid_compact_pending_task_num{module=\"OLAP-Node-Exporter\"}", + "state":1, + "legend":"Pending", + "name":"B" + } + ], + "datasource":"metrics" + }, + { + "name":"Segments", + "span":"6.0", + "height":"4.0", + "type":"Table", + "unit":"short", + "weight":56, + "param":{ + "enable":{ + "thresholds":true, + "legend":true, + "valueMapping":false + }, + "columns":[ + { + "unit":2, + "display":"{{A.$legend}}", + "show":true, + "title":"Element", + "error":false + }, + { + "unit":1, + "display":"{{A.$value}}", + "show":true, + "title":"Value", + "error":false + } + ], + "legend":{ + "values":[], + "show":true, + "placement":"bottom" + }, + "valueMapping":[ + { + "color":{ + "bac":"#fff", + "text":"#000" + }, + "columns":"Value", + "show":false, + "text":"", + "type":"value", + "error":true, + "value":0 + } + ], + "nullType":"null", + "statistics":"last" + }, + "remark":"The segment number in each Datasource.", + "x":"0.0", + "y":"3.0", + "elements":[ + { + "expression":"sum by (dataSource) (coordinator_segment_count{service=\"coordinator\"})", + "state":1, + "legend":"{{dataSource}}", + "name":"A" + } + ], + "datasource":"metrics" + }, + { + "name":"Segment Size", + "span":"6.0", + "height":"4.0", + "type":"Table", + "unit":"bytes", + "weight":57, + "param":{ + "visibility":{ + "result":"", + "varName":"", + "varValue":"", + "operator":"" + }, + "enable":{ + "thresholds":true, + "visibility":false, + "legend":true, + "valueMapping":false + }, + "columns":[ + { + "unit":2, + "display":"{{A.$legend}}", + "show":true, + "title":"Element", + "error":false + }, + { + "unit":7, + "display":"{{A.$value}}", + "show":true, + "title":"Value", + "error":false + } + ], + "legend":{ + "values":[], + "show":true, + "placement":"bottom" + }, + "valueMapping":[ + { + "regx":">1000", + "color":{ + "bac":"#C4162AFF", + "icon":"#d16eefFF", + "text":"#000" + }, + "columns":"Value", + "display":"Warn", + "show":true, + "column":"Value", + "from":100, + "text":"", + "to":100000000, + "type":"range", + "error":false + } + ], + "nullType":"null", + "statistics":"last" + }, + "remark":"The total segment size in each Datasource.", + "x":"6.0", + "y":"3.0", + "elements":[ + { + "expression":"sum by(dataSource)(coordinator_segment_size{service=\"coordinator\"})", + "state":1, + "legend":"{{dataSource}}", + "name":"A" + } + ], + "datasource":"metrics" + }, + { + "name":"Query CPU Usage", + "span":"6.0", + "height":"3.0", + "type":"Line chart", + "unit":"percent(0-100)", + "weight":58, + "param":{ + "stack":false, + "thresholds":[ + { + "val":"0.8", + "color":"#d64f40", + "value":80 + } + ], + "legend":{ + "values":[ + "min", + "avg", + "last", + "max" + ], + "placement":"bottom" + }, + "enable":{ + "thresholds":true, + "legend":true, + "valueMapping":true + }, + "style":"line", + "nullType":"zero" + }, + "remark":"The CPU usage in each asset.", + "x":"6.0", + "y":"7.0", + "elements":[ + { + "expression":"(1-avg(irate(node_cpu_seconds_total{mode=\"idle\",olap_node_exporter=~\".*druid_query.*\"}[5m])) by(asset)) * 100", + "state":1, + "legend":"{{asset}}", + "name":"A" + } + ], + "datasource":"metrics" + }, + { + "name":"Query Memory Usage", + "span":"6.0", + "height":"3.0", + "type":"Line chart", + "unit":"percent(0.0-1.0)", + "weight":59, + "param":{ + "stack":false, + "thresholds":[ + { + "val":"0.8", + "color":"#d64f40", + "value":80 + } + ], + "visibility":{ + "result":"", + "varName":"", + "varValue":"", + "operator":"" + }, + "legend":{ + "values":[ + "avg", + "last", + "max" + ], + "placement":"bottom" + }, + "enable":{ + "thresholds":true, + "visibility":false, + "legend":true, + "valueMapping":true + }, + "style":"line", + "nullType":"zero" + }, + "remark":"The memory usage in each asset. Runs Coordinator and Overlord processes, which manages data availability and ingestion.", + "x":"0.0", + "y":"7.0", + "elements":[ + { + "expression":"1 - (node_memory_MemAvailable_bytes{olap_node_exporter=~\".*druid_query.*\"} / node_memory_MemTotal_bytes{olap_node_exporter=~\".*druid_query.*\"}) ", + "state":1, + "legend":"{{asset}}", + "name":"A" + } + ], + "datasource":"metrics" + }, + { + "name":"Data Memory Usage", + "span":"6.0", + "height":"3.0", + "type":"Line chart", + "unit":"percent(0.0-1.0)", + "weight":60, + "param":{ + "stack":false, + "thresholds":[ + { + "val":"80", + "color":"#d64f40", + "value":80 + } + ], + "visibility":{ + "result":"", + "varName":"", + "varValue":"", + "operator":"" + }, + "legend":{ + "values":[], + "placement":"bottom" + }, + "enable":{ + "thresholds":true, + "visibility":false, + "legend":true, + "valueMapping":true + }, + "style":"line", + "nullType":"zero" + }, + "remark":"The memory usage in each asset. Runs Historical and MiddleManager processes, which executes ingestion workloads and stores all queryable data.", + "x":"0.0", + "y":"10.0", + "elements":[ + { + "expression":"1 - (node_memory_MemAvailable_bytes{olap_node_exporter=~\".*druid_data.*\"} / node_memory_MemTotal_bytes{olap_node_exporter=~\".*druid_data.*\"}) ", + "state":1, + "legend":"{{asset}}", + "name":"A" + } + ], + "datasource":"metrics" + }, + { + "name":"Data CPU Usage", + "span":"6.0", + "height":"3.0", + "type":"Line chart", + "unit":"percent(0-100)", + "weight":61, + "param":{ + "stack":false, + "thresholds":[ + { + "val":"90", + "color":"#d64f40", + "value":80 + } + ], + "legend":{ + "values":[], + "placement":"bottom" + }, + "enable":{ + "thresholds":true, + "legend":true, + "valueMapping":true + }, + "style":"line", + "nullType":"connected" + }, + "remark":"The CPU usage in each asset.", + "x":"6.0", + "y":"10.0", + "elements":[ + { + "expression":"(1-avg(irate(node_cpu_seconds_total{mode=\"idle\",olap_node_exporter=~\".*druid_data.*\"}[5m])) by(asset)) * 100", + "state":1, + "legend":"{{asset}}", + "name":"A" + } + ], + "datasource":"metrics" + }, + { + "name":"Data Network receive Bytes", + "span":"6.0", + "height":"3.0", + "type":"Line chart", + "unit":"bytes", + "weight":62, + "param":{ + "rightYAxis":{ + "elementNames":[], + "unit":2, + "style":"line", + "label":"" + }, + "stack":false, + "thresholds":[], + "visibility":{ + "result":"", + "varName":"", + "varValue":"", + "operator":"" + }, + "legend":{ + "values":[], + "placement":"bottom" + }, + "enable":{ + "thresholds":true, + "visibility":false, + "legend":true, + "valueMapping":true + }, + "link":"", + "style":"line", + "valueMapping":[], + "nullType":"connected" + }, + "remark":"Collecting and processing Traffic Records with incoming byte rate in each Druid-Data nodes.", + "x":"0.0", + "y":"16.0", + "elements":[ + { + "expression":"irate(node_network_receive_bytes_total{olap_node_exporter=~\".*druid_data.*\",device=\"em1\"}[5m])", + "state":1, + "legend":"{{asset}}", + "name":"A" + } + ], + "datasource":"metrics" + }, + { + "name":"Data Network send Bytes", + "span":"6.0", + "height":"3.0", + "type":"Line chart", + "unit":"bytes", + "weight":63, + "param":{ + "rightYAxis":{ + "elementNames":[], + "unit":2, + "style":"line", + "label":"" + }, + "stack":false, + "thresholds":[], + "visibility":{ + "result":"", + "varName":"", + "varValue":"", + "operator":"" + }, + "legend":{ + "values":[ + "avg", + "max" + ], + "placement":"bottom" + }, + "enable":{ + "thresholds":true, + "visibility":false, + "legend":true, + "valueMapping":true + }, + "link":"", + "style":"line", + "valueMapping":[], + "nullType":"connected" + }, + "remark":"Collecting and processing Traffic Records with outgoing byte rate in each Druid-Data nodes.", + "x":"6.0", + "y":"16.0", + "elements":[ + { + "expression":"irate(node_network_transmit_bytes_total{olap_node_exporter=~\".*druid_data.*\",device=\"em1\"}[5m])", + "state":1, + "legend":"{{asset}}", + "name":"A" + } + ], + "datasource":"metrics" + } + ] + }, + { + "name":"Clickhouse Status", + "span":"12.0", + "height":"1.0", + "type":"Group", + "unit":"short", + "weight":64, + "param":{ + "enable":{ + "thresholds":false, + "legend":false, + "valueMapping":false + }, + "collapse":true, + "nullType":"null" + }, + "remark":"", + "x":"0.0", + "y":"24.0", + "datasource":"misc", + "children":[ + { + "name":"Insert Rows /sec", + "span":"6.0", + "height":"3.0", + "type":"Line chart", + "unit":"short", + "weight":65, + "param":{ + "stack":false, + "visibility":{ + "result":"", + "varName":"", + "varValue":"", + "operator":"" + }, + "legend":{ + "values":[ + "total", + "avg", + "last", + "max" + ], + "placement":"bottom" + }, + "enable":{ + "thresholds":true, + "visibility":false, + "legend":true, + "valueMapping":true + }, + "style":"line", + "nullType":"zero" + }, + "remark":"Number of rows inserted to all tables", + "x":"0.0", + "y":"0.0", + "elements":[ + { + "expression":"sum by(datacenter) (irate(clickhouse_inserted_rows_total[5m]))", + "state":1, + "legend":"{{datacenter}}", + "name":"A" + } + ], + "datasource":"metrics" + }, + { + "name":"Insert Bytes /sec", + "span":"6.0", + "height":"3.0", + "type":"Line chart", + "unit":"bytes/sec", + "weight":66, + "param":{ + "stack":false, + "visibility":{ + "result":"", + "varName":"", + "varValue":"", + "operator":"" + }, + "legend":{ + "values":[ + "total", + "avg", + "last", + "max" + ], + "placement":"bottom" + }, + "enable":{ + "thresholds":true, + "visibility":false, + "legend":true, + "valueMapping":true + }, + "style":"line", + "nullType":"zero" + }, + "remark":"Number of uncompressed bytes inserted to all tables in each Data center.", + "x":"6.0", + "y":"0.0", + "elements":[ + { + "expression":"sum by(datacenter) (irate(clickhouse_inserted_bytes_total[5m]))", + "state":1, + "legend":"{{datacenter}}", + "name":"A" + } + ], + "datasource":"metrics" + }, + { + "name":"Total Requests /sec", + "span":"6.0", + "height":"3.0", + "type":"Line chart", + "unit":"short", + "weight":67, + "param":{ + "stack":false, + "thresholds":[ + { + "color":"#c1e835" + } + ], + "thresholdShow":true, + "legend":{ + "values":[], + "show":true, + "placement":"bottom" + }, + "enable":{ + "thresholds":false, + "legend":true, + "valueMapping":false + }, + "nullType":"zero" + }, + "remark":"The total requests per second in each datacenter.", + "x":"0.0", + "y":"3.0", + "elements":[ + { + "expression":"sum by(datacenter) (irate(request_sum_total[5m]))", + "state":1, + "legend":"{{datacenter}}", + "name":"A" + } + ], + "datasource":"metrics" + }, + { + "name":"Merged rate /sec", + "span":"6.0", + "height":"3.0", + "type":"Line chart", + "unit":"short", + "weight":68, + "param":{ + "stack":false, + "visibility":{ + "result":"", + "varName":"", + "varValue":"", + "operator":"" + }, + "legend":{ + "values":[ + "min", + "avg", + "last", + "max" + ], + "placement":"bottom" + }, + "enable":{ + "thresholds":true, + "visibility":false, + "legend":true, + "valueMapping":true + }, + "style":"line", + "nullType":"zero" + }, + "remark":"The merging data parts per second in each Data center.", + "x":"0.0", + "y":"6.0", + "elements":[ + { + "expression":"sum by(datacenter) (irate(clickhouse_merge[5m]))", + "state":1, + "legend":"{{datacenter}}", + "name":"A" + } + ], + "datasource":"metrics" + }, + { + "name":"Bad Requests /sec", + "span":"6.0", + "height":"3.0", + "type":"Line chart", + "unit":"short", + "weight":69, + "param":{ + "stack":false, + "legend":{ + "values":[], + "placement":"bottom" + }, + "enable":{ + "thresholds":true, + "legend":true, + "valueMapping":true + }, + "style":"line", + "nullType":"zero" + }, + "remark":"The bad requests per second in each Data center.", + "x":"6.0", + "y":"3.0", + "elements":[ + { + "expression":"sum by(asset) (irate(bad_requests_total[5m]))", + "state":1, + "legend":"{{asset}}", + "name":"A" + } + ], + "datasource":"metrics" + }, + { + "name":"Memory Usage", + "span":"6.0", + "height":"3.0", + "type":"Line chart", + "unit":"percent(0.0-1.0)", + "weight":70, + "param":{ + "stack":false, + "legend":{ + "values":[ + "avg", + "last", + "max" + ], + "placement":"bottom" + }, + "enable":{ + "thresholds":true, + "legend":true, + "valueMapping":true + }, + "style":"line", + "nullType":"zero" + }, + "remark":"The memory usage in each asset.", + "x":"6.0", + "y":"15.0", + "elements":[ + { + "expression":"1 - (node_memory_MemAvailable_bytes{olap_node_exporter=~\".*clickhouse.*\"} / node_memory_MemTotal_bytes{olap_node_exporter=~\".*clickhouse.*\"}) ", + "state":1, + "legend":"{{asset}}", + "name":"A" + } + ], + "datasource":"metrics" + }, + { + "name":"CPU Usage", + "span":"6.0", + "height":"3.0", + "type":"Line chart", + "unit":"percent(0.0-1.0)", + "weight":71, + "param":{ + "stack":false, + "legend":{ + "values":[ + "avg", + "last", + "max" + ], + "placement":"bottom" + }, + "enable":{ + "thresholds":true, + "legend":true, + "valueMapping":true + }, + "style":"line", + "nullType":"zero" + }, + "remark":"The CPU usage in each asset.", + "x":"0.0", + "y":"15.0", + "elements":[ + { + "expression":"(1-avg(irate(node_cpu_seconds_total{mode=\"idle\",olap_node_exporter=~\".*clickhouse.*\"}[5m])) by(asset))", + "state":1, + "legend":"{{asset}}", + "name":"A" + } + ], + "datasource":"metrics" + }, + { + "name":"TCP Connection Time", + "span":"6.0", + "height":"3.0", + "type":"Line chart", + "unit":"seconds(s)", + "weight":72, + "param":{ + "stack":false, + "legend":{ + "values":[ + "max", + "avg" + ], + "placement":"bottom" + }, + "enable":{ + "thresholds":true, + "legend":true, + "valueMapping":true + }, + "style":"line", + "nullType":"zero" + }, + "remark":"The current Tcp Connection time in each asset.", + "x":"6.0", + "y":"9.0", + "elements":[ + { + "expression":"max by(asset) (ck_connect_time{olap_node_exporter=~\".*clickhouse.*\"})", + "state":1, + "legend":"{{asset}}", + "name":"A" + } + ], + "datasource":"metrics" + }, + { + "name":"Active Query Count", + "span":"6.0", + "height":"3.0", + "type":"Line chart", + "unit":"short", + "weight":73, + "param":{ + "stack":false, + "thresholds":[ + { + "color":"#7dd1ed" + } + ], + "thresholdShow":true, + "legend":{ + "values":[ + "max", + "avg" + ], + "show":true, + "placement":"bottom" + }, + "enable":{ + "thresholds":false, + "legend":true, + "valueMapping":false + }, + "nullType":"zero" + }, + "remark":"The current number of queries in each asset.", + "x":"0.0", + "y":"9.0", + "elements":[ + { + "expression":"max by(asset) (ck_processes_count{olap_node_exporter=~\".*clickhouse.*\"})", + "state":1, + "legend":"{{asset}}", + "name":"A" + } + ], + "datasource":"metrics" + }, + { + "name":"Mutiation part Count", + "span":"6.0", + "height":"3.0", + "type":"Line chart", + "unit":"short", + "weight":74, + "param":{ + "stack":false, + "visibility":{ + "result":"", + "varName":"", + "varValue":"", + "operator":"" + }, + "legend":{ + "values":[ + "avg", + "last", + "max" + ], + "placement":"bottom" + }, + "enable":{ + "thresholds":true, + "visibility":false, + "legend":true, + "valueMapping":true + }, + "style":"line", + "nullType":"zero" + }, + "remark":"The current number of mutiation part in each asset.", + "x":"6.0", + "y":"12.0", + "elements":[ + { + "expression":"max by(asset)(clickhouse_part_mutation)", + "state":1, + "legend":"{{asset}}", + "name":"A" + } + ], + "datasource":"metrics" + }, + { + "name":"Asset Disk IO Usage", + "span":"6.0", + "height":"3.0", + "type":"Line chart", + "unit":"percent(0-100)", + "weight":75, + "param":{ + "stack":false, + "thresholds":[], + "visibility":{ + "result":"", + "varName":"", + "varValue":"", + "operator":"" + }, + "legend":{ + "values":[ + "avg", + "last", + "max" + ], + "placement":"bottom" + }, + "enable":{ + "thresholds":true, + "visibility":false, + "legend":true, + "valueMapping":true + }, + "link":"", + "style":"line", + "valueMapping":[], + "nullType":"zero" + }, + "remark":"The maxium percentage of DISK IO in each data center and asset.", + "x":"0.0", + "y":"18.0", + "elements":[ + { + "expression":"sum by(asset)(rate(node_disk_io_time_seconds_total{olap_node_exporter=~\".*clickhouse.*\"}[5m]))*100", + "state":1, + "legend":"{{asset}}", + "name":"A" + } + ], + "datasource":"metrics" + } + ] + }, + { + "name":"HBase Status", + "span":"12.0", + "height":"1.0", + "type":"Group", + "unit":"short", + "weight":76, + "param":{ + "visibility":{ + "result":"", + "varName":"", + "varValue":"", + "operator":"" + }, + "enable":{ + "thresholds":false, + "legend":false, + "valueMapping":false + }, + "collapse":true, + "nullType":"null" + }, + "remark":"", + "x":"0.0", + "y":"24.0", + "datasource":"misc", + "children":[ + { + "name":"Request /sec", + "span":"4.0", + "height":"3.0", + "type":"Line chart", + "unit":"short", + "weight":77, + "param":{ + "rightYAxis":{ + "elementNames":[], + "unit":2, + "style":"line", + "label":"" + }, + "stack":0, + "thresholds":[ + { + "color":"#0c4dff", + "id":"25815c1f" + } + ], + "thresholdShow":true, + "visibility":{ + "result":"", + "varName":"", + "varValue":"", + "operator":"" + }, + "legend":{ + "values":[ + "avg", + "max" + ], + "show":true, + "placement":"bottom" + }, + "enable":{ + "thresholds":false, + "visibility":false, + "legend":true, + "valueMapping":false + }, + "link":"", + "valueMapping":[], + "nullType":"null" + }, + "remark":"Number of requests per second of each type,Read/Write/Total", + "x":"0.0", + "y":"0.0", + "elements":[ + { + "expression":"sum by (module) (irate(Hadoop_HBase_totalRequestCount{module=\"DC-HBase\"}[5m]))", + "state":1, + "legend":"Total", + "name":"A" + }, + { + "expression":"sum by (module)(irate(Hadoop_HBase_readRequestCount{module=\"DC-HBase\"}[5m]))", + "state":1, + "legend":"Read", + "name":"B" + }, + { + "expression":"sum by (module)(irate(Hadoop_HBase_writeRequestCount{module=\"DC-HBase\"}[5m]))", + "state":1, + "legend":"Write", + "name":"C" + } + ], + "datasource":"metrics" + }, + { + "name":"Region Count", + "span":"4.0", + "height":"3.0", + "type":"Line chart", + "unit":"short", + "weight":78, + "param":{ + "stack":0, + "thresholds":[ + { + "color":"#0c4dff" + } + ], + "thresholdShow":true, + "legend":{ + "values":[], + "show":true, + "placement":"bottom" + }, + "enable":{ + "thresholds":false, + "legend":true, + "valueMapping":false + }, + "showHeader":true, + "nullType":"null" + }, + "remark":"Number of HBase cluster regions", + "x":"0.0", + "y":"3.0", + "elements":[ + { + "expression":"sum by (datacenter) (Hadoop_HBase_regionCount{module=~\".*HBase\"})", + "state":1, + "legend":"{{datacenter}}", + "name":"A" + } + ], + "datasource":"metrics" + }, + { + "name":"RitRegion(Unhealthy) Count", + "span":"4.0", + "height":"3.0", + "type":"Line chart", + "unit":"short", + "weight":79, + "param":{ + "stack":0, + "thresholds":[ + { + "color":"#0c4dff" + } + ], + "thresholdShow":true, + "legend":{ + "values":[], + "show":true, + "placement":"bottom" + }, + "enable":{ + "thresholds":false, + "legend":true, + "valueMapping":false + }, + "showHeader":true, + "nullType":"null" + }, + "remark":"Number of HBase cluster rit(Region in transition) regions.", + "x":"4.0", + "y":"3.0", + "elements":[ + { + "expression":"sum by (datacenter) (Hadoop_HBase_ritCount{module=~\".*HBase\"})", + "state":1, + "legend":"{{datacenter}}", + "name":"A" + } + ], + "datasource":"metrics" + }, + { + "name":"Memory Usage", + "span":"4.0", + "height":"3.0", + "type":"Line chart", + "unit":"percent(0.0-1.0)", + "weight":80, + "param":{ + "stack":0, + "thresholds":[ + { + "color":"#C4162AFF", + "value":80 + } + ], + "thresholdShow":true, + "visibility":{ + "result":"", + "varName":"", + "varValue":"", + "operator":"" + }, + "legend":{ + "values":[ + "avg", + "max" + ], + "show":true, + "placement":"bottom" + }, + "enable":{ + "thresholds":true, + "visibility":false, + "legend":true, + "valueMapping":false + }, + "showHeader":true, + "nullType":"null" + }, + "remark":"The Memory usage in each asset.", + "x":"8.0", + "y":"0.0", + "elements":[ + { + "expression":"((Hadoop_HBase_MemHeapUsedM{module=~\".*HBase\"})/((Hadoop_HBase_MemHeapMaxM{module=~\".*HBase\"})))", + "state":1, + "legend":"{{endpoint}}", + "name":"A" + } + ], + "datasource":"metrics" + }, + { + "name":"CPU Usage", + "span":"4.0", + "height":"3.0", + "type":"Line chart", + "unit":"percent(0.0-1.0)", + "weight":81, + "param":{ + "stack":0, + "thresholds":[ + { + "color":"#C4162AFF", + "value":80 + } + ], + "thresholdShow":true, + "visibility":{ + "result":"", + "varName":"", + "varValue":"", + "operator":"" + }, + "legend":{ + "values":[ + "avg", + "max" + ], + "show":true, + "placement":"bottom" + }, + "enable":{ + "thresholds":true, + "visibility":false, + "legend":true, + "valueMapping":false + }, + "showHeader":true, + "nullType":"null" + }, + "remark":"The CPU usage in each asset.", + "x":"4.0", + "y":"0.0", + "elements":[ + { + "expression":"sum by (asset)(java_lang_OperatingSystem_ProcessCpuLoad{module=~\".*HBase\"})", + "state":1, + "legend":"{{asset}}", + "name":"A" + } + ], + "datasource":"metrics" + }, + { + "name":"Request Queue", + "span":"4.0", + "height":"3.0", + "type":"Line chart", + "unit":"short", + "weight":82, + "param":{ + "stack":0, + "thresholds":[ + { + "color":"#0c4dff" + } + ], + "thresholdShow":true, + "visibility":{ + "result":"", + "varName":"", + "varValue":"", + "operator":"" + }, + "legend":{ + "values":[], + "show":true, + "placement":"bottom" + }, + "enable":{ + "thresholds":false, + "visibility":false, + "legend":true, + "valueMapping":false + }, + "showHeader":true, + "nullType":"null" + }, + "remark":"Number of requests in the HBase request queue.", + "x":"8.0", + "y":"3.0", + "elements":[ + { + "expression":"sum by(datacenter)(Hadoop_HBase_numCallsInGeneralQueue{name=\"RegionServer\"})", + "state":1, + "legend":"{{datacenter}}", + "name":"A" + } + ], + "datasource":"metrics" + }, + { + "name":"Slow request", + "span":"4.0", + "height":"3.0", + "type":"Line chart", + "unit":"short", + "weight":83, + "param":{ + "stack":0, + "thresholds":[ + { + "color":"#0c4dff" + } + ], + "thresholdShow":true, + "visibility":{ + "result":"", + "varName":"", + "varValue":"", + "operator":"" + }, + "legend":{ + "values":[], + "show":true, + "placement":"bottom" + }, + "enable":{ + "thresholds":false, + "visibility":false, + "legend":true, + "valueMapping":false + }, + "showHeader":true, + "nullType":"null" + }, + "remark":"Number of slow requests (put,append,get) in the HBase cluster.", + "x":"0.0", + "y":"6.0", + "elements":[ + { + "expression":"sum(irate(Hadoop_HBase_slowPutCount{name=\"RegionServer\",sub=\"Server\"}[5m])) by (datacenter)", + "state":1, + "legend":"put-{{datacenter}}", + "name":"A" + }, + { + "expression":"sum(irate(Hadoop_HBase_slowAppendCount{name=\"RegionServer\",sub=\"Server\"}[5m])) by (datacenter)", + "state":1, + "legend":"append-{{datacenter}}", + "name":"B" + }, + { + "expression":"sum(irate(Hadoop_HBase_slowGetCount{name=\"RegionServer\",sub=\"Server\"}[5m])) by (datacenter)", + "state":1, + "legend":"get-{{datacenter}}", + "name":"C" + } + ], + "datasource":"metrics" + }, + { + "name":"Memstore Size", + "span":"4.0", + "height":"3.0", + "type":"Line chart", + "unit":"bytes", + "weight":84, + "param":{ + "stack":0, + "thresholds":[ + { + "color":"#0c4dff" + } + ], + "thresholdShow":true, + "visibility":{ + "result":"", + "varName":"", + "varValue":"", + "operator":"" + }, + "legend":{ + "values":[], + "show":true, + "placement":"bottom" + }, + "enable":{ + "thresholds":false, + "visibility":false, + "legend":false, + "valueMapping":false + }, + "showHeader":true, + "nullType":"null" + }, + "remark":"Data size in the HBase node memory", + "x":"4.0", + "y":"6.0", + "elements":[ + { + "expression":"sum by (asset) (Hadoop_HBase_metric_memStoreSize)", + "state":1, + "legend":"{{asset}}", + "name":"A" + } + ], + "datasource":"metrics" + }, + { + "name":"GC Count /sec", + "span":"4.0", + "height":"3.0", + "type":"Line chart", + "unit":"none", + "weight":85, + "param":{ + "stack":0, + "thresholds":[ + { + "color":"#0c4dff" + } + ], + "thresholdShow":true, + "visibility":{ + "result":"", + "varName":"", + "varValue":"", + "operator":"" + }, + "legend":{ + "values":[], + "show":true, + "placement":"bottom" + }, + "enable":{ + "thresholds":false, + "visibility":false, + "legend":false, + "valueMapping":false + }, + "showHeader":true, + "nullType":"null" + }, + "remark":"GC times per second of the HBase process.", + "x":"8.0", + "y":"6.0", + "elements":[ + { + "expression":"sum by (datacenter)(irate(Hadoop_HBase_GcCount[5m]))", + "state":1, + "legend":"GCCount-{{datacenter}}", + "name":"A" + }, + { + "expression":"sum by (datacenter)(irate(Hadoop_HBase_GcCountParNew[5m]))", + "state":1, + "legend":"ParNewGcCount-{{datacenter}}", + "name":"B" + }, + { + "expression":"sum by (datacenter)(irate(Hadoop_HBase_GcCountConcurrentMarkSweep[5m]))", + "state":1, + "legend":"CMSGcCount-{{datacenter}}", + "name":"C" + } + ], + "datasource":"metrics" + }, + { + "name":"GC Time", + "span":"4.0", + "height":"3.0", + "type":"Line chart", + "unit":"milliseconds", + "weight":86, + "param":{ + "stack":0, + "thresholds":[ + { + "color":"#0c4dff" + } + ], + "thresholdShow":true, + "visibility":{ + "result":"", + "varName":"", + "varValue":"", + "operator":"" + }, + "legend":{ + "values":[], + "show":true, + "placement":"bottom" + }, + "enable":{ + "thresholds":false, + "visibility":false, + "legend":false, + "valueMapping":false + }, + "showHeader":true, + "nullType":"null" + }, + "remark":"The GC duration of the HBase process.", + "x":"0.0", + "y":"9.0", + "elements":[ + { + "expression":"sum by (datacenter)(irate(Hadoop_HBase_GcCount[5m]))", + "state":1, + "legend":"GC-{{datacenter}}", + "name":"A" + }, + { + "expression":"sum by (datacenter)(irate(Hadoop_HBase_GcCountParNew[5m]))", + "state":1, + "legend":"ParNewGc-{{datacenter}}", + "name":"B" + }, + { + "expression":"sum by (datacenter)(irate(Hadoop_HBase_GcCountConcurrentMarkSweep[5m]))", + "state":1, + "legend":"CMSGc-{{datacenter}}", + "name":"C" + } + ], + "datasource":"metrics" + }, + { + "name":"Put Request By HOS Table /sec", + "span":"4.0", + "height":"3.0", + "type":"Line chart", + "unit":"short", + "weight":87, + "param":{ + "stack":0, + "thresholds":[ + { + "color":"#0c4dff" + } + ], + "thresholdShow":true, + "visibility":{ + "result":"", + "varName":"", + "varValue":"", + "operator":"" + }, + "legend":{ + "values":[], + "show":true, + "placement":"bottom" + }, + "enable":{ + "thresholds":false, + "visibility":false, + "legend":false, + "valueMapping":false + }, + "showHeader":true, + "nullType":"null" + }, + "remark":"Number of put requests for hos table per second.", + "x":"4.0", + "y":"9.0", + "elements":[ + { + "expression":"sum by (table) (irate(Hadoop_HBase_metric_putCount{namespace=\"default\",type=\"data\",module=~\".*HBase\"}[2m]))", + "state":1, + "legend":"{{table}}", + "name":"A" + } + ], + "datasource":"metrics" + }, + { + "name":"Get Request By HOS Table /sec", + "span":"4.0", + "height":"3.0", + "type":"Line chart", + "unit":"short", + "weight":88, + "param":{ + "stack":0, + "thresholds":[ + { + "color":"#0c4dff" + } + ], + "thresholdShow":true, + "visibility":{ + "result":"", + "varName":"", + "varValue":"", + "operator":"" + }, + "legend":{ + "values":[], + "show":true, + "placement":"bottom" + }, + "enable":{ + "thresholds":false, + "visibility":false, + "legend":false, + "valueMapping":false + }, + "showHeader":true, + "nullType":"null" + }, + "remark":"Number of get requests for hos table per second.", + "x":"8.0", + "y":"9.0", + "elements":[ + { + "expression":"sum by (table) (irate(Hadoop_HBase_metric_getCount{namespace=\"default\",type=\"data\",module=~\".*HBase\"}[2m]))", + "state":1, + "legend":"{{table}}", + "name":"A" + } + ], + "datasource":"metrics" + }, + { + "name":"Append Request by HOS Table /sec", + "span":"4.0", + "height":"3.0", + "type":"Line chart", + "unit":"short", + "weight":89, + "param":{ + "stack":0, + "thresholds":[ + { + "color":"#0c4dff" + } + ], + "thresholdShow":true, + "visibility":{ + "result":"", + "varName":"", + "varValue":"", + "operator":"" + }, + "legend":{ + "values":[], + "show":true, + "placement":"bottom" + }, + "enable":{ + "thresholds":false, + "visibility":false, + "legend":false, + "valueMapping":false + }, + "showHeader":true, + "nullType":"null" + }, + "remark":"Number of append requests for hos table per second.", + "x":"0.0", + "y":"12.0", + "elements":[ + { + "expression":"sum by (table) (irate(Hadoop_HBase_metric_appendCount{namespace=\"default\",type=\"data\",module=~\".*HBase\"}[2m]))", + "state":1, + "legend":"{{table}}", + "name":"A" + } + ], + "datasource":"metrics" + }, + { + "name":"HOS TTL Scan Count by Table /sec", + "span":"4.0", + "height":"3.0", + "type":"Line chart", + "unit":"short", + "weight":90, + "param":{ + "stack":0, + "thresholds":[ + { + "color":"#0c4dff" + } + ], + "thresholdShow":true, + "visibility":{ + "result":"", + "varName":"", + "varValue":"", + "operator":"" + }, + "legend":{ + "values":[], + "show":true, + "placement":"bottom" + }, + "enable":{ + "thresholds":false, + "visibility":false, + "legend":false, + "valueMapping":false + }, + "showHeader":true, + "nullType":"null" + }, + "remark":"Number of scan times in ttl for hos table per second.", + "x":"4.0", + "y":"12.0", + "elements":[ + { + "expression":"sum by (table) (irate(Hadoop_HBase_metric_scanCount{namespace=\"default\",type=\"data\",module=~\".*HBase\"}[2m]))", + "state":1, + "legend":"{{table}}", + "name":"A" + } + ], + "datasource":"metrics" + }, + { + "name":"Scan Count by Business Table /sec", + "span":"4.0", + "height":"3.0", + "type":"Line chart", + "unit":"short", + "weight":91, + "param":{ + "stack":0, + "thresholds":[ + { + "color":"#0c4dff" + } + ], + "thresholdShow":true, + "visibility":{ + "result":"", + "varName":"", + "varValue":"", + "operator":"" + }, + "legend":{ + "values":[], + "show":true, + "placement":"bottom" + }, + "enable":{ + "thresholds":false, + "visibility":false, + "legend":false, + "valueMapping":false + }, + "showHeader":true, + "nullType":"null" + }, + "remark":"Number of scan times for business table per second.", + "x":"8.0", + "y":"12.0", + "elements":[ + { + "expression":"sum by (table) (irate(Hadoop_HBase_metric_scanCount{namespace=\"tsg_galaxy\",module=~\".*HBase\"}[2m]))", + "state":1, + "legend":"{{table}}", + "name":"A" + } + ], + "datasource":"metrics" + } + ] + }, + { + "name":"Hadoop HDFS Status", + "span":"12.0", + "height":"1.0", + "type":"Group", + "unit":"short", + "weight":92, + "param":{ + "thresholds":[], + "visibility":{ + "result":"", + "varName":"", + "varValue":"", + "operator":"" + }, + "showHeader":true, + "enable":{ + "thresholds":false, + "legend":false, + "valueMapping":false + }, + "repeat":{ + "variable":"" + }, + "link":"", + "valueMapping":[], + "collapse":true, + "nullType":"null" + }, + "remark":"", + "x":"0.0", + "y":"24.0", + "datasource":"misc", + "children":[ + { + "name":"Storage Capacity Usage", + "span":"4.0", + "height":"3.0", + "type":"Line chart", + "unit":"percent(0-100)", + "weight":93, + "param":{ + "stack":0, + "thresholds":[ + { + "color":"#C4162AFF", + "value":80 + } + ], + "thresholdShow":true, + "legend":{ + "values":[], + "show":true, + "placement":"bottom" + }, + "enable":{ + "thresholds":true, + "legend":true, + "valueMapping":false + }, + "nullType":"null" + }, + "remark":"Hadoop cluster storage capacity Usage.", + "x":"4.0", + "y":"6.0", + "elements":[ + { + "expression":"(sum by (datacenter)(Hadoop_NameNode_PercentUsed)) / 2", + "state":1, + "legend":"{{datacenter}}", + "name":"A" + } + ], + "datasource":"metrics" + }, + { + "name":"Namenode Memory Usage", + "span":"4.0", + "height":"3.0", + "type":"Line chart", + "unit":"percent(0.0-1.0)", + "weight":94, + "param":{ + "stack":0, + "thresholds":[ + { + "color":"#1c1984" + } + ], + "thresholdShow":true, + "legend":{ + "values":[ + "avg", + "max" + ], + "show":true, + "placement":"bottom" + }, + "enable":{ + "thresholds":false, + "legend":true, + "valueMapping":false + }, + "showHeader":true, + "nullType":"zero" + }, + "remark":"The Memory usage in each Namenode.", + "x":"4.0", + "y":"0.0", + "elements":[ + { + "expression":"Hadoop_NameNode_MemHeapUsedM{module=~\".*Hadoop\"}/Hadoop_NameNode_MemHeapMaxM{module=~\".*Hadoop\"}", + "state":1, + "legend":"{{datacenter}}-{{asset}}", + "name":"A" + } + ], + "datasource":"metrics" + }, + { + "name":"Datanode Memory Usage", + "span":"4.0", + "height":"3.0", + "type":"Line chart", + "unit":"percent(0.0-1.0)", + "weight":95, + "param":{ + "stack":0, + "thresholds":[ + { + "color":"#1c1984" + } + ], + "thresholdShow":true, + "legend":{ + "values":[ + "avg", + "max" + ], + "show":true, + "placement":"bottom" + }, + "enable":{ + "thresholds":false, + "legend":true, + "valueMapping":false + }, + "showHeader":true, + "nullType":"zero" + }, + "remark":"The Memory usage in each Datanode.", + "x":"4.0", + "y":"3.0", + "elements":[ + { + "expression":"Hadoop_DataNode_MemHeapUsedM{module=~\".*Hadoop\"}/Hadoop_DataNode_MemHeapMaxM{module=~\".*Hadoop\"}", + "state":1, + "legend":"{{datacenter}}-{{asset}}", + "name":"A" + } + ], + "datasource":"metrics" + }, + { + "name":"Missing Blocks", + "span":"4.0", + "height":"3.0", + "type":"Line chart", + "unit":"short", + "weight":96, + "param":{ + "stack":0, + "thresholds":[ + { + "color":"#1c1984" + } + ], + "thresholdShow":true, + "legend":{ + "values":[], + "show":true, + "placement":"bottom" + }, + "enable":{ + "thresholds":false, + "legend":true, + "valueMapping":false + }, + "showHeader":true, + "nullType":"null" + }, + "remark":"Number of blocks with missing in the Hadoop cluster.", + "x":"8.0", + "y":"3.0", + "elements":[ + { + "expression":"(sum by (datacenter)(Hadoop_NameNode_NumberOfMissingBlocks))/2", + "state":1, + "legend":"{{datacenter}}", + "name":"A" + } + ], + "datasource":"metrics" + }, + { + "name":"Namenode CPU Usage", + "span":"4.0", + "height":"3.0", + "type":"Line chart", + "unit":"percent(0.0-1.0)", + "weight":97, + "param":{ + "stack":0, + "thresholds":[ + { + "color":"#1c1984" + } + ], + "thresholdShow":true, + "visibility":{ + "result":"", + "varName":"", + "varValue":"", + "operator":"" + }, + "legend":{ + "values":[ + "avg", + "max" + ], + "show":true, + "placement":"bottom" + }, + "enable":{ + "thresholds":false, + "visibility":false, + "legend":true, + "valueMapping":false + }, + "showHeader":true, + "nullType":"null" + }, + "remark":"The CPU usage in each Namenode.", + "x":"0.0", + "y":"0.0", + "elements":[ + { + "expression":"java_lang_OperatingSystem_ProcessCpuLoad{module=~\".*Hadoop\",endpoint=~\".*Name.*\"}", + "state":1, + "legend":"{{asset}}", + "name":"A" + } + ], + "datasource":"metrics" + }, + { + "name":"Corrupt Blocks", + "span":"4.0", + "height":"3.0", + "type":"Line chart", + "unit":"short", + "weight":98, + "param":{ + "stack":0, + "thresholds":[ + { + "color":"#1c1984" + } + ], + "thresholdShow":true, + "legend":{ + "values":[], + "show":true, + "placement":"bottom" + }, + "enable":{ + "thresholds":false, + "legend":true, + "valueMapping":false + }, + "showHeader":true, + "nullType":"zero" + }, + "remark":"Number of blocks with corrupt replicas in the Hadoop cluster.", + "x":"8.0", + "y":"0.0", + "elements":[ + { + "expression":"(sum by (datacenter)(Hadoop_NameNode_CorruptBlocks))/2", + "state":1, + "legend":"{{datacenter}}", + "name":"B" + } + ], + "datasource":"metrics" + }, + { + "name":"Under Replicated Blocks", + "span":"4.0", + "height":"3.0", + "type":"Line chart", + "unit":"short", + "weight":99, + "param":{ + "stack":0, + "thresholds":[ + { + "color":"#1c1984" + } + ], + "thresholdShow":true, + "legend":{ + "values":[], + "show":true, + "placement":"bottom" + }, + "enable":{ + "thresholds":false, + "legend":true, + "valueMapping":false + }, + "showHeader":true, + "nullType":"null" + }, + "remark":"Number of blocks with lose replicated in the Hadoop cluster.", + "x":"0.0", + "y":"6.0", + "elements":[ + { + "expression":"(sum by (datacenter)(Hadoop_NameNode_UnderReplicatedBlocks))/2", + "state":1, + "legend":"{{datacenter}}", + "name":"C" + } + ], + "datasource":"metrics" + }, + { + "name":"Datanode CPU Usage", + "span":"4.0", + "height":"3.0", + "type":"Line chart", + "unit":"percent(0.0-1.0)", + "weight":100, + "param":{ + "stack":0, + "thresholds":[ + { + "color":"#1c1984" + } + ], + "thresholdShow":true, + "visibility":{ + "result":"", + "varName":"", + "varValue":"", + "operator":"" + }, + "legend":{ + "values":[ + "avg", + "max" + ], + "show":true, + "placement":"bottom" + }, + "enable":{ + "thresholds":false, + "visibility":false, + "legend":true, + "valueMapping":false + }, + "showHeader":true, + "nullType":"null" + }, + "remark":"The CPU usage in each DataNode.", + "x":"0.0", + "y":"3.0", + "elements":[ + { + "expression":"java_lang_OperatingSystem_ProcessCpuLoad{module=~\".*Hadoop\",endpoint=~\".*Data.*\"}", + "state":1, + "legend":"{{asset}}", + "name":"A" + } + ], + "datasource":"metrics" + } + ] + } + ], + "children":[] + } +]
\ No newline at end of file |
