summaryrefslogtreecommitdiff
path: root/MPE/Nezha/OLAP Componet Status.json
diff options
context:
space:
mode:
authorwangchengcheng <[email protected]>2023-07-27 15:43:51 +0800
committerwangchengcheng <[email protected]>2023-07-27 15:43:51 +0800
commit124f687daace8b85e5c74abac04bcd0a92744a8d (patch)
tree4f563326b1be67cfb51bf6a04f1ca4d953536e76 /MPE/Nezha/OLAP Componet Status.json
parent08686ae87f9efe7a590f48db74ed133b481c85b1 (diff)
P19 23.07 online-configP19
Diffstat (limited to 'MPE/Nezha/OLAP Componet Status.json')
-rw-r--r--MPE/Nezha/OLAP Componet Status.json4597
1 files changed, 4597 insertions, 0 deletions
diff --git a/MPE/Nezha/OLAP Componet Status.json b/MPE/Nezha/OLAP Componet Status.json
new file mode 100644
index 0000000..a7d846f
--- /dev/null
+++ b/MPE/Nezha/OLAP Componet Status.json
@@ -0,0 +1,4597 @@
+[
+ {
+ "name":"OLAP Componet Status",
+ "type":"dashboard",
+ "varType":0,
+ "param":{
+ "chartShare":"none",
+ "variables":[],
+ "report":{
+ "schedule":{
+ "repeat":1,
+ "etime":"",
+ "stime":"",
+ "type":2,
+ "nums":[]
+ },
+ "receivers":[],
+ "enable":false,
+ "range":{
+ "unit":"day",
+ "interval":1,
+ "type":"previous"
+ }
+ }
+ },
+ "remark":"",
+ "charts":[
+ {
+ "name":"Summary",
+ "span":"12.0",
+ "height":"1.0",
+ "type":"Group",
+ "unit":"short",
+ "weight":0,
+ "param":{
+ "thresholds":[],
+ "visibility":{
+ "result":"",
+ "varName":"",
+ "varValue":"",
+ "operator":""
+ },
+ "enable":{
+ "thresholds":false,
+ "legend":false,
+ "valueMapping":false
+ },
+ "repeat":{
+ "variable":""
+ },
+ "link":"",
+ "valueMapping":[],
+ "collapse":true,
+ "nullType":"null"
+ },
+ "remark":"",
+ "x":"0.0",
+ "y":"1.0",
+ "datasource":"misc",
+ "children":[
+ {
+ "name":"zookeeper live Status",
+ "span":"4.0",
+ "height":"2.0",
+ "type":"Line chart",
+ "unit":"short",
+ "weight":1,
+ "param":{
+ "stack":false,
+ "thresholds":[
+ {
+ "color":"#47e88d"
+ }
+ ],
+ "thresholdShow":true,
+ "visibility":{
+ "result":"",
+ "varName":"",
+ "varValue":"",
+ "operator":""
+ },
+ "legend":{
+ "values":[],
+ "show":true,
+ "placement":"bottom"
+ },
+ "enable":{
+ "thresholds":false,
+ "visibility":false,
+ "legend":true,
+ "valueMapping":false
+ },
+ "nullType":"null"
+ },
+ "remark":"The number of lived Zookeeper in each Data Center",
+ "x":"0.0",
+ "y":"0.0",
+ "elements":[
+ {
+ "expression":"sum by (datacenter) (up{module=~\".*Zookeeper\"})",
+ "state":1,
+ "legend":"{{datacenter}}",
+ "name":"A"
+ }
+ ],
+ "datasource":"metrics"
+ },
+ {
+ "name":"Nacos Live Status",
+ "span":"4.0",
+ "height":"2.0",
+ "type":"Line chart",
+ "unit":"short",
+ "weight":2,
+ "param":{
+ "stack":false,
+ "visibility":{
+ "result":"",
+ "varName":"",
+ "varValue":"",
+ "operator":""
+ },
+ "legend":{
+ "values":[],
+ "placement":"bottom"
+ },
+ "enable":{
+ "thresholds":true,
+ "visibility":false,
+ "legend":true,
+ "valueMapping":true
+ },
+ "style":"line",
+ "nullType":"zero"
+ },
+ "remark":"The number of lived Nacos in each Data Center",
+ "x":"4.0",
+ "y":"0.0",
+ "elements":[
+ {
+ "expression":"sum by(datacenter) (up{module=~\".*Nacos\"})",
+ "state":1,
+ "legend":"{{datacenter}}",
+ "name":"A"
+ }
+ ],
+ "datasource":"metrics"
+ },
+ {
+ "name":"Kafka live Status",
+ "span":"4.0",
+ "height":"2.0",
+ "type":"Line chart",
+ "unit":"short",
+ "weight":3,
+ "param":{
+ "stack":false,
+ "visibility":{
+ "result":"",
+ "varName":"",
+ "varValue":"",
+ "operator":""
+ },
+ "legend":{
+ "values":[],
+ "placement":"bottom"
+ },
+ "enable":{
+ "thresholds":true,
+ "visibility":false,
+ "legend":true,
+ "valueMapping":true
+ },
+ "style":"line",
+ "nullType":"zero"
+ },
+ "remark":"The number of lived Kafka in each Data Center",
+ "x":"8.0",
+ "y":"0.0",
+ "elements":[
+ {
+ "expression":"sum by (datacenter) (up{module=~\".*Kafka\"})",
+ "state":1,
+ "legend":"{{datacenter}}",
+ "name":"A"
+ }
+ ],
+ "datasource":"metrics"
+ },
+ {
+ "name":"Hadoop Yarn Live Status",
+ "span":"4.0",
+ "height":"2.0",
+ "type":"Line chart",
+ "unit":"short",
+ "weight":4,
+ "param":{
+ "stack":false,
+ "thresholds":[],
+ "visibility":{
+ "result":"",
+ "varName":"",
+ "varValue":"",
+ "operator":""
+ },
+ "legend":{
+ "values":[],
+ "placement":"bottom"
+ },
+ "enable":{
+ "thresholds":true,
+ "visibility":false,
+ "legend":true,
+ "valueMapping":true
+ },
+ "link":"",
+ "style":"line",
+ "valueMapping":[],
+ "nullType":"zero"
+ },
+ "remark":"The number of lived Yarn in each Data Center",
+ "x":"8.0",
+ "y":"4.0",
+ "elements":[
+ {
+ "expression":"sum by(datacenter)(up{module=~\".*Yarn\",endpoint=~\".*ResourceManager.*\"})",
+ "state":1,
+ "legend":"{{datacenter}}-ResourceManager",
+ "name":"A"
+ },
+ {
+ "expression":"sum by(datacenter)(up{module=~\".*Yarn\",endpoint=~\".*NodeManager.*\"})",
+ "state":1,
+ "legend":"{{datacenter}}-NodeManager",
+ "name":"B"
+ }
+ ],
+ "datasource":"metrics"
+ },
+ {
+ "name":"Druid live Status",
+ "span":"4.0",
+ "height":"2.0",
+ "type":"Line chart",
+ "unit":"short",
+ "weight":5,
+ "param":{
+ "stack":false,
+ "legend":{
+ "values":[],
+ "placement":"bottom"
+ },
+ "enable":{
+ "thresholds":true,
+ "legend":true,
+ "valueMapping":true
+ },
+ "style":"line",
+ "nullType":"zero"
+ },
+ "remark":"The number of lived Druid in each Data Center",
+ "x":"4.0",
+ "y":"2.0",
+ "elements":[
+ {
+ "expression":"count(sys_swap_page_in{service=\"broker\"})",
+ "state":1,
+ "legend":"broker",
+ "name":"A"
+ },
+ {
+ "expression":"count(sys_swap_page_in{service=\"router\",host=~\".*8088\"})",
+ "state":1,
+ "legend":"router",
+ "name":"B"
+ },
+ {
+ "expression":"count(sys_swap_page_in{service=\"coordinator\"})",
+ "state":1,
+ "legend":"coordinator",
+ "name":"C"
+ },
+ {
+ "expression":"count (sys_swap_page_in{service=\"middleManager\",host=~\".*8091\"})",
+ "state":1,
+ "legend":"middleManager",
+ "name":"D"
+ },
+ {
+ "expression":"count (sys_swap_page_in{service=\"historical\"})",
+ "state":1,
+ "legend":"historical",
+ "name":"E"
+ }
+ ],
+ "datasource":"metrics"
+ },
+ {
+ "name":"Clickhouse live Status",
+ "span":"4.0",
+ "height":"2.0",
+ "type":"Line chart",
+ "unit":"short",
+ "weight":6,
+ "param":{
+ "stack":false,
+ "legend":{
+ "values":[],
+ "placement":"bottom"
+ },
+ "enable":{
+ "thresholds":true,
+ "legend":true,
+ "valueMapping":true
+ },
+ "style":"line",
+ "nullType":"zero"
+ },
+ "remark":"The number of lived ClickHouse in each Data Center",
+ "x":"8.0",
+ "y":"2.0",
+ "elements":[
+ {
+ "expression":"sum by(datacenter) (up{module=\"NC-Clickhouse\"})",
+ "state":1,
+ "legend":"{{datacenter}}",
+ "name":"A"
+ }
+ ],
+ "datasource":"metrics"
+ },
+ {
+ "name":"Hbase Hmaster live Status",
+ "span":"4.0",
+ "height":"2.0",
+ "type":"Line chart",
+ "unit":"short",
+ "weight":7,
+ "param":{
+ "stack":false,
+ "visibility":{
+ "result":"",
+ "varName":"",
+ "varValue":"",
+ "operator":""
+ },
+ "legend":{
+ "values":[],
+ "placement":"bottom"
+ },
+ "enable":{
+ "thresholds":true,
+ "visibility":false,
+ "legend":true,
+ "valueMapping":true
+ },
+ "style":"line",
+ "nullType":"zero"
+ },
+ "remark":"The number of lived Hbase Hmaster in each Data Center",
+ "x":"0.0",
+ "y":"6.0",
+ "elements":[
+ {
+ "expression":"count by (datacenter) (Hadoop_HBase_numMasterWALs{module=~\".*HBase\"})",
+ "state":1,
+ "legend":"{{datacenter}}",
+ "name":"A"
+ }
+ ],
+ "datasource":"metrics"
+ },
+ {
+ "name":"Hbase Region Server live Status",
+ "span":"4.0",
+ "height":"2.0",
+ "type":"Line chart",
+ "unit":"short",
+ "weight":8,
+ "param":{
+ "stack":false,
+ "visibility":{
+ "result":"",
+ "varName":"",
+ "varValue":"",
+ "operator":""
+ },
+ "legend":{
+ "values":[],
+ "placement":"bottom"
+ },
+ "enable":{
+ "thresholds":true,
+ "visibility":false,
+ "legend":true,
+ "valueMapping":true
+ },
+ "style":"line",
+ "nullType":"zero"
+ },
+ "remark":"The number of lived Hbase Region in each Data Center",
+ "x":"4.0",
+ "y":"6.0",
+ "elements":[
+ {
+ "expression":"sum by (datacenter) (Hadoop_HBase_numRegionServers{module=~\".*HBase\"})",
+ "state":1,
+ "legend":"{{datacenter}}",
+ "name":"A"
+ }
+ ],
+ "datasource":"metrics"
+ },
+ {
+ "name":"Hadoop Namenode live Status",
+ "span":"4.0",
+ "height":"2.0",
+ "type":"Line chart",
+ "unit":"short",
+ "weight":9,
+ "param":{
+ "stack":false,
+ "visibility":{
+ "result":"",
+ "varName":"",
+ "varValue":"",
+ "operator":""
+ },
+ "legend":{
+ "values":[],
+ "placement":"bottom"
+ },
+ "enable":{
+ "thresholds":true,
+ "visibility":false,
+ "legend":true,
+ "valueMapping":true
+ },
+ "style":"line",
+ "nullType":"zero"
+ },
+ "remark":"The number of lived Hadoop Namenode in each Data Center",
+ "x":"0.0",
+ "y":"4.0",
+ "elements":[
+ {
+ "expression":"count by (datacenter) (Hadoop_NameNode_Total{module=~\".*Hadoop\"})",
+ "state":1,
+ "legend":"{{datacenter}}",
+ "name":"A"
+ }
+ ],
+ "datasource":"metrics"
+ },
+ {
+ "name":"Hadoop Datanode live Status",
+ "span":"4.0",
+ "height":"2.0",
+ "type":"Line chart",
+ "unit":"short",
+ "weight":10,
+ "param":{
+ "stack":false,
+ "visibility":{
+ "result":"",
+ "varName":"",
+ "varValue":"",
+ "operator":""
+ },
+ "legend":{
+ "values":[],
+ "placement":"bottom"
+ },
+ "enable":{
+ "thresholds":true,
+ "visibility":false,
+ "legend":true,
+ "valueMapping":true
+ },
+ "style":"line",
+ "nullType":"zero"
+ },
+ "remark":"The number of lived Hadoop Datanode in each Data Center",
+ "x":"4.0",
+ "y":"4.0",
+ "elements":[
+ {
+ "expression":"count by (datacenter) (Hadoop_DataNode_HeartbeatsNumOps{module=~\".*Hadoop\"})",
+ "state":1,
+ "legend":"{{datacenter}}",
+ "name":"A"
+ }
+ ],
+ "datasource":"metrics"
+ },
+ {
+ "name":"DC-GoHangout live Status",
+ "span":"4.0",
+ "height":"2.0",
+ "type":"Line chart",
+ "unit":"short",
+ "weight":11,
+ "param":{
+ "rightYAxis":{
+ "elementNames":[],
+ "unit":2,
+ "style":"line",
+ "label":""
+ },
+ "stack":false,
+ "thresholds":[],
+ "visibility":{
+ "result":"",
+ "varName":"",
+ "varValue":"",
+ "operator":""
+ },
+ "legend":{
+ "values":[],
+ "placement":"bottom"
+ },
+ "enable":{
+ "thresholds":true,
+ "visibility":false,
+ "legend":true,
+ "valueMapping":true
+ },
+ "link":"",
+ "style":"line",
+ "valueMapping":[],
+ "nullType":"zero"
+ },
+ "remark":"The number of lived GoHangout in each Data Center",
+ "x":"0.0",
+ "y":"2.0",
+ "elements":[
+ {
+ "expression":"sum by(datacenter) (gohangout_status{module=\"DC-Gohangout\"})",
+ "state":1,
+ "legend":"{{datacenter}}",
+ "name":"A"
+ }
+ ],
+ "datasource":"metrics"
+ }
+ ]
+ },
+ {
+ "name":"Zookeeper Status",
+ "span":"12.0",
+ "height":"1.0",
+ "type":"Group",
+ "unit":"short",
+ "weight":12,
+ "param":{
+ "enable":{
+ "thresholds":false,
+ "legend":false,
+ "valueMapping":false
+ },
+ "collapse":true,
+ "nullType":"null"
+ },
+ "remark":"",
+ "x":"0.0",
+ "y":"1.0",
+ "datasource":"misc",
+ "children":[
+ {
+ "name":"Zookeeper Leader",
+ "span":"6.0",
+ "height":"3.0",
+ "type":"Line chart",
+ "unit":"short",
+ "weight":13,
+ "param":{
+ "stack":false,
+ "legend":{
+ "values":[],
+ "placement":"bottom"
+ },
+ "enable":{
+ "thresholds":true,
+ "legend":true,
+ "valueMapping":true
+ },
+ "style":"line",
+ "nullType":"zero"
+ },
+ "remark":"Zookeeper leader. Describe whether the zookeeper leader has changed and determine the stability of the current cluster.",
+ "x":"0.0",
+ "y":"0.0",
+ "elements":[
+ {
+ "expression":"(zookeeper_leader>0)",
+ "state":1,
+ "legend":"{{datacenter}}-{{asset}}",
+ "name":"A"
+ }
+ ],
+ "datasource":"metrics"
+ },
+ {
+ "name":"Disk IO Usage",
+ "span":"6.0",
+ "height":"3.0",
+ "type":"Line chart",
+ "unit":"percent(0-100)",
+ "weight":14,
+ "param":{
+ "stack":false,
+ "thresholds":[
+ {
+ "val":"80",
+ "color":"#d64f40",
+ "id":"3099ecf1",
+ "value":80
+ }
+ ],
+ "visibility":{
+ "result":"",
+ "varName":"",
+ "varValue":"",
+ "operator":""
+ },
+ "legend":{
+ "values":[
+ "avg",
+ "last",
+ "max"
+ ],
+ "placement":"bottom"
+ },
+ "enable":{
+ "thresholds":true,
+ "visibility":false,
+ "legend":true,
+ "valueMapping":true
+ },
+ "link":"",
+ "style":"line",
+ "valueMapping":[],
+ "nullType":"zero"
+ },
+ "remark":"The maximum percentage of DISK IO in each asset.",
+ "x":"6.0",
+ "y":"0.0",
+ "elements":[
+ {
+ "expression":"sum by(asset)(rate(node_disk_io_time_seconds_total{olap_node_exporter=~\".*zookeeper.*\"}[5m]))*100",
+ "state":1,
+ "legend":"{{asset}}",
+ "name":"A"
+ }
+ ],
+ "datasource":"metrics"
+ },
+ {
+ "name":"Outstanding Requests",
+ "span":"6.0",
+ "height":"3.0",
+ "type":"Line chart",
+ "unit":"short",
+ "weight":15,
+ "param":{
+ "stack":false,
+ "thresholds":[
+ {
+ "val":"100",
+ "color":"#d64f40",
+ "value":100
+ }
+ ],
+ "legend":{
+ "values":[
+ "min",
+ "avg",
+ "last",
+ "max"
+ ],
+ "placement":"bottom"
+ },
+ "enable":{
+ "thresholds":true,
+ "legend":true,
+ "valueMapping":true
+ },
+ "style":"line",
+ "nullType":"zero"
+ },
+ "remark":"Number of queued requests in each Data Center. This goes up when the server receives more requests than it can process. When count > 100.",
+ "x":"0.0",
+ "y":"3.0",
+ "elements":[
+ {
+ "expression":"sum by(datacenter) (zookeeper_outstanding_requests{module=\"NC-Zookeeper\"})",
+ "state":1,
+ "legend":"{{datacenter}}",
+ "name":"A"
+ },
+ {
+ "expression":"sum by(datacenter) (zookeeper_outstanding_requests{module=\"DC-Zookeeper\"})",
+ "state":1,
+ "legend":"{{datacenter}}",
+ "name":"B"
+ }
+ ],
+ "datasource":"metrics"
+ },
+ {
+ "name":"Connections /sec",
+ "span":"6.0",
+ "height":"3.0",
+ "type":"Line chart",
+ "unit":"short",
+ "weight":16,
+ "param":{
+ "stack":false,
+ "legend":{
+ "values":[
+ "min",
+ "avg",
+ "last",
+ "max"
+ ],
+ "placement":"bottom"
+ },
+ "enable":{
+ "thresholds":true,
+ "legend":true,
+ "valueMapping":true
+ },
+ "style":"line",
+ "nullType":"zero"
+ },
+ "remark":"The number of connections per second in each Data Center",
+ "x":"6.0",
+ "y":"3.0",
+ "elements":[
+ {
+ "expression":"sum by(datacenter) (rate(zookeeper_connections{module=\"NC-Zookeeper\"}[5m]))",
+ "state":1,
+ "legend":"{{datacenter}}",
+ "name":"A"
+ },
+ {
+ "expression":"sum by(datacenter) (rate(zookeeper_connections{module=\"DC-Zookeeper\"}[5m]))",
+ "state":1,
+ "legend":"{{datacenter}}",
+ "name":"B"
+ }
+ ],
+ "datasource":"metrics"
+ },
+ {
+ "name":"Znodes",
+ "span":"6.0",
+ "height":"3.0",
+ "type":"Table",
+ "unit":"short",
+ "weight":17,
+ "param":{
+ "visibility":{
+ "result":"",
+ "varName":"",
+ "varValue":"",
+ "operator":""
+ },
+ "columns":[
+ {
+ "unit":2,
+ "display":"{{A.$legend}}",
+ "show":true,
+ "title":"Datacenter",
+ "error":false
+ },
+ {
+ "unit":2,
+ "display":"{{A.$value}}",
+ "show":true,
+ "title":"Znodes",
+ "error":false
+ }
+ ],
+ "enable":{
+ "thresholds":false,
+ "visibility":false,
+ "legend":true,
+ "valueMapping":false
+ },
+ "legend":{
+ "values":[],
+ "show":true,
+ "placement":"bottom"
+ },
+ "nullType":"zero",
+ "statistics":"last"
+ },
+ "remark":"Number of znodes in the Zookeeper namespace in each data center",
+ "x":"0.0",
+ "y":"6.0",
+ "elements":[
+ {
+ "expression":"sum by(datacenter) (zookeeper_znode_count{module=~\"NC-Zookeeper|DC-Zookeeper\"})",
+ "state":1,
+ "legend":"{{datacenter}}",
+ "name":"A"
+ }
+ ],
+ "datasource":"metrics"
+ },
+ {
+ "name":"Avg Latency",
+ "span":"6.0",
+ "height":"3.0",
+ "type":"Line chart",
+ "unit":"milliseconds",
+ "weight":18,
+ "param":{
+ "stack":false,
+ "thresholds":[
+ {
+ "val":"100000",
+ "color":"#d64f40",
+ "value":100000
+ }
+ ],
+ "visibility":{
+ "result":"",
+ "varName":"",
+ "varValue":"",
+ "operator":""
+ },
+ "legend":{
+ "values":[],
+ "placement":"bottom"
+ },
+ "enable":{
+ "thresholds":true,
+ "visibility":false,
+ "legend":true,
+ "valueMapping":true
+ },
+ "style":"line",
+ "nullType":"zero"
+ },
+ "remark":"Amount of time it takes for the server to respond to a client request (since the server was started) in each data center. When latency > 10 (Ticks).",
+ "x":"6.0",
+ "y":"6.0",
+ "elements":[
+ {
+ "expression":"sum by(datacenter) (irate(zookeeper_latency_avg_ms{module=~\".*Zookeeper\"}[5m]))",
+ "state":1,
+ "legend":"{{datacenter}}",
+ "name":"A"
+ }
+ ],
+ "datasource":"metrics"
+ },
+ {
+ "name":"Max Latency",
+ "span":"6.0",
+ "height":"3.0",
+ "type":"Line chart",
+ "unit":"milliseconds",
+ "weight":19,
+ "param":{
+ "stack":false,
+ "thresholds":[
+ {
+ "val":"100000",
+ "color":"#d64f40"
+ }
+ ],
+ "visibility":{
+ "result":"",
+ "varName":"",
+ "varValue":"",
+ "operator":""
+ },
+ "legend":{
+ "values":[],
+ "placement":"bottom"
+ },
+ "enable":{
+ "thresholds":false,
+ "visibility":false,
+ "legend":true,
+ "valueMapping":true
+ },
+ "style":"line",
+ "nullType":"zero"
+ },
+ "remark":"Amount of time it takes for the server to respond to a client request (since the server was started) in each data center. When latency > 10 (Ticks).",
+ "x":"0.0",
+ "y":"9.0",
+ "elements":[
+ {
+ "expression":"max by(datacenter) (irate(zookeeper_latency_max_ms{module=~\"NC-Zookeeper|DC-Zookeeper\"}[5m])) ",
+ "state":1,
+ "legend":"{{datacenter}}",
+ "name":"A"
+ }
+ ],
+ "datasource":"metrics"
+ },
+ {
+ "name":"Sent Packets /sec",
+ "span":"6.0",
+ "height":"3.0",
+ "type":"Line chart",
+ "unit":"packets/sec",
+ "weight":20,
+ "param":{
+ "stack":false,
+ "legend":{
+ "values":[
+ "avg",
+ "last",
+ "max"
+ ],
+ "placement":"bottom"
+ },
+ "enable":{
+ "thresholds":true,
+ "legend":true,
+ "valueMapping":true
+ },
+ "style":"line",
+ "nullType":"zero"
+ },
+ "remark":"Number of client packets sent (responses and notifications) in each data center.",
+ "x":"6.0",
+ "y":"9.0",
+ "elements":[
+ {
+ "expression":"sum by(datacenter) (irate(zookeeper_packets_sent{module=\"NC-Zookeeper\"}[5m]))",
+ "state":1,
+ "legend":"{{datacenter}}",
+ "name":"A"
+ },
+ {
+ "expression":"sum by(datacenter) (irate(zookeeper_packets_sent{module=\"DC-Zookeeper\"}[5m]))",
+ "state":1,
+ "legend":"{{datacenter}}",
+ "name":"B"
+ }
+ ],
+ "datasource":"metrics"
+ },
+ {
+ "name":"Received Packets /sec",
+ "span":"6.0",
+ "height":"3.0",
+ "type":"Line chart",
+ "unit":"packets/sec",
+ "weight":21,
+ "param":{
+ "stack":false,
+ "legend":{
+ "values":[
+ "total",
+ "min",
+ "avg",
+ "last",
+ "max"
+ ],
+ "placement":"bottom"
+ },
+ "enable":{
+ "thresholds":true,
+ "legend":true,
+ "valueMapping":true
+ },
+ "style":"line",
+ "nullType":"zero"
+ },
+ "remark":"Number of client requests (typically operations) received in each data center.",
+ "x":"0.0",
+ "y":"12.0",
+ "elements":[
+ {
+ "expression":"sum by(datacenter) (irate(zookeeper_packets_received{module=\"NC-Zookeeper\"}[5m]))",
+ "state":1,
+ "legend":"{{datacenter}}",
+ "name":"A"
+ },
+ {
+ "expression":"sum by(datacenter) (irate(zookeeper_packets_received{module=\"DC-Zookeeper\"}[5m]))",
+ "state":1,
+ "legend":"{{datacenter}}",
+ "name":"B"
+ }
+ ],
+ "datasource":"metrics"
+ }
+ ]
+ },
+ {
+ "name":"Nacos Status",
+ "span":"12.0",
+ "height":"3.0",
+ "type":"Group",
+ "unit":"short",
+ "weight":22,
+ "param":{
+ "collapse":true,
+ "nullType":"null"
+ },
+ "remark":"",
+ "x":"0.0",
+ "y":"2.0",
+ "datasource":"misc",
+ "children":[
+ {
+ "name":"QPS",
+ "span":"6.0",
+ "height":"3.0",
+ "type":"Line chart",
+ "unit":"short",
+ "weight":23,
+ "param":{
+ "stack":false,
+ "legend":{
+ "values":[
+ "avg",
+ "last",
+ "max"
+ ],
+ "placement":"bottom"
+ },
+ "enable":{
+ "thresholds":true,
+ "legend":true,
+ "valueMapping":true
+ },
+ "style":"line",
+ "nullType":"zero"
+ },
+ "remark":"Nacos  queries per second",
+ "x":"0.0",
+ "y":"0.0",
+ "elements":[
+ {
+ "expression":"sum(rate(http_server_requests_seconds_count{uri=~'/v1/cs/configs|/nacos/v1/ns/instance|/nacos/v1/ns/health', module=\"NC-Nacos\"}[5m])) by (method,uri)",
+ "state":1,
+ "legend":"{{method}}",
+ "name":"A"
+ }
+ ],
+ "datasource":"metrics"
+ },
+ {
+ "name":"Request Errors",
+ "span":"6.0",
+ "height":"3.0",
+ "type":"Line chart",
+ "unit":"short",
+ "weight":24,
+ "param":{
+ "stack":false,
+ "legend":{
+ "values":[
+ "avg",
+ "last",
+ "max"
+ ],
+ "placement":"bottom"
+ },
+ "enable":{
+ "thresholds":true,
+ "legend":true,
+ "valueMapping":true
+ },
+ "style":"line",
+ "nullType":"zero"
+ },
+ "remark":"The requests failed counter per second of each http method.",
+ "x":"6.0",
+ "y":"0.0",
+ "elements":[
+ {
+ "expression":"sum(irate(http_server_requests_seconds_count{exception!='None',module=\"NC-Nacos\"}[5m])) by (method,uri)",
+ "state":1,
+ "legend":"{{method}}-{{uri}}",
+ "name":"A"
+ }
+ ],
+ "datasource":"metrics"
+ },
+ {
+ "name":"Memory Usage",
+ "span":"6.0",
+ "height":"3.0",
+ "type":"Line chart",
+ "unit":"percent(0.0-1.0)",
+ "weight":25,
+ "param":{
+ "stack":false,
+ "legend":{
+ "values":[
+ "avg",
+ "last",
+ "max"
+ ],
+ "placement":"bottom"
+ },
+ "enable":{
+ "thresholds":true,
+ "legend":true,
+ "valueMapping":true
+ },
+ "style":"line",
+ "nullType":"zero"
+ },
+ "remark":"The memory usage in each asset",
+ "x":"0.0",
+ "y":"3.0",
+ "elements":[
+ {
+ "expression":"sum by(asset) (jvm_memory_used_bytes{module=\"NC-Nacos\"}) / sum by(asset) (jvm_memory_max_bytes{module=\"NC-Nacos\"})",
+ "state":1,
+ "legend":"{{asset}}",
+ "name":"A"
+ }
+ ],
+ "datasource":"metrics"
+ },
+ {
+ "name":"CPU Usage",
+ "span":"6.0",
+ "height":"3.0",
+ "type":"Line chart",
+ "unit":"percent(0.0-1.0)",
+ "weight":26,
+ "param":{
+ "stack":false,
+ "legend":{
+ "values":[
+ "avg",
+ "last",
+ "max"
+ ],
+ "placement":"bottom"
+ },
+ "enable":{
+ "thresholds":true,
+ "legend":true,
+ "valueMapping":true
+ },
+ "style":"line",
+ "nullType":"zero"
+ },
+ "remark":"The CPU usage in each asset",
+ "x":"6.0",
+ "y":"3.0",
+ "elements":[
+ {
+ "expression":"system_cpu_usage{module=\"NC-Nacos\"}",
+ "state":1,
+ "legend":"{{asset}}",
+ "name":"A"
+ }
+ ],
+ "datasource":"metrics"
+ }
+ ]
+ },
+ {
+ "name":"Kafka Status",
+ "span":"12.0",
+ "height":"3.0",
+ "type":"Group",
+ "unit":"short",
+ "weight":27,
+ "param":{
+ "collapse":true,
+ "nullType":"null"
+ },
+ "remark":"",
+ "x":"0.0",
+ "y":"2.0",
+ "datasource":"misc",
+ "children":[
+ {
+ "name":"Requests /sec",
+ "span":"6.0",
+ "height":"3.0",
+ "type":"Line chart",
+ "unit":"short",
+ "weight":28,
+ "param":{
+ "stack":false,
+ "legend":{
+ "values":[
+ "avg",
+ "last",
+ "max"
+ ],
+ "placement":"bottom"
+ },
+ "enable":{
+ "thresholds":true,
+ "legend":true,
+ "valueMapping":true
+ },
+ "style":"line",
+ "nullType":"zero"
+ },
+ "remark":"Number of (producer|consumer|follower) requests per second in each Data Center.",
+ "x":"6.0",
+ "y":"6.0",
+ "elements":[
+ {
+ "expression":"sum by(datacenter) (kafka_server_socket_server_metrics_request_rate)",
+ "state":1,
+ "legend":"{{datacenter}}",
+ "name":"A"
+ }
+ ],
+ "datasource":"metrics"
+ },
+ {
+ "name":"In Bytes /sec",
+ "span":"6.0",
+ "height":"3.0",
+ "type":"Line chart",
+ "unit":"bytes",
+ "weight":29,
+ "param":{
+ "stack":false,
+ "legend":{
+ "values":[
+ "total",
+ "min",
+ "avg",
+ "last",
+ "max"
+ ],
+ "placement":"bottom"
+ },
+ "enable":{
+ "thresholds":true,
+ "legend":true,
+ "valueMapping":true
+ },
+ "style":"line",
+ "nullType":"zero"
+ },
+ "remark":"Collecting and processing Traffic Records with incoming byte rate in each Data Center.",
+ "x":"0.0",
+ "y":"0.0",
+ "elements":[
+ {
+ "expression":"sum by(datacenter) (kafka_server_BrokerTopicMetrics_OneMinuteRate{name=\"BytesInPerSec\",topic=\"\"})",
+ "state":1,
+ "legend":"{{datacenter}}",
+ "name":"A"
+ }
+ ],
+ "datasource":"metrics"
+ },
+ {
+ "name":"Request Type Erros /sec",
+ "span":"6.0",
+ "height":"3.0",
+ "type":"Line chart",
+ "unit":"short",
+ "weight":30,
+ "param":{
+ "stack":false,
+ "legend":{
+ "values":[
+ "avg",
+ "last",
+ "max"
+ ],
+ "placement":"bottom"
+ },
+ "enable":{
+ "thresholds":true,
+ "legend":true,
+ "valueMapping":true
+ },
+ "style":"line",
+ "nullType":"null"
+ },
+ "remark":"The number of rquest type errors per second in each data center.",
+ "x":"0.0",
+ "y":"9.0",
+ "elements":[
+ {
+ "expression":"sum by(datacenter,error) \n(rate(kafka_network_RequestMetrics_Errors_total{module=\"NC-Kafka\",error!=\"NONE\"}[5m]))",
+ "state":1,
+ "legend":"{{error}}",
+ "name":"A"
+ }
+ ],
+ "datasource":"metrics"
+ },
+ {
+ "name":"Out Bytes /sec",
+ "span":"6.0",
+ "height":"3.0",
+ "type":"Line chart",
+ "unit":"bytes",
+ "weight":31,
+ "param":{
+ "stack":false,
+ "legend":{
+ "values":[
+ "total",
+ "min",
+ "avg",
+ "last",
+ "max"
+ ],
+ "placement":"bottom"
+ },
+ "enable":{
+ "thresholds":true,
+ "legend":true,
+ "valueMapping":true
+ },
+ "style":"line",
+ "nullType":"zero"
+ },
+ "remark":"Collecting and processing Traffic Records with outgoing byte rate in each Data Center.",
+ "x":"6.0",
+ "y":"0.0",
+ "elements":[
+ {
+ "expression":"sum by(datacenter) (kafka_server_BrokerTopicMetrics_OneMinuteRate{name=\"BytesOutPerSec\",topic=\"\"})",
+ "state":1,
+ "legend":"{{datacenter}}",
+ "name":"A"
+ }
+ ],
+ "datasource":"metrics"
+ },
+ {
+ "name":"Failed fetch request /sec",
+ "span":"6.0",
+ "height":"3.0",
+ "type":"Line chart",
+ "unit":"short",
+ "weight":32,
+ "param":{
+ "stack":false,
+ "legend":{
+ "values":[
+ "avg",
+ "last",
+ "max"
+ ],
+ "placement":"bottom"
+ },
+ "enable":{
+ "thresholds":true,
+ "legend":true,
+ "valueMapping":true
+ },
+ "style":"line",
+ "nullType":"zero"
+ },
+ "remark":"Number of data read requests from consumers that brokers failed to process for this topic in each Data Center.",
+ "x":"6.0",
+ "y":"9.0",
+ "elements":[
+ {
+ "expression":"sum by(datacenter) (kafka_server_BrokerTopicMetrics_OneMinuteRate{name=\"FailedFetchRequestsPerSec\",topic=\"\"})",
+ "state":1,
+ "legend":"{{datacenter}}",
+ "name":"A"
+ }
+ ],
+ "datasource":"metrics"
+ },
+ {
+ "name":"Failed produce request /sec",
+ "span":"6.0",
+ "height":"3.0",
+ "type":"Line chart",
+ "unit":"short",
+ "weight":33,
+ "param":{
+ "stack":false,
+ "legend":{
+ "values":[
+ "avg",
+ "last",
+ "max"
+ ],
+ "placement":"bottom"
+ },
+ "enable":{
+ "thresholds":true,
+ "legend":true,
+ "valueMapping":true
+ },
+ "style":"line",
+ "nullType":"zero"
+ },
+ "remark":"Number of data produce requests from producers that brokers failed to process for this topic in each Data Center.",
+ "x":"0.0",
+ "y":"12.0",
+ "elements":[
+ {
+ "expression":"sum by(datacenter) (kafka_server_BrokerTopicMetrics_OneMinuteRate{name=\"FailedProduceRequestsPerSec\",topic=\"\"})",
+ "state":1,
+ "legend":"{{datacenter}}",
+ "name":"A"
+ }
+ ],
+ "datasource":"metrics"
+ },
+ {
+ "name":"Rejected Bytes /sec",
+ "span":"6.0",
+ "height":"3.0",
+ "type":"Line chart",
+ "unit":"short",
+ "weight":34,
+ "param":{
+ "stack":false,
+ "legend":{
+ "values":[
+ "avg",
+ "last",
+ "max"
+ ],
+ "placement":"bottom"
+ },
+ "enable":{
+ "thresholds":true,
+ "legend":true,
+ "valueMapping":true
+ },
+ "style":"line",
+ "nullType":"zero"
+ },
+ "remark":"Amount of data in messages rejected by broker for this topic in each Data Center.",
+ "x":"6.0",
+ "y":"12.0",
+ "elements":[
+ {
+ "expression":"sum by(datacenter) (kafka_server_BrokerTopicMetrics_OneMinuteRate{name=\"BytesRejectedPerSec\",topic=\"\"})",
+ "state":1,
+ "legend":"{{datacenter}}",
+ "name":"A"
+ }
+ ],
+ "datasource":"metrics"
+ },
+ {
+ "name":"Asset Disk IO Usage",
+ "span":"6.0",
+ "height":"3.0",
+ "type":"Line chart",
+ "unit":"percent(0-100)",
+ "weight":35,
+ "param":{
+ "stack":false,
+ "thresholds":[
+ {
+ "val":"80",
+ "color":"#d64f40",
+ "id":"6954aa6b",
+ "value":80
+ }
+ ],
+ "visibility":{
+ "result":"",
+ "varName":"",
+ "varValue":"",
+ "operator":""
+ },
+ "legend":{
+ "values":[
+ "avg",
+ "last",
+ "max"
+ ],
+ "placement":"bottom"
+ },
+ "enable":{
+ "thresholds":true,
+ "visibility":false,
+ "legend":true,
+ "valueMapping":true
+ },
+ "link":"",
+ "style":"line",
+ "valueMapping":[],
+ "nullType":"zero"
+ },
+ "remark":"The maxium percentage of DISK IO in each data center and asset.",
+ "x":"0.0",
+ "y":"3.0",
+ "elements":[
+ {
+ "expression":"sum by(asset)(rate(node_disk_io_time_seconds_total{olap_node_exporter=~\".*kafka.*\"}[5m]))*100",
+ "state":1,
+ "legend":"{{asset}}",
+ "name":"A"
+ }
+ ],
+ "datasource":"metrics"
+ },
+ {
+ "name":"Avg Zookeeper Request Latency",
+ "span":"6.0",
+ "height":"3.0",
+ "type":"Line chart",
+ "unit":"milliseconds",
+ "weight":36,
+ "param":{
+ "stack":false,
+ "legend":{
+ "values":[
+ "min",
+ "avg",
+ "last",
+ "max"
+ ],
+ "placement":"bottom"
+ },
+ "enable":{
+ "thresholds":true,
+ "legend":true,
+ "valueMapping":true
+ },
+ "style":"line",
+ "nullType":"zero"
+ },
+ "remark":"The average zookeeper request latency(ms) in each data center and asset.",
+ "x":"0.0",
+ "y":"6.0",
+ "elements":[
+ {
+ "expression":"avg by (datacenter,asset)(kafka_server_ZooKeeperClientMetrics_Mean{module=\"NC-Kafka\"}) ",
+ "state":1,
+ "legend":"{{datacenter}} - {{asset}}",
+ "name":"A"
+ }
+ ],
+ "datasource":"metrics"
+ },
+ {
+ "name":"Open File Descriptors(Max)",
+ "span":"6.0",
+ "height":"3.0",
+ "type":"Line chart",
+ "unit":"short",
+ "weight":37,
+ "param":{
+ "stack":false,
+ "thresholds":[
+ {
+ "val":"52428",
+ "color":"#d64f40"
+ }
+ ],
+ "legend":{
+ "values":[
+ "min",
+ "avg",
+ "last",
+ "max"
+ ],
+ "placement":"bottom"
+ },
+ "enable":{
+ "thresholds":false,
+ "legend":true,
+ "valueMapping":true
+ },
+ "style":"line",
+ "nullType":"zero"
+ },
+ "remark":"The maxium number of open file descriptors in each data center and asset.",
+ "x":"6.0",
+ "y":"3.0",
+ "elements":[
+ {
+ "expression":"max by (datacenter,asset)(process_open_fds{module=~\"NC-Kafka\"}) ",
+ "state":1,
+ "legend":"{{datacenter}} - {{asset}}",
+ "name":"A"
+ }
+ ],
+ "datasource":"metrics"
+ }
+ ]
+ },
+ {
+ "name":"Hadoop Yarn Status",
+ "span":"12.0",
+ "height":"1.0",
+ "type":"Group",
+ "unit":"short",
+ "weight":38,
+ "param":{
+ "thresholds":[],
+ "visibility":{
+ "result":"",
+ "varName":"",
+ "varValue":"",
+ "operator":""
+ },
+ "enable":{
+ "thresholds":false,
+ "legend":false,
+ "valueMapping":false
+ },
+ "repeat":{
+ "variable":""
+ },
+ "link":"",
+ "valueMapping":[],
+ "collapse":false,
+ "nullType":"null"
+ },
+ "remark":"",
+ "x":"0.0",
+ "y":"2.0",
+ "datasource":"misc",
+ "children":[
+ {
+ "name":"National Center Yarn active nodes",
+ "span":"6.0",
+ "height":"3.0",
+ "type":"Line chart",
+ "unit":"short",
+ "weight":39,
+ "param":{
+ "stack":0,
+ "thresholds":[
+ {
+ "color":"#6f22e2",
+ "id":"1a7db7a"
+ }
+ ],
+ "thresholdShow":true,
+ "visibility":{
+ "result":"",
+ "varName":"",
+ "varValue":"",
+ "operator":""
+ },
+ "legend":{
+ "values":[],
+ "show":true,
+ "placement":"bottom"
+ },
+ "enable":{
+ "thresholds":false,
+ "visibility":false,
+ "legend":true,
+ "valueMapping":false
+ },
+ "link":"",
+ "valueMapping":[],
+ "nullType":"zero"
+ },
+ "remark":"The active num of Hadoop Yarn nodes in the National Center.",
+ "x":"0.0",
+ "y":"3.0",
+ "elements":[
+ {
+ "expression":"up{module=\"NC-Yarn\",endpoint=~\".*ResourceManager.*\"}",
+ "state":1,
+ "legend":"{{asset}}-ResourceManager",
+ "name":"A"
+ },
+ {
+ "expression":"up{module=~\"NC-Yarn\",endpoint=~\".*NodeManager.*\"}",
+ "state":1,
+ "legend":"{{asset}}-NodeManager",
+ "name":"B"
+ }
+ ],
+ "datasource":"metrics"
+ },
+ {
+ "name":"Application And Container Running Num",
+ "span":"6.0",
+ "height":"3.0",
+ "type":"Line chart",
+ "unit":"short",
+ "weight":40,
+ "param":{
+ "stack":0,
+ "thresholds":[
+ {
+ "color":"#933be5",
+ "id":"59117b59"
+ }
+ ],
+ "thresholdShow":true,
+ "visibility":{
+ "result":"",
+ "varName":"",
+ "varValue":"",
+ "operator":""
+ },
+ "legend":{
+ "values":[],
+ "show":true,
+ "placement":"bottom"
+ },
+ "enable":{
+ "thresholds":false,
+ "visibility":false,
+ "legend":true,
+ "valueMapping":false
+ },
+ "link":"",
+ "valueMapping":[],
+ "nullType":"zero"
+ },
+ "remark":"The number of running application and container in each Data center.",
+ "x":"6.0",
+ "y":"0.0",
+ "elements":[
+ {
+ "expression":"sum by(datacenter) (Hadoop_ResourceManager_AppsRunning{module=~\".*-Yarn\",endpoint=~\".*ResourceManager.*\",q1=\"default\"})",
+ "state":1,
+ "legend":"{{datacenter}}-Application",
+ "name":"A"
+ },
+ {
+ "expression":"sum by(datacenter) (Hadoop_ResourceManager_AllocatedContainers{module=~\".*-Yarn\",endpoint=~\".*ResourceManager.*\",q1=\"default\"})",
+ "state":1,
+ "legend":"{{datacenter}}-Container",
+ "name":"B"
+ }
+ ],
+ "datasource":"metrics"
+ },
+ {
+ "name":"NodeManager Unhealthy",
+ "span":"6.0",
+ "height":"3.0",
+ "type":"Line chart",
+ "unit":"none",
+ "weight":41,
+ "param":{
+ "stack":false,
+ "thresholds":[],
+ "visibility":{
+ "result":"",
+ "varName":"",
+ "varValue":"",
+ "operator":""
+ },
+ "legend":{
+ "values":[
+ "max",
+ "average"
+ ],
+ "placement":"bottom"
+ },
+ "enable":{
+ "thresholds":false,
+ "visibility":false,
+ "legend":false,
+ "valueMapping":true
+ },
+ "link":"",
+ "style":"line",
+ "valueMapping":[],
+ "nullType":"zero"
+ },
+ "remark":"Unhealthy NodeManager nodes in each Data center Hadoop yarn cluster.",
+ "x":"0.0",
+ "y":"0.0",
+ "elements":[
+ {
+ "expression":"sum by(asset)(Hadoop_ResourceManager_NumUnhealthyNMs{module=~\".*Yarn\"})",
+ "state":1,
+ "legend":"{{asset}}",
+ "name":"A"
+ }
+ ],
+ "datasource":"metrics"
+ },
+ {
+ "name":"Resourcemanager Memory Usage",
+ "span":"6.0",
+ "height":"3.0",
+ "type":"Line chart",
+ "unit":"percent(0.0-1.0)",
+ "weight":42,
+ "param":{
+ "stack":false,
+ "thresholds":[
+ {
+ "color":"#C4162AFF",
+ "id":"97bd450",
+ "value":0.8
+ }
+ ],
+ "visibility":{
+ "result":"",
+ "varName":"",
+ "varValue":"",
+ "operator":""
+ },
+ "legend":{
+ "values":[
+ "avg",
+ "max"
+ ],
+ "placement":"bottom"
+ },
+ "enable":{
+ "thresholds":true,
+ "visibility":false,
+ "legend":true,
+ "valueMapping":true
+ },
+ "link":"",
+ "style":"line",
+ "valueMapping":[],
+ "nullType":"zero"
+ },
+ "remark":"The Hadoop Yarn ResourceManager nodes memory Utilization.",
+ "x":"6.0",
+ "y":"6.0",
+ "elements":[
+ {
+ "expression":"sum by(asset) (Hadoop_ResourceManager_MemHeapUsedM{module=~\".*Yarn\",endpoint=~\".*ResourceManager.*\"}/Hadoop_ResourceManager_MemHeapMaxM{module=~\".*Yarn\",endpoint=~\".*ResourceManager.*\"})",
+ "state":1,
+ "legend":"{{asset}}",
+ "name":"A"
+ }
+ ],
+ "datasource":"metrics"
+ },
+ {
+ "name":"ResourceManager CPU Usage",
+ "span":"6.0",
+ "height":"3.0",
+ "type":"Line chart",
+ "unit":"percent(0.0-1.0)",
+ "weight":43,
+ "param":{
+ "stack":false,
+ "thresholds":[
+ {
+ "val":"0.7",
+ "color":"#d64f40",
+ "id":"5f03a027",
+ "value":0.8
+ }
+ ],
+ "visibility":{
+ "result":"",
+ "varName":"",
+ "varValue":"",
+ "operator":""
+ },
+ "legend":{
+ "values":[
+ "avg",
+ "max"
+ ],
+ "placement":"bottom"
+ },
+ "enable":{
+ "thresholds":true,
+ "visibility":false,
+ "legend":true,
+ "valueMapping":true
+ },
+ "link":"",
+ "style":"line",
+ "valueMapping":[],
+ "nullType":"zero"
+ },
+ "remark":"The Hadoop Yarn ResourceManager nodes CPU Utilization.",
+ "x":"0.0",
+ "y":"6.0",
+ "elements":[
+ {
+ "expression":"sum by (asset)(java_lang_OperatingSystem_ProcessCpuLoad{module=~\".*Yarn\",endpoint=~\".*ResourceManager.*\"})",
+ "state":1,
+ "legend":"{{asset}}",
+ "name":"A"
+ }
+ ],
+ "datasource":"metrics"
+ },
+ {
+ "name":"Data Center Yarn active nodes",
+ "span":"6.0",
+ "height":"3.0",
+ "type":"Line chart",
+ "unit":"short",
+ "weight":44,
+ "param":{
+ "stack":0,
+ "thresholds":[
+ {
+ "color":"#6f22e2",
+ "id":"0c2cf76"
+ }
+ ],
+ "thresholdShow":true,
+ "visibility":{
+ "result":"",
+ "varName":"",
+ "varValue":"",
+ "operator":""
+ },
+ "legend":{
+ "values":[],
+ "show":true,
+ "placement":"bottom"
+ },
+ "enable":{
+ "thresholds":false,
+ "visibility":false,
+ "legend":true,
+ "valueMapping":false
+ },
+ "link":"",
+ "valueMapping":[],
+ "nullType":"zero"
+ },
+ "remark":"The active num of Hadoop Yarn nodes in the Data Center.",
+ "x":"6.0",
+ "y":"3.0",
+ "elements":[
+ {
+ "expression":"up{module=\"DC-Yarn\",endpoint=~\".*ResourceManager.*\"}",
+ "state":1,
+ "legend":"{{asset}}-ResourceManager",
+ "name":"A"
+ },
+ {
+ "expression":"up{module=~\"DC-Yarn\",endpoint=~\".*NodeManager.*\"}",
+ "state":1,
+ "legend":"{{asset}}-NodeManager",
+ "name":"B"
+ }
+ ],
+ "datasource":"metrics"
+ },
+ {
+ "name":"NodeManager CPU Usage",
+ "span":"6.0",
+ "height":"3.0",
+ "type":"Line chart",
+ "unit":"percent(0.0-1.0)",
+ "weight":45,
+ "param":{
+ "stack":false,
+ "thresholds":[
+ {
+ "val":"0.7",
+ "color":"#d64f40",
+ "id":"14c9ce25",
+ "value":0.8
+ }
+ ],
+ "visibility":{
+ "result":"",
+ "varName":"",
+ "varValue":"",
+ "operator":""
+ },
+ "legend":{
+ "values":[
+ "avg",
+ "max"
+ ],
+ "placement":"bottom"
+ },
+ "enable":{
+ "thresholds":true,
+ "visibility":false,
+ "legend":true,
+ "valueMapping":true
+ },
+ "link":"",
+ "style":"line",
+ "valueMapping":[],
+ "nullType":"zero"
+ },
+ "remark":"The Hadoop Yarn NodeManager nodes CPU Utilization.",
+ "x":"0.0",
+ "y":"12.0",
+ "elements":[
+ {
+ "expression":"sum by (asset)(java_lang_OperatingSystem_ProcessCpuLoad{module=~\".*Yarn\",endpoint=~\".*NodeManager.*\"})",
+ "state":1,
+ "legend":"{{asset}}",
+ "name":"A"
+ }
+ ],
+ "datasource":"metrics"
+ },
+ {
+ "name":"NodeManager Memory Usage",
+ "span":"6.0",
+ "height":"3.0",
+ "type":"Line chart",
+ "unit":"percent(0.0-1.0)",
+ "weight":46,
+ "param":{
+ "stack":false,
+ "thresholds":[
+ {
+ "val":"0.7",
+ "color":"#d64f40",
+ "id":"7919a181",
+ "value":0.8
+ }
+ ],
+ "visibility":{
+ "result":"",
+ "varName":"",
+ "varValue":"",
+ "operator":""
+ },
+ "legend":{
+ "values":[
+ "avg",
+ "max"
+ ],
+ "placement":"bottom"
+ },
+ "enable":{
+ "thresholds":true,
+ "visibility":false,
+ "legend":true,
+ "valueMapping":true
+ },
+ "link":"",
+ "style":"line",
+ "valueMapping":[],
+ "nullType":"zero"
+ },
+ "remark":"The Hadoop Yarn NodeManager nodes memory Utilization.",
+ "x":"6.0",
+ "y":"12.0",
+ "elements":[
+ {
+ "expression":"sum by (asset)(Hadoop_NodeManager_MemHeapUsedM{module=~\".*Yarn\",endpoint=~\".*NodeManager.*\"}/Hadoop_NodeManager_MemHeapMaxM{module=~\".*Yarn\",endpoint=~\".*NodeManager.*\"})",
+ "state":1,
+ "legend":"{{asset}}",
+ "name":"A"
+ }
+ ],
+ "datasource":"metrics"
+ },
+ {
+ "name":"NodeManager RpcProcessing AvgTime",
+ "span":"6.0",
+ "height":"3.0",
+ "type":"Line chart",
+ "unit":"milliseconds",
+ "weight":47,
+ "param":{
+ "stack":0,
+ "thresholds":[
+ {
+ "color":"#7f4ee8",
+ "id":"57feb110"
+ }
+ ],
+ "thresholdShow":true,
+ "visibility":{
+ "result":"",
+ "varName":"",
+ "varValue":"",
+ "operator":""
+ },
+ "legend":{
+ "values":[
+ "avg",
+ "max"
+ ],
+ "show":true,
+ "placement":"bottom"
+ },
+ "enable":{
+ "thresholds":false,
+ "visibility":false,
+ "legend":true,
+ "valueMapping":false
+ },
+ "showHeader":1,
+ "link":"",
+ "valueMapping":[],
+ "nullType":"null"
+ },
+ "remark":"The average RPC call time of the NodeManager.",
+ "x":"0.0",
+ "y":"18.0",
+ "elements":[
+ {
+ "expression":"sum by(asset)(Hadoop_NodeManager_RpcProcessingTimeAvgTime{module=~\".*Yarn\",endpoint=~\".*NodeManager.*\",name=\"RpcActivityForPort9923\"})",
+ "state":1,
+ "legend":"{{asset}}",
+ "name":"A"
+ }
+ ],
+ "datasource":"metrics"
+ },
+ {
+ "name":"ResourceManager GC Count",
+ "span":"6.0",
+ "height":"3.0",
+ "type":"Line chart",
+ "unit":"none",
+ "weight":48,
+ "param":{
+ "stack":0,
+ "thresholds":[
+ {
+ "color":"#bd00f2",
+ "id":"1eace07"
+ }
+ ],
+ "thresholdShow":true,
+ "visibility":{
+ "result":"",
+ "varName":"",
+ "varValue":"",
+ "operator":""
+ },
+ "legend":{
+ "values":[
+ "avg",
+ "max"
+ ],
+ "show":true,
+ "placement":"bottom"
+ },
+ "enable":{
+ "thresholds":false,
+ "visibility":false,
+ "legend":true,
+ "valueMapping":false
+ },
+ "showHeader":1,
+ "link":"",
+ "valueMapping":[],
+ "nullType":"null"
+ },
+ "remark":"Number of GC times of the ResourceManager node",
+ "x":"0.0",
+ "y":"9.0",
+ "elements":[
+ {
+ "expression":"sum by(asset,name) (irate(java_lang_GarbageCollector_CollectionCount{module=~\".*Yarn\",endpoint=~\".*ResourceManager.*\",name=~\"PS MarkSweep|PS Scavenge\"}[5m]))",
+ "state":1,
+ "legend":"{{asset}}-{{name}}",
+ "name":"A"
+ }
+ ],
+ "datasource":"metrics"
+ },
+ {
+ "name":"NodeManager GC Count",
+ "span":"6.0",
+ "height":"3.0",
+ "type":"Line chart",
+ "unit":"none",
+ "weight":49,
+ "param":{
+ "stack":0,
+ "thresholds":[
+ {
+ "color":"#bd00f2",
+ "id":"df390e7"
+ }
+ ],
+ "thresholdShow":true,
+ "visibility":{
+ "result":"",
+ "varName":"",
+ "varValue":"",
+ "operator":""
+ },
+ "legend":{
+ "values":[
+ "avg",
+ "max"
+ ],
+ "show":true,
+ "placement":"bottom"
+ },
+ "enable":{
+ "thresholds":false,
+ "visibility":false,
+ "legend":true,
+ "valueMapping":false
+ },
+ "showHeader":1,
+ "link":"",
+ "valueMapping":[],
+ "nullType":"null"
+ },
+ "remark":"Number of GC times of the NodeManager node",
+ "x":"0.0",
+ "y":"15.0",
+ "elements":[
+ {
+ "expression":"sum by(asset,name) (irate(java_lang_GarbageCollector_CollectionCount{module=~\".*Yarn\",endpoint=~\".*NodeManager.*\",name=~\"PS MarkSweep|PS Scavenge\"}[5m]))",
+ "state":1,
+ "legend":"{{asset}}-{{name}}",
+ "name":"A"
+ }
+ ],
+ "datasource":"metrics"
+ },
+ {
+ "name":"ResourceManager GC Time",
+ "span":"6.0",
+ "height":"3.0",
+ "type":"Line chart",
+ "unit":"milliseconds",
+ "weight":50,
+ "param":{
+ "stack":0,
+ "thresholds":[
+ {
+ "color":"#bd00f2",
+ "id":"7897043"
+ }
+ ],
+ "thresholdShow":true,
+ "visibility":{
+ "result":"",
+ "varName":"",
+ "varValue":"",
+ "operator":""
+ },
+ "legend":{
+ "values":[
+ "avg",
+ "max"
+ ],
+ "show":true,
+ "placement":"bottom"
+ },
+ "enable":{
+ "thresholds":false,
+ "visibility":false,
+ "legend":true,
+ "valueMapping":false
+ },
+ "showHeader":1,
+ "link":"",
+ "valueMapping":[],
+ "nullType":"null"
+ },
+ "remark":"ResourceManager GC duration",
+ "x":"6.0",
+ "y":"9.0",
+ "elements":[
+ {
+ "expression":"sum by(asset,name) (irate(java_lang_GarbageCollector_CollectionTime{module=~\".*Yarn\",endpoint=~\".*ResourceManager.*\",name=~\"PS MarkSweep|PS Scavenge\"}[5m]))",
+ "state":1,
+ "legend":"{{asset}}-{{name}}",
+ "name":"A"
+ }
+ ],
+ "datasource":"metrics"
+ },
+ {
+ "name":"NodeManager GC Duration",
+ "span":"6.0",
+ "height":"3.0",
+ "type":"Line chart",
+ "unit":"milliseconds",
+ "weight":51,
+ "param":{
+ "stack":0,
+ "thresholds":[
+ {
+ "color":"#bd00f2",
+ "id":"7ba98f29"
+ }
+ ],
+ "thresholdShow":true,
+ "visibility":{
+ "result":"",
+ "varName":"",
+ "varValue":"",
+ "operator":""
+ },
+ "legend":{
+ "values":[
+ "avg",
+ "max"
+ ],
+ "show":true,
+ "placement":"bottom"
+ },
+ "enable":{
+ "thresholds":false,
+ "visibility":false,
+ "legend":true,
+ "valueMapping":false
+ },
+ "showHeader":1,
+ "link":"",
+ "valueMapping":[],
+ "nullType":"null"
+ },
+ "remark":"ResourceManager GC duration",
+ "x":"6.0",
+ "y":"15.0",
+ "elements":[
+ {
+ "expression":"sum by(asset,name) (irate(java_lang_GarbageCollector_CollectionTime{module=~\".*Yarn\",endpoint=~\".*NodeManager.*\",name=~\"PS MarkSweep|PS Scavenge\"}[5m]))",
+ "state":1,
+ "legend":"{{asset}}-{{name}}",
+ "name":"A"
+ }
+ ],
+ "datasource":"metrics"
+ }
+ ]
+ },
+ {
+ "name":"Druid Status",
+ "span":"12.0",
+ "height":"3.0",
+ "type":"Group",
+ "unit":"short",
+ "weight":52,
+ "param":{
+ "collapse":true,
+ "nullType":"null"
+ },
+ "remark":"",
+ "x":"0.0",
+ "y":"23.0",
+ "datasource":"misc",
+ "children":[
+ {
+ "name":"Druid Consumer Lag",
+ "span":"12.0",
+ "height":"3.0",
+ "type":"Line chart",
+ "unit":"short",
+ "weight":53,
+ "param":{
+ "stack":false,
+ "legend":{
+ "values":[
+ "avg",
+ "max"
+ ],
+ "placement":"bottom"
+ },
+ "enable":{
+ "thresholds":true,
+ "legend":true,
+ "valueMapping":true
+ },
+ "style":"line",
+ "nullType":"zero"
+ },
+ "remark":"The Consumer Lag in each Datasource. Consumer Lag tells us how far behind each Consumer (Group) is in each Partition.  The smaller the lag the more real-time the data consumption.",
+ "x":"0.0",
+ "y":"0.0",
+ "elements":[
+ {
+ "expression":"sum by(dataSource)(ingest_kafka_lag{service=\"coordinator\"})",
+ "state":1,
+ "legend":"{{dataSource}}",
+ "name":"A"
+ }
+ ],
+ "datasource":"metrics"
+ },
+ {
+ "name":"Index Status",
+ "span":"6.0",
+ "height":"3.0",
+ "type":"Line chart",
+ "unit":"short",
+ "weight":54,
+ "param":{
+ "rightYAxis":{
+ "elementNames":[],
+ "unit":2,
+ "style":"line",
+ "label":""
+ },
+ "stack":false,
+ "thresholds":[],
+ "visibility":{
+ "result":"",
+ "varName":"",
+ "varValue":"",
+ "operator":""
+ },
+ "legend":{
+ "values":[],
+ "placement":"bottom"
+ },
+ "enable":{
+ "thresholds":true,
+ "visibility":false,
+ "legend":true,
+ "valueMapping":true
+ },
+ "link":"",
+ "style":"line",
+ "valueMapping":[],
+ "nullType":"zero"
+ },
+ "remark":"",
+ "x":"0.0",
+ "y":"13.0",
+ "elements":[
+ {
+ "expression":"avg(druid_index_running_task_num{module=\"OLAP-Node-Exporter\"})",
+ "state":1,
+ "legend":"Running",
+ "name":"A"
+ },
+ {
+ "expression":"avg(druid_index_waiting_task_num{module=\"OLAP-Node-Exporter\"})",
+ "state":1,
+ "legend":"Waiting",
+ "name":"B"
+ },
+ {
+ "expression":"avg(druid_index_pending_task_num{module=\"OLAP-Node-Exporter\"})",
+ "state":1,
+ "legend":"Pending",
+ "name":"C"
+ }
+ ],
+ "datasource":"metrics"
+ },
+ {
+ "name":"Compact Status",
+ "span":"6.0",
+ "height":"3.0",
+ "type":"Line chart",
+ "unit":"short",
+ "weight":55,
+ "param":{
+ "stack":false,
+ "legend":{
+ "values":[],
+ "placement":"bottom"
+ },
+ "enable":{
+ "thresholds":true,
+ "legend":true,
+ "valueMapping":true
+ },
+ "style":"line",
+ "nullType":"zero"
+ },
+ "remark":"",
+ "x":"6.0",
+ "y":"13.0",
+ "elements":[
+ {
+ "expression":"druid_compact_waiting_task_num{module=\"OLAP-Node-Exporter\"}",
+ "state":1,
+ "legend":"Waiting",
+ "name":"A"
+ },
+ {
+ "expression":"druid_compact_pending_task_num{module=\"OLAP-Node-Exporter\"}",
+ "state":1,
+ "legend":"Pending",
+ "name":"B"
+ }
+ ],
+ "datasource":"metrics"
+ },
+ {
+ "name":"Segments",
+ "span":"6.0",
+ "height":"4.0",
+ "type":"Table",
+ "unit":"short",
+ "weight":56,
+ "param":{
+ "enable":{
+ "thresholds":true,
+ "legend":true,
+ "valueMapping":false
+ },
+ "columns":[
+ {
+ "unit":2,
+ "display":"{{A.$legend}}",
+ "show":true,
+ "title":"Element",
+ "error":false
+ },
+ {
+ "unit":1,
+ "display":"{{A.$value}}",
+ "show":true,
+ "title":"Value",
+ "error":false
+ }
+ ],
+ "legend":{
+ "values":[],
+ "show":true,
+ "placement":"bottom"
+ },
+ "valueMapping":[
+ {
+ "color":{
+ "bac":"#fff",
+ "text":"#000"
+ },
+ "columns":"Value",
+ "show":false,
+ "text":"",
+ "type":"value",
+ "error":true,
+ "value":0
+ }
+ ],
+ "nullType":"null",
+ "statistics":"last"
+ },
+ "remark":"The segment number in each Datasource.",
+ "x":"0.0",
+ "y":"3.0",
+ "elements":[
+ {
+ "expression":"sum by (dataSource) (coordinator_segment_count{service=\"coordinator\"})",
+ "state":1,
+ "legend":"{{dataSource}}",
+ "name":"A"
+ }
+ ],
+ "datasource":"metrics"
+ },
+ {
+ "name":"Segment Size",
+ "span":"6.0",
+ "height":"4.0",
+ "type":"Table",
+ "unit":"bytes",
+ "weight":57,
+ "param":{
+ "visibility":{
+ "result":"",
+ "varName":"",
+ "varValue":"",
+ "operator":""
+ },
+ "enable":{
+ "thresholds":true,
+ "visibility":false,
+ "legend":true,
+ "valueMapping":false
+ },
+ "columns":[
+ {
+ "unit":2,
+ "display":"{{A.$legend}}",
+ "show":true,
+ "title":"Element",
+ "error":false
+ },
+ {
+ "unit":7,
+ "display":"{{A.$value}}",
+ "show":true,
+ "title":"Value",
+ "error":false
+ }
+ ],
+ "legend":{
+ "values":[],
+ "show":true,
+ "placement":"bottom"
+ },
+ "valueMapping":[
+ {
+ "regx":">1000",
+ "color":{
+ "bac":"#C4162AFF",
+ "icon":"#d16eefFF",
+ "text":"#000"
+ },
+ "columns":"Value",
+ "display":"Warn",
+ "show":true,
+ "column":"Value",
+ "from":100,
+ "text":"",
+ "to":100000000,
+ "type":"range",
+ "error":false
+ }
+ ],
+ "nullType":"null",
+ "statistics":"last"
+ },
+ "remark":"The total segment size in each Datasource.",
+ "x":"6.0",
+ "y":"3.0",
+ "elements":[
+ {
+ "expression":"sum by(dataSource)(coordinator_segment_size{service=\"coordinator\"})",
+ "state":1,
+ "legend":"{{dataSource}}",
+ "name":"A"
+ }
+ ],
+ "datasource":"metrics"
+ },
+ {
+ "name":"Query CPU Usage",
+ "span":"6.0",
+ "height":"3.0",
+ "type":"Line chart",
+ "unit":"percent(0-100)",
+ "weight":58,
+ "param":{
+ "stack":false,
+ "thresholds":[
+ {
+ "val":"0.8",
+ "color":"#d64f40",
+ "value":80
+ }
+ ],
+ "legend":{
+ "values":[
+ "min",
+ "avg",
+ "last",
+ "max"
+ ],
+ "placement":"bottom"
+ },
+ "enable":{
+ "thresholds":true,
+ "legend":true,
+ "valueMapping":true
+ },
+ "style":"line",
+ "nullType":"zero"
+ },
+ "remark":"The CPU usage in each asset.",
+ "x":"6.0",
+ "y":"7.0",
+ "elements":[
+ {
+ "expression":"(1-avg(irate(node_cpu_seconds_total{mode=\"idle\",olap_node_exporter=~\".*druid_query.*\"}[5m])) by(asset)) * 100",
+ "state":1,
+ "legend":"{{asset}}",
+ "name":"A"
+ }
+ ],
+ "datasource":"metrics"
+ },
+ {
+ "name":"Query Memory Usage",
+ "span":"6.0",
+ "height":"3.0",
+ "type":"Line chart",
+ "unit":"percent(0.0-1.0)",
+ "weight":59,
+ "param":{
+ "stack":false,
+ "thresholds":[
+ {
+ "val":"0.8",
+ "color":"#d64f40",
+ "value":80
+ }
+ ],
+ "visibility":{
+ "result":"",
+ "varName":"",
+ "varValue":"",
+ "operator":""
+ },
+ "legend":{
+ "values":[
+ "avg",
+ "last",
+ "max"
+ ],
+ "placement":"bottom"
+ },
+ "enable":{
+ "thresholds":true,
+ "visibility":false,
+ "legend":true,
+ "valueMapping":true
+ },
+ "style":"line",
+ "nullType":"zero"
+ },
+ "remark":"The memory usage in each asset. Runs Coordinator and Overlord processes, which manages data availability and ingestion.",
+ "x":"0.0",
+ "y":"7.0",
+ "elements":[
+ {
+ "expression":"1 - (node_memory_MemAvailable_bytes{olap_node_exporter=~\".*druid_query.*\"} / node_memory_MemTotal_bytes{olap_node_exporter=~\".*druid_query.*\"}) ",
+ "state":1,
+ "legend":"{{asset}}",
+ "name":"A"
+ }
+ ],
+ "datasource":"metrics"
+ },
+ {
+ "name":"Data Memory Usage",
+ "span":"6.0",
+ "height":"3.0",
+ "type":"Line chart",
+ "unit":"percent(0.0-1.0)",
+ "weight":60,
+ "param":{
+ "stack":false,
+ "thresholds":[
+ {
+ "val":"80",
+ "color":"#d64f40",
+ "value":80
+ }
+ ],
+ "visibility":{
+ "result":"",
+ "varName":"",
+ "varValue":"",
+ "operator":""
+ },
+ "legend":{
+ "values":[],
+ "placement":"bottom"
+ },
+ "enable":{
+ "thresholds":true,
+ "visibility":false,
+ "legend":true,
+ "valueMapping":true
+ },
+ "style":"line",
+ "nullType":"zero"
+ },
+ "remark":"The memory usage in each asset.  Runs Historical and MiddleManager processes, which executes ingestion workloads and stores all queryable data.",
+ "x":"0.0",
+ "y":"10.0",
+ "elements":[
+ {
+ "expression":"1 - (node_memory_MemAvailable_bytes{olap_node_exporter=~\".*druid_data.*\"} / node_memory_MemTotal_bytes{olap_node_exporter=~\".*druid_data.*\"}) ",
+ "state":1,
+ "legend":"{{asset}}",
+ "name":"A"
+ }
+ ],
+ "datasource":"metrics"
+ },
+ {
+ "name":"Data CPU Usage",
+ "span":"6.0",
+ "height":"3.0",
+ "type":"Line chart",
+ "unit":"percent(0-100)",
+ "weight":61,
+ "param":{
+ "stack":false,
+ "thresholds":[
+ {
+ "val":"90",
+ "color":"#d64f40",
+ "value":80
+ }
+ ],
+ "legend":{
+ "values":[],
+ "placement":"bottom"
+ },
+ "enable":{
+ "thresholds":true,
+ "legend":true,
+ "valueMapping":true
+ },
+ "style":"line",
+ "nullType":"connected"
+ },
+ "remark":"The CPU usage in each asset.",
+ "x":"6.0",
+ "y":"10.0",
+ "elements":[
+ {
+ "expression":"(1-avg(irate(node_cpu_seconds_total{mode=\"idle\",olap_node_exporter=~\".*druid_data.*\"}[5m])) by(asset)) * 100",
+ "state":1,
+ "legend":"{{asset}}",
+ "name":"A"
+ }
+ ],
+ "datasource":"metrics"
+ },
+ {
+ "name":"Data Network receive Bytes",
+ "span":"6.0",
+ "height":"3.0",
+ "type":"Line chart",
+ "unit":"bytes",
+ "weight":62,
+ "param":{
+ "rightYAxis":{
+ "elementNames":[],
+ "unit":2,
+ "style":"line",
+ "label":""
+ },
+ "stack":false,
+ "thresholds":[],
+ "visibility":{
+ "result":"",
+ "varName":"",
+ "varValue":"",
+ "operator":""
+ },
+ "legend":{
+ "values":[],
+ "placement":"bottom"
+ },
+ "enable":{
+ "thresholds":true,
+ "visibility":false,
+ "legend":true,
+ "valueMapping":true
+ },
+ "link":"",
+ "style":"line",
+ "valueMapping":[],
+ "nullType":"connected"
+ },
+ "remark":"Collecting and processing Traffic Records with incoming byte rate in each Druid-Data nodes.",
+ "x":"0.0",
+ "y":"16.0",
+ "elements":[
+ {
+ "expression":"irate(node_network_receive_bytes_total{olap_node_exporter=~\".*druid_data.*\",device=\"em1\"}[5m])",
+ "state":1,
+ "legend":"{{asset}}",
+ "name":"A"
+ }
+ ],
+ "datasource":"metrics"
+ },
+ {
+ "name":"Data Network send Bytes",
+ "span":"6.0",
+ "height":"3.0",
+ "type":"Line chart",
+ "unit":"bytes",
+ "weight":63,
+ "param":{
+ "rightYAxis":{
+ "elementNames":[],
+ "unit":2,
+ "style":"line",
+ "label":""
+ },
+ "stack":false,
+ "thresholds":[],
+ "visibility":{
+ "result":"",
+ "varName":"",
+ "varValue":"",
+ "operator":""
+ },
+ "legend":{
+ "values":[
+ "avg",
+ "max"
+ ],
+ "placement":"bottom"
+ },
+ "enable":{
+ "thresholds":true,
+ "visibility":false,
+ "legend":true,
+ "valueMapping":true
+ },
+ "link":"",
+ "style":"line",
+ "valueMapping":[],
+ "nullType":"connected"
+ },
+ "remark":"Collecting and processing Traffic Records with outgoing byte rate in each Druid-Data nodes.",
+ "x":"6.0",
+ "y":"16.0",
+ "elements":[
+ {
+ "expression":"irate(node_network_transmit_bytes_total{olap_node_exporter=~\".*druid_data.*\",device=\"em1\"}[5m])",
+ "state":1,
+ "legend":"{{asset}}",
+ "name":"A"
+ }
+ ],
+ "datasource":"metrics"
+ }
+ ]
+ },
+ {
+ "name":"Clickhouse Status",
+ "span":"12.0",
+ "height":"1.0",
+ "type":"Group",
+ "unit":"short",
+ "weight":64,
+ "param":{
+ "enable":{
+ "thresholds":false,
+ "legend":false,
+ "valueMapping":false
+ },
+ "collapse":true,
+ "nullType":"null"
+ },
+ "remark":"",
+ "x":"0.0",
+ "y":"24.0",
+ "datasource":"misc",
+ "children":[
+ {
+ "name":"Insert Rows /sec",
+ "span":"6.0",
+ "height":"3.0",
+ "type":"Line chart",
+ "unit":"short",
+ "weight":65,
+ "param":{
+ "stack":false,
+ "visibility":{
+ "result":"",
+ "varName":"",
+ "varValue":"",
+ "operator":""
+ },
+ "legend":{
+ "values":[
+ "total",
+ "avg",
+ "last",
+ "max"
+ ],
+ "placement":"bottom"
+ },
+ "enable":{
+ "thresholds":true,
+ "visibility":false,
+ "legend":true,
+ "valueMapping":true
+ },
+ "style":"line",
+ "nullType":"zero"
+ },
+ "remark":"Number of rows inserted to all tables",
+ "x":"0.0",
+ "y":"0.0",
+ "elements":[
+ {
+ "expression":"sum by(datacenter) (irate(clickhouse_inserted_rows_total[5m]))",
+ "state":1,
+ "legend":"{{datacenter}}",
+ "name":"A"
+ }
+ ],
+ "datasource":"metrics"
+ },
+ {
+ "name":"Insert Bytes /sec",
+ "span":"6.0",
+ "height":"3.0",
+ "type":"Line chart",
+ "unit":"bytes/sec",
+ "weight":66,
+ "param":{
+ "stack":false,
+ "visibility":{
+ "result":"",
+ "varName":"",
+ "varValue":"",
+ "operator":""
+ },
+ "legend":{
+ "values":[
+ "total",
+ "avg",
+ "last",
+ "max"
+ ],
+ "placement":"bottom"
+ },
+ "enable":{
+ "thresholds":true,
+ "visibility":false,
+ "legend":true,
+ "valueMapping":true
+ },
+ "style":"line",
+ "nullType":"zero"
+ },
+ "remark":"Number of uncompressed bytes inserted to all tables in each Data center.",
+ "x":"6.0",
+ "y":"0.0",
+ "elements":[
+ {
+ "expression":"sum by(datacenter) (irate(clickhouse_inserted_bytes_total[5m]))",
+ "state":1,
+ "legend":"{{datacenter}}",
+ "name":"A"
+ }
+ ],
+ "datasource":"metrics"
+ },
+ {
+ "name":"Total Requests /sec",
+ "span":"6.0",
+ "height":"3.0",
+ "type":"Line chart",
+ "unit":"short",
+ "weight":67,
+ "param":{
+ "stack":false,
+ "thresholds":[
+ {
+ "color":"#c1e835"
+ }
+ ],
+ "thresholdShow":true,
+ "legend":{
+ "values":[],
+ "show":true,
+ "placement":"bottom"
+ },
+ "enable":{
+ "thresholds":false,
+ "legend":true,
+ "valueMapping":false
+ },
+ "nullType":"zero"
+ },
+ "remark":"The total requests per second in each datacenter.",
+ "x":"0.0",
+ "y":"3.0",
+ "elements":[
+ {
+ "expression":"sum by(datacenter) (irate(request_sum_total[5m]))",
+ "state":1,
+ "legend":"{{datacenter}}",
+ "name":"A"
+ }
+ ],
+ "datasource":"metrics"
+ },
+ {
+ "name":"Merged rate /sec",
+ "span":"6.0",
+ "height":"3.0",
+ "type":"Line chart",
+ "unit":"short",
+ "weight":68,
+ "param":{
+ "stack":false,
+ "visibility":{
+ "result":"",
+ "varName":"",
+ "varValue":"",
+ "operator":""
+ },
+ "legend":{
+ "values":[
+ "min",
+ "avg",
+ "last",
+ "max"
+ ],
+ "placement":"bottom"
+ },
+ "enable":{
+ "thresholds":true,
+ "visibility":false,
+ "legend":true,
+ "valueMapping":true
+ },
+ "style":"line",
+ "nullType":"zero"
+ },
+ "remark":"The merging data parts per second in each Data center.",
+ "x":"0.0",
+ "y":"6.0",
+ "elements":[
+ {
+ "expression":"sum by(datacenter) (irate(clickhouse_merge[5m]))",
+ "state":1,
+ "legend":"{{datacenter}}",
+ "name":"A"
+ }
+ ],
+ "datasource":"metrics"
+ },
+ {
+ "name":"Bad Requests /sec",
+ "span":"6.0",
+ "height":"3.0",
+ "type":"Line chart",
+ "unit":"short",
+ "weight":69,
+ "param":{
+ "stack":false,
+ "legend":{
+ "values":[],
+ "placement":"bottom"
+ },
+ "enable":{
+ "thresholds":true,
+ "legend":true,
+ "valueMapping":true
+ },
+ "style":"line",
+ "nullType":"zero"
+ },
+ "remark":"The bad requests per second in each Data center.",
+ "x":"6.0",
+ "y":"3.0",
+ "elements":[
+ {
+ "expression":"sum by(asset) (irate(bad_requests_total[5m]))",
+ "state":1,
+ "legend":"{{asset}}",
+ "name":"A"
+ }
+ ],
+ "datasource":"metrics"
+ },
+ {
+ "name":"Memory Usage",
+ "span":"6.0",
+ "height":"3.0",
+ "type":"Line chart",
+ "unit":"percent(0.0-1.0)",
+ "weight":70,
+ "param":{
+ "stack":false,
+ "legend":{
+ "values":[
+ "avg",
+ "last",
+ "max"
+ ],
+ "placement":"bottom"
+ },
+ "enable":{
+ "thresholds":true,
+ "legend":true,
+ "valueMapping":true
+ },
+ "style":"line",
+ "nullType":"zero"
+ },
+ "remark":"The memory usage in each asset.",
+ "x":"6.0",
+ "y":"15.0",
+ "elements":[
+ {
+ "expression":"1 - (node_memory_MemAvailable_bytes{olap_node_exporter=~\".*clickhouse.*\"} / node_memory_MemTotal_bytes{olap_node_exporter=~\".*clickhouse.*\"}) ",
+ "state":1,
+ "legend":"{{asset}}",
+ "name":"A"
+ }
+ ],
+ "datasource":"metrics"
+ },
+ {
+ "name":"CPU Usage",
+ "span":"6.0",
+ "height":"3.0",
+ "type":"Line chart",
+ "unit":"percent(0.0-1.0)",
+ "weight":71,
+ "param":{
+ "stack":false,
+ "legend":{
+ "values":[
+ "avg",
+ "last",
+ "max"
+ ],
+ "placement":"bottom"
+ },
+ "enable":{
+ "thresholds":true,
+ "legend":true,
+ "valueMapping":true
+ },
+ "style":"line",
+ "nullType":"zero"
+ },
+ "remark":"The CPU usage in each asset.",
+ "x":"0.0",
+ "y":"15.0",
+ "elements":[
+ {
+ "expression":"(1-avg(irate(node_cpu_seconds_total{mode=\"idle\",olap_node_exporter=~\".*clickhouse.*\"}[5m])) by(asset))",
+ "state":1,
+ "legend":"{{asset}}",
+ "name":"A"
+ }
+ ],
+ "datasource":"metrics"
+ },
+ {
+ "name":"TCP Connection Time",
+ "span":"6.0",
+ "height":"3.0",
+ "type":"Line chart",
+ "unit":"seconds(s)",
+ "weight":72,
+ "param":{
+ "stack":false,
+ "legend":{
+ "values":[
+ "max",
+ "avg"
+ ],
+ "placement":"bottom"
+ },
+ "enable":{
+ "thresholds":true,
+ "legend":true,
+ "valueMapping":true
+ },
+ "style":"line",
+ "nullType":"zero"
+ },
+ "remark":"The current Tcp Connection time in each asset.",
+ "x":"6.0",
+ "y":"9.0",
+ "elements":[
+ {
+ "expression":"max by(asset) (ck_connect_time{olap_node_exporter=~\".*clickhouse.*\"})",
+ "state":1,
+ "legend":"{{asset}}",
+ "name":"A"
+ }
+ ],
+ "datasource":"metrics"
+ },
+ {
+ "name":"Active Query Count",
+ "span":"6.0",
+ "height":"3.0",
+ "type":"Line chart",
+ "unit":"short",
+ "weight":73,
+ "param":{
+ "stack":false,
+ "thresholds":[
+ {
+ "color":"#7dd1ed"
+ }
+ ],
+ "thresholdShow":true,
+ "legend":{
+ "values":[
+ "max",
+ "avg"
+ ],
+ "show":true,
+ "placement":"bottom"
+ },
+ "enable":{
+ "thresholds":false,
+ "legend":true,
+ "valueMapping":false
+ },
+ "nullType":"zero"
+ },
+ "remark":"The current number of queries in each asset.",
+ "x":"0.0",
+ "y":"9.0",
+ "elements":[
+ {
+ "expression":"max by(asset) (ck_processes_count{olap_node_exporter=~\".*clickhouse.*\"})",
+ "state":1,
+ "legend":"{{asset}}",
+ "name":"A"
+ }
+ ],
+ "datasource":"metrics"
+ },
+ {
+ "name":"Mutiation part Count",
+ "span":"6.0",
+ "height":"3.0",
+ "type":"Line chart",
+ "unit":"short",
+ "weight":74,
+ "param":{
+ "stack":false,
+ "visibility":{
+ "result":"",
+ "varName":"",
+ "varValue":"",
+ "operator":""
+ },
+ "legend":{
+ "values":[
+ "avg",
+ "last",
+ "max"
+ ],
+ "placement":"bottom"
+ },
+ "enable":{
+ "thresholds":true,
+ "visibility":false,
+ "legend":true,
+ "valueMapping":true
+ },
+ "style":"line",
+ "nullType":"zero"
+ },
+ "remark":"The current number of mutiation part in each asset.",
+ "x":"6.0",
+ "y":"12.0",
+ "elements":[
+ {
+ "expression":"max by(asset)(clickhouse_part_mutation)",
+ "state":1,
+ "legend":"{{asset}}",
+ "name":"A"
+ }
+ ],
+ "datasource":"metrics"
+ },
+ {
+ "name":"Asset Disk IO Usage",
+ "span":"6.0",
+ "height":"3.0",
+ "type":"Line chart",
+ "unit":"percent(0-100)",
+ "weight":75,
+ "param":{
+ "stack":false,
+ "thresholds":[],
+ "visibility":{
+ "result":"",
+ "varName":"",
+ "varValue":"",
+ "operator":""
+ },
+ "legend":{
+ "values":[
+ "avg",
+ "last",
+ "max"
+ ],
+ "placement":"bottom"
+ },
+ "enable":{
+ "thresholds":true,
+ "visibility":false,
+ "legend":true,
+ "valueMapping":true
+ },
+ "link":"",
+ "style":"line",
+ "valueMapping":[],
+ "nullType":"zero"
+ },
+ "remark":"The maxium percentage of DISK IO in each data center and asset.",
+ "x":"0.0",
+ "y":"18.0",
+ "elements":[
+ {
+ "expression":"sum by(asset)(rate(node_disk_io_time_seconds_total{olap_node_exporter=~\".*clickhouse.*\"}[5m]))*100",
+ "state":1,
+ "legend":"{{asset}}",
+ "name":"A"
+ }
+ ],
+ "datasource":"metrics"
+ }
+ ]
+ },
+ {
+ "name":"HBase Status",
+ "span":"12.0",
+ "height":"1.0",
+ "type":"Group",
+ "unit":"short",
+ "weight":76,
+ "param":{
+ "visibility":{
+ "result":"",
+ "varName":"",
+ "varValue":"",
+ "operator":""
+ },
+ "enable":{
+ "thresholds":false,
+ "legend":false,
+ "valueMapping":false
+ },
+ "collapse":true,
+ "nullType":"null"
+ },
+ "remark":"",
+ "x":"0.0",
+ "y":"24.0",
+ "datasource":"misc",
+ "children":[
+ {
+ "name":"Request /sec",
+ "span":"4.0",
+ "height":"3.0",
+ "type":"Line chart",
+ "unit":"short",
+ "weight":77,
+ "param":{
+ "rightYAxis":{
+ "elementNames":[],
+ "unit":2,
+ "style":"line",
+ "label":""
+ },
+ "stack":0,
+ "thresholds":[
+ {
+ "color":"#0c4dff",
+ "id":"25815c1f"
+ }
+ ],
+ "thresholdShow":true,
+ "visibility":{
+ "result":"",
+ "varName":"",
+ "varValue":"",
+ "operator":""
+ },
+ "legend":{
+ "values":[
+ "avg",
+ "max"
+ ],
+ "show":true,
+ "placement":"bottom"
+ },
+ "enable":{
+ "thresholds":false,
+ "visibility":false,
+ "legend":true,
+ "valueMapping":false
+ },
+ "link":"",
+ "valueMapping":[],
+ "nullType":"null"
+ },
+ "remark":"Number of requests per second of each type,Read/Write/Total",
+ "x":"0.0",
+ "y":"0.0",
+ "elements":[
+ {
+ "expression":"sum by (module) (irate(Hadoop_HBase_totalRequestCount{module=\"DC-HBase\"}[5m]))",
+ "state":1,
+ "legend":"Total",
+ "name":"A"
+ },
+ {
+ "expression":"sum by (module)(irate(Hadoop_HBase_readRequestCount{module=\"DC-HBase\"}[5m]))",
+ "state":1,
+ "legend":"Read",
+ "name":"B"
+ },
+ {
+ "expression":"sum by (module)(irate(Hadoop_HBase_writeRequestCount{module=\"DC-HBase\"}[5m]))",
+ "state":1,
+ "legend":"Write",
+ "name":"C"
+ }
+ ],
+ "datasource":"metrics"
+ },
+ {
+ "name":"Region Count",
+ "span":"4.0",
+ "height":"3.0",
+ "type":"Line chart",
+ "unit":"short",
+ "weight":78,
+ "param":{
+ "stack":0,
+ "thresholds":[
+ {
+ "color":"#0c4dff"
+ }
+ ],
+ "thresholdShow":true,
+ "legend":{
+ "values":[],
+ "show":true,
+ "placement":"bottom"
+ },
+ "enable":{
+ "thresholds":false,
+ "legend":true,
+ "valueMapping":false
+ },
+ "showHeader":true,
+ "nullType":"null"
+ },
+ "remark":"Number of HBase cluster regions",
+ "x":"0.0",
+ "y":"3.0",
+ "elements":[
+ {
+ "expression":"sum by (datacenter) (Hadoop_HBase_regionCount{module=~\".*HBase\"})",
+ "state":1,
+ "legend":"{{datacenter}}",
+ "name":"A"
+ }
+ ],
+ "datasource":"metrics"
+ },
+ {
+ "name":"RitRegion(Unhealthy) Count",
+ "span":"4.0",
+ "height":"3.0",
+ "type":"Line chart",
+ "unit":"short",
+ "weight":79,
+ "param":{
+ "stack":0,
+ "thresholds":[
+ {
+ "color":"#0c4dff"
+ }
+ ],
+ "thresholdShow":true,
+ "legend":{
+ "values":[],
+ "show":true,
+ "placement":"bottom"
+ },
+ "enable":{
+ "thresholds":false,
+ "legend":true,
+ "valueMapping":false
+ },
+ "showHeader":true,
+ "nullType":"null"
+ },
+ "remark":"Number of HBase cluster rit(Region in transition) regions.",
+ "x":"4.0",
+ "y":"3.0",
+ "elements":[
+ {
+ "expression":"sum by (datacenter) (Hadoop_HBase_ritCount{module=~\".*HBase\"})",
+ "state":1,
+ "legend":"{{datacenter}}",
+ "name":"A"
+ }
+ ],
+ "datasource":"metrics"
+ },
+ {
+ "name":"Memory Usage",
+ "span":"4.0",
+ "height":"3.0",
+ "type":"Line chart",
+ "unit":"percent(0.0-1.0)",
+ "weight":80,
+ "param":{
+ "stack":0,
+ "thresholds":[
+ {
+ "color":"#C4162AFF",
+ "value":80
+ }
+ ],
+ "thresholdShow":true,
+ "visibility":{
+ "result":"",
+ "varName":"",
+ "varValue":"",
+ "operator":""
+ },
+ "legend":{
+ "values":[
+ "avg",
+ "max"
+ ],
+ "show":true,
+ "placement":"bottom"
+ },
+ "enable":{
+ "thresholds":true,
+ "visibility":false,
+ "legend":true,
+ "valueMapping":false
+ },
+ "showHeader":true,
+ "nullType":"null"
+ },
+ "remark":"The Memory usage in each asset.",
+ "x":"8.0",
+ "y":"0.0",
+ "elements":[
+ {
+ "expression":"((Hadoop_HBase_MemHeapUsedM{module=~\".*HBase\"})/((Hadoop_HBase_MemHeapMaxM{module=~\".*HBase\"})))",
+ "state":1,
+ "legend":"{{endpoint}}",
+ "name":"A"
+ }
+ ],
+ "datasource":"metrics"
+ },
+ {
+ "name":"CPU Usage",
+ "span":"4.0",
+ "height":"3.0",
+ "type":"Line chart",
+ "unit":"percent(0.0-1.0)",
+ "weight":81,
+ "param":{
+ "stack":0,
+ "thresholds":[
+ {
+ "color":"#C4162AFF",
+ "value":80
+ }
+ ],
+ "thresholdShow":true,
+ "visibility":{
+ "result":"",
+ "varName":"",
+ "varValue":"",
+ "operator":""
+ },
+ "legend":{
+ "values":[
+ "avg",
+ "max"
+ ],
+ "show":true,
+ "placement":"bottom"
+ },
+ "enable":{
+ "thresholds":true,
+ "visibility":false,
+ "legend":true,
+ "valueMapping":false
+ },
+ "showHeader":true,
+ "nullType":"null"
+ },
+ "remark":"The CPU usage in each asset.",
+ "x":"4.0",
+ "y":"0.0",
+ "elements":[
+ {
+ "expression":"sum by (asset)(java_lang_OperatingSystem_ProcessCpuLoad{module=~\".*HBase\"})",
+ "state":1,
+ "legend":"{{asset}}",
+ "name":"A"
+ }
+ ],
+ "datasource":"metrics"
+ },
+ {
+ "name":"Request Queue",
+ "span":"4.0",
+ "height":"3.0",
+ "type":"Line chart",
+ "unit":"short",
+ "weight":82,
+ "param":{
+ "stack":0,
+ "thresholds":[
+ {
+ "color":"#0c4dff"
+ }
+ ],
+ "thresholdShow":true,
+ "visibility":{
+ "result":"",
+ "varName":"",
+ "varValue":"",
+ "operator":""
+ },
+ "legend":{
+ "values":[],
+ "show":true,
+ "placement":"bottom"
+ },
+ "enable":{
+ "thresholds":false,
+ "visibility":false,
+ "legend":true,
+ "valueMapping":false
+ },
+ "showHeader":true,
+ "nullType":"null"
+ },
+ "remark":"Number of requests in the HBase request queue.",
+ "x":"8.0",
+ "y":"3.0",
+ "elements":[
+ {
+ "expression":"sum by(datacenter)(Hadoop_HBase_numCallsInGeneralQueue{name=\"RegionServer\"})",
+ "state":1,
+ "legend":"{{datacenter}}",
+ "name":"A"
+ }
+ ],
+ "datasource":"metrics"
+ },
+ {
+ "name":"Slow request",
+ "span":"4.0",
+ "height":"3.0",
+ "type":"Line chart",
+ "unit":"short",
+ "weight":83,
+ "param":{
+ "stack":0,
+ "thresholds":[
+ {
+ "color":"#0c4dff"
+ }
+ ],
+ "thresholdShow":true,
+ "visibility":{
+ "result":"",
+ "varName":"",
+ "varValue":"",
+ "operator":""
+ },
+ "legend":{
+ "values":[],
+ "show":true,
+ "placement":"bottom"
+ },
+ "enable":{
+ "thresholds":false,
+ "visibility":false,
+ "legend":true,
+ "valueMapping":false
+ },
+ "showHeader":true,
+ "nullType":"null"
+ },
+ "remark":"Number of slow requests (put,append,get) in the HBase cluster.",
+ "x":"0.0",
+ "y":"6.0",
+ "elements":[
+ {
+ "expression":"sum(irate(Hadoop_HBase_slowPutCount{name=\"RegionServer\",sub=\"Server\"}[5m])) by (datacenter)",
+ "state":1,
+ "legend":"put-{{datacenter}}",
+ "name":"A"
+ },
+ {
+ "expression":"sum(irate(Hadoop_HBase_slowAppendCount{name=\"RegionServer\",sub=\"Server\"}[5m])) by (datacenter)",
+ "state":1,
+ "legend":"append-{{datacenter}}",
+ "name":"B"
+ },
+ {
+ "expression":"sum(irate(Hadoop_HBase_slowGetCount{name=\"RegionServer\",sub=\"Server\"}[5m])) by (datacenter)",
+ "state":1,
+ "legend":"get-{{datacenter}}",
+ "name":"C"
+ }
+ ],
+ "datasource":"metrics"
+ },
+ {
+ "name":"Memstore Size",
+ "span":"4.0",
+ "height":"3.0",
+ "type":"Line chart",
+ "unit":"bytes",
+ "weight":84,
+ "param":{
+ "stack":0,
+ "thresholds":[
+ {
+ "color":"#0c4dff"
+ }
+ ],
+ "thresholdShow":true,
+ "visibility":{
+ "result":"",
+ "varName":"",
+ "varValue":"",
+ "operator":""
+ },
+ "legend":{
+ "values":[],
+ "show":true,
+ "placement":"bottom"
+ },
+ "enable":{
+ "thresholds":false,
+ "visibility":false,
+ "legend":false,
+ "valueMapping":false
+ },
+ "showHeader":true,
+ "nullType":"null"
+ },
+ "remark":"Data size in the HBase node memory",
+ "x":"4.0",
+ "y":"6.0",
+ "elements":[
+ {
+ "expression":"sum by (asset) (Hadoop_HBase_metric_memStoreSize)",
+ "state":1,
+ "legend":"{{asset}}",
+ "name":"A"
+ }
+ ],
+ "datasource":"metrics"
+ },
+ {
+ "name":"GC Count /sec",
+ "span":"4.0",
+ "height":"3.0",
+ "type":"Line chart",
+ "unit":"none",
+ "weight":85,
+ "param":{
+ "stack":0,
+ "thresholds":[
+ {
+ "color":"#0c4dff"
+ }
+ ],
+ "thresholdShow":true,
+ "visibility":{
+ "result":"",
+ "varName":"",
+ "varValue":"",
+ "operator":""
+ },
+ "legend":{
+ "values":[],
+ "show":true,
+ "placement":"bottom"
+ },
+ "enable":{
+ "thresholds":false,
+ "visibility":false,
+ "legend":false,
+ "valueMapping":false
+ },
+ "showHeader":true,
+ "nullType":"null"
+ },
+ "remark":"GC times per second of the HBase process.",
+ "x":"8.0",
+ "y":"6.0",
+ "elements":[
+ {
+ "expression":"sum by (datacenter)(irate(Hadoop_HBase_GcCount[5m]))",
+ "state":1,
+ "legend":"GCCount-{{datacenter}}",
+ "name":"A"
+ },
+ {
+ "expression":"sum by (datacenter)(irate(Hadoop_HBase_GcCountParNew[5m]))",
+ "state":1,
+ "legend":"ParNewGcCount-{{datacenter}}",
+ "name":"B"
+ },
+ {
+ "expression":"sum by (datacenter)(irate(Hadoop_HBase_GcCountConcurrentMarkSweep[5m]))",
+ "state":1,
+ "legend":"CMSGcCount-{{datacenter}}",
+ "name":"C"
+ }
+ ],
+ "datasource":"metrics"
+ },
+ {
+ "name":"GC Time",
+ "span":"4.0",
+ "height":"3.0",
+ "type":"Line chart",
+ "unit":"milliseconds",
+ "weight":86,
+ "param":{
+ "stack":0,
+ "thresholds":[
+ {
+ "color":"#0c4dff"
+ }
+ ],
+ "thresholdShow":true,
+ "visibility":{
+ "result":"",
+ "varName":"",
+ "varValue":"",
+ "operator":""
+ },
+ "legend":{
+ "values":[],
+ "show":true,
+ "placement":"bottom"
+ },
+ "enable":{
+ "thresholds":false,
+ "visibility":false,
+ "legend":false,
+ "valueMapping":false
+ },
+ "showHeader":true,
+ "nullType":"null"
+ },
+ "remark":"The GC duration of the HBase process.",
+ "x":"0.0",
+ "y":"9.0",
+ "elements":[
+ {
+ "expression":"sum by (datacenter)(irate(Hadoop_HBase_GcCount[5m]))",
+ "state":1,
+ "legend":"GC-{{datacenter}}",
+ "name":"A"
+ },
+ {
+ "expression":"sum by (datacenter)(irate(Hadoop_HBase_GcCountParNew[5m]))",
+ "state":1,
+ "legend":"ParNewGc-{{datacenter}}",
+ "name":"B"
+ },
+ {
+ "expression":"sum by (datacenter)(irate(Hadoop_HBase_GcCountConcurrentMarkSweep[5m]))",
+ "state":1,
+ "legend":"CMSGc-{{datacenter}}",
+ "name":"C"
+ }
+ ],
+ "datasource":"metrics"
+ },
+ {
+ "name":"Put Request By HOS Table /sec",
+ "span":"4.0",
+ "height":"3.0",
+ "type":"Line chart",
+ "unit":"short",
+ "weight":87,
+ "param":{
+ "stack":0,
+ "thresholds":[
+ {
+ "color":"#0c4dff"
+ }
+ ],
+ "thresholdShow":true,
+ "visibility":{
+ "result":"",
+ "varName":"",
+ "varValue":"",
+ "operator":""
+ },
+ "legend":{
+ "values":[],
+ "show":true,
+ "placement":"bottom"
+ },
+ "enable":{
+ "thresholds":false,
+ "visibility":false,
+ "legend":false,
+ "valueMapping":false
+ },
+ "showHeader":true,
+ "nullType":"null"
+ },
+ "remark":"Number of put requests for hos table per second.",
+ "x":"4.0",
+ "y":"9.0",
+ "elements":[
+ {
+ "expression":"sum by (table) (irate(Hadoop_HBase_metric_putCount{namespace=\"default\",type=\"data\",module=~\".*HBase\"}[2m]))",
+ "state":1,
+ "legend":"{{table}}",
+ "name":"A"
+ }
+ ],
+ "datasource":"metrics"
+ },
+ {
+ "name":"Get Request By HOS Table /sec",
+ "span":"4.0",
+ "height":"3.0",
+ "type":"Line chart",
+ "unit":"short",
+ "weight":88,
+ "param":{
+ "stack":0,
+ "thresholds":[
+ {
+ "color":"#0c4dff"
+ }
+ ],
+ "thresholdShow":true,
+ "visibility":{
+ "result":"",
+ "varName":"",
+ "varValue":"",
+ "operator":""
+ },
+ "legend":{
+ "values":[],
+ "show":true,
+ "placement":"bottom"
+ },
+ "enable":{
+ "thresholds":false,
+ "visibility":false,
+ "legend":false,
+ "valueMapping":false
+ },
+ "showHeader":true,
+ "nullType":"null"
+ },
+ "remark":"Number of get requests for hos table per second.",
+ "x":"8.0",
+ "y":"9.0",
+ "elements":[
+ {
+ "expression":"sum by (table) (irate(Hadoop_HBase_metric_getCount{namespace=\"default\",type=\"data\",module=~\".*HBase\"}[2m]))",
+ "state":1,
+ "legend":"{{table}}",
+ "name":"A"
+ }
+ ],
+ "datasource":"metrics"
+ },
+ {
+ "name":"Append Request by HOS Table /sec",
+ "span":"4.0",
+ "height":"3.0",
+ "type":"Line chart",
+ "unit":"short",
+ "weight":89,
+ "param":{
+ "stack":0,
+ "thresholds":[
+ {
+ "color":"#0c4dff"
+ }
+ ],
+ "thresholdShow":true,
+ "visibility":{
+ "result":"",
+ "varName":"",
+ "varValue":"",
+ "operator":""
+ },
+ "legend":{
+ "values":[],
+ "show":true,
+ "placement":"bottom"
+ },
+ "enable":{
+ "thresholds":false,
+ "visibility":false,
+ "legend":false,
+ "valueMapping":false
+ },
+ "showHeader":true,
+ "nullType":"null"
+ },
+ "remark":"Number of append requests for hos table per second.",
+ "x":"0.0",
+ "y":"12.0",
+ "elements":[
+ {
+ "expression":"sum by (table) (irate(Hadoop_HBase_metric_appendCount{namespace=\"default\",type=\"data\",module=~\".*HBase\"}[2m]))",
+ "state":1,
+ "legend":"{{table}}",
+ "name":"A"
+ }
+ ],
+ "datasource":"metrics"
+ },
+ {
+ "name":"HOS TTL Scan Count by Table /sec",
+ "span":"4.0",
+ "height":"3.0",
+ "type":"Line chart",
+ "unit":"short",
+ "weight":90,
+ "param":{
+ "stack":0,
+ "thresholds":[
+ {
+ "color":"#0c4dff"
+ }
+ ],
+ "thresholdShow":true,
+ "visibility":{
+ "result":"",
+ "varName":"",
+ "varValue":"",
+ "operator":""
+ },
+ "legend":{
+ "values":[],
+ "show":true,
+ "placement":"bottom"
+ },
+ "enable":{
+ "thresholds":false,
+ "visibility":false,
+ "legend":false,
+ "valueMapping":false
+ },
+ "showHeader":true,
+ "nullType":"null"
+ },
+ "remark":"Number of scan times in ttl for hos table per second.",
+ "x":"4.0",
+ "y":"12.0",
+ "elements":[
+ {
+ "expression":"sum by (table) (irate(Hadoop_HBase_metric_scanCount{namespace=\"default\",type=\"data\",module=~\".*HBase\"}[2m]))",
+ "state":1,
+ "legend":"{{table}}",
+ "name":"A"
+ }
+ ],
+ "datasource":"metrics"
+ },
+ {
+ "name":"Scan Count by Business Table /sec",
+ "span":"4.0",
+ "height":"3.0",
+ "type":"Line chart",
+ "unit":"short",
+ "weight":91,
+ "param":{
+ "stack":0,
+ "thresholds":[
+ {
+ "color":"#0c4dff"
+ }
+ ],
+ "thresholdShow":true,
+ "visibility":{
+ "result":"",
+ "varName":"",
+ "varValue":"",
+ "operator":""
+ },
+ "legend":{
+ "values":[],
+ "show":true,
+ "placement":"bottom"
+ },
+ "enable":{
+ "thresholds":false,
+ "visibility":false,
+ "legend":false,
+ "valueMapping":false
+ },
+ "showHeader":true,
+ "nullType":"null"
+ },
+ "remark":"Number of scan times for business table per second.",
+ "x":"8.0",
+ "y":"12.0",
+ "elements":[
+ {
+ "expression":"sum by (table) (irate(Hadoop_HBase_metric_scanCount{namespace=\"tsg_galaxy\",module=~\".*HBase\"}[2m]))",
+ "state":1,
+ "legend":"{{table}}",
+ "name":"A"
+ }
+ ],
+ "datasource":"metrics"
+ }
+ ]
+ },
+ {
+ "name":"Hadoop HDFS Status",
+ "span":"12.0",
+ "height":"1.0",
+ "type":"Group",
+ "unit":"short",
+ "weight":92,
+ "param":{
+ "thresholds":[],
+ "visibility":{
+ "result":"",
+ "varName":"",
+ "varValue":"",
+ "operator":""
+ },
+ "showHeader":true,
+ "enable":{
+ "thresholds":false,
+ "legend":false,
+ "valueMapping":false
+ },
+ "repeat":{
+ "variable":""
+ },
+ "link":"",
+ "valueMapping":[],
+ "collapse":true,
+ "nullType":"null"
+ },
+ "remark":"",
+ "x":"0.0",
+ "y":"24.0",
+ "datasource":"misc",
+ "children":[
+ {
+ "name":"Storage Capacity Usage",
+ "span":"4.0",
+ "height":"3.0",
+ "type":"Line chart",
+ "unit":"percent(0-100)",
+ "weight":93,
+ "param":{
+ "stack":0,
+ "thresholds":[
+ {
+ "color":"#C4162AFF",
+ "value":80
+ }
+ ],
+ "thresholdShow":true,
+ "legend":{
+ "values":[],
+ "show":true,
+ "placement":"bottom"
+ },
+ "enable":{
+ "thresholds":true,
+ "legend":true,
+ "valueMapping":false
+ },
+ "nullType":"null"
+ },
+ "remark":"Hadoop cluster storage capacity Usage.",
+ "x":"4.0",
+ "y":"6.0",
+ "elements":[
+ {
+ "expression":"(sum by (datacenter)(Hadoop_NameNode_PercentUsed)) / 2",
+ "state":1,
+ "legend":"{{datacenter}}",
+ "name":"A"
+ }
+ ],
+ "datasource":"metrics"
+ },
+ {
+ "name":"Namenode Memory Usage",
+ "span":"4.0",
+ "height":"3.0",
+ "type":"Line chart",
+ "unit":"percent(0.0-1.0)",
+ "weight":94,
+ "param":{
+ "stack":0,
+ "thresholds":[
+ {
+ "color":"#1c1984"
+ }
+ ],
+ "thresholdShow":true,
+ "legend":{
+ "values":[
+ "avg",
+ "max"
+ ],
+ "show":true,
+ "placement":"bottom"
+ },
+ "enable":{
+ "thresholds":false,
+ "legend":true,
+ "valueMapping":false
+ },
+ "showHeader":true,
+ "nullType":"zero"
+ },
+ "remark":"The Memory usage in each Namenode.",
+ "x":"4.0",
+ "y":"0.0",
+ "elements":[
+ {
+ "expression":"Hadoop_NameNode_MemHeapUsedM{module=~\".*Hadoop\"}/Hadoop_NameNode_MemHeapMaxM{module=~\".*Hadoop\"}",
+ "state":1,
+ "legend":"{{datacenter}}-{{asset}}",
+ "name":"A"
+ }
+ ],
+ "datasource":"metrics"
+ },
+ {
+ "name":"Datanode Memory Usage",
+ "span":"4.0",
+ "height":"3.0",
+ "type":"Line chart",
+ "unit":"percent(0.0-1.0)",
+ "weight":95,
+ "param":{
+ "stack":0,
+ "thresholds":[
+ {
+ "color":"#1c1984"
+ }
+ ],
+ "thresholdShow":true,
+ "legend":{
+ "values":[
+ "avg",
+ "max"
+ ],
+ "show":true,
+ "placement":"bottom"
+ },
+ "enable":{
+ "thresholds":false,
+ "legend":true,
+ "valueMapping":false
+ },
+ "showHeader":true,
+ "nullType":"zero"
+ },
+ "remark":"The Memory usage in each Datanode.",
+ "x":"4.0",
+ "y":"3.0",
+ "elements":[
+ {
+ "expression":"Hadoop_DataNode_MemHeapUsedM{module=~\".*Hadoop\"}/Hadoop_DataNode_MemHeapMaxM{module=~\".*Hadoop\"}",
+ "state":1,
+ "legend":"{{datacenter}}-{{asset}}",
+ "name":"A"
+ }
+ ],
+ "datasource":"metrics"
+ },
+ {
+ "name":"Missing Blocks",
+ "span":"4.0",
+ "height":"3.0",
+ "type":"Line chart",
+ "unit":"short",
+ "weight":96,
+ "param":{
+ "stack":0,
+ "thresholds":[
+ {
+ "color":"#1c1984"
+ }
+ ],
+ "thresholdShow":true,
+ "legend":{
+ "values":[],
+ "show":true,
+ "placement":"bottom"
+ },
+ "enable":{
+ "thresholds":false,
+ "legend":true,
+ "valueMapping":false
+ },
+ "showHeader":true,
+ "nullType":"null"
+ },
+ "remark":"Number of blocks with missing in the Hadoop cluster.",
+ "x":"8.0",
+ "y":"3.0",
+ "elements":[
+ {
+ "expression":"(sum by (datacenter)(Hadoop_NameNode_NumberOfMissingBlocks))/2",
+ "state":1,
+ "legend":"{{datacenter}}",
+ "name":"A"
+ }
+ ],
+ "datasource":"metrics"
+ },
+ {
+ "name":"Namenode CPU Usage",
+ "span":"4.0",
+ "height":"3.0",
+ "type":"Line chart",
+ "unit":"percent(0.0-1.0)",
+ "weight":97,
+ "param":{
+ "stack":0,
+ "thresholds":[
+ {
+ "color":"#1c1984"
+ }
+ ],
+ "thresholdShow":true,
+ "visibility":{
+ "result":"",
+ "varName":"",
+ "varValue":"",
+ "operator":""
+ },
+ "legend":{
+ "values":[
+ "avg",
+ "max"
+ ],
+ "show":true,
+ "placement":"bottom"
+ },
+ "enable":{
+ "thresholds":false,
+ "visibility":false,
+ "legend":true,
+ "valueMapping":false
+ },
+ "showHeader":true,
+ "nullType":"null"
+ },
+ "remark":"The CPU usage in each Namenode.",
+ "x":"0.0",
+ "y":"0.0",
+ "elements":[
+ {
+ "expression":"java_lang_OperatingSystem_ProcessCpuLoad{module=~\".*Hadoop\",endpoint=~\".*Name.*\"}",
+ "state":1,
+ "legend":"{{asset}}",
+ "name":"A"
+ }
+ ],
+ "datasource":"metrics"
+ },
+ {
+ "name":"Corrupt Blocks",
+ "span":"4.0",
+ "height":"3.0",
+ "type":"Line chart",
+ "unit":"short",
+ "weight":98,
+ "param":{
+ "stack":0,
+ "thresholds":[
+ {
+ "color":"#1c1984"
+ }
+ ],
+ "thresholdShow":true,
+ "legend":{
+ "values":[],
+ "show":true,
+ "placement":"bottom"
+ },
+ "enable":{
+ "thresholds":false,
+ "legend":true,
+ "valueMapping":false
+ },
+ "showHeader":true,
+ "nullType":"zero"
+ },
+ "remark":"Number of blocks with corrupt replicas in the Hadoop cluster.",
+ "x":"8.0",
+ "y":"0.0",
+ "elements":[
+ {
+ "expression":"(sum by (datacenter)(Hadoop_NameNode_CorruptBlocks))/2",
+ "state":1,
+ "legend":"{{datacenter}}",
+ "name":"B"
+ }
+ ],
+ "datasource":"metrics"
+ },
+ {
+ "name":"Under Replicated Blocks",
+ "span":"4.0",
+ "height":"3.0",
+ "type":"Line chart",
+ "unit":"short",
+ "weight":99,
+ "param":{
+ "stack":0,
+ "thresholds":[
+ {
+ "color":"#1c1984"
+ }
+ ],
+ "thresholdShow":true,
+ "legend":{
+ "values":[],
+ "show":true,
+ "placement":"bottom"
+ },
+ "enable":{
+ "thresholds":false,
+ "legend":true,
+ "valueMapping":false
+ },
+ "showHeader":true,
+ "nullType":"null"
+ },
+ "remark":"Number of blocks with lose replicated in the Hadoop cluster.",
+ "x":"0.0",
+ "y":"6.0",
+ "elements":[
+ {
+ "expression":"(sum by (datacenter)(Hadoop_NameNode_UnderReplicatedBlocks))/2",
+ "state":1,
+ "legend":"{{datacenter}}",
+ "name":"C"
+ }
+ ],
+ "datasource":"metrics"
+ },
+ {
+ "name":"Datanode CPU Usage",
+ "span":"4.0",
+ "height":"3.0",
+ "type":"Line chart",
+ "unit":"percent(0.0-1.0)",
+ "weight":100,
+ "param":{
+ "stack":0,
+ "thresholds":[
+ {
+ "color":"#1c1984"
+ }
+ ],
+ "thresholdShow":true,
+ "visibility":{
+ "result":"",
+ "varName":"",
+ "varValue":"",
+ "operator":""
+ },
+ "legend":{
+ "values":[
+ "avg",
+ "max"
+ ],
+ "show":true,
+ "placement":"bottom"
+ },
+ "enable":{
+ "thresholds":false,
+ "visibility":false,
+ "legend":true,
+ "valueMapping":false
+ },
+ "showHeader":true,
+ "nullType":"null"
+ },
+ "remark":"The CPU usage in each DataNode.",
+ "x":"0.0",
+ "y":"3.0",
+ "elements":[
+ {
+ "expression":"java_lang_OperatingSystem_ProcessCpuLoad{module=~\".*Hadoop\",endpoint=~\".*Data.*\"}",
+ "state":1,
+ "legend":"{{asset}}",
+ "name":"A"
+ }
+ ],
+ "datasource":"metrics"
+ }
+ ]
+ }
+ ],
+ "children":[]
+ }
+] \ No newline at end of file