summaryrefslogtreecommitdiff
path: root/groot-examples
diff options
context:
space:
mode:
authordoufenghu <[email protected]>2024-03-16 19:32:42 +0800
committerdoufenghu <[email protected]>2024-03-16 19:32:42 +0800
commit25994fade7720a43021b25004ade13c71f941e88 (patch)
tree9c91a57d526f0579f241add718d8cb114ff04468 /groot-examples
parent9ff68b2c631606cf06a7001036ff16475c52371c (diff)
[Improve][Docs] Add some help information for connector schema and knowledge base files.
Diffstat (limited to 'groot-examples')
-rw-r--r--groot-examples/end-to-end-example/src/main/resources/examples/inline_to_clickhouse.yaml21
-rw-r--r--groot-examples/end-to-end-example/src/main/resources/examples/inline_to_print_avro_schema.yaml73
-rw-r--r--groot-examples/end-to-end-example/src/main/resources/examples/schema/sink_schema.json302
-rw-r--r--groot-examples/end-to-end-example/src/main/resources/grootstream.yaml17
4 files changed, 412 insertions, 1 deletions
diff --git a/groot-examples/end-to-end-example/src/main/resources/examples/inline_to_clickhouse.yaml b/groot-examples/end-to-end-example/src/main/resources/examples/inline_to_clickhouse.yaml
index 829741d..c7072a4 100644
--- a/groot-examples/end-to-end-example/src/main/resources/examples/inline_to_clickhouse.yaml
+++ b/groot-examples/end-to-end-example/src/main/resources/examples/inline_to_clickhouse.yaml
@@ -27,7 +27,7 @@ sources: # [object] Define connector source
#
# [string] Event Data, it will be parsed to Map<String, Object> by the specified format.
#
- data: '{"recv_time": 1705565615, "tcp_rtt_ms":128,"decoded_as":"HTTP", "http_version":"http1","http_request_line":"GET / HTTP/1.1","http_host":"www.ct.cn","http_url":"www.ct.cn/","http_user_agent":"curl/8.0.1","http_status_code":200,"http_response_line":"HTTP/1.1 200 OK","http_response_content_type":"text/html; charset=UTF-8","http_response_latency_ms":31,"http_session_duration_ms":5451,"in_src_mac":"ba:bb:a7:3c:67:1c","in_dest_mac":"86:dd:7a:8f:ae:e2","out_src_mac":"86:dd:7a:8f:ae:e2","out_dest_mac":"ba:bb:a7:3c:67:1c","tcp_client_isn":678677906,"tcp_server_isn":1006700307,"address_type":4,"client_ip":"192.11.22.22","server_ip":"8.8.8.8","client_port":42751,"server_port":80,"in_link_id":65535,"out_link_id":65535,"start_timestamp_ms":1703646546127,"end_timestamp_ms":1703646551702,"duration_ms":5575,"sent_pkts":97,"sent_bytes":5892,"received_pkts":250,"received_bytes":333931,"tcp_c2s_ip_fragments":0,"tcp_s2c_ip_fragments":0,"tcp_c2s_rtx_pkts":0,"tcp_c2s_rtx_bytes":0,"tcp_s2c_rtx_pkts":0,"tcp_s2c_rtx_bytes":0,"tcp_c2s_o3_pkts":0,"tcp_s2c_o3_pkts":0,"tcp_c2s_lost_bytes":0,"tcp_s2c_lost_bytes":0,"flags":26418,"flags_identify_info":[100,1,100,60,150,100,1,2],"app_transition":"http.1111.test_1_1","decoded_as":"HTTP","server_fqdn":"www.ct.cn","app":"test_1_1","decoded_path":"ETHERNET.IPv4.TCP.http","fqdn_category_list":[1767],"t_vsys_id":1,"vsys_id":1,"session_id":290538039798223400,"tcp_handshake_latency_ms":41,"client_os_desc":"Windows","server_os_desc":"Linux","data_center":"center-xxg-tsgx","device_group":"group-xxg-tsgx","device_tag":"{\"tags\":[{\"tag\":\"data_center\",\"value\":\"center-xxg-tsgx\"},{\"tag\":\"device_group\",\"value\":\"group-xxg-tsgx\"}]}","device_id":"9800165603247024","sled_ip":"192.168.40.39","dup_traffic_flag":0}'
+ data: '{"recv_time": 1705565615, "tcp_rtt_ms":128,"decoded_as":"HTTP", "http_version":"http1","http_request_line":"GET / HTTP/1.1","http_host":"www.ct.cn","http_url":"www.ct.cn/","http_user_agent":"curl/8.0.1","http_status_code":200,"http_response_line":"HTTP/1.1 200 OK","http_response_content_type":"text/html; charset=UTF-8","http_response_latency_ms":31,"http_session_duration_ms":5451,"in_src_mac":"ba:bb:a7:3c:67:1c","in_dest_mac":"86:dd:7a:8f:ae:e2","out_src_mac":"86:dd:7a:8f:ae:e2","out_dest_mac":"ba:bb:a7:3c:67:1c","tcp_client_isn":678677906,"tcp_server_isn":1006700307,"address_type":4,"client_ip":"86.111.175.128","server_ip":"12.16.98.1","client_port":42751,"server_port":80,"in_link_id":65535,"out_link_id":65535,"start_timestamp_ms":1703646546127,"end_timestamp_ms":1703646551702,"duration_ms":5575,"sent_pkts":97,"sent_bytes":5892,"received_pkts":250,"received_bytes":333931,"tcp_c2s_ip_fragments":0,"tcp_s2c_ip_fragments":0,"tcp_c2s_rtx_pkts":0,"tcp_c2s_rtx_bytes":0,"tcp_s2c_rtx_pkts":0,"tcp_s2c_rtx_bytes":0,"tcp_c2s_o3_pkts":0,"tcp_s2c_o3_pkts":0,"tcp_c2s_lost_bytes":0,"tcp_s2c_lost_bytes":0,"flags":26418,"flags_identify_info":[100,1,100,60,150,100,1,2],"app_transition":"http.1111.test_1_1","decoded_as":"HTTP","server_fqdn":"www.ct.cn","app":"test_1_1","decoded_path":"ETHERNET.IPv4.TCP.http","fqdn_category_list":[1767],"t_vsys_id":1,"vsys_id":1,"session_id":290538039798223400,"tcp_handshake_latency_ms":41,"client_os_desc":"Windows","server_os_desc":"Linux","data_center":"center-xxg-tsgx","device_group":"group-xxg-tsgx","device_tag":"{\"tags\":[{\"tag\":\"data_center\",\"value\":\"center-xxg-tsgx\"},{\"tag\":\"device_group\",\"value\":\"group-xxg-tsgx\"}]}","device_id":"9800165603247024","sled_ip":"192.168.40.39","dup_traffic_flag":0}'
format: json
json.ignore.parse.errors: false
@@ -44,10 +44,29 @@ processing_pipelines:
output_fields: [ recv_time ]
parameters:
precision: seconds
+ - function: DOMAIN
+ lookup_fields: [ http_host,ssl_sni,dtls_sni,quic_sni ]
+ output_fields: [ server_domain ]
+ parameters:
+ option: FIRST_SIGNIFICANT_SUBDOMAIN
+ - function: ASN_LOOKUP
+ lookup_fields: [ server_ip ]
+ output_fields: [ server_asn ]
+ parameters:
+ option: IP_TO_ASN
+ kb_name: tsg_ip_asn
+ - function: GEOIP_LOOKUP
+ lookup_fields: [ client_ip ]
+ output_fields: [ client_geolocation ]
+ parameters:
+ kb_name: tsg_ip_location
+ option: IP_TO_COUNTRY
sinks:
clickhouse_sink:
type: clickhouse
+ schema:
+ local_file: /Users/darnell/IdeaProjects/groot-stream/groot-examples/end-to-end-example/src/main/resources/examples/schema/sink_schema.json
properties:
host: 192.168.44.12:9001
table: tsg_galaxy_v3.inline_source_test_local
diff --git a/groot-examples/end-to-end-example/src/main/resources/examples/inline_to_print_avro_schema.yaml b/groot-examples/end-to-end-example/src/main/resources/examples/inline_to_print_avro_schema.yaml
new file mode 100644
index 0000000..a88e36c
--- /dev/null
+++ b/groot-examples/end-to-end-example/src/main/resources/examples/inline_to_print_avro_schema.yaml
@@ -0,0 +1,73 @@
+sources:
+ inline_source:
+ type: inline
+ properties:
+ data: '[{"tcp_rtt_ms":128,"decoded_as":"HTTP","http_version":"http1","http_request_line":"GET / HTTP/1.1","http_host":"www.ct.cn","http_url":"www.ct.cn/","http_user_agent":"curl/8.0.1","http_status_code":200,"http_response_line":"HTTP/1.1 200 OK","http_response_content_type":"text/html; charset=UTF-8","http_response_latency_ms":31,"http_session_duration_ms":5451,"in_src_mac":"ba:bb:a7:3c:67:1c","in_dest_mac":"86:dd:7a:8f:ae:e2","out_src_mac":"86:dd:7a:8f:ae:e2","out_dest_mac":"ba:bb:a7:3c:67:1c","tcp_client_isn":678677906,"tcp_server_isn":1006700307,"address_type":4,"client_ip":"86.111.175.128","server_ip":"12.16.98.1","client_port":42751,"server_port":80,"in_link_id":65535,"out_link_id":65535,"start_timestamp_ms":1703646546127,"end_timestamp_ms":1703646551702,"duration_ms":5575,"sent_pkts":97,"sent_bytes":5892,"received_pkts":250,"received_bytes":333931},{"tcp_rtt_ms":256,"decoded_as":"HTTP","http_version":"http1","http_request_line":"GET / HTTP/1.1","http_host":"www.abc.cn","http_url":"www.cabc.cn/","http_user_agent":"curl/8.0.1","http_status_code":200,"http_response_line":"HTTP/1.1 200 OK","http_response_content_type":"text/html; charset=UTF-8","http_response_latency_ms":31,"http_session_duration_ms":5451,"in_src_mac":"ba:bb:a7:3c:67:1c","in_dest_mac":"86:dd:7a:8f:ae:e2","out_src_mac":"86:dd:7a:8f:ae:e2","out_dest_mac":"ba:bb:a7:3c:67:1c","tcp_client_isn":678677906,"tcp_server_isn":1006700307,"address_type":4,"client_ip":"192.168.10.198","server_ip":"4.4.4.4","client_port":42751,"server_port":80,"in_link_id":65535,"out_link_id":65535,"start_timestamp_ms":1703646546127,"end_timestamp_ms":1703646551702,"duration_ms":2575,"sent_pkts":197,"sent_bytes":5892,"received_pkts":350,"received_bytes":533931}]'
+ format: json
+ json.ignore.parse.errors: false
+
+filters:
+ filter_operator:
+ type: com.geedgenetworks.core.filter.AviatorFilter
+ properties:
+ expression: event.server_ip != '12.12.12.12'
+
+processing_pipelines:
+ projection_processor:
+ type: com.geedgenetworks.core.processor.projection.ProjectionProcessorImpl
+ remove_fields: [http_request_line, http_response_line, http_response_content_type]
+ functions:
+ - function: DROP
+ filter: event.server_ip == '4.4.4.4'
+ - function: SNOWFLAKE_ID
+ lookup_fields: [ ]
+ output_fields: [ log_id ]
+ parameters:
+ data_center_id_num: 1
+ - function: CURRENT_UNIX_TIMESTAMP
+ output_fields: [ recv_time ]
+ parameters:
+ precision: seconds
+ - function: DOMAIN
+ lookup_fields: [ http_host,ssl_sni,dtls_sni,quic_sni ]
+ output_fields: [ server_domain ]
+ parameters:
+ option: FIRST_SIGNIFICANT_SUBDOMAIN
+ - function: ASN_LOOKUP
+ lookup_fields: [ server_ip ]
+ output_fields: [ server_asn ]
+ parameters:
+ option: IP_TO_ASN
+ kb_name: tsg_ip_asn
+
+ - function: GEOIP_LOOKUP
+ lookup_fields: [ client_ip ]
+ output_fields: [ client_geolocation ]
+ parameters:
+ kb_name: tsg_ip_location
+ option: IP_TO_COUNTRY
+
+sinks:
+ print_sink:
+ type: print
+ schema:
+ local_file: /Users/darnell/IdeaProjects/groot-stream/groot-examples/end-to-end-example/src/main/resources/examples/schema/sink_schema.json
+ properties:
+ format: json
+ mode: log_warn
+
+application:
+ env:
+ name: example-inline-to-print
+ parallelism: 3
+ pipeline:
+ object-reuse: true
+ topology:
+ - name: inline_source
+ downstream: [filter_operator]
+ - name: filter_operator
+ downstream: [ projection_processor ]
+ - name: projection_processor
+ downstream: [ print_sink ]
+ - name: print_sink
+ downstream: [] \ No newline at end of file
diff --git a/groot-examples/end-to-end-example/src/main/resources/examples/schema/sink_schema.json b/groot-examples/end-to-end-example/src/main/resources/examples/schema/sink_schema.json
new file mode 100644
index 0000000..2730873
--- /dev/null
+++ b/groot-examples/end-to-end-example/src/main/resources/examples/schema/sink_schema.json
@@ -0,0 +1,302 @@
+{
+ "type": "record",
+ "name": "test_source",
+ "namespace": "abc",
+ "fields": [
+ {
+ "name": "recv_time",
+ "label": "Receive Time",
+ "doc": {
+ "constraints": {
+ "type": "unix_timestamp"
+ },
+ "visibility": "enabled",
+ "ttl": null
+ },
+ "type": {
+ "type": "long",
+ "logicalType": "unix_timestamp"
+ }
+ },
+ {
+ "name": "log_id",
+ "label": "Log ID",
+ "doc": {
+ "visibility": "enabled",
+ "ttl": null
+ },
+ "type": "long"
+ },
+ {
+ "name": "http_url",
+ "label": "HTTP.URL",
+ "doc": {
+ "visibility": "enabled",
+ "ttl": null
+ },
+ "type": "string"
+ },
+ {
+ "name": "http_host",
+ "label": "HTTP.Host",
+ "doc": {
+ "visibility": "enabled",
+ "ttl": null
+ },
+ "type": "string"
+ },
+ {
+ "name": "http_request_line",
+ "label": "HTTP.Request Line",
+ "doc": {
+ "visibility": "enabled",
+ "ttl": null
+ },
+ "type": "string"
+ },
+ {
+ "name": "http_response_line",
+ "label": "HTTP.Response Line",
+ "doc": {
+ "visibility": "enabled",
+ "ttl": null
+ },
+ "type": "string"
+ },
+ {
+ "name": "decoded_as",
+ "label": "Decoded AS",
+ "doc": {
+ "constraints": {
+ "operator_functions": "=,!=,in,not in"
+ },
+ "data": [
+ {
+ "code": "BASE",
+ "value": "BASE"
+ },
+ {
+ "code": "MAIL",
+ "value": "MAIL"
+ },
+ {
+ "code": "DNS",
+ "value": "DNS"
+ },
+ {
+ "code": "HTTP",
+ "value": "HTTP"
+ },
+ {
+ "code": "SSL",
+ "value": "SSL"
+ },
+ {
+ "code": "DTLS",
+ "value": "DTLS"
+ },
+ {
+ "code": "QUIC",
+ "value": "QUIC"
+ },
+ {
+ "code": "FTP",
+ "value": "FTP"
+ },
+ {
+ "code": "SSH",
+ "value": "SSH"
+ },
+ {
+ "code": "Stratum",
+ "value": "Stratum"
+ },
+ {
+ "code": "RDP",
+ "value": "RDP"
+ },
+ {
+ "code": "SIP",
+ "value": "SIP"
+ },
+ {
+ "code": "RTP",
+ "value": "RTP"
+ }
+ ],
+ "visibility": "enabled",
+ "ttl": null
+ },
+ "type": "string"
+ },
+ {
+ "name": "tcp_rtt_ms",
+ "label": "Round-trip Time (ms)",
+ "doc": {
+ "constraints": {
+ "type": "decimal",
+ "aggregation_functions": "AVG, MAX, MIN, MEDIAN, P95_PERCENTILE, P99_PERCENTILE"
+ },
+ "visibility": "enabled",
+ "ttl": null
+ },
+ "type": "int"
+ },
+ {
+ "name": "sent_bytes",
+ "label": "Bytes Sent",
+ "doc": {
+ "constraints": {
+ "type": "bytes"
+ },
+ "visibility": "enabled",
+ "ttl": null
+ },
+ "type": "long"
+ },
+ {
+ "name": "received_bytes",
+ "label": "Bytes Received",
+ "doc": {
+ "constraints": {
+ "type": "bytes"
+ },
+ "visibility": "enabled",
+ "ttl": null
+ },
+ "type": "long"
+ },{
+ "name": "server_fqdn",
+ "label": "Server FQDN",
+ "doc": {
+ "visibility": "enabled",
+ "ttl": null
+ },
+ "type": "string"
+ },
+ {
+ "name": "server_domain",
+ "label": "Server Domain",
+ "doc": {
+ "visibility": "enabled",
+ "ttl": null
+ },
+ "type": "string"
+ },{
+ "name": "client_ip",
+ "label": "Client IP",
+ "doc": {
+ "constraints": {
+ "type": "ip"
+ },
+ "visibility": "enabled",
+ "ttl": null
+ },
+ "type": "string"
+ },
+ {
+ "name": "client_port",
+ "label": "Client Port",
+ "doc": {
+ "constraints": {
+ "aggregation_functions": "COUNT, COUNT_DISTINCT"
+ },
+ "visibility": "enabled",
+ "ttl": null
+ },
+ "type": "int"
+ },
+ {
+ "name": "client_os_desc",
+ "type": "string",
+ "doc": {
+ "visibility": "enabled",
+ "ttl": null
+ },
+ "label": "Client OS Description"
+ },
+ {
+ "name": "client_geolocation",
+ "label": "Client Geolocation",
+ "doc": {
+ "visibility": "enabled",
+ "ttl": null
+ },
+ "type": "string"
+ },
+ {
+ "name": "client_asn",
+ "label": "Client ASN",
+ "doc": {
+ "constraints": {
+ "aggregation_functions": "COUNT, COUNT_DISTINCT"
+ },
+ "visibility": "enabled",
+ "ttl": null
+ },
+ "type": "long"
+ },
+ {
+ "name": "subscriber_id",
+ "label": "Subscriber ID",
+ "doc": {
+ "visibility": "enabled",
+ "ttl": null
+ },
+ "type": "string"
+ },{
+ "name": "server_ip",
+ "label": "Server IP",
+ "doc": {
+ "constraints": {
+ "type": "ip"
+ },
+ "visibility": "enabled",
+ "ttl": null
+ },
+ "type": "string"
+ },
+ {
+ "name": "server_port",
+ "label": "Server Port",
+ "doc": {
+ "constraints": {
+ "aggregation_functions": "COUNT, COUNT_DISTINCT"
+ },
+ "visibility": "enabled",
+ "ttl": null
+ },
+ "type": "int"
+ },
+ {
+ "name": "server_os_desc",
+ "type": "string",
+ "doc": {
+ "visibility": "enabled",
+ "ttl": null
+ },
+ "label": "Server OS Description"
+ },
+ {
+ "name": "server_geolocation",
+ "label": "Server Geolocation",
+ "doc": {
+ "visibility": "enabled",
+ "ttl": null
+ },
+ "type": "string"
+ },
+ {
+ "name": "server_asn",
+ "label": "Server ASN",
+ "doc": {
+ "constraints": {
+ "aggregation_functions": "COUNT, COUNT_DISTINCT"
+ },
+ "visibility": "enabled",
+ "ttl": null
+ },
+ "type": "long"
+ }
+ ]
+} \ No newline at end of file
diff --git a/groot-examples/end-to-end-example/src/main/resources/grootstream.yaml b/groot-examples/end-to-end-example/src/main/resources/grootstream.yaml
new file mode 100644
index 0000000..1ffda9f
--- /dev/null
+++ b/groot-examples/end-to-end-example/src/main/resources/grootstream.yaml
@@ -0,0 +1,17 @@
+grootstream:
+ knowledge_base:
+ - name: tsg_ip_asn
+ fs_type: local
+ fs_path: /Users/darnell/IdeaProjects/groot-stream/groot-core/src/test/resources/
+ files:
+ - asn.mmdb
+ - name: tsg_ip_location
+ fs_type: local
+ fs_path: /Users/darnell/IdeaProjects/groot-stream/groot-core/src/test/resources/
+ files:
+ - ip_builtin.mmdb
+ properties:
+ hos.path: http://192.168.44.12:9098/hos
+ hos.bucket.name.traffic_file: traffic_file_bucket
+ hos.bucket.name.troubleshooting_file: troubleshooting_file_bucket
+ scheduler.knowledge_base.update.interval.minutes: 5