diff options
| author | doufenghu <[email protected]> | 2024-03-16 19:32:42 +0800 |
|---|---|---|
| committer | doufenghu <[email protected]> | 2024-03-16 19:32:42 +0800 |
| commit | 25994fade7720a43021b25004ade13c71f941e88 (patch) | |
| tree | 9c91a57d526f0579f241add718d8cb114ff04468 /groot-examples | |
| parent | 9ff68b2c631606cf06a7001036ff16475c52371c (diff) | |
[Improve][Docs] Add some help information for connector schema and knowledge base files.
Diffstat (limited to 'groot-examples')
4 files changed, 412 insertions, 1 deletions
diff --git a/groot-examples/end-to-end-example/src/main/resources/examples/inline_to_clickhouse.yaml b/groot-examples/end-to-end-example/src/main/resources/examples/inline_to_clickhouse.yaml index 829741d..c7072a4 100644 --- a/groot-examples/end-to-end-example/src/main/resources/examples/inline_to_clickhouse.yaml +++ b/groot-examples/end-to-end-example/src/main/resources/examples/inline_to_clickhouse.yaml @@ -27,7 +27,7 @@ sources: # [object] Define connector source # # [string] Event Data, it will be parsed to Map<String, Object> by the specified format. # - data: '{"recv_time": 1705565615, "tcp_rtt_ms":128,"decoded_as":"HTTP", "http_version":"http1","http_request_line":"GET / HTTP/1.1","http_host":"www.ct.cn","http_url":"www.ct.cn/","http_user_agent":"curl/8.0.1","http_status_code":200,"http_response_line":"HTTP/1.1 200 OK","http_response_content_type":"text/html; charset=UTF-8","http_response_latency_ms":31,"http_session_duration_ms":5451,"in_src_mac":"ba:bb:a7:3c:67:1c","in_dest_mac":"86:dd:7a:8f:ae:e2","out_src_mac":"86:dd:7a:8f:ae:e2","out_dest_mac":"ba:bb:a7:3c:67:1c","tcp_client_isn":678677906,"tcp_server_isn":1006700307,"address_type":4,"client_ip":"192.11.22.22","server_ip":"8.8.8.8","client_port":42751,"server_port":80,"in_link_id":65535,"out_link_id":65535,"start_timestamp_ms":1703646546127,"end_timestamp_ms":1703646551702,"duration_ms":5575,"sent_pkts":97,"sent_bytes":5892,"received_pkts":250,"received_bytes":333931,"tcp_c2s_ip_fragments":0,"tcp_s2c_ip_fragments":0,"tcp_c2s_rtx_pkts":0,"tcp_c2s_rtx_bytes":0,"tcp_s2c_rtx_pkts":0,"tcp_s2c_rtx_bytes":0,"tcp_c2s_o3_pkts":0,"tcp_s2c_o3_pkts":0,"tcp_c2s_lost_bytes":0,"tcp_s2c_lost_bytes":0,"flags":26418,"flags_identify_info":[100,1,100,60,150,100,1,2],"app_transition":"http.1111.test_1_1","decoded_as":"HTTP","server_fqdn":"www.ct.cn","app":"test_1_1","decoded_path":"ETHERNET.IPv4.TCP.http","fqdn_category_list":[1767],"t_vsys_id":1,"vsys_id":1,"session_id":290538039798223400,"tcp_handshake_latency_ms":41,"client_os_desc":"Windows","server_os_desc":"Linux","data_center":"center-xxg-tsgx","device_group":"group-xxg-tsgx","device_tag":"{\"tags\":[{\"tag\":\"data_center\",\"value\":\"center-xxg-tsgx\"},{\"tag\":\"device_group\",\"value\":\"group-xxg-tsgx\"}]}","device_id":"9800165603247024","sled_ip":"192.168.40.39","dup_traffic_flag":0}' + data: '{"recv_time": 1705565615, "tcp_rtt_ms":128,"decoded_as":"HTTP", "http_version":"http1","http_request_line":"GET / HTTP/1.1","http_host":"www.ct.cn","http_url":"www.ct.cn/","http_user_agent":"curl/8.0.1","http_status_code":200,"http_response_line":"HTTP/1.1 200 OK","http_response_content_type":"text/html; charset=UTF-8","http_response_latency_ms":31,"http_session_duration_ms":5451,"in_src_mac":"ba:bb:a7:3c:67:1c","in_dest_mac":"86:dd:7a:8f:ae:e2","out_src_mac":"86:dd:7a:8f:ae:e2","out_dest_mac":"ba:bb:a7:3c:67:1c","tcp_client_isn":678677906,"tcp_server_isn":1006700307,"address_type":4,"client_ip":"86.111.175.128","server_ip":"12.16.98.1","client_port":42751,"server_port":80,"in_link_id":65535,"out_link_id":65535,"start_timestamp_ms":1703646546127,"end_timestamp_ms":1703646551702,"duration_ms":5575,"sent_pkts":97,"sent_bytes":5892,"received_pkts":250,"received_bytes":333931,"tcp_c2s_ip_fragments":0,"tcp_s2c_ip_fragments":0,"tcp_c2s_rtx_pkts":0,"tcp_c2s_rtx_bytes":0,"tcp_s2c_rtx_pkts":0,"tcp_s2c_rtx_bytes":0,"tcp_c2s_o3_pkts":0,"tcp_s2c_o3_pkts":0,"tcp_c2s_lost_bytes":0,"tcp_s2c_lost_bytes":0,"flags":26418,"flags_identify_info":[100,1,100,60,150,100,1,2],"app_transition":"http.1111.test_1_1","decoded_as":"HTTP","server_fqdn":"www.ct.cn","app":"test_1_1","decoded_path":"ETHERNET.IPv4.TCP.http","fqdn_category_list":[1767],"t_vsys_id":1,"vsys_id":1,"session_id":290538039798223400,"tcp_handshake_latency_ms":41,"client_os_desc":"Windows","server_os_desc":"Linux","data_center":"center-xxg-tsgx","device_group":"group-xxg-tsgx","device_tag":"{\"tags\":[{\"tag\":\"data_center\",\"value\":\"center-xxg-tsgx\"},{\"tag\":\"device_group\",\"value\":\"group-xxg-tsgx\"}]}","device_id":"9800165603247024","sled_ip":"192.168.40.39","dup_traffic_flag":0}' format: json json.ignore.parse.errors: false @@ -44,10 +44,29 @@ processing_pipelines: output_fields: [ recv_time ] parameters: precision: seconds + - function: DOMAIN + lookup_fields: [ http_host,ssl_sni,dtls_sni,quic_sni ] + output_fields: [ server_domain ] + parameters: + option: FIRST_SIGNIFICANT_SUBDOMAIN + - function: ASN_LOOKUP + lookup_fields: [ server_ip ] + output_fields: [ server_asn ] + parameters: + option: IP_TO_ASN + kb_name: tsg_ip_asn + - function: GEOIP_LOOKUP + lookup_fields: [ client_ip ] + output_fields: [ client_geolocation ] + parameters: + kb_name: tsg_ip_location + option: IP_TO_COUNTRY sinks: clickhouse_sink: type: clickhouse + schema: + local_file: /Users/darnell/IdeaProjects/groot-stream/groot-examples/end-to-end-example/src/main/resources/examples/schema/sink_schema.json properties: host: 192.168.44.12:9001 table: tsg_galaxy_v3.inline_source_test_local diff --git a/groot-examples/end-to-end-example/src/main/resources/examples/inline_to_print_avro_schema.yaml b/groot-examples/end-to-end-example/src/main/resources/examples/inline_to_print_avro_schema.yaml new file mode 100644 index 0000000..a88e36c --- /dev/null +++ b/groot-examples/end-to-end-example/src/main/resources/examples/inline_to_print_avro_schema.yaml @@ -0,0 +1,73 @@ +sources: + inline_source: + type: inline + properties: + data: '[{"tcp_rtt_ms":128,"decoded_as":"HTTP","http_version":"http1","http_request_line":"GET / HTTP/1.1","http_host":"www.ct.cn","http_url":"www.ct.cn/","http_user_agent":"curl/8.0.1","http_status_code":200,"http_response_line":"HTTP/1.1 200 OK","http_response_content_type":"text/html; charset=UTF-8","http_response_latency_ms":31,"http_session_duration_ms":5451,"in_src_mac":"ba:bb:a7:3c:67:1c","in_dest_mac":"86:dd:7a:8f:ae:e2","out_src_mac":"86:dd:7a:8f:ae:e2","out_dest_mac":"ba:bb:a7:3c:67:1c","tcp_client_isn":678677906,"tcp_server_isn":1006700307,"address_type":4,"client_ip":"86.111.175.128","server_ip":"12.16.98.1","client_port":42751,"server_port":80,"in_link_id":65535,"out_link_id":65535,"start_timestamp_ms":1703646546127,"end_timestamp_ms":1703646551702,"duration_ms":5575,"sent_pkts":97,"sent_bytes":5892,"received_pkts":250,"received_bytes":333931},{"tcp_rtt_ms":256,"decoded_as":"HTTP","http_version":"http1","http_request_line":"GET / HTTP/1.1","http_host":"www.abc.cn","http_url":"www.cabc.cn/","http_user_agent":"curl/8.0.1","http_status_code":200,"http_response_line":"HTTP/1.1 200 OK","http_response_content_type":"text/html; charset=UTF-8","http_response_latency_ms":31,"http_session_duration_ms":5451,"in_src_mac":"ba:bb:a7:3c:67:1c","in_dest_mac":"86:dd:7a:8f:ae:e2","out_src_mac":"86:dd:7a:8f:ae:e2","out_dest_mac":"ba:bb:a7:3c:67:1c","tcp_client_isn":678677906,"tcp_server_isn":1006700307,"address_type":4,"client_ip":"192.168.10.198","server_ip":"4.4.4.4","client_port":42751,"server_port":80,"in_link_id":65535,"out_link_id":65535,"start_timestamp_ms":1703646546127,"end_timestamp_ms":1703646551702,"duration_ms":2575,"sent_pkts":197,"sent_bytes":5892,"received_pkts":350,"received_bytes":533931}]' + format: json + json.ignore.parse.errors: false + +filters: + filter_operator: + type: com.geedgenetworks.core.filter.AviatorFilter + properties: + expression: event.server_ip != '12.12.12.12' + +processing_pipelines: + projection_processor: + type: com.geedgenetworks.core.processor.projection.ProjectionProcessorImpl + remove_fields: [http_request_line, http_response_line, http_response_content_type] + functions: + - function: DROP + filter: event.server_ip == '4.4.4.4' + - function: SNOWFLAKE_ID + lookup_fields: [ ] + output_fields: [ log_id ] + parameters: + data_center_id_num: 1 + - function: CURRENT_UNIX_TIMESTAMP + output_fields: [ recv_time ] + parameters: + precision: seconds + - function: DOMAIN + lookup_fields: [ http_host,ssl_sni,dtls_sni,quic_sni ] + output_fields: [ server_domain ] + parameters: + option: FIRST_SIGNIFICANT_SUBDOMAIN + - function: ASN_LOOKUP + lookup_fields: [ server_ip ] + output_fields: [ server_asn ] + parameters: + option: IP_TO_ASN + kb_name: tsg_ip_asn + + - function: GEOIP_LOOKUP + lookup_fields: [ client_ip ] + output_fields: [ client_geolocation ] + parameters: + kb_name: tsg_ip_location + option: IP_TO_COUNTRY + +sinks: + print_sink: + type: print + schema: + local_file: /Users/darnell/IdeaProjects/groot-stream/groot-examples/end-to-end-example/src/main/resources/examples/schema/sink_schema.json + properties: + format: json + mode: log_warn + +application: + env: + name: example-inline-to-print + parallelism: 3 + pipeline: + object-reuse: true + topology: + - name: inline_source + downstream: [filter_operator] + - name: filter_operator + downstream: [ projection_processor ] + - name: projection_processor + downstream: [ print_sink ] + - name: print_sink + downstream: []
\ No newline at end of file diff --git a/groot-examples/end-to-end-example/src/main/resources/examples/schema/sink_schema.json b/groot-examples/end-to-end-example/src/main/resources/examples/schema/sink_schema.json new file mode 100644 index 0000000..2730873 --- /dev/null +++ b/groot-examples/end-to-end-example/src/main/resources/examples/schema/sink_schema.json @@ -0,0 +1,302 @@ +{ + "type": "record", + "name": "test_source", + "namespace": "abc", + "fields": [ + { + "name": "recv_time", + "label": "Receive Time", + "doc": { + "constraints": { + "type": "unix_timestamp" + }, + "visibility": "enabled", + "ttl": null + }, + "type": { + "type": "long", + "logicalType": "unix_timestamp" + } + }, + { + "name": "log_id", + "label": "Log ID", + "doc": { + "visibility": "enabled", + "ttl": null + }, + "type": "long" + }, + { + "name": "http_url", + "label": "HTTP.URL", + "doc": { + "visibility": "enabled", + "ttl": null + }, + "type": "string" + }, + { + "name": "http_host", + "label": "HTTP.Host", + "doc": { + "visibility": "enabled", + "ttl": null + }, + "type": "string" + }, + { + "name": "http_request_line", + "label": "HTTP.Request Line", + "doc": { + "visibility": "enabled", + "ttl": null + }, + "type": "string" + }, + { + "name": "http_response_line", + "label": "HTTP.Response Line", + "doc": { + "visibility": "enabled", + "ttl": null + }, + "type": "string" + }, + { + "name": "decoded_as", + "label": "Decoded AS", + "doc": { + "constraints": { + "operator_functions": "=,!=,in,not in" + }, + "data": [ + { + "code": "BASE", + "value": "BASE" + }, + { + "code": "MAIL", + "value": "MAIL" + }, + { + "code": "DNS", + "value": "DNS" + }, + { + "code": "HTTP", + "value": "HTTP" + }, + { + "code": "SSL", + "value": "SSL" + }, + { + "code": "DTLS", + "value": "DTLS" + }, + { + "code": "QUIC", + "value": "QUIC" + }, + { + "code": "FTP", + "value": "FTP" + }, + { + "code": "SSH", + "value": "SSH" + }, + { + "code": "Stratum", + "value": "Stratum" + }, + { + "code": "RDP", + "value": "RDP" + }, + { + "code": "SIP", + "value": "SIP" + }, + { + "code": "RTP", + "value": "RTP" + } + ], + "visibility": "enabled", + "ttl": null + }, + "type": "string" + }, + { + "name": "tcp_rtt_ms", + "label": "Round-trip Time (ms)", + "doc": { + "constraints": { + "type": "decimal", + "aggregation_functions": "AVG, MAX, MIN, MEDIAN, P95_PERCENTILE, P99_PERCENTILE" + }, + "visibility": "enabled", + "ttl": null + }, + "type": "int" + }, + { + "name": "sent_bytes", + "label": "Bytes Sent", + "doc": { + "constraints": { + "type": "bytes" + }, + "visibility": "enabled", + "ttl": null + }, + "type": "long" + }, + { + "name": "received_bytes", + "label": "Bytes Received", + "doc": { + "constraints": { + "type": "bytes" + }, + "visibility": "enabled", + "ttl": null + }, + "type": "long" + },{ + "name": "server_fqdn", + "label": "Server FQDN", + "doc": { + "visibility": "enabled", + "ttl": null + }, + "type": "string" + }, + { + "name": "server_domain", + "label": "Server Domain", + "doc": { + "visibility": "enabled", + "ttl": null + }, + "type": "string" + },{ + "name": "client_ip", + "label": "Client IP", + "doc": { + "constraints": { + "type": "ip" + }, + "visibility": "enabled", + "ttl": null + }, + "type": "string" + }, + { + "name": "client_port", + "label": "Client Port", + "doc": { + "constraints": { + "aggregation_functions": "COUNT, COUNT_DISTINCT" + }, + "visibility": "enabled", + "ttl": null + }, + "type": "int" + }, + { + "name": "client_os_desc", + "type": "string", + "doc": { + "visibility": "enabled", + "ttl": null + }, + "label": "Client OS Description" + }, + { + "name": "client_geolocation", + "label": "Client Geolocation", + "doc": { + "visibility": "enabled", + "ttl": null + }, + "type": "string" + }, + { + "name": "client_asn", + "label": "Client ASN", + "doc": { + "constraints": { + "aggregation_functions": "COUNT, COUNT_DISTINCT" + }, + "visibility": "enabled", + "ttl": null + }, + "type": "long" + }, + { + "name": "subscriber_id", + "label": "Subscriber ID", + "doc": { + "visibility": "enabled", + "ttl": null + }, + "type": "string" + },{ + "name": "server_ip", + "label": "Server IP", + "doc": { + "constraints": { + "type": "ip" + }, + "visibility": "enabled", + "ttl": null + }, + "type": "string" + }, + { + "name": "server_port", + "label": "Server Port", + "doc": { + "constraints": { + "aggregation_functions": "COUNT, COUNT_DISTINCT" + }, + "visibility": "enabled", + "ttl": null + }, + "type": "int" + }, + { + "name": "server_os_desc", + "type": "string", + "doc": { + "visibility": "enabled", + "ttl": null + }, + "label": "Server OS Description" + }, + { + "name": "server_geolocation", + "label": "Server Geolocation", + "doc": { + "visibility": "enabled", + "ttl": null + }, + "type": "string" + }, + { + "name": "server_asn", + "label": "Server ASN", + "doc": { + "constraints": { + "aggregation_functions": "COUNT, COUNT_DISTINCT" + }, + "visibility": "enabled", + "ttl": null + }, + "type": "long" + } + ] +}
\ No newline at end of file diff --git a/groot-examples/end-to-end-example/src/main/resources/grootstream.yaml b/groot-examples/end-to-end-example/src/main/resources/grootstream.yaml new file mode 100644 index 0000000..1ffda9f --- /dev/null +++ b/groot-examples/end-to-end-example/src/main/resources/grootstream.yaml @@ -0,0 +1,17 @@ +grootstream: + knowledge_base: + - name: tsg_ip_asn + fs_type: local + fs_path: /Users/darnell/IdeaProjects/groot-stream/groot-core/src/test/resources/ + files: + - asn.mmdb + - name: tsg_ip_location + fs_type: local + fs_path: /Users/darnell/IdeaProjects/groot-stream/groot-core/src/test/resources/ + files: + - ip_builtin.mmdb + properties: + hos.path: http://192.168.44.12:9098/hos + hos.bucket.name.traffic_file: traffic_file_bucket + hos.bucket.name.troubleshooting_file: troubleshooting_file_bucket + scheduler.knowledge_base.update.interval.minutes: 5 |
