summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authordoufenghu <[email protected]>2024-03-16 19:32:42 +0800
committerdoufenghu <[email protected]>2024-03-16 19:32:42 +0800
commit25994fade7720a43021b25004ade13c71f941e88 (patch)
tree9c91a57d526f0579f241add718d8cb114ff04468
parent9ff68b2c631606cf06a7001036ff16475c52371c (diff)
[Improve][Docs] Add some help information for connector schema and knowledge base files.
-rw-r--r--config/template/grootstream_job_template.yaml8
-rw-r--r--docs/connector/connector.md38
-rw-r--r--docs/grootstream-config.md19
-rw-r--r--docs/user-guide.md55
-rw-r--r--groot-examples/end-to-end-example/src/main/resources/examples/inline_to_clickhouse.yaml21
-rw-r--r--groot-examples/end-to-end-example/src/main/resources/examples/inline_to_print_avro_schema.yaml73
-rw-r--r--groot-examples/end-to-end-example/src/main/resources/examples/schema/sink_schema.json302
-rw-r--r--groot-examples/end-to-end-example/src/main/resources/grootstream.yaml17
8 files changed, 480 insertions, 53 deletions
diff --git a/config/template/grootstream_job_template.yaml b/config/template/grootstream_job_template.yaml
index 2606d56..d700777 100644
--- a/config/template/grootstream_job_template.yaml
+++ b/config/template/grootstream_job_template.yaml
@@ -8,6 +8,10 @@
sources: # [object] Define connector source
kafka_source: # [object] Kafka source connector name, must be unique. It used to define the source node of the job topology.
type: kafka # [string] Source Type
+ schema: # [object] Source Schema, config through fields or local_file or url. if not set schema, all fields(Map<String, Object>) will be output.
+ #fields: "struct<log_id:bigint, recv_time:bigint,client_ip: string>"
+ local_file: /../schema/kafka_source_schema.json # [string] Local File Path for Schema
+ #url: http:// # [string] URL for Schema
properties: # [object] Kafka source properties
topic: SESSION-RECORD # [string] Topic Name, consumer will subscribe this topic.
kafka.bootstrap.servers: 127.0.0.1:9092 # [string] Kafka Bootstrap Servers, if you have multiple servers, use comma to separate them.
@@ -118,7 +122,7 @@ processing_pipelines: # [object] Define Processors for processing pipelines.
functions: # [array of object] Function List
- function: GEOIP_LOOKUP
lookup_fields: [ server_ip ]
- output_fields: [ server_asn ]
+ output_fields: [ server_geolocation ]
parameters:
kb_name: tsg_ip_location
option: IP_TO_OBJECT
@@ -292,6 +296,8 @@ sinks: # [object] Define connector sink
clickhouse_sink: # [object] ClickHouse sink connector name, must be unique. It used to define the sink node of the job topology.
type: clickhouse
+ schema:
+ local_file: /../schema/clickhouse_sink_schema.json
properties:
host: 127.0.0.1:9001
table: inline_source_test_local
diff --git a/docs/connector/connector.md b/docs/connector/connector.md
index 6bcc878..e36214c 100644
--- a/docs/connector/connector.md
+++ b/docs/connector/connector.md
@@ -7,13 +7,13 @@ Source Connector contains some common core features, and each source connector s
sources:
${source_name}:
type: ${source_connector_type}
- # source table schema, config through fields or local_file or url
- schema:
- fields:
+ # Source schema, config through fields or local_file or url. if not set schema, all fields(Map<String, Object>) will be output.
+ schema:
+ fields:
- name: ${field_name}
type: ${field_type}
- # local_file: "schema path"
- # url: "schema http url"
+ # local_file: "/path/to/schema.json"
+ # url: "https://localhost:8080/schema.json"
properties:
${prop_key}: ${prop_value}
```
@@ -29,12 +29,13 @@ The source connector supports reading only specified fields from the data source
The Schema Structure refer to [Schema Structure](../user-guide.md#schema-structure).
## Schema Config
-Schema can config through fields or local_file or url.
+Schema can be configured through fields or local_file or url. If not set schema, all fields(Map<String, Object>) will be output. And local_file and url only support Avro schema format. More details see the [Avro Schema](https://avro.apache.org/docs/1.11.1/specification/).
-### fields
+### Fields
+It can be configured through array or sql style. It is recommended to use array style, which is more readable.
```yaml
schema:
- # by array
+ # array style
fields:
- name: ${field_name}
type: ${field_type}
@@ -42,31 +43,28 @@ schema:
```yaml
schema:
- # by sql
+ # sql style
fields: "struct<field_name:field_type, ...>"
# can also without outer struct<>
# fields: "field_name:field_type, ..."
```
-### local_file
-
+### Local File
+To retrieve the schema from a local file using its absolute path.
```yaml
schema:
# by array
fields:
- local_file: "schema path"
+ local_file: "/path/to/schema.json"
```
-### url
-Retrieve updated schema from URL for cycle, support dynamic schema. Not all connector support dynamic schema.
-
-The connectors that currently support dynamic schema include: clickHouse sink.
-
+### URL
+Some connectors support periodically fetching and updating the schema from a URL, such as the `ClickHouse Sink`.
```yaml
schema:
# by array
fields:
- url: "schema http url"
+ url: "https://localhost:8080/schema.json"
```
# Sink Connector
@@ -81,8 +79,8 @@ sinks:
# sink table schema, config through fields or local_file or url. if not set schema, all fields(Map<String, Object>) will be output.
schema:
fields: "struct<field_name:field_type, ...>"
- # local_file: "schema path"
- # url: "schema url"
+ # local_file: "/path/to/schema.json"
+ # url: "https://localhost:8080/schema.json"
properties:
${prop_key}: ${prop_value}
```
diff --git a/docs/grootstream-config.md b/docs/grootstream-config.md
index a359c39..479f4a7 100644
--- a/docs/grootstream-config.md
+++ b/docs/grootstream-config.md
@@ -5,16 +5,25 @@ The purpose of this file is to provide a global configuration for the groot-stre
```yaml
grootstream:
- knowledge_base: # Define the knowledge base list.
- - name: ${knowledge_base_name} # Define the name of the knowledge base, used to kb function.
- fs_type: ${file_system_type} # Define the type of the file system.Support: local,hdfs,http.
- fs_path: ${file_system_path} # Define the path of the file system.
+ knowledge_base: # Define the libraries
+ - name: ${knowledge_base_name}
+ fs_type: ${file_system_type}
+ fs_path: ${file_system_path}
files:
- ${file_name} # Define the file name of the knowledge base.
properties: # Custom parameters.
hos.path: ${hos_path}
hos.bucket.name.traffic_file: ${traffic_file_bucket}
hos.bucket.name.troubleshooting_file: ${troubleshooting_file_bucket}
- scheduler.knowledge_base.update.interval.minutes: ${knowledge_base_update_interval_minutes}
+ scheduler.knowledge_base.update.interval.minutes: ${knowledge_base_update_interval_minutes} # Define the interval of the knowledge base file update.
```
+### Knowledge Base
+The knowledge base is a collection of libraries that can be used in the groot-stream job's UDFs. File system type can be specified `local` or `http` mode. If the value is `http`, must be `KB Repository` URL. The library will be dynamically updated according to the `scheduler.knowledge_base.update.interval.minutes` configuration.
+
+| Name | Type | Required | Default | Description |
+|:---------|:--------|:---------|:--------|:---------------------------------------------------------------------------|
+| name | String | Yes | - | The name of the knowledge base, used to [UDF](processor/udf.md) |
+| fs_type | String | Yes | - | The type of the file system. Enum: local and http. |
+| fs_path | String | Yes | - | The path of the file system. It can be file directory or http restful api. |
+| files | Array | No | - | The file list of the knowledge base object. |
diff --git a/docs/user-guide.md b/docs/user-guide.md
index fa05547..a8f5067 100644
--- a/docs/user-guide.md
+++ b/docs/user-guide.md
@@ -8,19 +8,20 @@ The main format of the config template file is `yaml`, for more details of this
sources:
inline_source:
type: inline
- fields:
- - name: log_id
- type: bigint
- - name: recv_time
- type: bigint
- - name: fqdn_string
- type: string
- - name: client_ip
- type: string
- - name: server_ip
- type: string
- - name: decoded_as
- type: string
+ schema:
+ fields:
+ - name: log_id
+ type: bigint
+ - name: recv_time
+ type: bigint
+ - name: fqdn_string
+ type: string
+ - name: client_ip
+ type: string
+ - name: server_ip
+ type: string
+ - name: decoded_as
+ type: string
properties:
data: '{"log_id": 1, "recv_time":"111","fqdn_string":"baidu.com", "client_ip":"192.168.0.1","server_ip":"120.233.20.242","decoded_as":"BASE", "dup_traffic_flag":1}'
format: json
@@ -92,19 +93,20 @@ application:
## Schema Structure
Some sources are not strongly limited schema, so you need use `fields` to define the field name and type. The source can customize the schema. Like `Kafka` `Inline` source etc.
```yaml
-fields:
- - name: log_id
- type: bigint
- - name: recv_time
- type: bigint
- - name: fqdn_string
- type: string
- - name: client_ip
- type: string
- - name: server_ip
- type: string
- - name: decoded_as
- type: string
+Schema:
+ fields:
+ - name: log_id
+ type: bigint
+ - name: recv_time
+ type: bigint
+ - name: fqdn_string
+ type: string
+ - name: client_ip
+ type: string
+ - name: server_ip
+ type: string
+ - name: decoded_as
+ type: string
```
`name` The name of the field. `type` The data type of the field.
@@ -136,6 +138,7 @@ Sink is used to define where GrootStream needs to output data. Multiple sinks ca
## Application
Used to define some common parameters of the job and the topology of the job. such as the name of the job, the parallelism of the job, etc. The following configuration parameters are supported.
+
### ENV
Used to define job environment configuration information. For more details, you can refer to the documentation [JobEnvConfig](./env-config.md).
diff --git a/groot-examples/end-to-end-example/src/main/resources/examples/inline_to_clickhouse.yaml b/groot-examples/end-to-end-example/src/main/resources/examples/inline_to_clickhouse.yaml
index 829741d..c7072a4 100644
--- a/groot-examples/end-to-end-example/src/main/resources/examples/inline_to_clickhouse.yaml
+++ b/groot-examples/end-to-end-example/src/main/resources/examples/inline_to_clickhouse.yaml
@@ -27,7 +27,7 @@ sources: # [object] Define connector source
#
# [string] Event Data, it will be parsed to Map<String, Object> by the specified format.
#
- data: '{"recv_time": 1705565615, "tcp_rtt_ms":128,"decoded_as":"HTTP", "http_version":"http1","http_request_line":"GET / HTTP/1.1","http_host":"www.ct.cn","http_url":"www.ct.cn/","http_user_agent":"curl/8.0.1","http_status_code":200,"http_response_line":"HTTP/1.1 200 OK","http_response_content_type":"text/html; charset=UTF-8","http_response_latency_ms":31,"http_session_duration_ms":5451,"in_src_mac":"ba:bb:a7:3c:67:1c","in_dest_mac":"86:dd:7a:8f:ae:e2","out_src_mac":"86:dd:7a:8f:ae:e2","out_dest_mac":"ba:bb:a7:3c:67:1c","tcp_client_isn":678677906,"tcp_server_isn":1006700307,"address_type":4,"client_ip":"192.11.22.22","server_ip":"8.8.8.8","client_port":42751,"server_port":80,"in_link_id":65535,"out_link_id":65535,"start_timestamp_ms":1703646546127,"end_timestamp_ms":1703646551702,"duration_ms":5575,"sent_pkts":97,"sent_bytes":5892,"received_pkts":250,"received_bytes":333931,"tcp_c2s_ip_fragments":0,"tcp_s2c_ip_fragments":0,"tcp_c2s_rtx_pkts":0,"tcp_c2s_rtx_bytes":0,"tcp_s2c_rtx_pkts":0,"tcp_s2c_rtx_bytes":0,"tcp_c2s_o3_pkts":0,"tcp_s2c_o3_pkts":0,"tcp_c2s_lost_bytes":0,"tcp_s2c_lost_bytes":0,"flags":26418,"flags_identify_info":[100,1,100,60,150,100,1,2],"app_transition":"http.1111.test_1_1","decoded_as":"HTTP","server_fqdn":"www.ct.cn","app":"test_1_1","decoded_path":"ETHERNET.IPv4.TCP.http","fqdn_category_list":[1767],"t_vsys_id":1,"vsys_id":1,"session_id":290538039798223400,"tcp_handshake_latency_ms":41,"client_os_desc":"Windows","server_os_desc":"Linux","data_center":"center-xxg-tsgx","device_group":"group-xxg-tsgx","device_tag":"{\"tags\":[{\"tag\":\"data_center\",\"value\":\"center-xxg-tsgx\"},{\"tag\":\"device_group\",\"value\":\"group-xxg-tsgx\"}]}","device_id":"9800165603247024","sled_ip":"192.168.40.39","dup_traffic_flag":0}'
+ data: '{"recv_time": 1705565615, "tcp_rtt_ms":128,"decoded_as":"HTTP", "http_version":"http1","http_request_line":"GET / HTTP/1.1","http_host":"www.ct.cn","http_url":"www.ct.cn/","http_user_agent":"curl/8.0.1","http_status_code":200,"http_response_line":"HTTP/1.1 200 OK","http_response_content_type":"text/html; charset=UTF-8","http_response_latency_ms":31,"http_session_duration_ms":5451,"in_src_mac":"ba:bb:a7:3c:67:1c","in_dest_mac":"86:dd:7a:8f:ae:e2","out_src_mac":"86:dd:7a:8f:ae:e2","out_dest_mac":"ba:bb:a7:3c:67:1c","tcp_client_isn":678677906,"tcp_server_isn":1006700307,"address_type":4,"client_ip":"86.111.175.128","server_ip":"12.16.98.1","client_port":42751,"server_port":80,"in_link_id":65535,"out_link_id":65535,"start_timestamp_ms":1703646546127,"end_timestamp_ms":1703646551702,"duration_ms":5575,"sent_pkts":97,"sent_bytes":5892,"received_pkts":250,"received_bytes":333931,"tcp_c2s_ip_fragments":0,"tcp_s2c_ip_fragments":0,"tcp_c2s_rtx_pkts":0,"tcp_c2s_rtx_bytes":0,"tcp_s2c_rtx_pkts":0,"tcp_s2c_rtx_bytes":0,"tcp_c2s_o3_pkts":0,"tcp_s2c_o3_pkts":0,"tcp_c2s_lost_bytes":0,"tcp_s2c_lost_bytes":0,"flags":26418,"flags_identify_info":[100,1,100,60,150,100,1,2],"app_transition":"http.1111.test_1_1","decoded_as":"HTTP","server_fqdn":"www.ct.cn","app":"test_1_1","decoded_path":"ETHERNET.IPv4.TCP.http","fqdn_category_list":[1767],"t_vsys_id":1,"vsys_id":1,"session_id":290538039798223400,"tcp_handshake_latency_ms":41,"client_os_desc":"Windows","server_os_desc":"Linux","data_center":"center-xxg-tsgx","device_group":"group-xxg-tsgx","device_tag":"{\"tags\":[{\"tag\":\"data_center\",\"value\":\"center-xxg-tsgx\"},{\"tag\":\"device_group\",\"value\":\"group-xxg-tsgx\"}]}","device_id":"9800165603247024","sled_ip":"192.168.40.39","dup_traffic_flag":0}'
format: json
json.ignore.parse.errors: false
@@ -44,10 +44,29 @@ processing_pipelines:
output_fields: [ recv_time ]
parameters:
precision: seconds
+ - function: DOMAIN
+ lookup_fields: [ http_host,ssl_sni,dtls_sni,quic_sni ]
+ output_fields: [ server_domain ]
+ parameters:
+ option: FIRST_SIGNIFICANT_SUBDOMAIN
+ - function: ASN_LOOKUP
+ lookup_fields: [ server_ip ]
+ output_fields: [ server_asn ]
+ parameters:
+ option: IP_TO_ASN
+ kb_name: tsg_ip_asn
+ - function: GEOIP_LOOKUP
+ lookup_fields: [ client_ip ]
+ output_fields: [ client_geolocation ]
+ parameters:
+ kb_name: tsg_ip_location
+ option: IP_TO_COUNTRY
sinks:
clickhouse_sink:
type: clickhouse
+ schema:
+ local_file: /Users/darnell/IdeaProjects/groot-stream/groot-examples/end-to-end-example/src/main/resources/examples/schema/sink_schema.json
properties:
host: 192.168.44.12:9001
table: tsg_galaxy_v3.inline_source_test_local
diff --git a/groot-examples/end-to-end-example/src/main/resources/examples/inline_to_print_avro_schema.yaml b/groot-examples/end-to-end-example/src/main/resources/examples/inline_to_print_avro_schema.yaml
new file mode 100644
index 0000000..a88e36c
--- /dev/null
+++ b/groot-examples/end-to-end-example/src/main/resources/examples/inline_to_print_avro_schema.yaml
@@ -0,0 +1,73 @@
+sources:
+ inline_source:
+ type: inline
+ properties:
+ data: '[{"tcp_rtt_ms":128,"decoded_as":"HTTP","http_version":"http1","http_request_line":"GET / HTTP/1.1","http_host":"www.ct.cn","http_url":"www.ct.cn/","http_user_agent":"curl/8.0.1","http_status_code":200,"http_response_line":"HTTP/1.1 200 OK","http_response_content_type":"text/html; charset=UTF-8","http_response_latency_ms":31,"http_session_duration_ms":5451,"in_src_mac":"ba:bb:a7:3c:67:1c","in_dest_mac":"86:dd:7a:8f:ae:e2","out_src_mac":"86:dd:7a:8f:ae:e2","out_dest_mac":"ba:bb:a7:3c:67:1c","tcp_client_isn":678677906,"tcp_server_isn":1006700307,"address_type":4,"client_ip":"86.111.175.128","server_ip":"12.16.98.1","client_port":42751,"server_port":80,"in_link_id":65535,"out_link_id":65535,"start_timestamp_ms":1703646546127,"end_timestamp_ms":1703646551702,"duration_ms":5575,"sent_pkts":97,"sent_bytes":5892,"received_pkts":250,"received_bytes":333931},{"tcp_rtt_ms":256,"decoded_as":"HTTP","http_version":"http1","http_request_line":"GET / HTTP/1.1","http_host":"www.abc.cn","http_url":"www.cabc.cn/","http_user_agent":"curl/8.0.1","http_status_code":200,"http_response_line":"HTTP/1.1 200 OK","http_response_content_type":"text/html; charset=UTF-8","http_response_latency_ms":31,"http_session_duration_ms":5451,"in_src_mac":"ba:bb:a7:3c:67:1c","in_dest_mac":"86:dd:7a:8f:ae:e2","out_src_mac":"86:dd:7a:8f:ae:e2","out_dest_mac":"ba:bb:a7:3c:67:1c","tcp_client_isn":678677906,"tcp_server_isn":1006700307,"address_type":4,"client_ip":"192.168.10.198","server_ip":"4.4.4.4","client_port":42751,"server_port":80,"in_link_id":65535,"out_link_id":65535,"start_timestamp_ms":1703646546127,"end_timestamp_ms":1703646551702,"duration_ms":2575,"sent_pkts":197,"sent_bytes":5892,"received_pkts":350,"received_bytes":533931}]'
+ format: json
+ json.ignore.parse.errors: false
+
+filters:
+ filter_operator:
+ type: com.geedgenetworks.core.filter.AviatorFilter
+ properties:
+ expression: event.server_ip != '12.12.12.12'
+
+processing_pipelines:
+ projection_processor:
+ type: com.geedgenetworks.core.processor.projection.ProjectionProcessorImpl
+ remove_fields: [http_request_line, http_response_line, http_response_content_type]
+ functions:
+ - function: DROP
+ filter: event.server_ip == '4.4.4.4'
+ - function: SNOWFLAKE_ID
+ lookup_fields: [ ]
+ output_fields: [ log_id ]
+ parameters:
+ data_center_id_num: 1
+ - function: CURRENT_UNIX_TIMESTAMP
+ output_fields: [ recv_time ]
+ parameters:
+ precision: seconds
+ - function: DOMAIN
+ lookup_fields: [ http_host,ssl_sni,dtls_sni,quic_sni ]
+ output_fields: [ server_domain ]
+ parameters:
+ option: FIRST_SIGNIFICANT_SUBDOMAIN
+ - function: ASN_LOOKUP
+ lookup_fields: [ server_ip ]
+ output_fields: [ server_asn ]
+ parameters:
+ option: IP_TO_ASN
+ kb_name: tsg_ip_asn
+
+ - function: GEOIP_LOOKUP
+ lookup_fields: [ client_ip ]
+ output_fields: [ client_geolocation ]
+ parameters:
+ kb_name: tsg_ip_location
+ option: IP_TO_COUNTRY
+
+sinks:
+ print_sink:
+ type: print
+ schema:
+ local_file: /Users/darnell/IdeaProjects/groot-stream/groot-examples/end-to-end-example/src/main/resources/examples/schema/sink_schema.json
+ properties:
+ format: json
+ mode: log_warn
+
+application:
+ env:
+ name: example-inline-to-print
+ parallelism: 3
+ pipeline:
+ object-reuse: true
+ topology:
+ - name: inline_source
+ downstream: [filter_operator]
+ - name: filter_operator
+ downstream: [ projection_processor ]
+ - name: projection_processor
+ downstream: [ print_sink ]
+ - name: print_sink
+ downstream: [] \ No newline at end of file
diff --git a/groot-examples/end-to-end-example/src/main/resources/examples/schema/sink_schema.json b/groot-examples/end-to-end-example/src/main/resources/examples/schema/sink_schema.json
new file mode 100644
index 0000000..2730873
--- /dev/null
+++ b/groot-examples/end-to-end-example/src/main/resources/examples/schema/sink_schema.json
@@ -0,0 +1,302 @@
+{
+ "type": "record",
+ "name": "test_source",
+ "namespace": "abc",
+ "fields": [
+ {
+ "name": "recv_time",
+ "label": "Receive Time",
+ "doc": {
+ "constraints": {
+ "type": "unix_timestamp"
+ },
+ "visibility": "enabled",
+ "ttl": null
+ },
+ "type": {
+ "type": "long",
+ "logicalType": "unix_timestamp"
+ }
+ },
+ {
+ "name": "log_id",
+ "label": "Log ID",
+ "doc": {
+ "visibility": "enabled",
+ "ttl": null
+ },
+ "type": "long"
+ },
+ {
+ "name": "http_url",
+ "label": "HTTP.URL",
+ "doc": {
+ "visibility": "enabled",
+ "ttl": null
+ },
+ "type": "string"
+ },
+ {
+ "name": "http_host",
+ "label": "HTTP.Host",
+ "doc": {
+ "visibility": "enabled",
+ "ttl": null
+ },
+ "type": "string"
+ },
+ {
+ "name": "http_request_line",
+ "label": "HTTP.Request Line",
+ "doc": {
+ "visibility": "enabled",
+ "ttl": null
+ },
+ "type": "string"
+ },
+ {
+ "name": "http_response_line",
+ "label": "HTTP.Response Line",
+ "doc": {
+ "visibility": "enabled",
+ "ttl": null
+ },
+ "type": "string"
+ },
+ {
+ "name": "decoded_as",
+ "label": "Decoded AS",
+ "doc": {
+ "constraints": {
+ "operator_functions": "=,!=,in,not in"
+ },
+ "data": [
+ {
+ "code": "BASE",
+ "value": "BASE"
+ },
+ {
+ "code": "MAIL",
+ "value": "MAIL"
+ },
+ {
+ "code": "DNS",
+ "value": "DNS"
+ },
+ {
+ "code": "HTTP",
+ "value": "HTTP"
+ },
+ {
+ "code": "SSL",
+ "value": "SSL"
+ },
+ {
+ "code": "DTLS",
+ "value": "DTLS"
+ },
+ {
+ "code": "QUIC",
+ "value": "QUIC"
+ },
+ {
+ "code": "FTP",
+ "value": "FTP"
+ },
+ {
+ "code": "SSH",
+ "value": "SSH"
+ },
+ {
+ "code": "Stratum",
+ "value": "Stratum"
+ },
+ {
+ "code": "RDP",
+ "value": "RDP"
+ },
+ {
+ "code": "SIP",
+ "value": "SIP"
+ },
+ {
+ "code": "RTP",
+ "value": "RTP"
+ }
+ ],
+ "visibility": "enabled",
+ "ttl": null
+ },
+ "type": "string"
+ },
+ {
+ "name": "tcp_rtt_ms",
+ "label": "Round-trip Time (ms)",
+ "doc": {
+ "constraints": {
+ "type": "decimal",
+ "aggregation_functions": "AVG, MAX, MIN, MEDIAN, P95_PERCENTILE, P99_PERCENTILE"
+ },
+ "visibility": "enabled",
+ "ttl": null
+ },
+ "type": "int"
+ },
+ {
+ "name": "sent_bytes",
+ "label": "Bytes Sent",
+ "doc": {
+ "constraints": {
+ "type": "bytes"
+ },
+ "visibility": "enabled",
+ "ttl": null
+ },
+ "type": "long"
+ },
+ {
+ "name": "received_bytes",
+ "label": "Bytes Received",
+ "doc": {
+ "constraints": {
+ "type": "bytes"
+ },
+ "visibility": "enabled",
+ "ttl": null
+ },
+ "type": "long"
+ },{
+ "name": "server_fqdn",
+ "label": "Server FQDN",
+ "doc": {
+ "visibility": "enabled",
+ "ttl": null
+ },
+ "type": "string"
+ },
+ {
+ "name": "server_domain",
+ "label": "Server Domain",
+ "doc": {
+ "visibility": "enabled",
+ "ttl": null
+ },
+ "type": "string"
+ },{
+ "name": "client_ip",
+ "label": "Client IP",
+ "doc": {
+ "constraints": {
+ "type": "ip"
+ },
+ "visibility": "enabled",
+ "ttl": null
+ },
+ "type": "string"
+ },
+ {
+ "name": "client_port",
+ "label": "Client Port",
+ "doc": {
+ "constraints": {
+ "aggregation_functions": "COUNT, COUNT_DISTINCT"
+ },
+ "visibility": "enabled",
+ "ttl": null
+ },
+ "type": "int"
+ },
+ {
+ "name": "client_os_desc",
+ "type": "string",
+ "doc": {
+ "visibility": "enabled",
+ "ttl": null
+ },
+ "label": "Client OS Description"
+ },
+ {
+ "name": "client_geolocation",
+ "label": "Client Geolocation",
+ "doc": {
+ "visibility": "enabled",
+ "ttl": null
+ },
+ "type": "string"
+ },
+ {
+ "name": "client_asn",
+ "label": "Client ASN",
+ "doc": {
+ "constraints": {
+ "aggregation_functions": "COUNT, COUNT_DISTINCT"
+ },
+ "visibility": "enabled",
+ "ttl": null
+ },
+ "type": "long"
+ },
+ {
+ "name": "subscriber_id",
+ "label": "Subscriber ID",
+ "doc": {
+ "visibility": "enabled",
+ "ttl": null
+ },
+ "type": "string"
+ },{
+ "name": "server_ip",
+ "label": "Server IP",
+ "doc": {
+ "constraints": {
+ "type": "ip"
+ },
+ "visibility": "enabled",
+ "ttl": null
+ },
+ "type": "string"
+ },
+ {
+ "name": "server_port",
+ "label": "Server Port",
+ "doc": {
+ "constraints": {
+ "aggregation_functions": "COUNT, COUNT_DISTINCT"
+ },
+ "visibility": "enabled",
+ "ttl": null
+ },
+ "type": "int"
+ },
+ {
+ "name": "server_os_desc",
+ "type": "string",
+ "doc": {
+ "visibility": "enabled",
+ "ttl": null
+ },
+ "label": "Server OS Description"
+ },
+ {
+ "name": "server_geolocation",
+ "label": "Server Geolocation",
+ "doc": {
+ "visibility": "enabled",
+ "ttl": null
+ },
+ "type": "string"
+ },
+ {
+ "name": "server_asn",
+ "label": "Server ASN",
+ "doc": {
+ "constraints": {
+ "aggregation_functions": "COUNT, COUNT_DISTINCT"
+ },
+ "visibility": "enabled",
+ "ttl": null
+ },
+ "type": "long"
+ }
+ ]
+} \ No newline at end of file
diff --git a/groot-examples/end-to-end-example/src/main/resources/grootstream.yaml b/groot-examples/end-to-end-example/src/main/resources/grootstream.yaml
new file mode 100644
index 0000000..1ffda9f
--- /dev/null
+++ b/groot-examples/end-to-end-example/src/main/resources/grootstream.yaml
@@ -0,0 +1,17 @@
+grootstream:
+ knowledge_base:
+ - name: tsg_ip_asn
+ fs_type: local
+ fs_path: /Users/darnell/IdeaProjects/groot-stream/groot-core/src/test/resources/
+ files:
+ - asn.mmdb
+ - name: tsg_ip_location
+ fs_type: local
+ fs_path: /Users/darnell/IdeaProjects/groot-stream/groot-core/src/test/resources/
+ files:
+ - ip_builtin.mmdb
+ properties:
+ hos.path: http://192.168.44.12:9098/hos
+ hos.bucket.name.traffic_file: traffic_file_bucket
+ hos.bucket.name.troubleshooting_file: troubleshooting_file_bucket
+ scheduler.knowledge_base.update.interval.minutes: 5