diff options
| author | doufenghu <[email protected]> | 2024-03-16 19:32:42 +0800 |
|---|---|---|
| committer | doufenghu <[email protected]> | 2024-03-16 19:32:42 +0800 |
| commit | 25994fade7720a43021b25004ade13c71f941e88 (patch) | |
| tree | 9c91a57d526f0579f241add718d8cb114ff04468 | |
| parent | 9ff68b2c631606cf06a7001036ff16475c52371c (diff) | |
[Improve][Docs] Add some help information for connector schema and knowledge base files.
8 files changed, 480 insertions, 53 deletions
diff --git a/config/template/grootstream_job_template.yaml b/config/template/grootstream_job_template.yaml index 2606d56..d700777 100644 --- a/config/template/grootstream_job_template.yaml +++ b/config/template/grootstream_job_template.yaml @@ -8,6 +8,10 @@ sources: # [object] Define connector source kafka_source: # [object] Kafka source connector name, must be unique. It used to define the source node of the job topology. type: kafka # [string] Source Type + schema: # [object] Source Schema, config through fields or local_file or url. if not set schema, all fields(Map<String, Object>) will be output. + #fields: "struct<log_id:bigint, recv_time:bigint,client_ip: string>" + local_file: /../schema/kafka_source_schema.json # [string] Local File Path for Schema + #url: http:// # [string] URL for Schema properties: # [object] Kafka source properties topic: SESSION-RECORD # [string] Topic Name, consumer will subscribe this topic. kafka.bootstrap.servers: 127.0.0.1:9092 # [string] Kafka Bootstrap Servers, if you have multiple servers, use comma to separate them. @@ -118,7 +122,7 @@ processing_pipelines: # [object] Define Processors for processing pipelines. functions: # [array of object] Function List - function: GEOIP_LOOKUP lookup_fields: [ server_ip ] - output_fields: [ server_asn ] + output_fields: [ server_geolocation ] parameters: kb_name: tsg_ip_location option: IP_TO_OBJECT @@ -292,6 +296,8 @@ sinks: # [object] Define connector sink clickhouse_sink: # [object] ClickHouse sink connector name, must be unique. It used to define the sink node of the job topology. type: clickhouse + schema: + local_file: /../schema/clickhouse_sink_schema.json properties: host: 127.0.0.1:9001 table: inline_source_test_local diff --git a/docs/connector/connector.md b/docs/connector/connector.md index 6bcc878..e36214c 100644 --- a/docs/connector/connector.md +++ b/docs/connector/connector.md @@ -7,13 +7,13 @@ Source Connector contains some common core features, and each source connector s sources: ${source_name}: type: ${source_connector_type} - # source table schema, config through fields or local_file or url - schema: - fields: + # Source schema, config through fields or local_file or url. if not set schema, all fields(Map<String, Object>) will be output. + schema: + fields: - name: ${field_name} type: ${field_type} - # local_file: "schema path" - # url: "schema http url" + # local_file: "/path/to/schema.json" + # url: "https://localhost:8080/schema.json" properties: ${prop_key}: ${prop_value} ``` @@ -29,12 +29,13 @@ The source connector supports reading only specified fields from the data source The Schema Structure refer to [Schema Structure](../user-guide.md#schema-structure). ## Schema Config -Schema can config through fields or local_file or url. +Schema can be configured through fields or local_file or url. If not set schema, all fields(Map<String, Object>) will be output. And local_file and url only support Avro schema format. More details see the [Avro Schema](https://avro.apache.org/docs/1.11.1/specification/). -### fields +### Fields +It can be configured through array or sql style. It is recommended to use array style, which is more readable. ```yaml schema: - # by array + # array style fields: - name: ${field_name} type: ${field_type} @@ -42,31 +43,28 @@ schema: ```yaml schema: - # by sql + # sql style fields: "struct<field_name:field_type, ...>" # can also without outer struct<> # fields: "field_name:field_type, ..." ``` -### local_file - +### Local File +To retrieve the schema from a local file using its absolute path. ```yaml schema: # by array fields: - local_file: "schema path" + local_file: "/path/to/schema.json" ``` -### url -Retrieve updated schema from URL for cycle, support dynamic schema. Not all connector support dynamic schema. - -The connectors that currently support dynamic schema include: clickHouse sink. - +### URL +Some connectors support periodically fetching and updating the schema from a URL, such as the `ClickHouse Sink`. ```yaml schema: # by array fields: - url: "schema http url" + url: "https://localhost:8080/schema.json" ``` # Sink Connector @@ -81,8 +79,8 @@ sinks: # sink table schema, config through fields or local_file or url. if not set schema, all fields(Map<String, Object>) will be output. schema: fields: "struct<field_name:field_type, ...>" - # local_file: "schema path" - # url: "schema url" + # local_file: "/path/to/schema.json" + # url: "https://localhost:8080/schema.json" properties: ${prop_key}: ${prop_value} ``` diff --git a/docs/grootstream-config.md b/docs/grootstream-config.md index a359c39..479f4a7 100644 --- a/docs/grootstream-config.md +++ b/docs/grootstream-config.md @@ -5,16 +5,25 @@ The purpose of this file is to provide a global configuration for the groot-stre ```yaml grootstream: - knowledge_base: # Define the knowledge base list. - - name: ${knowledge_base_name} # Define the name of the knowledge base, used to kb function. - fs_type: ${file_system_type} # Define the type of the file system.Support: local,hdfs,http. - fs_path: ${file_system_path} # Define the path of the file system. + knowledge_base: # Define the libraries + - name: ${knowledge_base_name} + fs_type: ${file_system_type} + fs_path: ${file_system_path} files: - ${file_name} # Define the file name of the knowledge base. properties: # Custom parameters. hos.path: ${hos_path} hos.bucket.name.traffic_file: ${traffic_file_bucket} hos.bucket.name.troubleshooting_file: ${troubleshooting_file_bucket} - scheduler.knowledge_base.update.interval.minutes: ${knowledge_base_update_interval_minutes} + scheduler.knowledge_base.update.interval.minutes: ${knowledge_base_update_interval_minutes} # Define the interval of the knowledge base file update. ``` +### Knowledge Base +The knowledge base is a collection of libraries that can be used in the groot-stream job's UDFs. File system type can be specified `local` or `http` mode. If the value is `http`, must be `KB Repository` URL. The library will be dynamically updated according to the `scheduler.knowledge_base.update.interval.minutes` configuration. + +| Name | Type | Required | Default | Description | +|:---------|:--------|:---------|:--------|:---------------------------------------------------------------------------| +| name | String | Yes | - | The name of the knowledge base, used to [UDF](processor/udf.md) | +| fs_type | String | Yes | - | The type of the file system. Enum: local and http. | +| fs_path | String | Yes | - | The path of the file system. It can be file directory or http restful api. | +| files | Array | No | - | The file list of the knowledge base object. | diff --git a/docs/user-guide.md b/docs/user-guide.md index fa05547..a8f5067 100644 --- a/docs/user-guide.md +++ b/docs/user-guide.md @@ -8,19 +8,20 @@ The main format of the config template file is `yaml`, for more details of this sources: inline_source: type: inline - fields: - - name: log_id - type: bigint - - name: recv_time - type: bigint - - name: fqdn_string - type: string - - name: client_ip - type: string - - name: server_ip - type: string - - name: decoded_as - type: string + schema: + fields: + - name: log_id + type: bigint + - name: recv_time + type: bigint + - name: fqdn_string + type: string + - name: client_ip + type: string + - name: server_ip + type: string + - name: decoded_as + type: string properties: data: '{"log_id": 1, "recv_time":"111","fqdn_string":"baidu.com", "client_ip":"192.168.0.1","server_ip":"120.233.20.242","decoded_as":"BASE", "dup_traffic_flag":1}' format: json @@ -92,19 +93,20 @@ application: ## Schema Structure Some sources are not strongly limited schema, so you need use `fields` to define the field name and type. The source can customize the schema. Like `Kafka` `Inline` source etc. ```yaml -fields: - - name: log_id - type: bigint - - name: recv_time - type: bigint - - name: fqdn_string - type: string - - name: client_ip - type: string - - name: server_ip - type: string - - name: decoded_as - type: string +Schema: + fields: + - name: log_id + type: bigint + - name: recv_time + type: bigint + - name: fqdn_string + type: string + - name: client_ip + type: string + - name: server_ip + type: string + - name: decoded_as + type: string ``` `name` The name of the field. `type` The data type of the field. @@ -136,6 +138,7 @@ Sink is used to define where GrootStream needs to output data. Multiple sinks ca ## Application Used to define some common parameters of the job and the topology of the job. such as the name of the job, the parallelism of the job, etc. The following configuration parameters are supported. + ### ENV Used to define job environment configuration information. For more details, you can refer to the documentation [JobEnvConfig](./env-config.md). diff --git a/groot-examples/end-to-end-example/src/main/resources/examples/inline_to_clickhouse.yaml b/groot-examples/end-to-end-example/src/main/resources/examples/inline_to_clickhouse.yaml index 829741d..c7072a4 100644 --- a/groot-examples/end-to-end-example/src/main/resources/examples/inline_to_clickhouse.yaml +++ b/groot-examples/end-to-end-example/src/main/resources/examples/inline_to_clickhouse.yaml @@ -27,7 +27,7 @@ sources: # [object] Define connector source # # [string] Event Data, it will be parsed to Map<String, Object> by the specified format. # - data: '{"recv_time": 1705565615, "tcp_rtt_ms":128,"decoded_as":"HTTP", "http_version":"http1","http_request_line":"GET / HTTP/1.1","http_host":"www.ct.cn","http_url":"www.ct.cn/","http_user_agent":"curl/8.0.1","http_status_code":200,"http_response_line":"HTTP/1.1 200 OK","http_response_content_type":"text/html; charset=UTF-8","http_response_latency_ms":31,"http_session_duration_ms":5451,"in_src_mac":"ba:bb:a7:3c:67:1c","in_dest_mac":"86:dd:7a:8f:ae:e2","out_src_mac":"86:dd:7a:8f:ae:e2","out_dest_mac":"ba:bb:a7:3c:67:1c","tcp_client_isn":678677906,"tcp_server_isn":1006700307,"address_type":4,"client_ip":"192.11.22.22","server_ip":"8.8.8.8","client_port":42751,"server_port":80,"in_link_id":65535,"out_link_id":65535,"start_timestamp_ms":1703646546127,"end_timestamp_ms":1703646551702,"duration_ms":5575,"sent_pkts":97,"sent_bytes":5892,"received_pkts":250,"received_bytes":333931,"tcp_c2s_ip_fragments":0,"tcp_s2c_ip_fragments":0,"tcp_c2s_rtx_pkts":0,"tcp_c2s_rtx_bytes":0,"tcp_s2c_rtx_pkts":0,"tcp_s2c_rtx_bytes":0,"tcp_c2s_o3_pkts":0,"tcp_s2c_o3_pkts":0,"tcp_c2s_lost_bytes":0,"tcp_s2c_lost_bytes":0,"flags":26418,"flags_identify_info":[100,1,100,60,150,100,1,2],"app_transition":"http.1111.test_1_1","decoded_as":"HTTP","server_fqdn":"www.ct.cn","app":"test_1_1","decoded_path":"ETHERNET.IPv4.TCP.http","fqdn_category_list":[1767],"t_vsys_id":1,"vsys_id":1,"session_id":290538039798223400,"tcp_handshake_latency_ms":41,"client_os_desc":"Windows","server_os_desc":"Linux","data_center":"center-xxg-tsgx","device_group":"group-xxg-tsgx","device_tag":"{\"tags\":[{\"tag\":\"data_center\",\"value\":\"center-xxg-tsgx\"},{\"tag\":\"device_group\",\"value\":\"group-xxg-tsgx\"}]}","device_id":"9800165603247024","sled_ip":"192.168.40.39","dup_traffic_flag":0}' + data: '{"recv_time": 1705565615, "tcp_rtt_ms":128,"decoded_as":"HTTP", "http_version":"http1","http_request_line":"GET / HTTP/1.1","http_host":"www.ct.cn","http_url":"www.ct.cn/","http_user_agent":"curl/8.0.1","http_status_code":200,"http_response_line":"HTTP/1.1 200 OK","http_response_content_type":"text/html; charset=UTF-8","http_response_latency_ms":31,"http_session_duration_ms":5451,"in_src_mac":"ba:bb:a7:3c:67:1c","in_dest_mac":"86:dd:7a:8f:ae:e2","out_src_mac":"86:dd:7a:8f:ae:e2","out_dest_mac":"ba:bb:a7:3c:67:1c","tcp_client_isn":678677906,"tcp_server_isn":1006700307,"address_type":4,"client_ip":"86.111.175.128","server_ip":"12.16.98.1","client_port":42751,"server_port":80,"in_link_id":65535,"out_link_id":65535,"start_timestamp_ms":1703646546127,"end_timestamp_ms":1703646551702,"duration_ms":5575,"sent_pkts":97,"sent_bytes":5892,"received_pkts":250,"received_bytes":333931,"tcp_c2s_ip_fragments":0,"tcp_s2c_ip_fragments":0,"tcp_c2s_rtx_pkts":0,"tcp_c2s_rtx_bytes":0,"tcp_s2c_rtx_pkts":0,"tcp_s2c_rtx_bytes":0,"tcp_c2s_o3_pkts":0,"tcp_s2c_o3_pkts":0,"tcp_c2s_lost_bytes":0,"tcp_s2c_lost_bytes":0,"flags":26418,"flags_identify_info":[100,1,100,60,150,100,1,2],"app_transition":"http.1111.test_1_1","decoded_as":"HTTP","server_fqdn":"www.ct.cn","app":"test_1_1","decoded_path":"ETHERNET.IPv4.TCP.http","fqdn_category_list":[1767],"t_vsys_id":1,"vsys_id":1,"session_id":290538039798223400,"tcp_handshake_latency_ms":41,"client_os_desc":"Windows","server_os_desc":"Linux","data_center":"center-xxg-tsgx","device_group":"group-xxg-tsgx","device_tag":"{\"tags\":[{\"tag\":\"data_center\",\"value\":\"center-xxg-tsgx\"},{\"tag\":\"device_group\",\"value\":\"group-xxg-tsgx\"}]}","device_id":"9800165603247024","sled_ip":"192.168.40.39","dup_traffic_flag":0}' format: json json.ignore.parse.errors: false @@ -44,10 +44,29 @@ processing_pipelines: output_fields: [ recv_time ] parameters: precision: seconds + - function: DOMAIN + lookup_fields: [ http_host,ssl_sni,dtls_sni,quic_sni ] + output_fields: [ server_domain ] + parameters: + option: FIRST_SIGNIFICANT_SUBDOMAIN + - function: ASN_LOOKUP + lookup_fields: [ server_ip ] + output_fields: [ server_asn ] + parameters: + option: IP_TO_ASN + kb_name: tsg_ip_asn + - function: GEOIP_LOOKUP + lookup_fields: [ client_ip ] + output_fields: [ client_geolocation ] + parameters: + kb_name: tsg_ip_location + option: IP_TO_COUNTRY sinks: clickhouse_sink: type: clickhouse + schema: + local_file: /Users/darnell/IdeaProjects/groot-stream/groot-examples/end-to-end-example/src/main/resources/examples/schema/sink_schema.json properties: host: 192.168.44.12:9001 table: tsg_galaxy_v3.inline_source_test_local diff --git a/groot-examples/end-to-end-example/src/main/resources/examples/inline_to_print_avro_schema.yaml b/groot-examples/end-to-end-example/src/main/resources/examples/inline_to_print_avro_schema.yaml new file mode 100644 index 0000000..a88e36c --- /dev/null +++ b/groot-examples/end-to-end-example/src/main/resources/examples/inline_to_print_avro_schema.yaml @@ -0,0 +1,73 @@ +sources: + inline_source: + type: inline + properties: + data: '[{"tcp_rtt_ms":128,"decoded_as":"HTTP","http_version":"http1","http_request_line":"GET / HTTP/1.1","http_host":"www.ct.cn","http_url":"www.ct.cn/","http_user_agent":"curl/8.0.1","http_status_code":200,"http_response_line":"HTTP/1.1 200 OK","http_response_content_type":"text/html; charset=UTF-8","http_response_latency_ms":31,"http_session_duration_ms":5451,"in_src_mac":"ba:bb:a7:3c:67:1c","in_dest_mac":"86:dd:7a:8f:ae:e2","out_src_mac":"86:dd:7a:8f:ae:e2","out_dest_mac":"ba:bb:a7:3c:67:1c","tcp_client_isn":678677906,"tcp_server_isn":1006700307,"address_type":4,"client_ip":"86.111.175.128","server_ip":"12.16.98.1","client_port":42751,"server_port":80,"in_link_id":65535,"out_link_id":65535,"start_timestamp_ms":1703646546127,"end_timestamp_ms":1703646551702,"duration_ms":5575,"sent_pkts":97,"sent_bytes":5892,"received_pkts":250,"received_bytes":333931},{"tcp_rtt_ms":256,"decoded_as":"HTTP","http_version":"http1","http_request_line":"GET / HTTP/1.1","http_host":"www.abc.cn","http_url":"www.cabc.cn/","http_user_agent":"curl/8.0.1","http_status_code":200,"http_response_line":"HTTP/1.1 200 OK","http_response_content_type":"text/html; charset=UTF-8","http_response_latency_ms":31,"http_session_duration_ms":5451,"in_src_mac":"ba:bb:a7:3c:67:1c","in_dest_mac":"86:dd:7a:8f:ae:e2","out_src_mac":"86:dd:7a:8f:ae:e2","out_dest_mac":"ba:bb:a7:3c:67:1c","tcp_client_isn":678677906,"tcp_server_isn":1006700307,"address_type":4,"client_ip":"192.168.10.198","server_ip":"4.4.4.4","client_port":42751,"server_port":80,"in_link_id":65535,"out_link_id":65535,"start_timestamp_ms":1703646546127,"end_timestamp_ms":1703646551702,"duration_ms":2575,"sent_pkts":197,"sent_bytes":5892,"received_pkts":350,"received_bytes":533931}]' + format: json + json.ignore.parse.errors: false + +filters: + filter_operator: + type: com.geedgenetworks.core.filter.AviatorFilter + properties: + expression: event.server_ip != '12.12.12.12' + +processing_pipelines: + projection_processor: + type: com.geedgenetworks.core.processor.projection.ProjectionProcessorImpl + remove_fields: [http_request_line, http_response_line, http_response_content_type] + functions: + - function: DROP + filter: event.server_ip == '4.4.4.4' + - function: SNOWFLAKE_ID + lookup_fields: [ ] + output_fields: [ log_id ] + parameters: + data_center_id_num: 1 + - function: CURRENT_UNIX_TIMESTAMP + output_fields: [ recv_time ] + parameters: + precision: seconds + - function: DOMAIN + lookup_fields: [ http_host,ssl_sni,dtls_sni,quic_sni ] + output_fields: [ server_domain ] + parameters: + option: FIRST_SIGNIFICANT_SUBDOMAIN + - function: ASN_LOOKUP + lookup_fields: [ server_ip ] + output_fields: [ server_asn ] + parameters: + option: IP_TO_ASN + kb_name: tsg_ip_asn + + - function: GEOIP_LOOKUP + lookup_fields: [ client_ip ] + output_fields: [ client_geolocation ] + parameters: + kb_name: tsg_ip_location + option: IP_TO_COUNTRY + +sinks: + print_sink: + type: print + schema: + local_file: /Users/darnell/IdeaProjects/groot-stream/groot-examples/end-to-end-example/src/main/resources/examples/schema/sink_schema.json + properties: + format: json + mode: log_warn + +application: + env: + name: example-inline-to-print + parallelism: 3 + pipeline: + object-reuse: true + topology: + - name: inline_source + downstream: [filter_operator] + - name: filter_operator + downstream: [ projection_processor ] + - name: projection_processor + downstream: [ print_sink ] + - name: print_sink + downstream: []
\ No newline at end of file diff --git a/groot-examples/end-to-end-example/src/main/resources/examples/schema/sink_schema.json b/groot-examples/end-to-end-example/src/main/resources/examples/schema/sink_schema.json new file mode 100644 index 0000000..2730873 --- /dev/null +++ b/groot-examples/end-to-end-example/src/main/resources/examples/schema/sink_schema.json @@ -0,0 +1,302 @@ +{ + "type": "record", + "name": "test_source", + "namespace": "abc", + "fields": [ + { + "name": "recv_time", + "label": "Receive Time", + "doc": { + "constraints": { + "type": "unix_timestamp" + }, + "visibility": "enabled", + "ttl": null + }, + "type": { + "type": "long", + "logicalType": "unix_timestamp" + } + }, + { + "name": "log_id", + "label": "Log ID", + "doc": { + "visibility": "enabled", + "ttl": null + }, + "type": "long" + }, + { + "name": "http_url", + "label": "HTTP.URL", + "doc": { + "visibility": "enabled", + "ttl": null + }, + "type": "string" + }, + { + "name": "http_host", + "label": "HTTP.Host", + "doc": { + "visibility": "enabled", + "ttl": null + }, + "type": "string" + }, + { + "name": "http_request_line", + "label": "HTTP.Request Line", + "doc": { + "visibility": "enabled", + "ttl": null + }, + "type": "string" + }, + { + "name": "http_response_line", + "label": "HTTP.Response Line", + "doc": { + "visibility": "enabled", + "ttl": null + }, + "type": "string" + }, + { + "name": "decoded_as", + "label": "Decoded AS", + "doc": { + "constraints": { + "operator_functions": "=,!=,in,not in" + }, + "data": [ + { + "code": "BASE", + "value": "BASE" + }, + { + "code": "MAIL", + "value": "MAIL" + }, + { + "code": "DNS", + "value": "DNS" + }, + { + "code": "HTTP", + "value": "HTTP" + }, + { + "code": "SSL", + "value": "SSL" + }, + { + "code": "DTLS", + "value": "DTLS" + }, + { + "code": "QUIC", + "value": "QUIC" + }, + { + "code": "FTP", + "value": "FTP" + }, + { + "code": "SSH", + "value": "SSH" + }, + { + "code": "Stratum", + "value": "Stratum" + }, + { + "code": "RDP", + "value": "RDP" + }, + { + "code": "SIP", + "value": "SIP" + }, + { + "code": "RTP", + "value": "RTP" + } + ], + "visibility": "enabled", + "ttl": null + }, + "type": "string" + }, + { + "name": "tcp_rtt_ms", + "label": "Round-trip Time (ms)", + "doc": { + "constraints": { + "type": "decimal", + "aggregation_functions": "AVG, MAX, MIN, MEDIAN, P95_PERCENTILE, P99_PERCENTILE" + }, + "visibility": "enabled", + "ttl": null + }, + "type": "int" + }, + { + "name": "sent_bytes", + "label": "Bytes Sent", + "doc": { + "constraints": { + "type": "bytes" + }, + "visibility": "enabled", + "ttl": null + }, + "type": "long" + }, + { + "name": "received_bytes", + "label": "Bytes Received", + "doc": { + "constraints": { + "type": "bytes" + }, + "visibility": "enabled", + "ttl": null + }, + "type": "long" + },{ + "name": "server_fqdn", + "label": "Server FQDN", + "doc": { + "visibility": "enabled", + "ttl": null + }, + "type": "string" + }, + { + "name": "server_domain", + "label": "Server Domain", + "doc": { + "visibility": "enabled", + "ttl": null + }, + "type": "string" + },{ + "name": "client_ip", + "label": "Client IP", + "doc": { + "constraints": { + "type": "ip" + }, + "visibility": "enabled", + "ttl": null + }, + "type": "string" + }, + { + "name": "client_port", + "label": "Client Port", + "doc": { + "constraints": { + "aggregation_functions": "COUNT, COUNT_DISTINCT" + }, + "visibility": "enabled", + "ttl": null + }, + "type": "int" + }, + { + "name": "client_os_desc", + "type": "string", + "doc": { + "visibility": "enabled", + "ttl": null + }, + "label": "Client OS Description" + }, + { + "name": "client_geolocation", + "label": "Client Geolocation", + "doc": { + "visibility": "enabled", + "ttl": null + }, + "type": "string" + }, + { + "name": "client_asn", + "label": "Client ASN", + "doc": { + "constraints": { + "aggregation_functions": "COUNT, COUNT_DISTINCT" + }, + "visibility": "enabled", + "ttl": null + }, + "type": "long" + }, + { + "name": "subscriber_id", + "label": "Subscriber ID", + "doc": { + "visibility": "enabled", + "ttl": null + }, + "type": "string" + },{ + "name": "server_ip", + "label": "Server IP", + "doc": { + "constraints": { + "type": "ip" + }, + "visibility": "enabled", + "ttl": null + }, + "type": "string" + }, + { + "name": "server_port", + "label": "Server Port", + "doc": { + "constraints": { + "aggregation_functions": "COUNT, COUNT_DISTINCT" + }, + "visibility": "enabled", + "ttl": null + }, + "type": "int" + }, + { + "name": "server_os_desc", + "type": "string", + "doc": { + "visibility": "enabled", + "ttl": null + }, + "label": "Server OS Description" + }, + { + "name": "server_geolocation", + "label": "Server Geolocation", + "doc": { + "visibility": "enabled", + "ttl": null + }, + "type": "string" + }, + { + "name": "server_asn", + "label": "Server ASN", + "doc": { + "constraints": { + "aggregation_functions": "COUNT, COUNT_DISTINCT" + }, + "visibility": "enabled", + "ttl": null + }, + "type": "long" + } + ] +}
\ No newline at end of file diff --git a/groot-examples/end-to-end-example/src/main/resources/grootstream.yaml b/groot-examples/end-to-end-example/src/main/resources/grootstream.yaml new file mode 100644 index 0000000..1ffda9f --- /dev/null +++ b/groot-examples/end-to-end-example/src/main/resources/grootstream.yaml @@ -0,0 +1,17 @@ +grootstream: + knowledge_base: + - name: tsg_ip_asn + fs_type: local + fs_path: /Users/darnell/IdeaProjects/groot-stream/groot-core/src/test/resources/ + files: + - asn.mmdb + - name: tsg_ip_location + fs_type: local + fs_path: /Users/darnell/IdeaProjects/groot-stream/groot-core/src/test/resources/ + files: + - ip_builtin.mmdb + properties: + hos.path: http://192.168.44.12:9098/hos + hos.bucket.name.traffic_file: traffic_file_bucket + hos.bucket.name.troubleshooting_file: troubleshooting_file_bucket + scheduler.knowledge_base.update.interval.minutes: 5 |
