summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authordoufenghu <[email protected]>2024-11-08 16:49:43 +0800
committerdoufenghu <[email protected]>2024-11-08 16:49:43 +0800
commit446662f03d5af5ca4a849944b95ebc55c61c2e2c (patch)
treeeee3a05b4c7031e6839382e527e5a8e51021323e
parentf20d93b792c0e814ed7c00062ac908d6cc68436a (diff)
增加24.09相关初始化sql及模版配置文件
-rw-r--r--README.md6
-rw-r--r--clickhouse/001_create_tsg_olap_clickhouse_table.sql (renamed from clickhouse/tsg_olap_clickhouse_ddl.sql)0
-rw-r--r--clickhouse/002_check_tsg_olap_clickhouse_table.sql (renamed from clickhouse/tsg_olap_clickhouse_ddl_check.sql)0
-rw-r--r--config-templates/README.md1
-rw-r--r--druid/README.md1
-rw-r--r--file-chunk-combiner/agg_traffic_file_chunk_combiner (renamed from file-chunk-combiner/templates/agg_traffic_file_chunk_combiner)0
-rw-r--r--file-chunk-combiner/cluster/config/agg_traffic_eml_file_chunk_combiner (renamed from file-chunk-combiner/集群/config/agg_traffic_eml_file_chunk_combiner)0
-rw-r--r--file-chunk-combiner/cluster/config/agg_traffic_http_file_chunk_combiner (renamed from file-chunk-combiner/集群/config/agg_traffic_http_file_chunk_combiner)0
-rw-r--r--file-chunk-combiner/cluster/config/agg_traffic_policy_capture_file_chunk_combiner (renamed from file-chunk-combiner/集群/config/agg_traffic_policy_capture_file_chunk_combiner)0
-rw-r--r--file-chunk-combiner/cluster/config/agg_traffic_rtp_file_chunk_combiner (renamed from file-chunk-combiner/集群/config/agg_traffic_rtp_file_chunk_combiner)0
-rw-r--r--file-chunk-combiner/cluster/env/agg_traffic_eml_file_chunk_combiner.sh (renamed from file-chunk-combiner/集群/env/agg_traffic_eml_file_chunk_combiner.sh)0
-rw-r--r--file-chunk-combiner/cluster/env/agg_traffic_http_file_chunk_combiner.sh (renamed from file-chunk-combiner/集群/env/agg_traffic_http_file_chunk_combiner.sh)0
-rw-r--r--file-chunk-combiner/cluster/env/agg_traffic_policy_capture_file_chunk_combiner.sh (renamed from file-chunk-combiner/集群/env/agg_traffic_policy_capture_file_chunk_combiner.sh)0
-rw-r--r--file-chunk-combiner/cluster/env/agg_traffic_rtp_file_chunk_combiner.sh (renamed from file-chunk-combiner/集群/env/agg_traffic_rtp_file_chunk_combiner.sh)0
-rw-r--r--file-chunk-combiner/standalone/config/agg_traffic_eml_file_chunk_combiner (renamed from file-chunk-combiner/单机/config/agg_traffic_eml_file_chunk_combiner)0
-rw-r--r--file-chunk-combiner/standalone/config/agg_traffic_http_file_chunk_combiner (renamed from file-chunk-combiner/单机/config/agg_traffic_http_file_chunk_combiner)0
-rw-r--r--file-chunk-combiner/standalone/config/agg_traffic_policy_capture_file_chunk_combiner (renamed from file-chunk-combiner/单机/config/agg_traffic_policy_capture_file_chunk_combiner)0
-rw-r--r--file-chunk-combiner/standalone/config/agg_traffic_rtp_file_chunk_combiner (renamed from file-chunk-combiner/单机/config/agg_traffic_rtp_file_chunk_combiner)0
-rw-r--r--file-chunk-combiner/standalone/env/agg_traffic_eml_file_chunk_combiner.sh (renamed from file-chunk-combiner/单机/env/agg_traffic_eml_file_chunk_combiner.sh)0
-rw-r--r--file-chunk-combiner/standalone/env/agg_traffic_http_file_chunk_combiner.sh (renamed from file-chunk-combiner/单机/env/agg_traffic_http_file_chunk_combiner.sh)0
-rw-r--r--file-chunk-combiner/standalone/env/agg_traffic_policy_capture_file_chunk_combiner.sh (renamed from file-chunk-combiner/单机/env/agg_traffic_policy_capture_file_chunk_combiner.sh)0
-rw-r--r--file-chunk-combiner/standalone/env/agg_traffic_rtp_file_chunk_combiner.sh (renamed from file-chunk-combiner/单机/env/agg_traffic_rtp_file_chunk_combiner.sh)0
-rw-r--r--groot-stream/README.md31
-rw-r--r--groot-stream/multi-datacenter-examples/datacenter_dt/dos_sketch_kafka_to_ndc_kafka49
-rw-r--r--groot-stream/multi-datacenter-examples/datacenter_dt/etl_proxy_event_kafka_to_ndc_kafka154
-rw-r--r--groot-stream/multi-datacenter-examples/datacenter_dt/etl_session_record_kafka_to_ndc_kafka154
-rw-r--r--groot-stream/multi-datacenter-examples/datacenter_dt/etl_transaction_record_kafka_to_ndc_kafka157
-rw-r--r--groot-stream/multi-datacenter-examples/datacenter_dt/network_traffic_metrics_kafka_to_ndc_kafka48
-rw-r--r--groot-stream/multi-datacenter-examples/datacenter_dt/object_statistics_metric_kafka_to_ndc_kafka50
-rw-r--r--groot-stream/multi-datacenter-examples/datacenter_dt/policy_rule_metrics_kafka_to_ndc_kafka50
-rw-r--r--groot-stream/multi-datacenter-examples/datacenter_dt/pxy_exch_intermedia_cert_kafka_to_ndc_kafka58
-rw-r--r--groot-stream/multi-datacenter-examples/datacenter_dt/statistics_rule_metric_kafka_to_ndc_kafka50
-rw-r--r--groot-stream/multi-datacenter-examples/datacenter_dt/troubleshooting_file_stream_record_kafka_to_ndc_kafka50
-rw-r--r--groot-stream/multi-datacenter-examples/datacenter_dt/voip_record_kafka_to_ndc_kafka89
-rw-r--r--groot-stream/multi-datacenter-examples/national_datacenter/dos_event_kafka_to_clickhouse43
-rw-r--r--groot-stream/multi-datacenter-examples/national_datacenter/etl_session_record_processed_kafka_to_cn_kafka399
-rw-r--r--groot-stream/multi-datacenter-examples/national_datacenter/etl_voip_record_kafka_to_clickhouse119
-rw-r--r--groot-stream/multi-datacenter-examples/national_datacenter/proxy_event_processed_kafka_to_clickhouse42
-rw-r--r--groot-stream/multi-datacenter-examples/national_datacenter/session_record_processed_kafka_to_clickhouse42
-rw-r--r--groot-stream/multi-datacenter-examples/national_datacenter/transaction_record_processed_kafka_to_clickhouse42
-rw-r--r--groot-stream/single-cluster-examples/dos_event_kafka_to_clickhouse50
-rw-r--r--groot-stream/single-cluster-examples/etl_datapath_telemetry_record_kafka_to_clickhouse72
-rw-r--r--groot-stream/single-cluster-examples/etl_proxy_event_kafka_to_clickhouse143
-rw-r--r--groot-stream/single-cluster-examples/etl_session_record_kafka_to_clickhouse141
-rw-r--r--groot-stream/single-cluster-examples/etl_traffic_sketch_metric_kafka_to_clickhouse93
-rw-r--r--groot-stream/single-cluster-examples/etl_transaction_record_kafka_to_clickhouse141
-rw-r--r--groot-stream/single-cluster-examples/etl_voip_record_kafka_to_clickhouse143
-rw-r--r--groot-stream/templates/realtime_log_streaming_cn_session_record.yaml.template387
-rw-r--r--hbase/001_create_tsg_olap_hbase_table.sql (renamed from hbase/tsg_olap_hbase_ddl.sql)0
-rw-r--r--hbase/002_create_tsg_olap_hbase_phoenix_table.sql (renamed from hbase/tsg_olap_phoenix_ddl.sql)0
-rw-r--r--hbase/101_upgrade_v2408_to_v2409_tsg_olap_hbase_table.sh (renamed from hbase/update_hbase.sh)0
-rw-r--r--hos/001_create_tsg_olap_hos_bucket.sh (renamed from hos/create_bucket.sh)0
-rw-r--r--hos/002_upgrade_tsg_olap_hos_bucket.sh (renamed from hos/bucket_upgrade.sh)0
-rw-r--r--hos/galaxy-hos-service-24.09.yml97
-rw-r--r--hos/hosutil/config.properties21
-rw-r--r--hos/hosutil/galaxy-hos-util-1.4.jarbin21460135 -> 0 bytes
-rw-r--r--hos/hosutil/hosutil.sh138
-rw-r--r--mariadb/README.md1
-rw-r--r--mariadb/galaxy-qgw-service/V23.12__init_saved_query_job.sql (renamed from mariadb/galaxy-qgw-service/init_saved_query_job.sql)0
-rw-r--r--mariadb/galaxy-qgw-service/V24.07__init_sys_storage_event.sql (renamed from mariadb/galaxy-qgw-service/init_sys_storage_event.sql)0
-rw-r--r--shell-scripts/README.md1
61 files changed, 2807 insertions, 256 deletions
diff --git a/README.md b/README.md
index a6800ff..9a263e1 100644
--- a/README.md
+++ b/README.md
@@ -6,3 +6,9 @@
|:-----------------|:--------------|
| shell-scripts | 存储安装和初始化脚本。 |
| config-templates | 存储配置文件模板。 |
+
+## 文件命名
+
+- `[索引号]_[create]_[项目名]_[功能名称]`:初始化数据库或脚本。例如:`001_create_tsg_olap_clickhouse_table.sql`。
+- `[索引号]_[upgrade]_[from version]_to_[current version]_[项目名]_[功能名称]`:升级文件,多个版本升级记录可以放在一个文件中。例如发布一个LTS版本:`101_upgrade_v2402_v2409_tsg_olap_clickhouse_table.sql`。
+
diff --git a/clickhouse/tsg_olap_clickhouse_ddl.sql b/clickhouse/001_create_tsg_olap_clickhouse_table.sql
index 0840c6c..0840c6c 100644
--- a/clickhouse/tsg_olap_clickhouse_ddl.sql
+++ b/clickhouse/001_create_tsg_olap_clickhouse_table.sql
diff --git a/clickhouse/tsg_olap_clickhouse_ddl_check.sql b/clickhouse/002_check_tsg_olap_clickhouse_table.sql
index 2bf242c..2bf242c 100644
--- a/clickhouse/tsg_olap_clickhouse_ddl_check.sql
+++ b/clickhouse/002_check_tsg_olap_clickhouse_table.sql
diff --git a/config-templates/README.md b/config-templates/README.md
index e69de29..2c45905 100644
--- a/config-templates/README.md
+++ b/config-templates/README.md
@@ -0,0 +1 @@
+全局安装配置文件 \ No newline at end of file
diff --git a/druid/README.md b/druid/README.md
index e69de29..d0aea9c 100644
--- a/druid/README.md
+++ b/druid/README.md
@@ -0,0 +1 @@
+Druid 摄入任务 \ No newline at end of file
diff --git a/file-chunk-combiner/templates/agg_traffic_file_chunk_combiner b/file-chunk-combiner/agg_traffic_file_chunk_combiner
index 0b2e5ef..0b2e5ef 100644
--- a/file-chunk-combiner/templates/agg_traffic_file_chunk_combiner
+++ b/file-chunk-combiner/agg_traffic_file_chunk_combiner
diff --git a/file-chunk-combiner/集群/config/agg_traffic_eml_file_chunk_combiner b/file-chunk-combiner/cluster/config/agg_traffic_eml_file_chunk_combiner
index 05053c4..05053c4 100644
--- a/file-chunk-combiner/集群/config/agg_traffic_eml_file_chunk_combiner
+++ b/file-chunk-combiner/cluster/config/agg_traffic_eml_file_chunk_combiner
diff --git a/file-chunk-combiner/集群/config/agg_traffic_http_file_chunk_combiner b/file-chunk-combiner/cluster/config/agg_traffic_http_file_chunk_combiner
index fdd496a..fdd496a 100644
--- a/file-chunk-combiner/集群/config/agg_traffic_http_file_chunk_combiner
+++ b/file-chunk-combiner/cluster/config/agg_traffic_http_file_chunk_combiner
diff --git a/file-chunk-combiner/集群/config/agg_traffic_policy_capture_file_chunk_combiner b/file-chunk-combiner/cluster/config/agg_traffic_policy_capture_file_chunk_combiner
index 98cfc68..98cfc68 100644
--- a/file-chunk-combiner/集群/config/agg_traffic_policy_capture_file_chunk_combiner
+++ b/file-chunk-combiner/cluster/config/agg_traffic_policy_capture_file_chunk_combiner
diff --git a/file-chunk-combiner/集群/config/agg_traffic_rtp_file_chunk_combiner b/file-chunk-combiner/cluster/config/agg_traffic_rtp_file_chunk_combiner
index b63382c..b63382c 100644
--- a/file-chunk-combiner/集群/config/agg_traffic_rtp_file_chunk_combiner
+++ b/file-chunk-combiner/cluster/config/agg_traffic_rtp_file_chunk_combiner
diff --git a/file-chunk-combiner/集群/env/agg_traffic_eml_file_chunk_combiner.sh b/file-chunk-combiner/cluster/env/agg_traffic_eml_file_chunk_combiner.sh
index d4913fc..d4913fc 100644
--- a/file-chunk-combiner/集群/env/agg_traffic_eml_file_chunk_combiner.sh
+++ b/file-chunk-combiner/cluster/env/agg_traffic_eml_file_chunk_combiner.sh
diff --git a/file-chunk-combiner/集群/env/agg_traffic_http_file_chunk_combiner.sh b/file-chunk-combiner/cluster/env/agg_traffic_http_file_chunk_combiner.sh
index c01287f..c01287f 100644
--- a/file-chunk-combiner/集群/env/agg_traffic_http_file_chunk_combiner.sh
+++ b/file-chunk-combiner/cluster/env/agg_traffic_http_file_chunk_combiner.sh
diff --git a/file-chunk-combiner/集群/env/agg_traffic_policy_capture_file_chunk_combiner.sh b/file-chunk-combiner/cluster/env/agg_traffic_policy_capture_file_chunk_combiner.sh
index c01287f..c01287f 100644
--- a/file-chunk-combiner/集群/env/agg_traffic_policy_capture_file_chunk_combiner.sh
+++ b/file-chunk-combiner/cluster/env/agg_traffic_policy_capture_file_chunk_combiner.sh
diff --git a/file-chunk-combiner/集群/env/agg_traffic_rtp_file_chunk_combiner.sh b/file-chunk-combiner/cluster/env/agg_traffic_rtp_file_chunk_combiner.sh
index c01287f..c01287f 100644
--- a/file-chunk-combiner/集群/env/agg_traffic_rtp_file_chunk_combiner.sh
+++ b/file-chunk-combiner/cluster/env/agg_traffic_rtp_file_chunk_combiner.sh
diff --git a/file-chunk-combiner/单机/config/agg_traffic_eml_file_chunk_combiner b/file-chunk-combiner/standalone/config/agg_traffic_eml_file_chunk_combiner
index 7dcd279..7dcd279 100644
--- a/file-chunk-combiner/单机/config/agg_traffic_eml_file_chunk_combiner
+++ b/file-chunk-combiner/standalone/config/agg_traffic_eml_file_chunk_combiner
diff --git a/file-chunk-combiner/单机/config/agg_traffic_http_file_chunk_combiner b/file-chunk-combiner/standalone/config/agg_traffic_http_file_chunk_combiner
index b1a4daf..b1a4daf 100644
--- a/file-chunk-combiner/单机/config/agg_traffic_http_file_chunk_combiner
+++ b/file-chunk-combiner/standalone/config/agg_traffic_http_file_chunk_combiner
diff --git a/file-chunk-combiner/单机/config/agg_traffic_policy_capture_file_chunk_combiner b/file-chunk-combiner/standalone/config/agg_traffic_policy_capture_file_chunk_combiner
index 11c9aef..11c9aef 100644
--- a/file-chunk-combiner/单机/config/agg_traffic_policy_capture_file_chunk_combiner
+++ b/file-chunk-combiner/standalone/config/agg_traffic_policy_capture_file_chunk_combiner
diff --git a/file-chunk-combiner/单机/config/agg_traffic_rtp_file_chunk_combiner b/file-chunk-combiner/standalone/config/agg_traffic_rtp_file_chunk_combiner
index 58e7774..58e7774 100644
--- a/file-chunk-combiner/单机/config/agg_traffic_rtp_file_chunk_combiner
+++ b/file-chunk-combiner/standalone/config/agg_traffic_rtp_file_chunk_combiner
diff --git a/file-chunk-combiner/单机/env/agg_traffic_eml_file_chunk_combiner.sh b/file-chunk-combiner/standalone/env/agg_traffic_eml_file_chunk_combiner.sh
index a60f9fc..a60f9fc 100644
--- a/file-chunk-combiner/单机/env/agg_traffic_eml_file_chunk_combiner.sh
+++ b/file-chunk-combiner/standalone/env/agg_traffic_eml_file_chunk_combiner.sh
diff --git a/file-chunk-combiner/单机/env/agg_traffic_http_file_chunk_combiner.sh b/file-chunk-combiner/standalone/env/agg_traffic_http_file_chunk_combiner.sh
index ff68f57..ff68f57 100644
--- a/file-chunk-combiner/单机/env/agg_traffic_http_file_chunk_combiner.sh
+++ b/file-chunk-combiner/standalone/env/agg_traffic_http_file_chunk_combiner.sh
diff --git a/file-chunk-combiner/单机/env/agg_traffic_policy_capture_file_chunk_combiner.sh b/file-chunk-combiner/standalone/env/agg_traffic_policy_capture_file_chunk_combiner.sh
index ff68f57..ff68f57 100644
--- a/file-chunk-combiner/单机/env/agg_traffic_policy_capture_file_chunk_combiner.sh
+++ b/file-chunk-combiner/standalone/env/agg_traffic_policy_capture_file_chunk_combiner.sh
diff --git a/file-chunk-combiner/单机/env/agg_traffic_rtp_file_chunk_combiner.sh b/file-chunk-combiner/standalone/env/agg_traffic_rtp_file_chunk_combiner.sh
index ff68f57..ff68f57 100644
--- a/file-chunk-combiner/单机/env/agg_traffic_rtp_file_chunk_combiner.sh
+++ b/file-chunk-combiner/standalone/env/agg_traffic_rtp_file_chunk_combiner.sh
diff --git a/groot-stream/README.md b/groot-stream/README.md
new file mode 100644
index 0000000..c2ef994
--- /dev/null
+++ b/groot-stream/README.md
@@ -0,0 +1,31 @@
+# 配置模版举例
+
+## session_record.yaml.j2 (会话日志ETL场景)
+
+- 多数中心部署场景: 分中心Data Transporter预处理后,集中汇聚至国家中心(NDC)
+ - etl_session_record_kafka_to_ndc_kafka (A-DT)
+ - Topology: kafka_source -> etl_processor -> kafka_sink
+ - Data Flow: SESSION-RECORD -> SESSION-RECORD-PROCESSED
+- 多数中心部署场景:国家中心侧加载会话日志写入ClickHouse
+ - session_record_processed_kafka_to_clickhouse(A-NDC)
+ - Topology: kafka_source -> clickhouse_sink
+ - Data Flow: SESSION-RECORD-PROCESSED -> session_record_local
+- 集中部署场景:摄入会话日志,预处理后写入ClickHouse
+ - etl_session_record_kafka_to_clickhouse (B)
+ - Topology: kafka_source -> etl_processor -> clickhouse_sink
+ - Data Flow: SESSION-RECORD -> session_record_local
+
+## data_transporter.yaml.j2 (数据回传场景)
+
+- troubleshooting_file_stream_kafka_to_ndc_kafka
+ - Topology: kafka_source -> kafka_sink (format:raw)
+ - Data Flow: TROUBLESHOOTING-FILE-STREAM-RECORD -> TROUBLESHOOTING-FILE-STREAM-RECORD
+
+## realtime_log_streaming_cn_session_record.yaml.template (向其它厂商/第三方推送场景)
+
+`install_cn_udf.sh安装CN UDFs;grootstream.yaml定义CN知识库`
+
+- etl_session_record_kafka_to_cn_kafka
+ - Topology: kafka_source -> etl_processor -> post_output_field_processor -> kafka_sink
+ - Data Flow: SESSION-RECORD(SESSION-RECORD-PROCESSED) -> SESSION-RECORD-CN
+
diff --git a/groot-stream/multi-datacenter-examples/datacenter_dt/dos_sketch_kafka_to_ndc_kafka b/groot-stream/multi-datacenter-examples/datacenter_dt/dos_sketch_kafka_to_ndc_kafka
new file mode 100644
index 0000000..b131c5c
--- /dev/null
+++ b/groot-stream/multi-datacenter-examples/datacenter_dt/dos_sketch_kafka_to_ndc_kafka
@@ -0,0 +1,49 @@
+sources:
+ kafka_source:
+ type: kafka
+ properties:
+ topic: DOS-SKETCH-RECORD
+ kafka.bootstrap.servers: "{{ kafka_source_servers }}"
+ kafka.client.id: DOS-SKETCH-RECORD
+ kafka.session.timeout.ms: 60000
+ kafka.max.poll.records: 3000
+ kafka.max.partition.fetch.bytes: 31457280
+ kafka.security.protocol: SASL_PLAINTEXT
+ kafka.sasl.mechanism: PLAIN
+ kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a80bea937958aaa485c2acc2b475603495a23eb59f055e037c0b186acb22886bd0275ca91f1633441d9943e7962942252
+ kafka.group.id: dos_sketch_record_kafka_to_kafka
+ kafka.auto.offset.reset: latest
+ format: raw
+
+
+sinks:
+ kafka_sink:
+ type: kafka
+ properties:
+ topic: DOS-SKETCH-RECORD
+ kafka.bootstrap.servers: "{{ kafka_sink_servers }}"
+ kafka.client.id: DOS-SKETCH-RECORD
+ kafka.retries: 0
+ kafka.linger.ms: 10
+ kafka.request.timeout.ms: 30000
+ kafka.batch.size: 262144
+ kafka.buffer.memory: 134217728
+ kafka.max.request.size: 10485760
+ kafka.compression.type: snappy
+ kafka.security.protocol: SASL_PLAINTEXT
+ kafka.sasl.mechanism: PLAIN
+ kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a80bea937958aaa485c2acc2b475603495a23eb59f055e037c0b186acb22886bd0275ca91f1633441d9943e7962942252
+ format: raw
+
+
+application:
+ env:
+ name: dos_sketch_record_kafka_to_kafka
+ shade.identifier: aes
+ pipeline:
+ object-reuse: true # [boolean] Object Reuse, default is false
+ topology:
+ - name: kafka_source
+ downstream: [kafka_sink]
+ - name: kafka_sink
+
diff --git a/groot-stream/multi-datacenter-examples/datacenter_dt/etl_proxy_event_kafka_to_ndc_kafka b/groot-stream/multi-datacenter-examples/datacenter_dt/etl_proxy_event_kafka_to_ndc_kafka
new file mode 100644
index 0000000..512ec94
--- /dev/null
+++ b/groot-stream/multi-datacenter-examples/datacenter_dt/etl_proxy_event_kafka_to_ndc_kafka
@@ -0,0 +1,154 @@
+sources:
+ kafka_source:
+ type: kafka
+ properties:
+ topic: PROXY-EVENT
+ kafka.bootstrap.servers: "{{ kafka_source_servers }}"
+ kafka.client.id: PROXY-EVENT
+ kafka.session.timeout.ms: 60000
+ kafka.max.poll.records: 3000
+ kafka.max.partition.fetch.bytes: 31457280
+ kafka.security.protocol: SASL_PLAINTEXT
+ kafka.sasl.mechanism: PLAIN
+ kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a80bea937958aaa485c2acc2b475603495a23eb59f055e037c0b186acb22886bd0275ca91f1633441d9943e7962942252
+ kafka.group.id: etl_proxy_event_kafka_to_ndc_kafka
+ kafka.auto.offset.reset: latest
+ format: json
+
+processing_pipelines:
+ etl_processor: # [object] Processing Pipeline
+ type: com.geedgenetworks.core.processor.projection.ProjectionProcessorImpl
+ remove_fields:
+ output_fields:
+ properties:
+ key: value
+ functions: # [array of object] Function List
+
+ - function: ASN_LOOKUP
+ lookup_fields: [server_ip]
+ output_fields: [server_asn]
+ parameters:
+ option: IP_TO_ASN
+ kb_name: tsg_ip_asn
+
+ - function: ASN_LOOKUP
+ lookup_fields: [client_ip]
+ output_fields: [client_asn]
+ parameters:
+ option: IP_TO_ASN
+ kb_name: tsg_ip_asn
+
+ - function: SNOWFLAKE_ID
+ lookup_fields: ['']
+ output_fields: [log_id]
+ parameters:
+ data_center_id_num: 1
+
+ - function: JSON_EXTRACT
+ lookup_fields: [device_tag]
+ output_fields: [data_center]
+ filter:
+ parameters:
+ value_expression: $.tags[?(@.tag=='data_center')][0].value
+
+ - function: JSON_EXTRACT
+ lookup_fields: [device_tag]
+ output_fields: [device_group]
+ filter:
+ parameters:
+ value_expression: $.tags[?(@.tag=='device_group')][0].value
+
+ - function: CURRENT_UNIX_TIMESTAMP
+ output_fields: [processing_time]
+ parameters:
+ precision: seconds
+
+ - function: UNIX_TIMESTAMP_CONVERTER
+ lookup_fields: [__timestamp]
+ output_fields: [recv_time]
+ parameters:
+ precision: seconds
+
+ - function: EVAL
+ output_fields: [ingestion_time]
+ parameters:
+ value_expression: recv_time
+
+
+ - function: BASE64_DECODE_TO_STRING
+ output_fields: [mail_subject]
+ parameters:
+ value_field: mail_subject
+ charset_field: mail_subject_charset
+
+ - function: BASE64_DECODE_TO_STRING
+ output_fields: [mail_attachment_name]
+ parameters:
+ value_field: mail_attachment_name
+ charset_field: mail_attachment_name_charset
+
+ - function: PATH_COMBINE
+ lookup_fields: [rtp_pcap_path]
+ output_fields: [rtp_pcap_path]
+ parameters:
+ path: [props.hos.path, props.hos.bucket.name.traffic_file, rtp_pcap_path]
+
+ - function: PATH_COMBINE
+ lookup_fields: [http_request_body]
+ output_fields: [http_request_body]
+ parameters:
+ path: [props.hos.path, props.hos.bucket.name.traffic_file, http_request_body]
+
+ - function: PATH_COMBINE
+ lookup_fields: [http_response_body]
+ output_fields: [http_response_body]
+ parameters:
+ path: [props.hos.path, props.hos.bucket.name.traffic_file, http_response_body]
+
+ - function: PATH_COMBINE
+ lookup_fields: [mail_eml_file]
+ output_fields: [mail_eml_file]
+ parameters:
+ path: [props.hos.path, props.hos.bucket.name.traffic_file, mail_eml_file]
+
+ - function: PATH_COMBINE
+ lookup_fields: [packet_capture_file]
+ output_fields: [packet_capture_file]
+ parameters:
+ path: [props.hos.path, props.hos.bucket.name.traffic_file, packet_capture_file]
+
+sinks:
+ kafka_sink:
+ type: kafka
+ properties:
+ topic: PROXY-EVENT-PROCESSED
+ kafka.bootstrap.servers: "{{ kafka_sink_servers }}"
+ kafka.client.id: PROXY-EVENT-PROCESSED
+ kafka.retries: 0
+ kafka.linger.ms: 10
+ kafka.request.timeout.ms: 30000
+ kafka.batch.size: 262144
+ kafka.buffer.memory: 134217728
+ kafka.max.request.size: 10485760
+ kafka.compression.type: snappy
+ kafka.security.protocol: SASL_PLAINTEXT
+ kafka.sasl.mechanism: PLAIN
+ kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a80bea937958aaa485c2acc2b475603495a23eb59f055e037c0b186acb22886bd0275ca91f1633441d9943e7962942252
+ format: json
+
+
+application:
+
+ env: # [object] Environment Variables
+ name: etl_proxy_event_kafka_to_ndc_kafka # [string] Job Name
+ shade.identifier: aes
+ pipeline:
+ object-reuse: true # [boolean] Object Reuse, default is false
+ topology:
+ - name: kafka_source
+ downstream: [etl_processor]
+ - name: etl_processor
+ downstream: [kafka_sink]
+ - name: kafka_sink
+
+
diff --git a/groot-stream/multi-datacenter-examples/datacenter_dt/etl_session_record_kafka_to_ndc_kafka b/groot-stream/multi-datacenter-examples/datacenter_dt/etl_session_record_kafka_to_ndc_kafka
new file mode 100644
index 0000000..1aa840f
--- /dev/null
+++ b/groot-stream/multi-datacenter-examples/datacenter_dt/etl_session_record_kafka_to_ndc_kafka
@@ -0,0 +1,154 @@
+sources:
+ kafka_source:
+ type: kafka
+ properties:
+ topic: SESSION-RECORD
+ kafka.bootstrap.servers: "{{ kafka_source_servers }}"
+ kafka.client.id: SESSION-RECORD
+ kafka.session.timeout.ms: 60000
+ kafka.max.poll.records: 3000
+ kafka.max.partition.fetch.bytes: 31457280
+ kafka.security.protocol: SASL_PLAINTEXT
+ kafka.sasl.mechanism: PLAIN
+ kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a80bea937958aaa485c2acc2b475603495a23eb59f055e037c0b186acb22886bd0275ca91f1633441d9943e7962942252
+ kafka.group.id: etl_session_record_kafka_to_ndc_kafka
+ kafka.auto.offset.reset: latest
+ format: json
+
+processing_pipelines:
+ etl_processor:
+ type: projection
+ functions:
+
+ - function: ASN_LOOKUP
+ lookup_fields: [server_ip]
+ output_fields: [server_asn]
+ parameters:
+ option: IP_TO_ASN
+ kb_name: tsg_ip_asn
+
+ - function: ASN_LOOKUP
+ lookup_fields: [client_ip]
+ output_fields: [client_asn]
+ parameters:
+ option: IP_TO_ASN
+ kb_name: tsg_ip_asn
+
+ - function: SNOWFLAKE_ID
+ lookup_fields: ['']
+ output_fields: [log_id]
+ parameters:
+ data_center_id_num: 1
+
+ - function: JSON_EXTRACT
+ lookup_fields: [device_tag]
+ output_fields: [data_center]
+ filter:
+ parameters:
+ value_expression: $.tags[?(@.tag=='data_center')][0].value
+
+ - function: JSON_EXTRACT
+ lookup_fields: [device_tag]
+ output_fields: [device_group]
+ filter:
+ parameters:
+ value_expression: $.tags[?(@.tag=='device_group')][0].value
+
+ - function: CURRENT_UNIX_TIMESTAMP
+ output_fields: [processing_time]
+ parameters:
+ precision: seconds
+
+ - function: UNIX_TIMESTAMP_CONVERTER
+ lookup_fields: [__timestamp]
+ output_fields: [recv_time]
+ parameters:
+ precision: seconds
+
+ - function: EVAL
+ output_fields: [ingestion_time]
+ parameters:
+ value_expression: recv_time
+
+ - function: BASE64_DECODE_TO_STRING
+ output_fields: [mail_subject]
+ parameters:
+ value_field: mail_subject
+ charset_field: mail_subject_charset
+
+ - function: BASE64_DECODE_TO_STRING
+ output_fields: [mail_attachment_name]
+ parameters:
+ value_field: mail_attachment_name
+ charset_field: mail_attachment_name_charset
+
+ - function: PATH_COMBINE
+ lookup_fields: [rtp_pcap_path]
+ output_fields: [rtp_pcap_path]
+ parameters:
+ path: [props.hos.path, props.hos.bucket.name.traffic_file, rtp_pcap_path]
+
+ - function: PATH_COMBINE
+ lookup_fields: [http_request_body]
+ output_fields: [http_request_body]
+ parameters:
+ path: [props.hos.path, props.hos.bucket.name.traffic_file, http_request_body]
+
+ - function: PATH_COMBINE
+ lookup_fields: [http_response_body]
+ output_fields: [http_response_body]
+ parameters:
+ path: [props.hos.path, props.hos.bucket.name.traffic_file, http_response_body]
+
+ - function: PATH_COMBINE
+ lookup_fields: [mail_eml_file]
+ output_fields: [mail_eml_file]
+ parameters:
+ path: [props.hos.path, props.hos.bucket.name.traffic_file, mail_eml_file]
+
+ - function: PATH_COMBINE
+ lookup_fields: [packet_capture_file]
+ output_fields: [packet_capture_file]
+ parameters:
+ path: [props.hos.path, props.hos.bucket.name.traffic_file, packet_capture_file]
+
+sinks:
+ kafka_sink:
+ type: kafka
+ properties:
+ topic: SESSION-RECORD-PROCESSED
+ kafka.bootstrap.servers: "{{ kafka_sink_servers }}"
+ kafka.client.id: SESSION-RECORD-PROCESSED
+ kafka.retries: 0
+ kafka.linger.ms: 10
+ kafka.request.timeout.ms: 30000
+ kafka.batch.size: 262144
+ kafka.buffer.memory: 134217728
+ kafka.max.request.size: 10485760
+ kafka.compression.type: snappy
+ kafka.security.protocol: SASL_PLAINTEXT
+ kafka.sasl.mechanism: PLAIN
+ kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a80bea937958aaa485c2acc2b475603495a23eb59f055e037c0b186acb22886bd0275ca91f1633441d9943e7962942252
+ format: json
+
+
+application:
+
+ env:
+ name: etl_session_record_kafka_to_ndc_kafka
+ shade.identifier: aes
+ pipeline:
+ object-reuse: true
+ properties:
+ hos.bucket.name.rtp_file: traffic_rtp_file_bucket
+ hos.bucket.name.http_file: traffic_http_file_bucket
+ hos.bucket.name.eml_file: traffic_eml_file_bucket
+ hos.bucket.name.policy_capture_file: traffic_policy_capture_file_bucket
+ topology:
+ - name: kafka_source
+ downstream: [etl_processor]
+ - name: etl_processor
+ downstream: [kafka_sink]
+ - name: kafka_sink
+
+
diff --git a/groot-stream/multi-datacenter-examples/datacenter_dt/etl_transaction_record_kafka_to_ndc_kafka b/groot-stream/multi-datacenter-examples/datacenter_dt/etl_transaction_record_kafka_to_ndc_kafka
new file mode 100644
index 0000000..5f9c317
--- /dev/null
+++ b/groot-stream/multi-datacenter-examples/datacenter_dt/etl_transaction_record_kafka_to_ndc_kafka
@@ -0,0 +1,157 @@
+sources:
+ kafka_source:
+ type: kafka
+ # fields: # [array of object] Field List, if not set, all fields(Map<String, Object>) will be output.
+ # watermark_timestamp: common_recv_time # [string] Watermark Field Name
+ # watermark_timestamp_unit: ms # [string] Watermark Unit, default is ms
+ # watermark_lag: 60 # [number] Watermark Lag, default is 60
+ properties:
+ topic: TRANSACTION-RECORD
+ kafka.bootstrap.servers: "{{ kafka_source_servers }}"
+ kafka.client.id: TRANSACTION-RECORD
+ kafka.session.timeout.ms: 60000
+ kafka.max.poll.records: 3000
+ kafka.max.partition.fetch.bytes: 31457280
+ kafka.security.protocol: SASL_PLAINTEXT
+ kafka.sasl.mechanism: PLAIN
+ kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a80bea937958aaa485c2acc2b475603495a23eb59f055e037c0b186acb22886bd0275ca91f1633441d9943e7962942252
+ kafka.group.id: etl_transaction_record_kafka_to_ndc_kafka
+ kafka.auto.offset.reset: latest
+ format: json
+
+processing_pipelines:
+ etl_processor: # [object] Processing Pipeline
+ type: com.geedgenetworks.core.processor.projection.ProjectionProcessorImpl
+ remove_fields:
+ output_fields:
+ properties:
+ key: value
+ functions: # [array of object] Function List
+
+ - function: ASN_LOOKUP
+ lookup_fields: [server_ip]
+ output_fields: [server_asn]
+ parameters:
+ option: IP_TO_ASN
+ kb_name: tsg_ip_asn
+
+ - function: ASN_LOOKUP
+ lookup_fields: [client_ip]
+ output_fields: [client_asn]
+ parameters:
+ option: IP_TO_ASN
+ kb_name: tsg_ip_asn
+
+ - function: SNOWFLAKE_ID
+ lookup_fields: ['']
+ output_fields: [log_id]
+ parameters:
+ data_center_id_num: 1
+
+ - function: JSON_EXTRACT
+ lookup_fields: [device_tag]
+ output_fields: [data_center]
+ filter:
+ parameters:
+ value_expression: $.tags[?(@.tag=='data_center')][0].value
+
+ - function: JSON_EXTRACT
+ lookup_fields: [device_tag]
+ output_fields: [device_group]
+ filter:
+ parameters:
+ value_expression: $.tags[?(@.tag=='device_group')][0].value
+
+ - function: CURRENT_UNIX_TIMESTAMP
+ output_fields: [processing_time]
+ parameters:
+ precision: seconds
+
+ - function: UNIX_TIMESTAMP_CONVERTER
+ lookup_fields: [__timestamp]
+ output_fields: [recv_time]
+ parameters:
+ precision: seconds
+
+ - function: EVAL
+ output_fields: [ingestion_time]
+ parameters:
+ value_expression: recv_time
+
+ - function: BASE64_DECODE_TO_STRING
+ output_fields: [mail_subject]
+ parameters:
+ value_field: mail_subject
+ charset_field: mail_subject_charset
+
+ - function: BASE64_DECODE_TO_STRING
+ output_fields: [mail_attachment_name]
+ parameters:
+ value_field: mail_attachment_name
+ charset_field: mail_attachment_name_charset
+
+ - function: PATH_COMBINE
+ lookup_fields: [rtp_pcap_path]
+ output_fields: [rtp_pcap_path]
+ parameters:
+ path: [props.hos.path, props.hos.bucket.name.traffic_file, rtp_pcap_path]
+
+ - function: PATH_COMBINE
+ lookup_fields: [http_request_body]
+ output_fields: [http_request_body]
+ parameters:
+ path: [props.hos.path, props.hos.bucket.name.traffic_file, http_request_body]
+
+ - function: PATH_COMBINE
+ lookup_fields: [http_response_body]
+ output_fields: [http_response_body]
+ parameters:
+ path: [props.hos.path, props.hos.bucket.name.traffic_file, http_response_body]
+
+ - function: PATH_COMBINE
+ lookup_fields: [mail_eml_file]
+ output_fields: [mail_eml_file]
+ parameters:
+ path: [props.hos.path, props.hos.bucket.name.traffic_file, mail_eml_file]
+
+ - function: PATH_COMBINE
+ lookup_fields: [packet_capture_file]
+ output_fields: [packet_capture_file]
+ parameters:
+ path: [props.hos.path, props.hos.bucket.name.traffic_file, packet_capture_file]
+
+sinks:
+ kafka_sink:
+ type: kafka
+ properties:
+ topic: TRANSACTION-RECORD-PROCESSED
+ kafka.bootstrap.servers: "{{ kafka_sink_servers }}"
+ kafka.client.id: TRANSACTION-RECORD-PROCESSED
+ kafka.retries: 0
+ kafka.linger.ms: 10
+ kafka.request.timeout.ms: 30000
+ kafka.batch.size: 262144
+ kafka.buffer.memory: 134217728
+ kafka.max.request.size: 10485760
+ kafka.compression.type: snappy
+ kafka.security.protocol: SASL_PLAINTEXT
+ kafka.sasl.mechanism: PLAIN
+ kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a80bea937958aaa485c2acc2b475603495a23eb59f055e037c0b186acb22886bd0275ca91f1633441d9943e7962942252
+ format: json
+
+
+application:
+
+ env: # [object] Environment Variables
+ name: etl_transaction_record_kafka_to_ndc_kafka # [string] Job Name
+ shade.identifier: aes
+ pipeline:
+ object-reuse: true # [boolean] Object Reuse, default is false
+ topology:
+ - name: kafka_source
+ downstream: [etl_processor]
+ - name: etl_processor
+ downstream: [kafka_sink]
+ - name: kafka_sink
+
+
diff --git a/groot-stream/multi-datacenter-examples/datacenter_dt/network_traffic_metrics_kafka_to_ndc_kafka b/groot-stream/multi-datacenter-examples/datacenter_dt/network_traffic_metrics_kafka_to_ndc_kafka
new file mode 100644
index 0000000..a419fbd
--- /dev/null
+++ b/groot-stream/multi-datacenter-examples/datacenter_dt/network_traffic_metrics_kafka_to_ndc_kafka
@@ -0,0 +1,48 @@
+sources:
+ kafka_source:
+ type: kafka
+ properties:
+ topic: NETWORK-TRAFFIC-METRIC
+ kafka.bootstrap.servers: "{{ kafka_source_servers }}"
+ kafka.client.id: NETWORK-TRAFFIC-METRIC
+ kafka.session.timeout.ms: 60000
+ kafka.max.poll.records: 3000
+ kafka.max.partition.fetch.bytes: 31457280
+ kafka.security.protocol: SASL_PLAINTEXT
+ kafka.sasl.mechanism: PLAIN
+ kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a80bea937958aaa485c2acc2b475603495a23eb59f055e037c0b186acb22886bd0275ca91f1633441d9943e7962942252
+ kafka.group.id: network_traffic_metrics_kafka_to_ndc_kafka
+ kafka.auto.offset.reset: latest
+ format: raw
+
+
+sinks:
+ kafka_sink:
+ type: kafka
+ properties:
+ topic: NETWORK-TRAFFIC-METRIC
+ kafka.bootstrap.servers: "{{ kafka_sink_servers }}"
+ kafka.client.id: NETWORK-TRAFFIC-METRIC
+ kafka.retries: 0
+ kafka.linger.ms: 10
+ kafka.request.timeout.ms: 30000
+ kafka.batch.size: 262144
+ kafka.buffer.memory: 134217728
+ kafka.max.request.size: 10485760
+ kafka.compression.type: snappy
+ kafka.security.protocol: SASL_PLAINTEXT
+ kafka.sasl.mechanism: PLAIN
+ kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a80bea937958aaa485c2acc2b475603495a23eb59f055e037c0b186acb22886bd0275ca91f1633441d9943e7962942252
+ format: raw
+
+
+application:
+ env:
+ name: network_traffic_metrics_kafka_to_ndc_kafka
+ shade.identifier: aes
+ pipeline:
+ object-reuse: true
+ topology:
+ - name: kafka_source
+ downstream: [kafka_sink]
+ - name: kafka_sink
diff --git a/groot-stream/multi-datacenter-examples/datacenter_dt/object_statistics_metric_kafka_to_ndc_kafka b/groot-stream/multi-datacenter-examples/datacenter_dt/object_statistics_metric_kafka_to_ndc_kafka
new file mode 100644
index 0000000..2a698ad
--- /dev/null
+++ b/groot-stream/multi-datacenter-examples/datacenter_dt/object_statistics_metric_kafka_to_ndc_kafka
@@ -0,0 +1,50 @@
+sources:
+ kafka_source:
+ type: kafka
+ properties:
+ topic: OBJECT-STATISTICS-METRIC
+ kafka.bootstrap.servers: "{{ kafka_source_servers }}"
+ kafka.client.id: OBJECT-STATISTICS-METRIC
+ kafka.session.timeout.ms: 60000
+ kafka.max.poll.records: 3000
+ kafka.max.partition.fetch.bytes: 31457280
+ kafka.security.protocol: SASL_PLAINTEXT
+ kafka.sasl.mechanism: PLAIN
+ kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a80bea937958aaa485c2acc2b475603495a23eb59f055e037c0b186acb22886bd0275ca91f1633441d9943e7962942252
+ kafka.group.id: object_statistics_metric_kafka_to_ndc_kafka
+ kafka.auto.offset.reset: latest
+ format: raw
+
+
+sinks:
+ kafka_sink:
+ type: kafka
+ properties:
+ topic: OBJECT-STATISTICS-METRIC
+ kafka.bootstrap.servers: "{{ kafka_sink_servers }}"
+ kafka.client.id: OBJECT-STATISTICS-METRIC
+ kafka.retries: 0
+ kafka.linger.ms: 10
+ kafka.request.timeout.ms: 30000
+ kafka.batch.size: 262144
+ kafka.buffer.memory: 134217728
+ kafka.max.request.size: 10485760
+ kafka.compression.type: snappy
+ kafka.security.protocol: SASL_PLAINTEXT
+ kafka.sasl.mechanism: PLAIN
+ kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a80bea937958aaa485c2acc2b475603495a23eb59f055e037c0b186acb22886bd0275ca91f1633441d9943e7962942252
+ format: raw
+
+
+application:
+ env:
+ name: object_statistics_metric_kafka_to_ndc_kafka
+ shade.identifier: aes
+ pipeline:
+ object-reuse: true # [boolean] Object Reuse, default is false
+ topology:
+ - name: kafka_source
+ downstream: [kafka_sink]
+ - name: kafka_sink
+
+
diff --git a/groot-stream/multi-datacenter-examples/datacenter_dt/policy_rule_metrics_kafka_to_ndc_kafka b/groot-stream/multi-datacenter-examples/datacenter_dt/policy_rule_metrics_kafka_to_ndc_kafka
new file mode 100644
index 0000000..07f2a68
--- /dev/null
+++ b/groot-stream/multi-datacenter-examples/datacenter_dt/policy_rule_metrics_kafka_to_ndc_kafka
@@ -0,0 +1,50 @@
+sources:
+ kafka_source:
+ type: kafka
+ properties:
+ topic: POLICY-RULE-METRIC
+ kafka.bootstrap.servers: "{{ kafka_source_servers }}"
+ kafka.client.id: POLICY-RULE-METRIC
+ kafka.session.timeout.ms: 60000
+ kafka.max.poll.records: 3000
+ kafka.max.partition.fetch.bytes: 31457280
+ kafka.security.protocol: SASL_PLAINTEXT
+ kafka.sasl.mechanism: PLAIN
+ kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a80bea937958aaa485c2acc2b475603495a23eb59f055e037c0b186acb22886bd0275ca91f1633441d9943e7962942252
+ kafka.group.id: policy_rule_metrics_kafka_to_ndc_kafka
+ kafka.auto.offset.reset: latest
+ format: raw
+
+
+sinks:
+ kafka_sink:
+ type: kafka
+ properties:
+ topic: POLICY-RULE-METRIC
+ kafka.bootstrap.servers: "{{ kafka_sink_servers }}"
+ kafka.client.id: POLICY-RULE-METRIC
+ kafka.retries: 0
+ kafka.linger.ms: 10
+ kafka.request.timeout.ms: 30000
+ kafka.batch.size: 262144
+ kafka.buffer.memory: 134217728
+ kafka.max.request.size: 10485760
+ kafka.compression.type: snappy
+ kafka.security.protocol: SASL_PLAINTEXT
+ kafka.sasl.mechanism: PLAIN
+ kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a80bea937958aaa485c2acc2b475603495a23eb59f055e037c0b186acb22886bd0275ca91f1633441d9943e7962942252
+ format: raw
+
+
+application:
+ env:
+ name: policy_rule_metrics_kafka_to_ndc_kafka
+ shade.identifier: aes
+ pipeline:
+ object-reuse: true # [boolean] Object Reuse, default is false
+ topology:
+ - name: kafka_source
+ downstream: [kafka_sink]
+ - name: kafka_sink
+
+
diff --git a/groot-stream/multi-datacenter-examples/datacenter_dt/pxy_exch_intermedia_cert_kafka_to_ndc_kafka b/groot-stream/multi-datacenter-examples/datacenter_dt/pxy_exch_intermedia_cert_kafka_to_ndc_kafka
new file mode 100644
index 0000000..8cca8b2
--- /dev/null
+++ b/groot-stream/multi-datacenter-examples/datacenter_dt/pxy_exch_intermedia_cert_kafka_to_ndc_kafka
@@ -0,0 +1,58 @@
+sources:
+ kafka_source:
+ type: kafka
+ properties:
+ topic: PXY-EXCH-INTERMEDIA-CERT
+ kafka.bootstrap.servers: "{{ kafka_source_servers }}"
+ kafka.client.id: PXY-EXCH-INTERMEDIA-CERT
+ kafka.session.timeout.ms: 60000
+ kafka.max.poll.records: 3000
+ kafka.max.partition.fetch.bytes: 31457280
+ kafka.security.protocol: SASL_PLAINTEXT
+ kafka.ssl.keystore.location:
+ kafka.ssl.keystore.password:
+ kafka.ssl.truststore.location:
+ kafka.ssl.truststore.password:
+ kafka.ssl.key.password:
+ kafka.sasl.mechanism: PLAIN
+ kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a80bea937958aaa485c2acc2b475603495a23eb59f055e037c0b186acb22886bd0275ca91f1633441d9943e7962942252
+ kafka.buffer.memory:
+ kafka.group.id: pxy_exch_intermedia_cert_kafka_to_ndc_kafka
+ kafka.auto.offset.reset: latest
+ kafka.max.request.size:
+ kafka.compression.type: none
+ format: raw
+
+
+sinks:
+ kafka_sink:
+ type: kafka
+ properties:
+ topic: PXY-EXCH-INTERMEDIA-CERT
+ kafka.bootstrap.servers: "{{ kafka_sink_servers }}"
+ kafka.client.id: PXY-EXCH-INTERMEDIA-CERT
+ kafka.retries: 0
+ kafka.linger.ms: 10
+ kafka.request.timeout.ms: 30000
+ kafka.batch.size: 262144
+ kafka.buffer.memory: 134217728
+ kafka.max.request.size: 10485760
+ kafka.compression.type: snappy
+ kafka.security.protocol: SASL_PLAINTEXT
+ kafka.sasl.mechanism: PLAIN
+ kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a80bea937958aaa485c2acc2b475603495a23eb59f055e037c0b186acb22886bd0275ca91f1633441d9943e7962942252
+ format: raw
+
+
+application:
+ env:
+ name: pxy_exch_intermedia_cert_kafka_to_ndc_kafka
+ shade.identifier: aes
+ pipeline:
+ object-reuse: true # [boolean] Object Reuse, default is false
+ topology:
+ - name: kafka_source
+ downstream: [kafka_sink]
+ - name: kafka_sink
+
+
diff --git a/groot-stream/multi-datacenter-examples/datacenter_dt/statistics_rule_metric_kafka_to_ndc_kafka b/groot-stream/multi-datacenter-examples/datacenter_dt/statistics_rule_metric_kafka_to_ndc_kafka
new file mode 100644
index 0000000..ff83e39
--- /dev/null
+++ b/groot-stream/multi-datacenter-examples/datacenter_dt/statistics_rule_metric_kafka_to_ndc_kafka
@@ -0,0 +1,50 @@
+sources:
+ kafka_source:
+ type: kafka
+ properties:
+ topic: STATISTICS-RULE-METRIC
+ kafka.bootstrap.servers: "{{ kafka_source_servers }}"
+ kafka.client.id: STATISTICS-RULE-METRIC
+ kafka.session.timeout.ms: 60000
+ kafka.max.poll.records: 3000
+ kafka.max.partition.fetch.bytes: 31457280
+ kafka.security.protocol: SASL_PLAINTEXT
+ kafka.sasl.mechanism: PLAIN
+ kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a80bea937958aaa485c2acc2b475603495a23eb59f055e037c0b186acb22886bd0275ca91f1633441d9943e7962942252
+ kafka.group.id: statistics_rule_metric_kafka_to_ndc_kafka
+ kafka.auto.offset.reset: latest
+ format: raw
+
+
+sinks:
+ kafka_sink:
+ type: kafka
+ properties:
+ topic: STATISTICS-RULE-METRIC
+ kafka.bootstrap.servers: "{{ kafka_sink_servers }}"
+ kafka.client.id: STATISTICS-RULE-METRIC
+ kafka.retries: 0
+ kafka.linger.ms: 10
+ kafka.request.timeout.ms: 30000
+ kafka.batch.size: 262144
+ kafka.buffer.memory: 134217728
+ kafka.max.request.size: 10485760
+ kafka.compression.type: snappy
+ kafka.security.protocol: SASL_PLAINTEXT
+ kafka.sasl.mechanism: PLAIN
+ kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a80bea937958aaa485c2acc2b475603495a23eb59f055e037c0b186acb22886bd0275ca91f1633441d9943e7962942252
+ format: raw
+
+
+application:
+ env:
+ name: statistics_rule_metric_kafka_to_ndc_kafka
+ shade.identifier: aes
+ pipeline:
+ object-reuse: true # [boolean] Object Reuse, default is false
+ topology:
+ - name: kafka_source
+ downstream: [kafka_sink]
+ - name: kafka_sink
+
+
diff --git a/groot-stream/multi-datacenter-examples/datacenter_dt/troubleshooting_file_stream_record_kafka_to_ndc_kafka b/groot-stream/multi-datacenter-examples/datacenter_dt/troubleshooting_file_stream_record_kafka_to_ndc_kafka
new file mode 100644
index 0000000..b880e8a
--- /dev/null
+++ b/groot-stream/multi-datacenter-examples/datacenter_dt/troubleshooting_file_stream_record_kafka_to_ndc_kafka
@@ -0,0 +1,50 @@
+sources:
+ kafka_source:
+ type: kafka
+ properties:
+ topic: TROUBLESHOOTING-FILE-STREAM-RECORD
+ kafka.bootstrap.servers: "{{ kafka_source_servers }}"
+ kafka.client.id: TROUBLESHOOTING-FILE-STREAM-RECORD
+ kafka.session.timeout.ms: 60000
+ kafka.max.poll.records: 3000
+ kafka.max.partition.fetch.bytes: 31457280
+ kafka.security.protocol: SASL_PLAINTEXT
+ kafka.sasl.mechanism: PLAIN
+ kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a80bea937958aaa485c2acc2b475603495a23eb59f055e037c0b186acb22886bd0275ca91f1633441d9943e7962942252
+ kafka.group.id: troubleshooting_file_stream_record_kafka_to_ndc_kafka
+ kafka.auto.offset.reset: latest
+ format: raw
+
+
+sinks:
+ kafka_sink:
+ type: kafka
+ properties:
+ topic: TROUBLESHOOTING-FILE-STREAM-RECORD
+ kafka.bootstrap.servers: "{{ kafka_sink_servers }}"
+ kafka.client.id: TROUBLESHOOTING-FILE-STREAM-RECORD
+ kafka.retries: 0
+ kafka.linger.ms: 10
+ kafka.request.timeout.ms: 30000
+ kafka.batch.size: 262144
+ kafka.buffer.memory: 134217728
+ kafka.max.request.size: 10485760
+ kafka.compression.type: snappy
+ kafka.security.protocol: SASL_PLAINTEXT
+ kafka.sasl.mechanism: PLAIN
+ kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a80bea937958aaa485c2acc2b475603495a23eb59f055e037c0b186acb22886bd0275ca91f1633441d9943e7962942252
+ format: raw
+
+
+application:
+ env:
+ name: troubleshooting_file_stream_record_kafka_to_ndc_kafka
+ shade.identifier: aes
+ pipeline:
+ object-reuse: true # [boolean] Object Reuse, default is false
+ topology:
+ - name: kafka_source
+ downstream: [kafka_sink]
+ - name: kafka_sink
+
+
diff --git a/groot-stream/multi-datacenter-examples/datacenter_dt/voip_record_kafka_to_ndc_kafka b/groot-stream/multi-datacenter-examples/datacenter_dt/voip_record_kafka_to_ndc_kafka
new file mode 100644
index 0000000..0339db1
--- /dev/null
+++ b/groot-stream/multi-datacenter-examples/datacenter_dt/voip_record_kafka_to_ndc_kafka
@@ -0,0 +1,89 @@
+sources:
+ kafka_source:
+ type: kafka
+ properties:
+ topic: VOIP-RECORD
+ kafka.bootstrap.servers: "{{ kafka_source_servers }}"
+ kafka.client.id: VOIP-RECORD
+ kafka.session.timeout.ms: 60000
+ kafka.max.poll.records: 3000
+ kafka.max.partition.fetch.bytes: 31457280
+ kafka.security.protocol: SASL_PLAINTEXT
+ kafka.sasl.mechanism: PLAIN
+ kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a80bea937958aaa485c2acc2b475603495a23eb59f055e037c0b186acb22886bd0275ca91f1633441d9943e7962942252
+ kafka.group.id: statistics_rule_metric_kafka_to_ndc_kafka
+ kafka.auto.offset.reset: latest
+ format: json
+
+
+processing_pipelines:
+ etl_processor: # [object] Processing Pipeline
+ type: com.geedgenetworks.core.processor.projection.ProjectionProcessorImpl
+ remove_fields:
+ output_fields:
+ functions: # [array of object] Function List
+
+ - function: PATH_COMBINE
+ lookup_fields: [rtp_pcap_path]
+ output_fields: [rtp_pcap_path]
+ parameters:
+ path: [props.hos.path, props.hos.bucket.name.traffic_file, rtp_pcap_path]
+
+ - function: PATH_COMBINE
+ lookup_fields: [http_request_body]
+ output_fields: [http_request_body]
+ parameters:
+ path: [props.hos.path, props.hos.bucket.name.traffic_file, http_request_body]
+
+ - function: PATH_COMBINE
+ lookup_fields: [http_response_body]
+ output_fields: [http_response_body]
+ parameters:
+ path: [props.hos.path, props.hos.bucket.name.traffic_file, http_response_body]
+
+ - function: PATH_COMBINE
+ lookup_fields: [mail_eml_file]
+ output_fields: [mail_eml_file]
+ parameters:
+ path: [props.hos.path, props.hos.bucket.name.traffic_file, mail_eml_file]
+
+ - function: PATH_COMBINE
+ lookup_fields: [packet_capture_file]
+ output_fields: [packet_capture_file]
+ parameters:
+ path: [props.hos.path, props.hos.bucket.name.traffic_file, packet_capture_file]
+
+
+sinks:
+ kafka_sink:
+ type: kafka
+ properties:
+ topic: VOIP-RECORD
+ kafka.bootstrap.servers: "{{ kafka_sink_servers }}"
+ kafka.client.id: VOIP-RECORD
+ kafka.retries: 0
+ kafka.linger.ms: 10
+ kafka.request.timeout.ms: 30000
+ kafka.batch.size: 262144
+ kafka.buffer.memory: 134217728
+ kafka.max.request.size: 10485760
+ kafka.compression.type: snappy
+ kafka.security.protocol: SASL_PLAINTEXT
+ kafka.sasl.mechanism: PLAIN
+ kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a80bea937958aaa485c2acc2b475603495a23eb59f055e037c0b186acb22886bd0275ca91f1633441d9943e7962942252
+ format: json
+
+
+
+application:
+ env:
+ name: voip_record_kafka_to_ndc_kafka
+ shade.identifier: aes
+ pipeline:
+ object-reuse: true # [boolean] Object Reuse, default is false
+ topology:
+ - name: kafka_source
+ downstream: [etl_processor]
+ - name: etl_processor
+ downstream: [kafka_sink]
+ - name: kafka_sink
diff --git a/groot-stream/multi-datacenter-examples/national_datacenter/dos_event_kafka_to_clickhouse b/groot-stream/multi-datacenter-examples/national_datacenter/dos_event_kafka_to_clickhouse
new file mode 100644
index 0000000..2800bf2
--- /dev/null
+++ b/groot-stream/multi-datacenter-examples/national_datacenter/dos_event_kafka_to_clickhouse
@@ -0,0 +1,43 @@
+sources:
+ kafka_source:
+ type: kafka
+ properties:
+ topic: DOS-EVENT
+ kafka.bootstrap.servers: "{{ kafka_source_servers }}"
+ kafka.client.id: DOS-EVENT
+ kafka.session.timeout.ms: 60000
+ kafka.max.poll.records: 3000
+ kafka.max.partition.fetch.bytes: 31457280
+ kafka.security.protocol: SASL_PLAINTEXT
+ kafka.sasl.mechanism: PLAIN
+ kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a80bea937958aaa485c2acc2b475603495a23eb59f055e037c0b186acb22886bd0275ca91f1633441d9943e7962942252
+ kafka.group.id: dos_event_kafka_to_clickhouse
+ kafka.auto.offset.reset: latest
+ format: json
+
+
+sinks:
+ clickhouse_sink:
+ type: clickhouse
+ properties:
+ host: "{{ clickhouse_servers }}"
+ table: tsg_galaxy_v3.dos_event_local
+ batch.size: 100000
+ batch.interval: 30s
+ connection.user: e54c9568586180eede1506eecf3574e9
+ connection.password: 86cf0e2ffba3f541a6c6761313e5cc7e
+ connection.connect_timeout: 30
+ connection.query_timeout: 300
+
+application:
+ env:
+ name: dos_event_kafka_to_clickhouse
+ shade.identifier: aes
+ pipeline:
+ object-reuse: true # [boolean] Object Reuse, default is false
+ topology:
+ - name: kafka_source
+ downstream: [clickhouse_sink]
+ - name: clickhouse_sink
+
+
diff --git a/groot-stream/multi-datacenter-examples/national_datacenter/etl_session_record_processed_kafka_to_cn_kafka b/groot-stream/multi-datacenter-examples/national_datacenter/etl_session_record_processed_kafka_to_cn_kafka
new file mode 100644
index 0000000..69bd6e8
--- /dev/null
+++ b/groot-stream/multi-datacenter-examples/national_datacenter/etl_session_record_processed_kafka_to_cn_kafka
@@ -0,0 +1,399 @@
+sources:
+ kafka_source:
+ type: kafka
+ properties:
+ topic: SESSION-RECORD-PROCESSED
+ kafka.bootstrap.servers: "{{ kafka_source_servers }}"
+ kafka.client.id: SESSION-RECORD-PROCESSED
+ kafka.session.timeout.ms: 60000
+ kafka.max.poll.records: 3000
+ kafka.max.partition.fetch.bytes: 31457280
+ kafka.security.protocol: SASL_PLAINTEXT
+ kafka.sasl.mechanism: PLAIN
+ kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a80bea937958aaa485c2acc2b475603495a23eb59f055e037c0b186acb22886bd0275ca91f1633441d9943e7962942252
+ kafka.group.id: etl_processed_session_record_kafka_to_cn_kafka
+ kafka.auto.offset.reset: latest
+ format: json
+
+processing_pipelines:
+ session_record_processor:
+ type: projection
+ remove_fields:
+ output_fields:
+ functions: # [array of object] Function List
+ - function: EVAL
+ output_fields: [ domain ]
+ parameters:
+ value_expression: server_fqdn
+
+ - function: EVAL
+ output_fields: [ domain_sld ]
+ parameters:
+ value_expression: server_domain
+
+ - function: CN_L7_PROTOCOL_AND_APP_EXTRACT
+ parameters:
+ decoded_path_field_name: decoded_path
+ app_transition_field_name: app_transition
+ l7_protocol_field_name: l7_protocol
+ app_field_name: app
+ l7_protocol: DHCP,DNS,FTP,GRE,GTP,HTTP,HTTPS,ICMP,IMAP,IMAPS,IPSEC,ISAKMP,XMPP,L2TP,LDAP,MMS,NETBIOS,NETFLOW,NTP,POP3,POP3S,RDP,PPTP,RADIUS,RTCP,RTP,RTSP,SIP,SMB,SMTP,SMTPS,SNMP,SSDP,SSH,SSL,STUN,TELNET,TFTP,OPENVPN,RTMP,TEREDO,FTPS,DTLS,SPDY,BJNP,QUIC,MDNS,Unknown TCP,Unknown UDP,Unknown Other,IKE,MAIL,SOCKS,DoH,SLP,SSL with ESNI,ISATAP,Stratum,SSL with ECH
+
+ - function: GEOIP_LOOKUP
+ lookup_fields: [ client_ip ]
+ output_fields: [ ]
+ parameters:
+ kb_name: cn_ip_location
+ option: IP_TO_OBJECT
+ geolocation_field_mapping:
+ COUNTRY: client_country_region
+ PROVINCE: client_super_admin_area
+ CITY: client_admin_area
+ LONGITUDE: client_longitude
+ LATITUDE: client_latitude
+ ISP: client_isp
+
+ - function: GEOIP_LOOKUP
+ lookup_fields: [ server_ip ]
+ output_fields: [ ]
+ parameters:
+ kb_name: cn_ip_location
+ option: IP_TO_OBJECT
+ geolocation_field_mapping:
+ COUNTRY: server_country_region
+ PROVINCE: server_super_admin_area
+ CITY: server_admin_area
+ LONGITUDE: server_longitude
+ LATITUDE: server_latitude
+ ISP: server_isp
+
+ - function: ASN_LOOKUP
+ lookup_fields: [ client_ip ]
+ output_fields: [ client_asn ]
+ parameters:
+ option: IP_TO_ASN
+ kb_name: cn_ip_asn
+
+ - function: ASN_LOOKUP
+ lookup_fields: [ server_ip ]
+ output_fields: [ server_asn ]
+ parameters:
+ option: IP_TO_ASN
+ kb_name: cn_ip_asn
+
+ - function: CN_IDC_RENTER_LOOKUP
+ lookup_fields: [ client_ip ]
+ output_fields: [ client_idc_renter ]
+ parameters:
+ kb_name: cn_idc_renter
+
+ - function: CN_IDC_RENTER_LOOKUP
+ lookup_fields: [ server_ip ]
+ output_fields: [ server_idc_renter ]
+ parameters:
+ kb_name: cn_idc_renter
+
+ - function: CN_LINK_DIRECTION_LOOKUP
+ lookup_fields: [ in_link_id ]
+ output_fields: [ in_link_direction ]
+ parameters:
+ kb_name: cn_link_direction
+
+ - function: CN_LINK_DIRECTION_LOOKUP
+ lookup_fields: [ out_link_id ]
+ output_fields: [ out_link_direction ]
+ parameters:
+ kb_name: cn_link_direction
+
+ - function: CN_FQDN_CATEGORY_LOOKUP
+ lookup_fields: [ domain ]
+ parameters:
+ kb_name: cn_fqdn_category
+ field_mapping:
+ NAME: domain_category_name
+ GROUP: domain_category_group
+ REPUTATION_LEVEL: domain_reputation_level
+
+ - function: CN_ICP_LOOKUP
+ lookup_fields: [ domain ]
+ output_fields: [ domain_icp_company_name ]
+ parameters:
+ kb_name: cn_fqdn_icp
+
+ - function: CN_FQDN_WHOIS_LOOKUP
+ lookup_fields: [ domain ]
+ output_fields: [ domain_whois_org ]
+ parameters:
+ kb_name: cn_fqdn_whois
+
+ - function: CN_DNS_SERVER_LOOKUP
+ lookup_fields: [ server_ip ]
+ output_fields: [ server_dns_server ]
+ parameters:
+ kb_name: cn_dns_server
+
+ - function: CN_APP_CATEGORY_LOOKUP
+ lookup_fields: [ app ]
+ parameters:
+ kb_name: cn_app_category
+ field_mapping:
+ CATEGORY: app_category
+ SUBCATEGORY: app_subcategory
+ COMPANY: app_company
+ COMPANY_CATEGORY: app_company_category
+
+ - function: EVAL
+ output_fields: [ client_zone ]
+ parameters:
+ value_expression: "flags & 8 == 8 ? 'internal' : 'external'"
+
+ - function: EVAL
+ output_fields: [ server_zone ]
+ parameters:
+ value_expression: "flags & 16 == 16 ? 'internal' : 'external'"
+
+ - function: CN_IP_ZONE_LOOKUP
+ lookup_fields: [ client_ip ]
+ output_fields: [ client_zone ]
+ parameters:
+ kb_name: none
+ #kb_name: cn_internal_ip
+
+ - function: CN_IP_ZONE_LOOKUP
+ lookup_fields: [ server_ip ]
+ output_fields: [ server_zone ]
+ parameters:
+ kb_name: none
+ #kb_name: cn_internal_ip
+
+ - function: EVAL
+ output_fields: [ sent_bytes ]
+ parameters:
+ value_expression: "sent_bytes == null ? 0 : sent_bytes"
+
+ - function: EVAL
+ output_fields: [ sent_pkts ]
+ parameters:
+ value_expression: "sent_pkts == null ? 0 : sent_pkts"
+
+ - function: EVAL
+ output_fields: [ received_bytes ]
+ parameters:
+ value_expression: "received_bytes == null ? 0 : received_bytes"
+
+ - function: EVAL
+ output_fields: [ received_pkts ]
+ parameters:
+ value_expression: "received_pkts == null ? 0 : received_pkts"
+
+ - function: EVAL
+ output_fields: [ traffic_inbound_byte ]
+ parameters:
+ value_expression: "client_zone == 'internal' && server_zone == 'external' ? received_bytes : traffic_inbound_byte"
+
+ - function: EVAL
+ output_fields: [ traffic_outbound_byte ]
+ parameters:
+ value_expression: "client_zone == 'external' && server_zone == 'internal' ? received_bytes : traffic_outbound_byte"
+
+ - function: EVAL
+ output_fields: [ traffic_inbound_pkt ]
+ parameters:
+ value_expression: "client_zone == 'internal' && server_zone == 'external' ? received_pkts : traffic_inbound_pkt"
+
+ - function: EVAL
+ output_fields: [ traffic_outbound_pkt ]
+ parameters:
+ value_expression: "client_zone == 'external' && server_zone == 'internal' ? received_pkts : traffic_outbound_pkt"
+
+ - function: EVAL
+ output_fields: [ traffic_outbound_byte ]
+ parameters:
+ value_expression: "client_zone == 'internal' && server_zone == 'external' ? sent_bytes : traffic_outbound_byte"
+
+ - function: EVAL
+ output_fields: [ traffic_inbound_byte ]
+ parameters:
+ value_expression: "client_zone == 'external' && server_zone == 'internal' ? sent_bytes : traffic_inbound_byte"
+
+ - function: EVAL
+ output_fields: [ traffic_outbound_pkt ]
+ parameters:
+ value_expression: "client_zone == 'internal' && server_zone == 'external' ? sent_pkts : traffic_outbound_pkt"
+
+ - function: EVAL
+ output_fields: [ traffic_inbound_pkt ]
+ parameters:
+ value_expression: "client_zone == 'external' && server_zone == 'internal' ? sent_pkts : traffic_inbound_pkt"
+
+ - function: EVAL
+ output_fields: [ traffic_internal_byte ]
+ parameters:
+ value_expression: "client_zone == 'internal' && server_zone == 'internal' ? sent_bytes + received_bytes : traffic_internal_byte"
+
+ - function: EVAL
+ output_fields: [ traffic_internal_pkt ]
+ parameters:
+ value_expression: "client_zone == 'internal' && server_zone == 'internal' ? sent_pkts + received_pkts : traffic_internal_pkt"
+
+ - function: EVAL
+ output_fields: [ traffic_through_byte ]
+ parameters:
+ value_expression: "client_zone == 'external' && server_zone == 'external' ? sent_bytes + received_bytes : traffic_through_byte"
+
+ - function: EVAL
+ output_fields: [ traffic_through_pkt ]
+ parameters:
+ value_expression: "client_zone == 'external' && server_zone == 'external' ? sent_pkts + received_pkts : traffic_through_pkt"
+
+ - function: EVAL
+ output_fields: [ sessions ]
+ parameters:
+ value_expression: "1"
+
+ - function: EVAL
+ output_fields: [ internal_query_num ]
+ parameters:
+ value_expression: "client_zone == 'internal' ? sessions : internal_query_num"
+
+ - function: EVAL
+ output_fields: [ external_query_num ]
+ parameters:
+ value_expression: "client_zone == 'external' ? sessions : external_query_num"
+
+ - function: CN_VPN_LOOKUP
+ lookup_fields: [ server_ip ]
+ output_fields: [ server_vpn_service_name ]
+ parameters:
+ kb_name: cn_vpn_learning_ip
+ option: IP_TO_VPN
+
+ - function: CN_VPN_LOOKUP
+ lookup_fields: [ domain ]
+ output_fields: [ domain_vpn_service_name ]
+ parameters:
+ kb_name: cn_vpn_learning_domain
+ option: DOMAIN_TO_VPN
+
+ - function: CN_IOC_LOOKUP
+ lookup_fields: [ server_ip ]
+ output_fields: [ server_malware ]
+ parameters:
+ kb_name: cn_ioc_malware
+ option: IP_TO_MALWARE
+
+ - function: CN_IOC_LOOKUP
+ lookup_fields: [ domain ]
+ output_fields: [ domain_malware ]
+ parameters:
+ kb_name: cn_ioc_malware
+ option: DOMAIN_TO_MALWARE
+
+ - function: CN_USER_DEFINE_TAG_LOOKUP
+ lookup_fields: [ client_ip ]
+ output_fields: [ client_ip_tags ]
+ parameters:
+ kb_name: cn_ip_tag_user_define
+ option: IP_TO_TAG
+
+ - function: CN_USER_DEFINE_TAG_LOOKUP
+ lookup_fields: [ server_ip ]
+ output_fields: [ server_ip_tags ]
+ parameters:
+ kb_name: cn_ip_tag_user_define
+ option: IP_TO_TAG
+
+ - function: CN_USER_DEFINE_TAG_LOOKUP
+ lookup_fields: [ domain ]
+ output_fields: [ domain_tags ]
+ parameters:
+ kb_name: cn_domain_tag_user_define
+ option: DOMAIN_TO_TAG
+
+ - function: CN_USER_DEFINE_TAG_LOOKUP
+ lookup_fields: [ app ]
+ output_fields: [ app_tags ]
+ parameters:
+ kb_name: cn_app_tag_user_define
+ option: APP_TO_TAG
+
+ - function: GENERATE_STRING_ARRAY
+ lookup_fields: [ client_idc_renter,client_ip_tags ]
+ output_fields: [ client_ip_tags ]
+
+ - function: GENERATE_STRING_ARRAY
+ lookup_fields: [ server_idc_renter,server_dns_server,server_node_type,server_malware,server_vpn_service_name,server_ip_tags ]
+ output_fields: [ server_ip_tags ]
+
+ - function: GENERATE_STRING_ARRAY
+ lookup_fields: [ domain_node_type,domain_malware,domain_vpn_service_name,domain_tags ]
+ output_fields: [ domain_tags ]
+
+ - function: CN_ARRAY_ELEMENTS_PREPEND
+ lookup_fields: [ client_ip_tags ]
+ output_fields: [ client_ip_tags ]
+ parameters:
+ prefix: ip.
+
+ - function: CN_ARRAY_ELEMENTS_PREPEND
+ lookup_fields: [ server_ip_tags ]
+ output_fields: [ server_ip_tags ]
+ parameters:
+ prefix: ip.
+
+ - function: CN_ARRAY_ELEMENTS_PREPEND
+ lookup_fields: [ domain_tags ]
+ output_fields: [ domain_tags ]
+ parameters:
+ prefix: domain.
+
+ - function: CN_ARRAY_ELEMENTS_PREPEND
+ lookup_fields: [ app_tags ]
+ output_fields: [ app_tags ]
+ parameters:
+ prefix: app.
+postprocessing_pipelines:
+ remove_field_processor: # [object] Processing Pipeline
+ type: com.geedgenetworks.core.processor.projection.ProjectionProcessorImpl
+ output_fields: [ recv_time,log_id,flags,start_timestamp_ms,end_timestamp_ms,duration_ms,decoded_as,client_ip,server_ip,client_port,server_port,app,app_transition,decoded_path,ip_protocol,l7_protocol,out_link_id,in_link_id,subscriber_id,imei,imsi,phone_number,apn,http_url,dns_rcode,dns_qname,dns_qtype,dns_rr,out_link_direction,in_link_direction,server_fqdn,server_domain,domain,domain_sld,domain_category_name,domain_category_group,domain_reputation_level,domain_icp_company_name,domain_whois_org,domain_tags,client_zone,client_country_region,client_super_admin_area,client_admin_area,client_longitude,client_latitude,client_isp,client_asn,client_ip_tags,server_zone,server_country_region,server_super_admin_area,server_admin_area,server_longitude,server_latitude,server_isp,server_asn,server_ip_tags,app_category,app_subcategory,app_company,app_company_category,app_tags,sent_pkts,sent_bytes,received_pkts,received_bytes,sessions,tcp_c2s_lost_bytes,tcp_s2c_lost_bytes,tcp_c2s_o3_pkts,tcp_s2c_o3_pkts,tcp_c2s_rtx_bytes,tcp_s2c_rtx_bytes,tcp_c2s_rtx_pkts,tcp_s2c_rtx_pkts,tcp_rtt_ms,http_response_latency_ms,ssl_handshake_latency_ms,dns_response_latency_ms,cn_internal_rule_id_list,cn_internal_ioc_type_list,traffic_inbound_byte,traffic_inbound_pkt,traffic_outbound_byte,traffic_outbound_pkt,traffic_internal_byte,traffic_internal_pkt,traffic_through_byte,traffic_through_pkt,internal_query_num,external_query_num ]
+
+sinks:
+ cn_kafka_sink:
+ type: kafka
+ properties:
+ topic: SESSION-RECORD-CN
+ kafka.bootstrap.servers: {{ national_center_cn_kafka_servers }}
+ kafka.client.id: SESSION-RECORD-CN
+ kafka.retries: 0
+ kafka.linger.ms: 10
+ kafka.request.timeout.ms: 30000
+ kafka.batch.size: 262144
+ kafka.buffer.memory: 134217728
+ kafka.max.request.size: 10485760
+ kafka.compression.type: snappy
+ kafka.security.protocol: SASL_PLAINTEXT
+ kafka.sasl.mechanism: PLAIN
+ kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a80bea937958aaa485c2acc2b475603495a23eb59f055e037c0b186acb22886bd0275ca91f1633441d9943e7962942252
+ format: json
+
+
+application:
+ env:
+ name: etl_session_record_processed_kafka_to_cn_kafka
+ shade.identifier: aes
+ pipeline:
+ object-reuse: true
+ properties:
+ hos.bucket.name.rtp_file: traffic_rtp_file_bucket
+ hos.bucket.name.http_file: traffic_http_file_bucket
+ hos.bucket.name.eml_file: traffic_eml_file_bucket
+ hos.bucket.name.policy_capture_file: traffic_policy_capture_file_bucket
+ topology:
+ - name: kafka_source
+ downstream: [ session_record_processor ]
+ - name: session_record_processor
+ downstream: [ remove_field_processor ]
+ - name: remove_field_processor
+ downstream: [ cn_kafka_sink ]
+ - name: cn_kafka_sink
+ downstream: [ ]
diff --git a/groot-stream/multi-datacenter-examples/national_datacenter/etl_voip_record_kafka_to_clickhouse b/groot-stream/multi-datacenter-examples/national_datacenter/etl_voip_record_kafka_to_clickhouse
new file mode 100644
index 0000000..7b46dc8
--- /dev/null
+++ b/groot-stream/multi-datacenter-examples/national_datacenter/etl_voip_record_kafka_to_clickhouse
@@ -0,0 +1,119 @@
+sources:
+ kafka_source:
+ type: kafka
+ properties:
+ topic: VOIP-CONVERSATION-RECORD
+ kafka.bootstrap.servers: "{{ kafka_source_servers }}"
+ kafka.client.id: VOIP-CONVERSATION-RECORD
+ kafka.session.timeout.ms: 60000
+ kafka.max.poll.records: 3000
+ kafka.max.partition.fetch.bytes: 31457280
+ kafka.security.protocol: SASL_PLAINTEXT
+ kafka.sasl.mechanism: PLAIN
+ kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a80bea937958aaa485c2acc2b475603495a23eb59f055e037c0b186acb22886bd0275ca91f1633441d9943e7962942252
+ kafka.group.id: etl_voip_record_kafka_to_clickhouse
+ kafka.auto.offset.reset: latest
+ format: json
+
+processing_pipelines:
+ etl_processor: # [object] Processing Pipeline
+ type: com.geedgenetworks.core.processor.projection.ProjectionProcessorImpl
+ remove_fields:
+ output_fields:
+ properties:
+ key: value
+ functions: # [array of object] Function List
+
+ - function: ASN_LOOKUP
+ lookup_fields: [server_ip]
+ output_fields: [server_asn]
+ parameters:
+ option: IP_TO_ASN
+ kb_name: tsg_ip_asn
+
+ - function: ASN_LOOKUP
+ lookup_fields: [client_ip]
+ output_fields: [client_asn]
+ parameters:
+ option: IP_TO_ASN
+ kb_name: tsg_ip_asn
+
+ - function: SNOWFLAKE_ID
+ lookup_fields: ['']
+ output_fields: [log_id]
+ parameters:
+ data_center_id_num: 1
+
+ - function: JSON_EXTRACT
+ lookup_fields: [device_tag]
+ output_fields: [data_center]
+ filter:
+ parameters:
+ value_expression: $.tags[?(@.tag=='data_center')][0].value
+
+ - function: JSON_EXTRACT
+ lookup_fields: [device_tag]
+ output_fields: [device_group]
+ filter:
+ parameters:
+ value_expression: $.tags[?(@.tag=='device_group')][0].value
+
+ - function: CURRENT_UNIX_TIMESTAMP
+ output_fields: [processing_time]
+ parameters:
+ precision: seconds
+
+ - function: UNIX_TIMESTAMP_CONVERTER
+ lookup_fields: [__timestamp]
+ output_fields: [recv_time]
+ parameters:
+ precision: seconds
+
+ - function: EVAL
+ output_fields: [ingestion_time]
+ parameters:
+ value_expression: recv_time
+
+
+ - function: BASE64_DECODE_TO_STRING
+ output_fields: [mail_subject]
+ parameters:
+ value_field: mail_subject
+ charset_field: mail_subject_charset
+
+ - function: BASE64_DECODE_TO_STRING
+ output_fields: [mail_attachment_name]
+ parameters:
+ value_field: mail_attachment_name
+ charset_field: mail_attachment_name_charset
+
+
+sinks:
+ clickhouse_sink:
+ type: clickhouse
+ properties:
+ host: "{{ clickhouse_servers }}"
+ table: tsg_galaxy_v3.voip_record_local
+ batch.size: 100000
+ batch.interval: 30s
+ connection.user: e54c9568586180eede1506eecf3574e9
+ connection.password: 86cf0e2ffba3f541a6c6761313e5cc7e
+ connection.connect_timeout: 30
+ connection.query_timeout: 300
+
+
+application:
+
+ env: # [object] Environment Variables
+ name: etl_voip_record_kafka_to_clickhouse # [string] Job Name
+ shade.identifier: aes
+ pipeline:
+ object-reuse: true # [boolean] Object Reuse, default is false
+ topology:
+ - name: kafka_source
+ downstream: [etl_processor]
+ - name: etl_processor
+ downstream: [clickhouse_sink]
+ - name: clickhouse_sink
+
+
diff --git a/groot-stream/multi-datacenter-examples/national_datacenter/proxy_event_processed_kafka_to_clickhouse b/groot-stream/multi-datacenter-examples/national_datacenter/proxy_event_processed_kafka_to_clickhouse
new file mode 100644
index 0000000..8cdfae5
--- /dev/null
+++ b/groot-stream/multi-datacenter-examples/national_datacenter/proxy_event_processed_kafka_to_clickhouse
@@ -0,0 +1,42 @@
+sources:
+ kafka_source:
+ type: kafka
+ properties:
+ topic: PROXY-EVENT-PROCESSED
+ kafka.bootstrap.servers: "{{ kafka_source_servers }}"
+ kafka.client.id: PROXY-EVENT-PROCESSED
+ kafka.session.timeout.ms: 60000
+ kafka.max.poll.records: 3000
+ kafka.max.partition.fetch.bytes: 31457280
+ kafka.security.protocol: SASL_PLAINTEXT
+ kafka.sasl.mechanism: PLAIN
+ kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a80bea937958aaa485c2acc2b475603495a23eb59f055e037c0b186acb22886bd0275ca91f1633441d9943e7962942252
+ kafka.group.id: proxy_event_processed_kafka_to_clickhouse
+ kafka.auto.offset.reset: latest
+ format: json
+
+sinks:
+ clickhouse_sink:
+ type: clickhouse
+ properties:
+ host: "{{ clickhouse_servers }}"
+ table: tsg_galaxy_v3.proxy_event_local
+ batch.size: 100000
+ batch.interval: 30s
+ connection.user: e54c9568586180eede1506eecf3574e9
+ connection.password: 86cf0e2ffba3f541a6c6761313e5cc7e
+ connection.connect_timeout: 30
+ connection.query_timeout: 300
+
+
+application:
+
+ env: # [object] Environment Variables
+ name: proxy_event_processed_kafka_to_clickhouse # [string] Job Name
+ shade.identifier: aes
+ pipeline:
+ object-reuse: true # [boolean] Object Reuse, default is false
+ topology:
+ - name: kafka_source
+ downstream: [clickhouse_sink]
+ - name: clickhouse_sink
diff --git a/groot-stream/multi-datacenter-examples/national_datacenter/session_record_processed_kafka_to_clickhouse b/groot-stream/multi-datacenter-examples/national_datacenter/session_record_processed_kafka_to_clickhouse
new file mode 100644
index 0000000..7d0f68b
--- /dev/null
+++ b/groot-stream/multi-datacenter-examples/national_datacenter/session_record_processed_kafka_to_clickhouse
@@ -0,0 +1,42 @@
+sources:
+ kafka_source:
+ type: kafka
+ properties:
+ topic: SESSION-RECORD-PROCESSED
+ kafka.bootstrap.servers: "{{ kafka_source_servers }}"
+ kafka.client.id: SESSION-RECORD-PROCESSED
+ kafka.session.timeout.ms: 60000
+ kafka.max.poll.records: 3000
+ kafka.max.partition.fetch.bytes: 31457280
+ kafka.security.protocol: SASL_PLAINTEXT
+ kafka.sasl.mechanism: PLAIN
+ kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a80bea937958aaa485c2acc2b475603495a23eb59f055e037c0b186acb22886bd0275ca91f1633441d9943e7962942252
+ kafka.group.id: session_record_processed_kafka_to_clickhouse
+ kafka.auto.offset.reset: latest
+ format: json
+
+sinks:
+ clickhouse_sink:
+ type: clickhouse
+ properties:
+ host: "{{ clickhouse_servers }}"
+ table: tsg_galaxy_v3.session_record_local
+ batch.size: 100000
+ batch.interval: 30s
+ connection.user: e54c9568586180eede1506eecf3574e9
+ connection.password: 86cf0e2ffba3f541a6c6761313e5cc7e
+ connection.connect_timeout: 30
+ connection.query_timeout: 300
+
+
+application:
+
+ env: # [object] Environment Variables
+ name: session_record_processed_kafka_to_clickhouse # [string] Job Name
+ shade.identifier: aes
+ pipeline:
+ object-reuse: true # [boolean] Object Reuse, default is false
+ topology:
+ - name: kafka_source
+ downstream: [clickhouse_sink]
+ - name: clickhouse_sink
diff --git a/groot-stream/multi-datacenter-examples/national_datacenter/transaction_record_processed_kafka_to_clickhouse b/groot-stream/multi-datacenter-examples/national_datacenter/transaction_record_processed_kafka_to_clickhouse
new file mode 100644
index 0000000..83ce33d
--- /dev/null
+++ b/groot-stream/multi-datacenter-examples/national_datacenter/transaction_record_processed_kafka_to_clickhouse
@@ -0,0 +1,42 @@
+sources:
+ kafka_source:
+ type: kafka
+ properties:
+ topic: TRANSACTION-RECORD-PROCESSED
+ kafka.bootstrap.servers: "{{ kafka_source_servers }}"
+ kafka.client.id: TRANSACTION-RECORD-PROCESSED
+ kafka.session.timeout.ms: 60000
+ kafka.max.poll.records: 3000
+ kafka.max.partition.fetch.bytes: 31457280
+ kafka.security.protocol: SASL_PLAINTEXT
+ kafka.sasl.mechanism: PLAIN
+ kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a80bea937958aaa485c2acc2b475603495a23eb59f055e037c0b186acb22886bd0275ca91f1633441d9943e7962942252
+ kafka.group.id: transaction_record_processed_kafka_to_clickhouse
+ kafka.auto.offset.reset: latest
+ format: json
+
+sinks:
+ clickhouse_sink:
+ type: clickhouse
+ properties:
+ host: "{{ clickhouse_servers }}"
+ table: tsg_galaxy_v3.transaction_record_local
+ batch.size: 100000
+ batch.interval: 30s
+ connection.user: e54c9568586180eede1506eecf3574e9
+ connection.password: 86cf0e2ffba3f541a6c6761313e5cc7e
+ connection.connect_timeout: 30
+ connection.query_timeout: 300
+
+
+application:
+
+ env: # [object] Environment Variables
+ name: transaction_record_processed_kafka_to_clickhouse # [string] Job Name
+ shade.identifier: aes
+ pipeline:
+ object-reuse: true # [boolean] Object Reuse, default is false
+ topology:
+ - name: kafka_source
+ downstream: [clickhouse_sink]
+ - name: clickhouse_sink
diff --git a/groot-stream/single-cluster-examples/dos_event_kafka_to_clickhouse b/groot-stream/single-cluster-examples/dos_event_kafka_to_clickhouse
new file mode 100644
index 0000000..5152734
--- /dev/null
+++ b/groot-stream/single-cluster-examples/dos_event_kafka_to_clickhouse
@@ -0,0 +1,50 @@
+sources:
+ kafka_source:
+ type: kafka
+ properties:
+ topic: DOS-EVENT
+ kafka.bootstrap.servers: "{{ kafka_source_servers }}"
+ kafka.client.id: DOS-EVENT
+ kafka.session.timeout.ms: 60000
+ kafka.max.poll.records: 3000
+ kafka.max.partition.fetch.bytes: 31457280
+ kafka.security.protocol: SASL_PLAINTEXT
+ kafka.ssl.keystore.location:
+ kafka.ssl.keystore.password:
+ kafka.ssl.truststore.location:
+ kafka.ssl.truststore.password:
+ kafka.ssl.key.password:
+ kafka.sasl.mechanism: PLAIN
+ kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a80bea937958aaa485c2acc2b475603495a23eb59f055e037c0b186acb22886bd0275ca91f1633441d9943e7962942252
+ kafka.buffer.memory:
+ kafka.group.id: dos_event_kafka_to_clickhouse-20231221
+ kafka.auto.offset.reset: latest
+ kafka.max.request.size:
+ kafka.compression.type: none
+ format: json
+
+
+sinks:
+ clickhouse_sink:
+ type: clickhouse
+ properties:
+ host: "{{ clickhouse_servers }}"
+ table: tsg_galaxy_v3.dos_event_local
+ batch.size: 100000
+ batch.interval: 30s
+ connection.user: e54c9568586180eede1506eecf3574e9
+ connection.password: 86cf0e2ffba3f541a6c6761313e5cc7e
+
+
+application:
+ env:
+ name: dos_event_kafka_to_clickhouse
+ shade.identifier: aes
+ pipeline:
+ object-reuse: true # [boolean] Object Reuse, default is false
+ topology:
+ - name: kafka_source
+ downstream: [clickhouse_sink]
+ - name: clickhouse_sink
+
+
diff --git a/groot-stream/single-cluster-examples/etl_datapath_telemetry_record_kafka_to_clickhouse b/groot-stream/single-cluster-examples/etl_datapath_telemetry_record_kafka_to_clickhouse
new file mode 100644
index 0000000..9ae5b54
--- /dev/null
+++ b/groot-stream/single-cluster-examples/etl_datapath_telemetry_record_kafka_to_clickhouse
@@ -0,0 +1,72 @@
+sources:
+ kafka_source:
+ type: kafka
+ properties:
+ topic: DATAPATH-TELEMETRY-RECORD
+ kafka.bootstrap.servers: "{{ kafka_source_servers }}"
+ kafka.client.id: DATAPATH-TELEMETRY-RECORD
+ kafka.session.timeout.ms: 60000
+ kafka.max.poll.records: 3000
+ kafka.max.partition.fetch.bytes: 31457280
+ #kafka.security.protocol: SSL
+ #kafka.ssl.endpoint.identification.algorithm: ""
+ #kafka.ssl.keystore.location: /data/tsg/olap/flink/topology/data/keystore.jks
+ #kafka.ssl.keystore.password: 86cf0e2ffba3f541a6c6761313e5cc7e
+ #kafka.ssl.truststore.location: /data/tsg/olap/flink/topology/data/truststore.jks
+ #kafka.ssl.truststore.password: 86cf0e2ffba3f541a6c6761313e5cc7e
+ #kafka.ssl.key.password: 86cf0e2ffba3f541a6c6761313e5cc7e
+ kafka.security.protocol: SASL_PLAINTEXT
+ kafka.sasl.mechanism: PLAIN
+ kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a80bea937958aaa485c2acc2b475603495a23eb59f055e037c0b186acb22886bd0275ca91f1633441d9943e7962942252
+ kafka.group.id: etl_datapath_telemetry_record_kafka_to_clickhouse-20230125
+ kafka.auto.offset.reset: latest
+ format: msgpack
+
+processing_pipelines:
+ etl_processor: # [object] Processing Pipeline
+ type: com.geedgenetworks.core.processor.projection.ProjectionProcessorImpl
+ remove_fields:
+ output_fields:
+ functions:
+ - function: SNOWFLAKE_ID
+ lookup_fields: [ '' ]
+ output_fields: [ log_id ]
+ parameters:
+ data_center_id_num: 1
+ - function: UNIX_TIMESTAMP_CONVERTER
+ lookup_fields: [ __timestamp ]
+ output_fields: [ recv_time ]
+ parameters:
+ precision: seconds
+ - function: BASE64_ENCODE_TO_STRING
+ output_fields: [ packet ]
+ parameters:
+ value_field: packet
+
+sinks:
+ clickhouse_sink:
+ type: clickhouse
+ properties:
+ host: "{{ clickhouse_servers }}"
+ table: tsg_galaxy_v3.datapath_telemetry_record_local
+ batch.size: 5000
+ batch.interval: 30s
+ connection.user: e54c9568586180eede1506eecf3574e9
+ connection.password: 86cf0e2ffba3f541a6c6761313e5cc7e
+
+
+application:
+
+ env: # [object] Environment Variables
+ name: etl_datapath_telemetry_record_kafka_to_clickhouse # [string] Job Name
+ shade.identifier: aes
+ pipeline:
+ object-reuse: true # [boolean] Object Reuse, default is false
+ topology:
+ - name: kafka_source
+ downstream: [etl_processor]
+ - name: etl_processor
+ downstream: [clickhouse_sink]
+ - name: clickhouse_sink
+
+
diff --git a/groot-stream/single-cluster-examples/etl_proxy_event_kafka_to_clickhouse b/groot-stream/single-cluster-examples/etl_proxy_event_kafka_to_clickhouse
new file mode 100644
index 0000000..011eabb
--- /dev/null
+++ b/groot-stream/single-cluster-examples/etl_proxy_event_kafka_to_clickhouse
@@ -0,0 +1,143 @@
+sources:
+ kafka_source:
+ type: kafka
+ # fields: # [array of object] Field List, if not set, all fields(Map<String, Object>) will be output.
+ # watermark_timestamp: common_recv_time # [string] Watermark Field Name
+ # watermark_timestamp_unit: ms # [string] Watermark Unit, default is ms
+ # watermark_lag: 60 # [number] Watermark Lag, default is 60
+ properties:
+ topic: PROXY-EVENT
+ kafka.bootstrap.servers: "{{ kafka_source_servers }}"
+ kafka.client.id: PROXY-EVENT
+ kafka.session.timeout.ms: 60000
+ kafka.max.poll.records: 3000
+ kafka.max.partition.fetch.bytes: 31457280
+ kafka.security.protocol: SASL_PLAINTEXT
+ kafka.ssl.keystore.location:
+ kafka.ssl.keystore.password:
+ kafka.ssl.truststore.location:
+ kafka.ssl.truststore.password:
+ kafka.ssl.key.password:
+ kafka.sasl.mechanism: PLAIN
+ kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a80bea937958aaa485c2acc2b475603495a23eb59f055e037c0b186acb22886bd0275ca91f1633441d9943e7962942252
+ kafka.buffer.memory:
+ kafka.group.id: etl_proxy_event_kafka_to_clickhouse-20231221
+ kafka.auto.offset.reset: latest
+ kafka.max.request.size:
+ kafka.compression.type: none
+ format: json
+
+processing_pipelines:
+ etl_processor: # [object] Processing Pipeline
+ type: com.geedgenetworks.core.processor.projection.ProjectionProcessorImpl
+ remove_fields:
+ output_fields:
+ properties:
+ key: value
+ functions: # [array of object] Function List
+
+ - function: SNOWFLAKE_ID
+ lookup_fields: ['']
+ output_fields: [log_id]
+ parameters:
+ data_center_id_num: 1
+
+ - function: JSON_EXTRACT
+ lookup_fields: [device_tag]
+ output_fields: [data_center]
+ filter:
+ parameters:
+ value_expression: $.tags[?(@.tag=='data_center')][0].value
+
+ - function: JSON_EXTRACT
+ lookup_fields: [device_tag]
+ output_fields: [device_group]
+ filter:
+ parameters:
+ value_expression: $.tags[?(@.tag=='device_group')][0].value
+
+ - function: CURRENT_UNIX_TIMESTAMP
+ output_fields: [processing_time]
+ parameters:
+ precision: seconds
+
+ - function: UNIX_TIMESTAMP_CONVERTER
+ lookup_fields: [__timestamp]
+ output_fields: [recv_time]
+ parameters:
+ precision: seconds
+
+ - function: EVAL
+ output_fields: [ingestion_time]
+ parameters:
+ value_expression: recv_time
+
+ - function: BASE64_DECODE_TO_STRING
+ output_fields: [mail_subject]
+ parameters:
+ value_field: mail_subject
+ charset_field: mail_subject_charset
+
+ - function: BASE64_DECODE_TO_STRING
+ output_fields: [mail_attachment_name]
+ parameters:
+ value_field: mail_attachment_name
+ charset_field: mail_attachment_name_charset
+
+ - function: PATH_COMBINE
+ lookup_fields: [rtp_pcap_path]
+ output_fields: [rtp_pcap_path]
+ parameters:
+ path: [props.hos.path, props.hos.bucket.name.traffic_file, rtp_pcap_path]
+
+ - function: PATH_COMBINE
+ lookup_fields: [http_request_body]
+ output_fields: [http_request_body]
+ parameters:
+ path: [props.hos.path, props.hos.bucket.name.traffic_file, http_request_body]
+
+ - function: PATH_COMBINE
+ lookup_fields: [http_response_body]
+ output_fields: [http_response_body]
+ parameters:
+ path: [props.hos.path, props.hos.bucket.name.traffic_file, http_response_body]
+
+ - function: PATH_COMBINE
+ lookup_fields: [mail_eml_file]
+ output_fields: [mail_eml_file]
+ parameters:
+ path: [props.hos.path, props.hos.bucket.name.traffic_file, mail_eml_file]
+
+ - function: PATH_COMBINE
+ lookup_fields: [packet_capture_file]
+ output_fields: [packet_capture_file]
+ parameters:
+ path: [props.hos.path, props.hos.bucket.name.traffic_file, packet_capture_file]
+
+sinks:
+ clickhouse_sink:
+ type: clickhouse
+ properties:
+ host: "{{ clickhouse_servers }}"
+ table: tsg_galaxy_v3.proxy_event_local
+ batch.size: 100000
+ batch.interval: 30s
+ connection.user: e54c9568586180eede1506eecf3574e9
+ connection.password: 86cf0e2ffba3f541a6c6761313e5cc7e
+
+
+application:
+
+ env: # [object] Environment Variables
+ name: etl_proxy_event_kafka_to_clickhouse # [string] Job Name
+ shade.identifier: aes
+ pipeline:
+ object-reuse: true # [boolean] Object Reuse, default is false
+ topology:
+ - name: kafka_source
+ downstream: [etl_processor]
+ - name: etl_processor
+ downstream: [clickhouse_sink]
+ - name: clickhouse_sink
+
+
diff --git a/groot-stream/single-cluster-examples/etl_session_record_kafka_to_clickhouse b/groot-stream/single-cluster-examples/etl_session_record_kafka_to_clickhouse
new file mode 100644
index 0000000..960c10e
--- /dev/null
+++ b/groot-stream/single-cluster-examples/etl_session_record_kafka_to_clickhouse
@@ -0,0 +1,141 @@
+sources:
+ kafka_source:
+ type: kafka
+ properties:
+ topic: SESSION-RECORD
+ kafka.bootstrap.servers: "{{ kafka_source_servers }}"
+ kafka.client.id: SESSION-RECORD
+ kafka.session.timeout.ms: 60000
+ kafka.max.poll.records: 3000
+ kafka.max.partition.fetch.bytes: 31457280
+
+ # kafka.security.protocol: SSL
+ # kafka.ssl.endpoint.identification.algorithm: ""
+ # kafka.ssl.keystore.location: $GROOT_HOME/config/dat/keystore.jks
+ # kafka.ssl.keystore.password: 86cf0e2ffba3f541a6c6761313e5cc7e
+ # kafka.ssl.truststore.location: $GROOT_HOME/config/dat/truststore.jks
+ # kafka.ssl.truststore.password: 86cf0e2ffba3f541a6c6761313e5cc7e
+ # kafka.ssl.key.password: 86cf0e2ffba3f541a6c6761313e5cc7e
+
+ kafka.security.protocol: SASL_PLAINTEXT
+ kafka.sasl.mechanism: PLAIN
+ kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a80bea937958aaa485c2acc2b475603495a23eb59f055e037c0b186acb22886bd0275ca91f1633441d9943e7962942252
+ kafka.group.id: etl_session_record_kafka_to_clickhouse-20230125
+ kafka.auto.offset.reset: latest
+ format: json
+
+processing_pipelines:
+ etl_processor:
+ type: projection
+ properties:
+ key: value
+ functions:
+ - function: SNOWFLAKE_ID
+ lookup_fields: ['']
+ output_fields: [log_id]
+ parameters:
+ data_center_id_num: 1
+
+ - function: JSON_EXTRACT
+ lookup_fields: [device_tag]
+ output_fields: [data_center]
+ filter:
+ parameters:
+ value_expression: $.tags[?(@.tag=='data_center')][0].value
+
+ - function: JSON_EXTRACT
+ lookup_fields: [device_tag]
+ output_fields: [device_group]
+ filter:
+ parameters:
+ value_expression: $.tags[?(@.tag=='device_group')][0].value
+
+ - function: CURRENT_UNIX_TIMESTAMP
+ output_fields: [processing_time]
+ parameters:
+ precision: seconds
+
+ - function: UNIX_TIMESTAMP_CONVERTER
+ lookup_fields: [__timestamp]
+ output_fields: [recv_time]
+ parameters:
+ precision: seconds
+
+ - function: EVAL
+ output_fields: [ingestion_time]
+ parameters:
+ value_expression: recv_time
+
+ - function: BASE64_DECODE_TO_STRING
+ output_fields: [mail_subject]
+ parameters:
+ value_field: mail_subject
+ charset_field: mail_subject_charset
+
+ - function: BASE64_DECODE_TO_STRING
+ output_fields: [mail_attachment_name]
+ parameters:
+ value_field: mail_attachment_name
+ charset_field: mail_attachment_name_charset
+
+ - function: PATH_COMBINE
+ lookup_fields: [rtp_pcap_path]
+ output_fields: [rtp_pcap_path]
+ parameters:
+ path: [props.hos.path, props.hos.bucket.name.traffic_file, rtp_pcap_path]
+
+ - function: PATH_COMBINE
+ lookup_fields: [http_request_body]
+ output_fields: [http_request_body]
+ parameters:
+ path: [props.hos.path, props.hos.bucket.name.traffic_file, http_request_body]
+
+ - function: PATH_COMBINE
+ lookup_fields: [http_response_body]
+ output_fields: [http_response_body]
+ parameters:
+ path: [props.hos.path, props.hos.bucket.name.traffic_file, http_response_body]
+
+ - function: PATH_COMBINE
+ lookup_fields: [mail_eml_file]
+ output_fields: [mail_eml_file]
+ parameters:
+ path: [props.hos.path, props.hos.bucket.name.traffic_file, mail_eml_file]
+
+ - function: PATH_COMBINE
+ lookup_fields: [packet_capture_file]
+ output_fields: [packet_capture_file]
+ parameters:
+ path: [props.hos.path, props.hos.bucket.name.traffic_file, packet_capture_file]
+
+sinks:
+ clickhouse_sink:
+ type: clickhouse
+ properties:
+ host: "{{ clickhouse_servers }}"
+ table: tsg_galaxy_v3.session_record_local
+ batch.size: 100000
+ batch.interval: 30s
+ connection.user: e54c9568586180eede1506eecf3574e9
+ connection.password: 86cf0e2ffba3f541a6c6761313e5cc7e
+
+
+application:
+ env:
+ name: etl_session_record_kafka_to_clickhouse
+ shade.identifier: aes
+ pipeline:
+ object-reuse: true
+ properties:
+ hos.bucket.name.rtp_file: traffic_rtp_file_bucket
+ hos.bucket.name.http_file: traffic_http_file_bucket
+ hos.bucket.name.eml_file: traffic_eml_file_bucket
+ hos.bucket.name.policy_capture_file: traffic_policy_capture_file_bucket
+ topology:
+ - name: kafka_source
+ downstream: [etl_processor]
+ - name: etl_processor
+ downstream: [clickhouse_sink]
+ - name: clickhouse_sink
+
+
diff --git a/groot-stream/single-cluster-examples/etl_traffic_sketch_metric_kafka_to_clickhouse b/groot-stream/single-cluster-examples/etl_traffic_sketch_metric_kafka_to_clickhouse
new file mode 100644
index 0000000..9eee8c4
--- /dev/null
+++ b/groot-stream/single-cluster-examples/etl_traffic_sketch_metric_kafka_to_clickhouse
@@ -0,0 +1,93 @@
+sources:
+ kafka_source:
+ type: kafka
+ properties:
+ topic: TRAFFIC-SKETCH-METRIC
+ kafka.bootstrap.servers: "{{ kafka_source_servers }}"
+ kafka.client.id: TRAFFIC-SKETCH-METRIC
+ kafka.session.timeout.ms: 60000
+ kafka.max.poll.records: 3000
+ kafka.max.partition.fetch.bytes: 31457280
+ kafka.security.protocol: SASL_PLAINTEXT
+ kafka.sasl.mechanism: PLAIN
+ kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a80bea937958aaa485c2acc2b475603495a23eb59f055e037c0b186acb22886bd0275ca91f1633441d9943e7962942252
+ kafka.group.id: etl_traffic_sketch_metric
+ kafka.auto.offset.reset: latest
+ kafka.compression.type: none
+ format: json
+
+processing_pipelines:
+ etl_processor: # [object] Processing Pipeline
+ type: com.geedgenetworks.core.processor.projection.ProjectionProcessorImpl
+ remove_fields:
+ output_fields:
+ functions: # [array of object] Function List
+
+ - function: FLATTEN
+ lookup_fields: [ fields,tags ]
+ output_fields: [ ]
+ parameters:
+ #prefix: ""
+ depth: 3
+ # delimiter: "."
+
+ - function: RENAME
+ lookup_fields: [ '' ]
+ output_fields: [ '' ]
+ filter:
+ parameters:
+ # parent_fields: [tags]
+ #rename_fields:
+ # tags: tags
+ rename_expression: key =string.replace_all(key,'tags.','');key =string.replace_all(key,'fields.','');return key;
+
+ - function: EVAL
+ output_fields: [ internal_ip ]
+ parameters:
+ value_expression: 'direction=Outbound? client_ip : server_ip'
+ - function: EVAL
+ output_fields: [ external_ip ]
+ parameters:
+ value_expression: 'direction=Outbound? server_ip : client_ip'
+
+ - function: UNIX_TIMESTAMP_CONVERTER
+ lookup_fields: [ timestamp_ms ]
+ output_fields: [ recv_time ]
+ parameters:
+ precision: seconds
+
+ - function: SNOWFLAKE_ID
+ lookup_fields: [ '' ]
+ output_fields: [ log_id ]
+ filter:
+ parameters:
+ data_center_id_num: 1
+
+
+sinks:
+ clickhouse_sink:
+ type: clickhouse
+ properties:
+ host: "{{ clickhouse_servers }}"
+ table: tsg_galaxy_v3.traffic_sketch_metric_local
+ batch.size: 100000
+ batch.interval: 30s
+ connection.user: e54c9568586180eede1506eecf3574e9
+ connection.password: 86cf0e2ffba3f541a6c6761313e5cc7e
+
+
+application:
+
+ env: # [object] Environment Variables
+ name: etl_traffic_sketch_metric # [string] Job Name
+ shade.identifier: aes
+ pipeline:
+ object-reuse: true # [boolean] Object Reuse, default is false
+ topology:
+ - name: kafka_source
+ downstream: [etl_processor]
+ - name: etl_processor
+ downstream: [clickhouse_sink]
+ - name: clickhouse_sink
+
+
diff --git a/groot-stream/single-cluster-examples/etl_transaction_record_kafka_to_clickhouse b/groot-stream/single-cluster-examples/etl_transaction_record_kafka_to_clickhouse
new file mode 100644
index 0000000..03bd6d7
--- /dev/null
+++ b/groot-stream/single-cluster-examples/etl_transaction_record_kafka_to_clickhouse
@@ -0,0 +1,141 @@
+sources:
+ kafka_source:
+ type: kafka
+ # fields: # [array of object] Field List, if not set, all fields(Map<String, Object>) will be output.
+ # watermark_timestamp: common_recv_time # [string] Watermark Field Name
+ # watermark_timestamp_unit: ms # [string] Watermark Unit, default is ms
+ # watermark_lag: 60 # [number] Watermark Lag, default is 60
+ properties:
+ topic: TRANSACTION-RECORD
+ kafka.bootstrap.servers: "{{ kafka_source_servers }}"
+ kafka.client.id: TRANSACTION-RECORD
+ kafka.session.timeout.ms: 60000
+ kafka.max.poll.records: 3000
+ kafka.max.partition.fetch.bytes: 31457280
+ kafka.security.protocol: SASL_PLAINTEXT
+ kafka.ssl.keystore.location:
+ kafka.ssl.keystore.password:
+ kafka.ssl.truststore.location:
+ kafka.ssl.truststore.password:
+ kafka.ssl.key.password:
+ kafka.sasl.mechanism: PLAIN
+ kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a80bea937958aaa485c2acc2b475603495a23eb59f055e037c0b186acb22886bd0275ca91f1633441d9943e7962942252
+ kafka.buffer.memory:
+ kafka.group.id: etl_transaction_record_kafka_to_clickhouse-20240308
+ kafka.auto.offset.reset: latest
+ kafka.max.request.size:
+ kafka.compression.type: none
+ format: json
+
+processing_pipelines:
+ etl_processor: # [object] Processing Pipeline
+ type: com.geedgenetworks.core.processor.projection.ProjectionProcessorImpl
+ remove_fields:
+ output_fields:
+ properties:
+ key: value
+ functions: # [array of object] Function List
+
+ - function: SNOWFLAKE_ID
+ lookup_fields: ['']
+ output_fields: [log_id]
+ parameters:
+ data_center_id_num: 1
+
+ - function: JSON_EXTRACT
+ lookup_fields: [device_tag]
+ output_fields: [data_center]
+ filter:
+ parameters:
+ value_expression: $.tags[?(@.tag=='data_center')][0].value
+
+ - function: JSON_EXTRACT
+ lookup_fields: [device_tag]
+ output_fields: [device_group]
+ filter:
+ parameters:
+ value_expression: $.tags[?(@.tag=='device_group')][0].value
+
+ - function: CURRENT_UNIX_TIMESTAMP
+ output_fields: [processing_time]
+ parameters:
+ precision: seconds
+
+ - function: UNIX_TIMESTAMP_CONVERTER
+ lookup_fields: [__timestamp]
+ output_fields: [recv_time]
+ parameters:
+ precision: seconds
+
+ - function: EVAL
+ output_fields: [ingestion_time]
+ parameters:
+ value_expression: recv_time
+
+ - function: BASE64_DECODE_TO_STRING
+ output_fields: [mail_subject]
+ parameters:
+ value_field: mail_subject
+ charset_field: mail_subject_charset
+
+ - function: BASE64_DECODE_TO_STRING
+ output_fields: [mail_attachment_name]
+ parameters:
+ value_field: mail_attachment_name
+ charset_field: mail_attachment_name_charset
+
+ - function: PATH_COMBINE
+ lookup_fields: [rtp_pcap_path]
+ output_fields: [rtp_pcap_path]
+ parameters:
+ path: [props.hos.path, props.hos.bucket.name.traffic_file, rtp_pcap_path]
+
+ - function: PATH_COMBINE
+ lookup_fields: [http_request_body]
+ output_fields: [http_request_body]
+ parameters:
+ path: [props.hos.path, props.hos.bucket.name.traffic_file, http_request_body]
+
+ - function: PATH_COMBINE
+ lookup_fields: [http_response_body]
+ output_fields: [http_response_body]
+ parameters:
+ path: [props.hos.path, props.hos.bucket.name.traffic_file, http_response_body]
+
+ - function: PATH_COMBINE
+ lookup_fields: [mail_eml_file]
+ output_fields: [mail_eml_file]
+ parameters:
+ path: [props.hos.path, props.hos.bucket.name.traffic_file, mail_eml_file]
+
+ - function: PATH_COMBINE
+ lookup_fields: [packet_capture_file]
+ output_fields: [packet_capture_file]
+ parameters:
+ path: [props.hos.path, props.hos.bucket.name.traffic_file, packet_capture_file]
+
+sinks:
+ clickhouse_sink:
+ type: clickhouse
+ properties:
+ host: "{{ clickhouse_servers }}"
+ table: tsg_galaxy_v3.transaction_record_local
+ batch.size: 100000
+ batch.interval: 30s
+ connection.user: e54c9568586180eede1506eecf3574e9
+ connection.password: 86cf0e2ffba3f541a6c6761313e5cc7e
+
+
+application:
+
+ env: # [object] Environment Variables
+ name: etl_transaction_record_kafka_to_clickhouse # [string] Job Name
+ shade.identifier: aes
+ pipeline:
+ object-reuse: true # [boolean] Object Reuse, default is false
+ topology:
+ - name: kafka_source
+ downstream: [etl_processor]
+ - name: etl_processor
+ downstream: [clickhouse_sink]
+ - name: clickhouse_sink
diff --git a/groot-stream/single-cluster-examples/etl_voip_record_kafka_to_clickhouse b/groot-stream/single-cluster-examples/etl_voip_record_kafka_to_clickhouse
new file mode 100644
index 0000000..6fde822
--- /dev/null
+++ b/groot-stream/single-cluster-examples/etl_voip_record_kafka_to_clickhouse
@@ -0,0 +1,143 @@
+sources:
+ kafka_source:
+ type: kafka
+ # fields: # [array of object] Field List, if not set, all fields(Map<String, Object>) will be output.
+ # watermark_timestamp: common_recv_time # [string] Watermark Field Name
+ # watermark_timestamp_unit: ms # [string] Watermark Unit, default is ms
+ # watermark_lag: 60 # [number] Watermark Lag, default is 60
+ properties:
+ topic: VOIP-CONVERSATION-RECORD
+ kafka.bootstrap.servers: "{{ kafka_source_servers }}"
+ kafka.client.id: VOIP-CONVERSATION-RECORD
+ kafka.session.timeout.ms: 60000
+ kafka.max.poll.records: 3000
+ kafka.max.partition.fetch.bytes: 31457280
+ kafka.security.protocol: SASL_PLAINTEXT
+ kafka.ssl.keystore.location:
+ kafka.ssl.keystore.password:
+ kafka.ssl.truststore.location:
+ kafka.ssl.truststore.password:
+ kafka.ssl.key.password:
+ kafka.sasl.mechanism: PLAIN
+ kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a80bea937958aaa485c2acc2b475603495a23eb59f055e037c0b186acb22886bd0275ca91f1633441d9943e7962942252
+ kafka.buffer.memory:
+ kafka.group.id: etl_voip_record_kafka_to_clickhouse-20231221
+ kafka.auto.offset.reset: latest
+ kafka.max.request.size:
+ kafka.compression.type: none
+ format: json
+
+processing_pipelines:
+ etl_processor: # [object] Processing Pipeline
+ type: com.geedgenetworks.core.processor.projection.ProjectionProcessorImpl
+ remove_fields:
+ output_fields:
+ properties:
+ key: value
+ functions: # [array of object] Function List
+
+ - function: SNOWFLAKE_ID
+ lookup_fields: ['']
+ output_fields: [log_id]
+ parameters:
+ data_center_id_num: 1
+
+ - function: JSON_EXTRACT
+ lookup_fields: [device_tag]
+ output_fields: [data_center]
+ filter:
+ parameters:
+ value_expression: $.tags[?(@.tag=='data_center')][0].value
+
+ - function: JSON_EXTRACT
+ lookup_fields: [device_tag]
+ output_fields: [device_group]
+ filter:
+ parameters:
+ value_expression: $.tags[?(@.tag=='device_group')][0].value
+
+ - function: CURRENT_UNIX_TIMESTAMP
+ output_fields: [processing_time]
+ parameters:
+ precision: seconds
+
+ - function: UNIX_TIMESTAMP_CONVERTER
+ lookup_fields: [__timestamp]
+ output_fields: [recv_time]
+ parameters:
+ precision: seconds
+
+ - function: EVAL
+ output_fields: [ingestion_time]
+ parameters:
+ value_expression: recv_time
+
+ - function: BASE64_DECODE_TO_STRING
+ output_fields: [mail_subject]
+ parameters:
+ value_field: mail_subject
+ charset_field: mail_subject_charset
+
+ - function: BASE64_DECODE_TO_STRING
+ output_fields: [mail_attachment_name]
+ parameters:
+ value_field: mail_attachment_name
+ charset_field: mail_attachment_name_charset
+
+ - function: PATH_COMBINE
+ lookup_fields: [rtp_pcap_path]
+ output_fields: [rtp_pcap_path]
+ parameters:
+ path: [props.hos.path, props.hos.bucket.name.traffic_file, rtp_pcap_path]
+
+ - function: PATH_COMBINE
+ lookup_fields: [http_request_body]
+ output_fields: [http_request_body]
+ parameters:
+ path: [props.hos.path, props.hos.bucket.name.traffic_file, http_request_body]
+
+ - function: PATH_COMBINE
+ lookup_fields: [http_response_body]
+ output_fields: [http_response_body]
+ parameters:
+ path: [props.hos.path, props.hos.bucket.name.traffic_file, http_response_body]
+
+ - function: PATH_COMBINE
+ lookup_fields: [mail_eml_file]
+ output_fields: [mail_eml_file]
+ parameters:
+ path: [props.hos.path, props.hos.bucket.name.traffic_file, mail_eml_file]
+
+ - function: PATH_COMBINE
+ lookup_fields: [packet_capture_file]
+ output_fields: [packet_capture_file]
+ parameters:
+ path: [props.hos.path, props.hos.bucket.name.traffic_file, packet_capture_file]
+
+sinks:
+ clickhouse_sink:
+ type: clickhouse
+ properties:
+ host: "{{ clickhouse_servers }}"
+ table: tsg_galaxy_v3.voip_record_local
+ batch.size: 100000
+ batch.interval: 30s
+ connection.user: e54c9568586180eede1506eecf3574e9
+ connection.password: 86cf0e2ffba3f541a6c6761313e5cc7e
+
+
+application:
+
+ env: # [object] Environment Variables
+ name: etl_voip_record_kafka_to_clickhouse # [string] Job Name
+ shade.identifier: aes
+ pipeline:
+ object-reuse: true # [boolean] Object Reuse, default is false
+ topology:
+ - name: kafka_source
+ downstream: [etl_processor]
+ - name: etl_processor
+ downstream: [clickhouse_sink]
+ - name: clickhouse_sink
+
+
diff --git a/groot-stream/templates/realtime_log_streaming_cn_session_record.yaml.template b/groot-stream/templates/realtime_log_streaming_cn_session_record.yaml.template
new file mode 100644
index 0000000..3e9db4e
--- /dev/null
+++ b/groot-stream/templates/realtime_log_streaming_cn_session_record.yaml.template
@@ -0,0 +1,387 @@
+sources:
+ kafka_source:
+ type: kafka
+ properties:
+ topic: {{ kafka_source_topic }}
+ kafka.bootstrap.servers: {{ kafka_source_bootstrap_servers }}
+ kafka.client.id: {{ kafka_source_topic }}
+ kafka.session.timeout.ms: 60000
+ kafka.max.poll.records: 3000
+ kafka.max.partition.fetch.bytes: 31457280
+ kafka.security.protocol: SASL_PLAINTEXT
+ kafka.sasl.mechanism: PLAIN
+ kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a80bea937958aaa485c2acc2b475603495a23eb59f055e037c0b186acb22886bd0275ca91f1633441d9943e7962942252
+ kafka.group.id: {{ kafka_source_group_id }}
+ kafka.auto.offset.reset: latest
+ format: json
+ json.ignore.parse.errors: false
+
+
+processing_pipelines:
+ etl_processor:
+ type: com.geedgenetworks.core.processor.projection.ProjectionProcessorImpl
+ functions:
+ - function: SNOWFLAKE_ID
+ lookup_fields: [ '' ]
+ output_fields: [ cn_log_id ]
+ parameters:
+ data_center_id_num: 1
+
+ - function: EVAL
+ output_fields: [ log_id ]
+ parameters:
+ value_expression: "log_id == null ? cn_log_id : log_id"
+
+ - function: UNIX_TIMESTAMP_CONVERTER
+ lookup_fields: [ __timestamp ]
+ output_fields: [ kafka_recv_time ]
+ parameters:
+ precision: seconds
+
+ - function: EVAL
+ output_fields: [ recv_time ]
+ parameters:
+ value_expression: "recv_time == null ? kafka_recv_time : recv_time"
+
+ - function: EVAL
+ output_fields: [ domain ]
+ parameters:
+ value_expression: server_fqdn
+
+ - function: EVAL
+ output_fields: [ domain_sld ]
+ parameters:
+ value_expression: server_domain
+
+ - function: CN_L7_PROTOCOL_AND_APP_EXTRACT
+ parameters:
+ decoded_path_field_name: decoded_path
+ app_transition_field_name: app_transition
+ l7_protocol_field_name: l7_protocol
+ app_field_name: app
+ l7_protocol: DHCP,DNS,FTP,GRE,GTP,HTTP,HTTPS,ICMP,IMAP,IMAPS,IPSEC,ISAKMP,XMPP,L2TP,LDAP,MMS,NETBIOS,NETFLOW,NTP,POP3,POP3S,RDP,PPTP,RADIUS,RTCP,RTP,RTSP,SIP,SMB,SMTP,SMTPS,SNMP,SSDP,SSH,SSL,STUN,TELNET,TFTP,OPENVPN,RTMP,TEREDO,FTPS,DTLS,SPDY,BJNP,QUIC,MDNS,Unknown TCP,Unknown UDP,Unknown Other,IKE,MAIL,SOCKS,DoH,SLP,SSL with ESNI,ISATAP,Stratum,SSL with ECH
+
+ - function: GEOIP_LOOKUP
+ lookup_fields: [ client_ip ]
+ output_fields: [ ]
+ parameters:
+ kb_name: cn_ip_location
+ option: IP_TO_OBJECT
+ geolocation_field_mapping:
+ COUNTRY: client_country_region
+ PROVINCE: client_super_admin_area
+ CITY: client_admin_area
+ LONGITUDE: client_longitude
+ LATITUDE: client_latitude
+ ISP: client_isp
+
+ - function: GEOIP_LOOKUP
+ lookup_fields: [ server_ip ]
+ output_fields: [ ]
+ parameters:
+ kb_name: cn_ip_location
+ option: IP_TO_OBJECT
+ geolocation_field_mapping:
+ COUNTRY: server_country_region
+ PROVINCE: server_super_admin_area
+ CITY: server_admin_area
+ LONGITUDE: server_longitude
+ LATITUDE: server_latitude
+ ISP: server_isp
+
+ - function: ASN_LOOKUP
+ lookup_fields: [ client_ip ]
+ output_fields: [ client_asn ]
+ parameters:
+ option: IP_TO_ASN
+ kb_name: cn_ip_asn
+
+ - function: ASN_LOOKUP
+ lookup_fields: [ server_ip ]
+ output_fields: [ server_asn ]
+ parameters:
+ option: IP_TO_ASN
+ kb_name: cn_ip_asn
+
+ - function: CN_IDC_RENTER_LOOKUP
+ lookup_fields: [ client_ip ]
+ output_fields: [ client_idc_renter ]
+ parameters:
+ kb_name: cn_idc_renter
+
+ - function: CN_IDC_RENTER_LOOKUP
+ lookup_fields: [ server_ip ]
+ output_fields: [ server_idc_renter ]
+ parameters:
+ kb_name: cn_idc_renter
+
+ - function: CN_LINK_DIRECTION_LOOKUP
+ lookup_fields: [ in_link_id ]
+ output_fields: [ in_link_direction ]
+ parameters:
+ kb_name: cn_link_direction
+
+ - function: CN_LINK_DIRECTION_LOOKUP
+ lookup_fields: [ out_link_id ]
+ output_fields: [ out_link_direction ]
+ parameters:
+ kb_name: cn_link_direction
+
+ - function: CN_FQDN_CATEGORY_LOOKUP
+ lookup_fields: [ domain ]
+ parameters:
+ kb_name: cn_fqdn_category
+ field_mapping:
+ NAME: domain_category_name
+ GROUP: domain_category_group
+ REPUTATION_LEVEL: domain_reputation_level
+
+ - function: CN_ICP_LOOKUP
+ lookup_fields: [ domain ]
+ output_fields: [ domain_icp_company_name ]
+ parameters:
+ kb_name: cn_fqdn_icp
+
+ - function: CN_FQDN_WHOIS_LOOKUP
+ lookup_fields: [ domain ]
+ output_fields: [ domain_whois_org ]
+ parameters:
+ kb_name: cn_fqdn_whois
+
+ - function: CN_DNS_SERVER_LOOKUP
+ lookup_fields: [ server_ip ]
+ output_fields: [ server_dns_server ]
+ parameters:
+ kb_name: cn_dns_server
+
+ - function: CN_APP_CATEGORY_LOOKUP
+ lookup_fields: [ app ]
+ parameters:
+ kb_name: cn_app_category
+ field_mapping:
+ CATEGORY: app_category
+ SUBCATEGORY: app_subcategory
+ COMPANY: app_company
+ COMPANY_CATEGORY: app_company_category
+
+ - function: EVAL
+ output_fields: [ client_zone ]
+ parameters:
+ value_expression: "flags & 8 == 8 ? 'internal' : 'external'"
+
+ - function: EVAL
+ output_fields: [ server_zone ]
+ parameters:
+ value_expression: "flags & 16 == 16 ? 'internal' : 'external'"
+
+ - function: CN_IP_ZONE_LOOKUP
+ lookup_fields: [ client_ip ]
+ output_fields: [ client_zone ]
+ parameters:
+ kb_name: none
+ #kb_name: cn_internal_ip
+
+ - function: CN_IP_ZONE_LOOKUP
+ lookup_fields: [ server_ip ]
+ output_fields: [ server_zone ]
+ parameters:
+ kb_name: none
+ #kb_name: cn_internal_ip
+
+ - function: EVAL
+ output_fields: [ sent_bytes ]
+ parameters:
+ value_expression: "sent_bytes == null ? 0 : sent_bytes"
+
+ - function: EVAL
+ output_fields: [ sent_pkts ]
+ parameters:
+ value_expression: "sent_pkts == null ? 0 : sent_pkts"
+
+ - function: EVAL
+ output_fields: [ received_bytes ]
+ parameters:
+ value_expression: "received_bytes == null ? 0 : received_bytes"
+
+ - function: EVAL
+ output_fields: [ received_pkts ]
+ parameters:
+ value_expression: "received_pkts == null ? 0 : received_pkts"
+
+ - function: EVAL
+ output_fields: [ traffic_inbound_byte ]
+ parameters:
+ value_expression: "client_zone == 'internal' && server_zone == 'external' ? received_bytes : traffic_inbound_byte"
+
+ - function: EVAL
+ output_fields: [ traffic_outbound_byte ]
+ parameters:
+ value_expression: "client_zone == 'external' && server_zone == 'internal' ? received_bytes : traffic_outbound_byte"
+
+ - function: EVAL
+ output_fields: [ traffic_inbound_pkt ]
+ parameters:
+ value_expression: "client_zone == 'internal' && server_zone == 'external' ? received_pkts : traffic_inbound_pkt"
+
+ - function: EVAL
+ output_fields: [ traffic_outbound_pkt ]
+ parameters:
+ value_expression: "client_zone == 'external' && server_zone == 'internal' ? received_pkts : traffic_outbound_pkt"
+
+ - function: EVAL
+ output_fields: [ traffic_outbound_byte ]
+ parameters:
+ value_expression: "client_zone == 'internal' && server_zone == 'external' ? sent_bytes : traffic_outbound_byte"
+
+ - function: EVAL
+ output_fields: [ traffic_inbound_byte ]
+ parameters:
+ value_expression: "client_zone == 'external' && server_zone == 'internal' ? sent_bytes : traffic_inbound_byte"
+
+ - function: EVAL
+ output_fields: [ traffic_outbound_pkt ]
+ parameters:
+ value_expression: "client_zone == 'internal' && server_zone == 'external' ? sent_pkts : traffic_outbound_pkt"
+
+ - function: EVAL
+ output_fields: [ traffic_inbound_pkt ]
+ parameters:
+ value_expression: "client_zone == 'external' && server_zone == 'internal' ? sent_pkts : traffic_inbound_pkt"
+
+ - function: EVAL
+ output_fields: [ traffic_internal_byte ]
+ parameters:
+ value_expression: "client_zone == 'internal' && server_zone == 'internal' ? sent_bytes + received_bytes : traffic_internal_byte"
+
+ - function: EVAL
+ output_fields: [ traffic_internal_pkt ]
+ parameters:
+ value_expression: "client_zone == 'internal' && server_zone == 'internal' ? sent_pkts + received_pkts : traffic_internal_pkt"
+
+ - function: EVAL
+ output_fields: [ traffic_through_byte ]
+ parameters:
+ value_expression: "client_zone == 'external' && server_zone == 'external' ? sent_bytes + received_bytes : traffic_through_byte"
+
+ - function: EVAL
+ output_fields: [ traffic_through_pkt ]
+ parameters:
+ value_expression: "client_zone == 'external' && server_zone == 'external' ? sent_pkts + received_pkts : traffic_through_pkt"
+
+ - function: EVAL
+ output_fields: [ sessions ]
+ parameters:
+ value_expression: "1"
+
+ - function: EVAL
+ output_fields: [ internal_query_num ]
+ parameters:
+ value_expression: "client_zone == 'internal' ? sessions : internal_query_num"
+
+ - function: EVAL
+ output_fields: [ external_query_num ]
+ parameters:
+ value_expression: "client_zone == 'external' ? sessions : external_query_num"
+
+ - function: CN_ANONYMITY_LOOKUP
+ lookup_fields: [ server_ip ]
+ output_fields: [ server_node_type ]
+ parameters:
+ kb_name: cn_ioc_darkweb
+ option: IP_TO_NODE_TYPE
+
+ - function: CN_ANONYMITY_LOOKUP
+ lookup_fields: [ domain ]
+ output_fields: [ domain_node_type ]
+ parameters:
+ kb_name: cn_ioc_darkweb
+ option: DOMAIN_TO_NODE_TYPE
+
+ - function: CN_IOC_LOOKUP
+ lookup_fields: [ server_ip ]
+ output_fields: [ server_malware ]
+ parameters:
+ kb_name: cn_ioc_malware
+ option: IP_TO_MALWARE
+
+ - function: CN_IOC_LOOKUP
+ lookup_fields: [ domain ]
+ output_fields: [ domain_malware ]
+ parameters:
+ kb_name: cn_ioc_malware
+ option: DOMAIN_TO_MALWARE
+
+ - function: CN_INTELLIGENCE_INDICATOR_LOOKUP
+ lookup_fields: [ client_ip ]
+ output_fields: [ client_ip_tags ]
+ parameters:
+ kb_name: cn_intelligence_indicator
+ option: IP_TO_TAG
+
+ - function: CN_INTELLIGENCE_INDICATOR_LOOKUP
+ lookup_fields: [ server_ip ]
+ output_fields: [ server_ip_tags ]
+ parameters:
+ kb_name: cn_intelligence_indicator
+ option: IP_TO_TAG
+
+ - function: CN_INTELLIGENCE_INDICATOR_LOOKUP
+ lookup_fields: [ domain ]
+ output_fields: [ domain_tags ]
+ parameters:
+ kb_name: cn_intelligence_indicator
+ option: DOMAIN_TO_TAG
+
+ - function: GENERATE_STRING_ARRAY
+ lookup_fields: [ client_idc_renter,client_ip_tags ]
+ output_fields: [ client_ip_tags ]
+
+ - function: GENERATE_STRING_ARRAY
+ lookup_fields: [ server_idc_renter,server_dns_server,server_node_type,server_malware,server_ip_tags ]
+ output_fields: [ server_ip_tags ]
+
+ - function: GENERATE_STRING_ARRAY
+ lookup_fields: [ domain_node_type,domain_malware,domain_tags ]
+ output_fields: [ domain_tags ]
+
+postprocessing_pipelines:
+ post_output_field_processor:
+ type: com.geedgenetworks.core.processor.projection.ProjectionProcessorImpl
+ output_fields: [ recv_time,log_id,flags,start_timestamp_ms,end_timestamp_ms,duration_ms,decoded_as,client_ip,server_ip,client_port,server_port,app,app_transition,decoded_path,ip_protocol,l7_protocol,out_link_id,in_link_id,subscriber_id,imei,imsi,phone_number,apn,http_url,dns_rcode,dns_qname,dns_qtype,dns_rr,out_link_direction,in_link_direction,server_fqdn,server_domain,domain,domain_sld,domain_category_name,domain_category_group,domain_reputation_level,domain_icp_company_name,domain_whois_org,domain_tags,client_zone,client_country_region,client_super_admin_area,client_admin_area,client_longitude,client_latitude,client_isp,client_asn,client_ip_tags,server_zone,server_country_region,server_super_admin_area,server_admin_area,server_longitude,server_latitude,server_isp,server_asn,server_ip_tags,app_category,app_subcategory,app_company,app_company_category,app_tags,sent_pkts,sent_bytes,received_pkts,received_bytes,sessions,tcp_c2s_lost_bytes,tcp_s2c_lost_bytes,tcp_c2s_o3_pkts,tcp_s2c_o3_pkts,tcp_c2s_rtx_bytes,tcp_s2c_rtx_bytes,tcp_c2s_rtx_pkts,tcp_s2c_rtx_pkts,tcp_rtt_ms,http_response_latency_ms,ssl_handshake_latency_ms,dns_response_latency_ms,cn_internal_rule_id_list,cn_internal_ioc_type_list,traffic_inbound_byte,traffic_inbound_pkt,traffic_outbound_byte,traffic_outbound_pkt,traffic_internal_byte,traffic_internal_pkt,traffic_through_byte,traffic_through_pkt,internal_query_num,external_query_num ]
+
+sinks:
+ kafka_sink:
+ type: kafka
+ properties:
+ topic: {{ kafka_sink_topic }}
+ kafka.bootstrap.servers: {{ kafka_sink_bootstrap_servers }}
+ kafka.client.id: {{ kafka_sink_topic }}
+ kafka.retries: 0
+ kafka.linger.ms: 10
+ kafka.request.timeout.ms: 30000
+ kafka.batch.size: 262144
+ kafka.buffer.memory: 134217728
+ kafka.max.request.size: 10485760
+ kafka.compression.type: snappy
+ kafka.security.protocol: SASL_PLAINTEXT
+ kafka.sasl.mechanism: PLAIN
+ kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a80bea937958aaa485c2acc2b475603495a23eb59f055e037c0b186acb22886bd0275ca91f1633441d9943e7962942252
+ format: json
+ json.ignore.parse.errors: false
+ log.failures.only: true
+
+application:
+ env:
+ name: {{ job_name }}
+ shade.identifier: aes
+ pipeline:
+ object-reuse: true
+ topology:
+ - name: kafka_source
+ downstream: [ etl_processor ]
+ - name: etl_processor
+ downstream: [ post_output_field_processor ]
+ - name: post_output_field_processor
+ downstream: [ kafka_sink ]
+ - name: kafka_sink
+ downstream: [ ]
diff --git a/hbase/tsg_olap_hbase_ddl.sql b/hbase/001_create_tsg_olap_hbase_table.sql
index fed3ece..fed3ece 100644
--- a/hbase/tsg_olap_hbase_ddl.sql
+++ b/hbase/001_create_tsg_olap_hbase_table.sql
diff --git a/hbase/tsg_olap_phoenix_ddl.sql b/hbase/002_create_tsg_olap_hbase_phoenix_table.sql
index 41006e4..41006e4 100644
--- a/hbase/tsg_olap_phoenix_ddl.sql
+++ b/hbase/002_create_tsg_olap_hbase_phoenix_table.sql
diff --git a/hbase/update_hbase.sh b/hbase/101_upgrade_v2408_to_v2409_tsg_olap_hbase_table.sh
index 54bfaae..54bfaae 100644
--- a/hbase/update_hbase.sh
+++ b/hbase/101_upgrade_v2408_to_v2409_tsg_olap_hbase_table.sh
diff --git a/hos/create_bucket.sh b/hos/001_create_tsg_olap_hos_bucket.sh
index 0473632..0473632 100644
--- a/hos/create_bucket.sh
+++ b/hos/001_create_tsg_olap_hos_bucket.sh
diff --git a/hos/bucket_upgrade.sh b/hos/002_upgrade_tsg_olap_hos_bucket.sh
index 740b4da..740b4da 100644
--- a/hos/bucket_upgrade.sh
+++ b/hos/002_upgrade_tsg_olap_hos_bucket.sh
diff --git a/hos/galaxy-hos-service-24.09.yml b/hos/galaxy-hos-service-24.09.yml
deleted file mode 100644
index b4ce78f..0000000
--- a/hos/galaxy-hos-service-24.09.yml
+++ /dev/null
@@ -1,97 +0,0 @@
-#服务端口
-server:
- port: 8186
- max-http-header-size: 20MB
- tomcat:
- max-threads: 400
-#tomcat缓存大小,单位KB系统默认10M,配置10g
-tomcat:
- cacheMaxSize: 1000000
-#hbase参数
-hbase:
- zookeeperQuorum: 192.168.44.11:2181,192.168.44.14:2181,192.168.44.15:2181
- zookeeperPort: 2181
- zookeeperNodeParent: /hbase
- clientRetriesNumber: 9
- rpcTimeout: 100000
- connectPool: 10
- clientWriteBuffer: 10485760
- clientKeyValueMaxsize: 1073741824
- mobThreshold: 10485760
- #part的最大数量
- maxParts: 100000
- #每次获取的part数
- getPartBatch: 10
- #hbase索引表前缀,前缀为以下的都为索引表
- timeIndexTablePrefix: index_time_
- filenameIndexTablePrefix: index_filename_
- partFileIndexTablePrefix: index_partfile_
- systemBucketMeta: system:bucket_meta
- #创建表的分区数
- regionCount: 16
- filenameHead: 0,1,2,3,4,5,6,7,8,9,a,b,c,d,e,f
- partHead: 0,1,2,3,4,5,6,7,8,9,a,b,c,d,e,f
- #获取文件大小的目录
- dataPath: /hbase
- #hadoop集群namenode节点,单机为单个ip,集群为ip1,ip2
- hadoopNameNodes: 192.168.44.10,192.168.44.11
- #副本数,单机为1,集群为2
- hadoopReplication: 2
- #hadoop端口
- hadoopPort: 9000
- hadoopUser: root
- hadoopNameServices: ns1
- hadoopNameNodesNs1: nn1,nn2
- asyncPut: 0
-#是否打开验证,0打开,打开需要使用S3身份验证或者token访问服务
-auth:
- open: 0
- #http访问使用的token
- token: ENC(vknRT6U4I739rLIha9CvojM+4uFyXZLEYpO2HZayLnRak1HPW0K2yZ3vnQBA2foo)
- #s3验证
- s3:
- accesskey: ENC(FUQDvVP+zqCiwHQhXcRvbw==)
- secretkey: ENC(FUQDvVP+zqCiwHQhXcRvbw==)
-hos:
- #文件大小阈值
- maxFileSize: 5073741800
- #大文件阈值
- uploadThreshold: 104857600
- #长连接超时时间
- keepAliveTimeout: 60000
- #批量删除对象的最大数量
- deleteMultipleNumber: 1000
- #获取对象列表等操作的最大值
- maxResultLimit: 100000
- #分块上传的最大分块数
- maxPartNumber: 10000
- #追加上传的最大次数
- maxAppendNumber: 100000
- #是否快速上传
- isQuickUpload: 0
- #是否快速下载文件,1打开,hbase内存小于20G的集群设为0
- isQuickDownloadFile: 0
- #用户白名单(hbase的namespace),获取存储配额
- users: default
- #是否打开限流,0:关闭,1:打开
- openRateLimiter: 0
- #限流每秒请求数
- rateLimiterQps: 20000
-#设置上传文件大小的最大值
-spring:
- servlet:
- multipart:
- max-file-size: 5GB
- max-request-size: 5GB
-#Prometheus参数
- application:
- name: HosServiceApplication
-#Prometheus参数
-management:
- endpoints:
- web:
- exposure:
- include: '*'
- metrics:
- tags:
- application: ${spring.application.name} \ No newline at end of file
diff --git a/hos/hosutil/config.properties b/hos/hosutil/config.properties
deleted file mode 100644
index fc486bf..0000000
--- a/hos/hosutil/config.properties
+++ /dev/null
@@ -1,21 +0,0 @@
-qgw.serverAddr=http://{{ vrrp_instance.default.virtual_ipaddress }}:9999
-hos.serverAddr=http://{{ vrrp_instance.oss.virtual_ipaddress }}:9098
-hos.token={{ hos_token }}
-kafka.server={{ groups.kafka[0] }}:9092
-#延迟时间,校验多少秒之前的文件,单位秒
-check.time.delay=180
-hos.traffic.buckets=traffic_policy_capture_file_bucket,traffic_rtp_file_bucket,traffic_http_file_bucket,traffic_eml_file_bucket
-kafka.traffic.topics=TRAFFIC-POLICY-CAPTURE-FILE-STREAM-RECORD,TRAFFIC-RTP-FILE-STREAM-RECORD,TRAFFIC-HTTP-FILE-STREAM-RECORD,TRAFFIC-EML-FILE-STREAM-RECORD
-kafka.troubleshooting.topic=TROUBLESHOOTING-FILE-STREAM-RECORD
-file.chunk.combiner.window.time=15000
-traffic.file.count=10
-threads=1
-max.threads=10
-print.out.interval=1000
-http.max.total=100
-http.default.max.per.route=100
-http.connect.timeout=5000
-http.connection.request.timeout=10000
-http.socket.timeout=-1
-hos.log.types=security_event,monitor_event,proxy_event,session_record,voip_record,assessment_event,transaction_record,troubleshooting
-hos.log.types.file.types.url.fields=security_event:http-http_response_body&http_request_body,pcap-packet_capture_file&rtp_pcap_path,eml-mail_eml_file;proxy_event:http-http_response_body&http_request_body;session_record:http-http_response_body&http_request_body,pcap-packet_capture_file&rtp_pcap_path,eml-mail_eml_file;voip_record:pcap-rtp_pcap_path;assessment_event:other-assessment_file;transaction_record:http-http_response_body&http_request_body,eml-mail_eml_file;monitor_event:http-http_response_body&http_request_body,pcap-packet_capture_file&rtp_pcap_path,eml-mail_eml_file \ No newline at end of file
diff --git a/hos/hosutil/galaxy-hos-util-1.4.jar b/hos/hosutil/galaxy-hos-util-1.4.jar
deleted file mode 100644
index 9b05a71..0000000
--- a/hos/hosutil/galaxy-hos-util-1.4.jar
+++ /dev/null
Binary files differ
diff --git a/hos/hosutil/hosutil.sh b/hos/hosutil/hosutil.sh
deleted file mode 100644
index e74c7ff..0000000
--- a/hos/hosutil/hosutil.sh
+++ /dev/null
@@ -1,138 +0,0 @@
-#!/bin/bash
-
-version="1.4"
-jar="galaxy-hos-util-$version.jar"
-
-usage() {
- cat <<EOF
-
-Usage: ./hosutil.sh [command] [-h] [options...]
-
-Available commands:
- download Download individual or batch files
- upload Upload individual or batch files
- check Check file availability
- combiner Verify if the file-chunk-combiner data stream is correct
- version Print the version
-
-Options for 'download' command:
- -b, --bucket The bucket to access.
- -d, --directory Directory to save files. If not exists, will be created. Default is ./download/.
- -k, --keys Files to download. Can be a single or multiple files separated by commas.
- -p, --prefix Prefix for batch downloading files based on file name.
- -s, --start_time Start time in UTC format (yyyyMMdd, yyyy-MM-dd, yyyyMMddHHmmss). Default is the previous day's time.
- -e, --end_time End time in UTC format (yyyyMMdd, yyyy-MM-dd, yyyyMMddHHmmss). Default is current time.
- -c, --count Number of files to download. Default is 1000, maximum is 100000.
- -t, --threads Number of threads. Default is 1, maximum is 10.
-
-Options for 'upload' command:
- -b, --bucket The bucket to access.
- -d, --directory Directory where files to upload are located. Default is ./upload/.
- -t, --threads Number of threads. Default is 1, maximum is 10.
-
-Options for 'check' command:
- -s, --start_time Start time in UTC format (yyyyMMdd, yyyy-MM-dd, yyyyMMddHHmmss). Default is the previous day's time.
- -e, --end_time End time in UTC format (yyyyMMdd, yyyy-MM-dd, yyyyMMddHHmmss). Default is current time.
- -c, --count Number of logs to evaluate. Default is 1000, maximum is 100000.
- -d, --data_center Specify the data centers to evaluate, separated by commas. If not specified, all data centers are evaluated.
- -l, --log_type Specify the logs to evaluate, separated by commas. If not specified, all logs are evaluated.
- Supported logs: security_event, monitor_event, proxy_event, session_record, voip_record, assessment_event, transaction_record, troubleshooting.
- -f, --file_type Specify file types. If not specified, all types are evaluated. Supported types: eml, http, pcap, other.
- Only session_record, security_event, monitor_event, transaction_record support multiple types.
- -t --threads Number of threads. Default is 1, maximum is 10.
-
-Options for 'combiner' command:
- -j, --job Job to verify. Options: traffic, troubleshooting. Default is traffic.(Troubleshooting job removed in version 24.05)
-
-EOF
-}
-
-# 初始化默认值
-bucket=""
-directory=""
-keys=""
-prefix=""
-start_time=""
-end_time=""
-count=1000
-threads=1
-log_type=""
-file_type=""
-data_center=""
-job_name="traffic"
-
-# 检查必填参数
-check_required() {
- case "$operation" in
- download|upload)
- if [ -z "$bucket" ]; then
- echo "Error: bucket is required for $operation."
- exit 1
- fi
- ;;
- *)
- # 对于其他操作,不需要检查特定参数
- ;;
- esac
-}
-
-# 下载函数
-download() {
- directory=${directory:-"./download/"}
- check_required
- java -jar $jar download $bucket $directory keys=$keys prefix=$prefix max_keys=$count time_range=$start_time/$end_time thread_num=$threads
-}
-
-# 上传函数
-upload() {
- directory=${directory:-"./upload/"}
- check_required
- java -jar $jar upload $bucket $directory thread_num=$threads
-}
-
-# 检查函数
-check() {
- java -jar $jar check data_center=$data_center log_type=$log_type file_type=$file_type max_logs=$count time_range=$start_time/$end_time thread_num=$threads
-}
-
-# 合并器函数
-combiner() {
- java -jar $jar combiner $job_name
-}
-
-# 主操作流程
-if [ $# -eq 0 ];then
- usage
- exit 0
-fi
-
-operation=$1
-shift
-while getopts ":h:b:d:k:p:s:e:c:t:l:f:j:" opt; do
- case $opt in
- h) usage; exit 0 ;;
- b) bucket=$OPTARG ;;
- d) if [ "$operation" == "check" ]; then data_center=$OPTARG; else directory=$OPTARG; fi ;;
- k) keys=$OPTARG ;;
- p) prefix=$OPTARG ;;
- s) start_time=$OPTARG ;;
- e) end_Time=$OPTARG ;;
- c) count=$OPTARG ;;
- t) threads=$OPTARG ;;
- l) log_type=$OPTARG ;;
- f) file_type=$OPTARG ;;
- j) job_name=$OPTARG ;;
- \?) echo "Invalid option: -$OPTARG" >&2; usage; exit 1 ;;
- :) echo "Option -$OPTARG requires an argument" >&2; usage; exit 1 ;;
- esac
-done
-
-case "$operation" in
- download) download ;;
- upload) upload ;;
- check) check ;;
- combiner) combiner ;;
- version) echo $version ;;
- *) usage; exit 1 ;;
-esac
-
diff --git a/mariadb/README.md b/mariadb/README.md
index e69de29..4dfe1c6 100644
--- a/mariadb/README.md
+++ b/mariadb/README.md
@@ -0,0 +1 @@
+MariaDB 数据初始化脚本 \ No newline at end of file
diff --git a/mariadb/galaxy-qgw-service/init_saved_query_job.sql b/mariadb/galaxy-qgw-service/V23.12__init_saved_query_job.sql
index b0c7ca1..b0c7ca1 100644
--- a/mariadb/galaxy-qgw-service/init_saved_query_job.sql
+++ b/mariadb/galaxy-qgw-service/V23.12__init_saved_query_job.sql
diff --git a/mariadb/galaxy-qgw-service/init_sys_storage_event.sql b/mariadb/galaxy-qgw-service/V24.07__init_sys_storage_event.sql
index af20799..af20799 100644
--- a/mariadb/galaxy-qgw-service/init_sys_storage_event.sql
+++ b/mariadb/galaxy-qgw-service/V24.07__init_sys_storage_event.sql
diff --git a/shell-scripts/README.md b/shell-scripts/README.md
index e69de29..15f8338 100644
--- a/shell-scripts/README.md
+++ b/shell-scripts/README.md
@@ -0,0 +1 @@
+全局安装脚本管理 \ No newline at end of file