summaryrefslogtreecommitdiff
path: root/docs
diff options
context:
space:
mode:
authordoufenghu <[email protected]>2024-01-23 21:37:12 +0800
committerdoufenghu <[email protected]>2024-01-23 21:37:12 +0800
commit85ed9c9c8a2cba8268ab7facf169fa2eae9cdea1 (patch)
tree0a7d868bf03e4f920e236f837c62aafef7ed9f56 /docs
parent9c87c6d19b2faaa043a3906062db46eedc082ce8 (diff)
[Improve][docs] Add data formats: json, protobuf.
Diffstat (limited to 'docs')
-rw-r--r--docs/connector/formats/json.md141
-rw-r--r--docs/connector/formats/protobuf.md301
2 files changed, 442 insertions, 0 deletions
diff --git a/docs/connector/formats/json.md b/docs/connector/formats/json.md
new file mode 100644
index 0000000..8be2959
--- /dev/null
+++ b/docs/connector/formats/json.md
@@ -0,0 +1,141 @@
+# JSON
+> Format JSON
+## Description
+Event serialization and deserialization format.
+
+| Name | Supported Versions | Maven |
+|-------------|--------------------|----------------------------------------------------------------------------------------------------------------------------|
+| Format JSON | Universal | [Download](http://192.168.40.153:8099/service/local/repositories/platform-release/content/com/geedgenetworks/format-json/) |
+
+## Format Options
+
+| Name | Type | Required | Default | Description |
+|---------------------------|----------|----------|---------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| format | String | Yes | - | Specify what format to use, here should be 'json'. |
+| json.ignore.parse.errors | Boolean | No | true | Skip fields and rows with parse errors instead of failing. Fields are set to null in case of errors. |
+
+# How to use
+## Inline uses example
+```json
+{
+ "recv_time": 1705565615,
+ "log_id": 206211012872372220,
+ "tcp_rtt_ms": 128,
+ "decoded_as": "HTTP",
+ "http_version": "http1",
+ "http_request_line": "GET / HTTP/1.1",
+ "http_host": "www.ct.cn",
+ "http_url": "www.ct.cn/",
+ "http_user_agent": "curl/8.0.1",
+ "http_status_code": 200,
+ "http_response_line": "HTTP/1.1 200 OK",
+ "http_response_content_type": "text/html; charset=UTF-8",
+ "http_response_latency_ms": 31,
+ "http_session_duration_ms": 5451,
+ "in_src_mac": "ba:bb:a7:3c:67:1c",
+ "in_dest_mac": "86:dd:7a:8f:ae:e2",
+ "out_src_mac": "86:dd:7a:8f:ae:e2",
+ "out_dest_mac": "ba:bb:a7:3c:67:1c",
+ "tcp_client_isn": 678677906,
+ "tcp_server_isn": 1006700307,
+ "address_type": 4,
+ "client_ip": "192.11.22.22",
+ "server_ip": "8.8.8.8",
+ "client_port": 42751,
+ "server_port": 80,
+ "in_link_id": 65535,
+ "out_link_id": 65535,
+ "start_timestamp_ms": 1703646546127,
+ "end_timestamp_ms": 1703646551702,
+ "duration_ms": 5575,
+ "sent_pkts": 97,
+ "sent_bytes": 5892,
+ "received_pkts": 250,
+ "received_bytes": 333931,
+ "tcp_c2s_ip_fragments": 0,
+ "tcp_s2c_ip_fragments": 0,
+ "tcp_c2s_rtx_pkts": 0,
+ "tcp_c2s_rtx_bytes": 0,
+ "tcp_s2c_rtx_pkts": 0,
+ "tcp_s2c_rtx_bytes": 0,
+ "tcp_c2s_o3_pkts": 0,
+ "tcp_s2c_o3_pkts": 0,
+ "tcp_c2s_lost_bytes": 0,
+ "tcp_s2c_lost_bytes": 0,
+ "flags": 26418,
+ "flags_identify_info": [100, 1, 100, 60, 150, 100, 1, 2],
+ "app_transition": "http.1111.test_1_1",
+ "server_fqdn": "www.ct.cn",
+ "app": "test_1_1",
+ "decoded_path": "ETHERNET.IPv4.TCP.http",
+ "fqdn_category_list": [1767],
+ "t_vsys_id": 1,
+ "vsys_id": 1,
+ "session_id": 290538039798223400,
+ "tcp_handshake_latency_ms": 41,
+ "client_os_desc": "Windows",
+ "server_os_desc": "Linux",
+ "data_center": "center-xxg-tsgx",
+ "device_group": "group-xxg-tsgx",
+ "device_tag": "{\"tags\":[{\"tag\":\"data_center\",\"value\":\"center-xxg-tsgx\"},{\"tag\":\"device_group\",\"value\":\"group-xxg-tsgx\"}]}",
+ "device_id": "9800165603247024",
+ "sled_ip": "192.168.40.39",
+ "dup_traffic_flag": 0
+}
+```
+
+```yaml
+sources:
+ inline_source:
+ type: inline
+ fields:
+ - name: log_id
+ type: bigint
+ - name: recv_time
+ type: bigint
+ - name: server_fqdn
+ type: string
+ - name: server_domain
+ type: string
+ - name: client_ip
+ type: string
+ - name: server_ip
+ type: string
+ - name: server_asn
+ type: string
+ - name: decoded_as
+ type: string
+ - name: device_group
+ type: string
+ - name: device_tag
+ type: string
+ properties:
+ data: '{"tcp_rtt_ms":128,"decoded_as":"HTTP", "http_version":"http1","http_request_line":"GET / HTTP/1.1","http_host":"www.ct.cn","http_url":"www.ct.cn/","http_user_agent":"curl/8.0.1","http_status_code":200,"http_response_line":"HTTP/1.1 200 OK","http_response_content_type":"text/html; charset=UTF-8","http_response_latency_ms":31,"http_session_duration_ms":5451,"in_src_mac":"ba:bb:a7:3c:67:1c","in_dest_mac":"86:dd:7a:8f:ae:e2","out_src_mac":"86:dd:7a:8f:ae:e2","out_dest_mac":"ba:bb:a7:3c:67:1c","tcp_client_isn":678677906,"tcp_server_isn":1006700307,"address_type":4,"client_ip":"192.11.22.22","server_ip":"8.8.8.8","client_port":42751,"server_port":80,"in_link_id":65535,"out_link_id":65535,"start_timestamp_ms":1703646546127,"end_timestamp_ms":1703646551702,"duration_ms":5575,"sent_pkts":97,"sent_bytes":5892,"received_pkts":250,"received_bytes":333931,"tcp_c2s_ip_fragments":0,"tcp_s2c_ip_fragments":0,"tcp_c2s_rtx_pkts":0,"tcp_c2s_rtx_bytes":0,"tcp_s2c_rtx_pkts":0,"tcp_s2c_rtx_bytes":0,"tcp_c2s_o3_pkts":0,"tcp_s2c_o3_pkts":0,"tcp_c2s_lost_bytes":0,"tcp_s2c_lost_bytes":0,"flags":26418,"flags_identify_info":[100,1,100,60,150,100,1,2],"app_transition":"http.1111.test_1_1","decoded_as":"HTTP","server_fqdn":"www.ct.cn","app":"test_1_1","decoded_path":"ETHERNET.IPv4.TCP.http","fqdn_category_list":[1767],"t_vsys_id":1,"vsys_id":1,"session_id":290538039798223400,"tcp_handshake_latency_ms":41,"client_os_desc":"Windows","server_os_desc":"Linux","data_center":"center-xxg-tsgx","device_group":"group-xxg-tsgx","device_tag":"{\"tags\":[{\"tag\":\"data_center\",\"value\":\"center-xxg-tsgx\"},{\"tag\":\"device_group\",\"value\":\"group-xxg-tsgx\"}]}","device_id":"9800165603247024","sled_ip":"192.168.40.39","dup_traffic_flag":0}'
+ format: json
+ json.ignore.parse.errors: false
+
+sinks:
+ print_sink:
+ type: print
+ properties:
+ format: json
+
+application:
+ env:
+ name: example-inline-to-print
+ parallelism: 3
+ pipeline:
+ object-reuse: true
+ topology:
+ - name: inline_source
+ downstream: [print_sink]
+ - name: print_sink
+ downstream: []
+
+```
+
+
+
+
+
+
diff --git a/docs/connector/formats/protobuf.md b/docs/connector/formats/protobuf.md
new file mode 100644
index 0000000..c798447
--- /dev/null
+++ b/docs/connector/formats/protobuf.md
@@ -0,0 +1,301 @@
+# Protobuf
+> Format protobuf
+## Description
+Protocol buffers are Google’s language-neutral, platform-neutral, extensible mechanism for serializing structured data – think XML, but smaller, faster, and simpler.
+It is very popular in Streaming Data Pipeline. Now support protobuf format in source connector.
+
+| Name | Supported Versions | Maven |
+|-----------------|--------------------|--------------------------------------------------------------------------------------------------------------------------------|
+| Format Protobuf | Universal | [Download](http://192.168.40.153:8099/service/local/repositories/platform-release/content/com/geedgenetworks/format-protobuf/) |
+
+## Format Options
+
+| Name | Type | Required | Default | Description |
+|-------------------------------|----------|----------|---------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| format | String | Yes | - | Specify what format to use, here should be 'protobuf'. |
+| protobuf.descriptor.file.path | String | Yes | - | The descriptor file path. |
+| protobuf.message.name | String | Yes | - | The protobuf messageName to look for in descriptor file. |
+| protobuf.ignore.parse.errors | Boolean | No | true | Protobuf ignore parse errors, otherwise will throw exception. |
+| protobuf.emit.default.values | Boolean | No | false | If true, default values will be emitted for missing fields. It is not recommended, because it will cause performance degradation. About basic data type, it is suggested to use `optional` instead of `required`. |
+
+## Data Type Mapping
+
+| Data Type | Protobuf Data Type | Description |
+|--------------|---------------------------------------------------------------------------------------------|-------------|
+| int / bigint | int32 / int64 / uint32 / uint64 / sint32 / sint64 / fixed32 / fixed64 / sfixed32 / sfixed64 | |
+| string | string | |
+
+# How to use
+## protobuf uses example
+```protobuf
+syntax = "proto3";
+
+// [START java_declaration]
+// option java_multiple_files = true;
+option java_package = "com.geedgenetworks.proto";
+option java_outer_classname = "SessionRecordProtos";
+// [END java_declaration]
+
+message SessionRecord {
+ optional int64 recv_time = 1;
+ optional int64 log_id = 2;
+ string decoded_as = 3;
+ optional int64 session_id = 4;
+ optional int64 start_timestamp_ms = 5;
+ optional int64 end_timestamp_ms = 6;
+ optional int32 duration_ms = 7;
+ optional int32 tcp_handshake_latency_ms = 8;
+ optional int64 ingestion_time = 9;
+ optional int64 processing_time = 10;
+ string device_id = 11;
+ optional int32 out_link_id = 12;
+ optional int32 in_link_id = 13;
+ string device_tag = 14;
+ string data_center = 15;
+ string device_group = 16;
+ string sled_ip = 17;
+ optional int32 address_type = 18;
+ optional int32 vsys_id = 19;
+ optional int32 t_vsys_id = 20;
+ optional int64 flags = 21;
+ string flags_identify_info = 22;
+ repeated int64 security_rule_list = 23;
+ string security_action = 24;
+ repeated int64 monitor_rule_list = 25;
+ repeated int64 shaping_rule_list = 26;
+ repeated int64 proxy_rule_list = 27;
+ repeated int64 statistics_rule_list = 28;
+ repeated int64 sc_rule_list = 29;
+ repeated int64 sc_rsp_raw = 30;
+ repeated int64 sc_rsp_decrypted = 31;
+ string proxy_action = 32;
+ optional int32 proxy_pinning_status = 33;
+ optional int32 proxy_intercept_status = 34;
+ string proxy_passthrough_reason = 35;
+ optional int32 proxy_client_side_latency_ms = 36;
+ optional int32 proxy_server_side_latency_ms = 37;
+ string proxy_client_side_version = 38;
+ string proxy_server_side_version = 39;
+ optional int32 proxy_cert_verify = 40;
+ string proxy_intercept_error = 41;
+ optional int32 monitor_mirrored_pkts = 42;
+ optional int32 monitor_mirrored_bytes = 43;
+ string client_ip = 44;
+ optional int32 client_port = 45;
+ string client_os_desc = 46;
+ string client_geolocation = 47;
+ optional int64 client_asn = 48;
+ string subscriber_id = 49;
+ string imei = 50;
+ string imsi = 51;
+ string phone_number = 52;
+ string apn = 53;
+ string server_ip = 54;
+ optional int32 server_port = 55;
+ string server_os_desc = 56;
+ string server_geolocation = 57;
+ optional int64 server_asn = 58;
+ string server_fqdn = 59;
+ string server_domain = 60;
+ string app_transition = 61;
+ string app = 62;
+ string app_debug_info = 63;
+ string app_content = 64;
+ repeated int64 fqdn_category_list = 65;
+ string ip_protocol = 66;
+ string decoded_path = 67;
+ optional int32 dns_message_id = 68;
+ optional int32 dns_qr = 69;
+ optional int32 dns_opcode = 70;
+ optional int32 dns_aa = 71;
+ optional int32 dns_tc = 72;
+ optional int32 dns_rd = 73;
+ optional int32 dns_ra = 74;
+ optional int32 dns_rcode = 75;
+ optional int32 dns_qdcount = 76;
+ optional int32 dns_ancount = 77;
+ optional int32 dns_nscount = 78;
+ optional int32 dns_arcount = 79;
+ string dns_qname = 80;
+ optional int32 dns_qtype = 81;
+ optional int32 dns_qclass = 82;
+ string dns_cname = 83;
+ optional int32 dns_sub = 84;
+ string dns_rr = 85;
+ optional int32 dns_response_latency_ms = 86;
+ string http_url = 87;
+ string http_host = 88;
+ string http_request_line = 89;
+ string http_response_line = 90;
+ string http_request_body = 91;
+ string http_response_body = 92;
+ optional int32 http_proxy_flag = 93;
+ optional int32 http_sequence = 94;
+ string http_cookie = 95;
+ string http_referer = 96;
+ string http_user_agent = 97;
+ optional int64 http_request_content_length = 98;
+ string http_request_content_type = 99;
+ optional int64 http_response_content_length = 100;
+ string http_response_content_type = 101;
+ string http_set_cookie = 102;
+ string http_version = 103;
+ optional int32 http_status_code = 104;
+ optional int32 http_response_latency_ms = 105;
+ optional int32 http_session_duration_ms = 106;
+ optional int64 http_action_file_size = 107;
+ string ssl_version = 108;
+ string ssl_sni = 109;
+ string ssl_san = 110;
+ string ssl_cn = 111;
+ optional int32 ssl_handshake_latency_ms = 112;
+ string ssl_ja3_hash = 113;
+ string ssl_ja3s_hash = 114;
+ string ssl_cert_issuer = 115;
+ string ssl_cert_subject = 116;
+ optional int32 ssl_esni_flag = 117;
+ optional int32 ssl_ech_flag = 118;
+ string dtls_cookie = 119;
+ string dtls_version = 120;
+ string dtls_sni = 121;
+ string dtls_san = 122;
+ string dtls_cn = 123;
+ optional int32 dtls_handshake_latency_ms = 124;
+ string dtls_ja3_fingerprint = 125;
+ string dtls_ja3_hash = 126;
+ string dtls_cert_issuer = 127;
+ string dtls_cert_subject = 128;
+ string mail_protocol_type = 129;
+ string mail_account = 130;
+ string mail_from_cmd = 131;
+ string mail_to_cmd = 132;
+ string mail_from = 133;
+ string mail_password = 134;
+ string mail_to = 135;
+ string mail_cc = 136;
+ string mail_bcc = 137;
+ string mail_subject = 138;
+ string mail_subject_charset = 139;
+ string mail_attachment_name = 140;
+ string mail_attachment_name_charset = 141;
+ string mail_eml_file = 142;
+ string ftp_account = 143;
+ string ftp_url = 144;
+ string ftp_link_type = 145;
+ string quic_version = 146;
+ string quic_sni = 147;
+ string quic_user_agent = 148;
+ string rdp_cookie = 149;
+ string rdp_security_protocol = 150;
+ string rdp_client_channels = 151;
+ string rdp_keyboard_layout = 152;
+ string rdp_client_version = 153;
+ string rdp_client_name = 154;
+ string rdp_client_product_id = 155;
+ string rdp_desktop_width = 156;
+ string rdp_desktop_height = 157;
+ string rdp_requested_color_depth = 158;
+ string rdp_certificate_type = 159;
+ optional int32 rdp_certificate_count = 160;
+ optional int32 rdp_certificate_permanent = 161;
+ string rdp_encryption_level = 162;
+ string rdp_encryption_method = 163;
+ string ssh_version = 164;
+ string ssh_auth_success = 165;
+ string ssh_client_version = 166;
+ string ssh_server_version = 167;
+ string ssh_cipher_alg = 168;
+ string ssh_mac_alg = 169;
+ string ssh_compression_alg = 170;
+ string ssh_kex_alg = 171;
+ string ssh_host_key_alg = 172;
+ string ssh_host_key = 173;
+ string ssh_hassh = 174;
+ string sip_call_id = 175;
+ string sip_originator_description = 176;
+ string sip_responder_description = 177;
+ string sip_user_agent = 178;
+ string sip_server = 179;
+ string sip_originator_sdp_connect_ip = 180;
+ optional int32 sip_originator_sdp_media_port = 181;
+ string sip_originator_sdp_media_type = 182;
+ string sip_originator_sdp_content = 183;
+ string sip_responder_sdp_connect_ip = 184;
+ optional int32 sip_responder_sdp_media_port = 185;
+ string sip_responder_sdp_media_type = 186;
+ string sip_responder_sdp_content = 187;
+ optional int32 sip_duration_s = 188;
+ string sip_bye = 189;
+ optional int32 rtp_payload_type_c2s = 190;
+ optional int32 rtp_payload_type_s2c = 191;
+ string rtp_pcap_path = 192;
+ optional int32 rtp_originator_dir = 193;
+ string stratum_cryptocurrency = 194;
+ string stratum_mining_pools = 195;
+ string stratum_mining_program = 196;
+ string stratum_mining_subscribe = 197;
+ optional int64 sent_pkts = 198;
+ optional int64 received_pkts = 199;
+ optional int64 sent_bytes = 200;
+ optional int64 received_bytes = 201;
+ optional int64 tcp_c2s_ip_fragments = 202;
+ optional int64 tcp_s2c_ip_fragments = 203;
+ optional int64 tcp_c2s_lost_bytes = 204;
+ optional int64 tcp_s2c_lost_bytes = 205;
+ optional int64 tcp_c2s_o3_pkts = 206;
+ optional int64 tcp_s2c_o3_pkts = 207;
+ optional int64 tcp_c2s_rtx_pkts = 208;
+ optional int64 tcp_s2c_rtx_pkts = 209;
+ optional int64 tcp_c2s_rtx_bytes = 210;
+ optional int64 tcp_s2c_rtx_bytes = 211;
+ optional int32 tcp_rtt_ms = 212;
+ optional int64 tcp_client_isn = 213;
+ optional int64 tcp_server_isn = 214;
+ string packet_capture_file = 215;
+ string in_src_mac = 216;
+ string out_src_mac = 217;
+ string in_dest_mac = 218;
+ string out_dest_mac = 219;
+ string tunnels = 220;
+ optional int32 dup_traffic_flag = 221;
+ string tunnel_endpoint_a_desc = 222;
+ string tunnel_endpoint_b_desc = 223;
+}
+```
+Build protobuf file to binary descriptor file.
+```shell
+protoc --descriptor_set_out=session_record_test.desc session_record_test.proto
+```
+
+```yaml
+sources:
+ inline_source:
+ type: inline
+ properties:
+ data: CIin2awGEICAoLC/hYzKAhoEQkFTRSCch8z3wtqEhAQo6o/Xmc0xMMCy15nNMTjWIkDRCEiIp9msBlCIp9msBloIMjE0MjYwMDNg//8DaP//A3JqeyJ0YWdzIjpbeyJ0YWciOiJkYXRhX2NlbnRlciIsInZhbHVlIjoiY2VudGVyLXh4Zy05MTQwIn0seyJ0YWciOiJkZXZpY2VfZ3JvdXAiLCJ2YWx1ZSI6Imdyb3VwLXh4Zy05MTQwIn1dfXoPY2VudGVyLXh4Zy05MTQwggEOZ3JvdXAteHhnLTkxNDCKAQ0xOTIuMTY4LjQwLjgxkAEEmAEBoAEBqAGQwAGyAQdbMSwxLDJd4gEDt+gY4gINMTkyLjU2LjE1MS44MOgCoeYD8gIHV2luZG93c/oCGOe+juWbvS5Vbmtub3duLlVua25vd24uLrIDDTE5Mi41Ni4yMjIuOTO4A/ZwwgMFTGludXjKAxjnvo7lm70uVW5rbm93bi5Vbmtub3duLi6SBAN0Y3CaBBFFVEhFUk5FVC5JUHY0LlRDULAMBLgMBcAM9gHIDJEOoA2AAagN8cr+jgKwDezksIAPwg0RYTI6ZmE6ZGM6NTY6Yzc6YjPKDRE0ODo3Mzo5Nzo5NjozODoyMNINETQ4OjczOjk3Ojk2OjM4OjIw2g0RYTI6ZmE6ZGM6NTY6Yzc6YjM=
+ type: base64
+ format: protobuf
+ protobuf.descriptor.file.path: .\session_record_test.desc
+ protobuf.message.name: SessionRecord
+
+sinks:
+ print_sink:
+ type: print
+ properties:
+ format: json
+
+application:
+ env:
+ name: example-inline-to-print
+ parallelism: 3
+ pipeline:
+ object-reuse: true
+ topology:
+ - name: inline_source
+ downstream: [print_sink]
+ - name: print_sink
+ downstream: []
+```
+
+
+