1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
|
## convert clickhouse's sql into openlookeng's sql
import json
def txt_to_json():
with open("poc-ch.txt",'r') as file:
content = file.readlines()
i = 1
all = {}
for line in content:
all['Q'+ str(i)] = line
i += 1
with open("poc-ch.json",'w') as file:
content = json.dumps(all)
file.write(content)
def change_sql_ckdb(origin_sql):
changed_sql = origin_sql.replace('toUnixTimestamp($time1)','ckdb.function.toUnixTimestamp($time1)')
changed_sql = changed_sql.replace('toUnixTimestamp($time2)','ckdb.function.toUnixTimestamp($time2)')
changed_sql = changed_sql.replace('toDateTime(common_recv_time) AS common_recv_time',"ckdb.function.toDateTime(common_recv_time)")
changed_sql = changed_sql.replace("notEmpty(http_url)","length(http_url)!= 0")
changed_sql = changed_sql.replace("notEmpty(http_domain)","length(http_domain)!= 0")
changed_sql = changed_sql.replace("notEmpty(common_l7_protocol)","length(common_l7_protocol)!= 0")
changed_sql = changed_sql.replace("notEmpty(common_server_ip)","length(common_server_ip)!= 0")
changed_sql = changed_sql.replace("notEmpty(http_domain)","length(http_domain)!= 0")
changed_sql = changed_sql.replace("notEmpty(common_subscriber_id)","length(common_subscriber_id)!= 0")
#changed_sql = changed_sql.replace("toUnixTimestamp(toDateTime(toStartOfInterval(toDateTime(common_recv_time),INTERVAL 5 MINUTE)))","floor(common_recv_time/300) * 300")
changed_sql = changed_sql.replace("toUnixTimestamp(toDateTime(toStartOfInterval(toDateTime(common_recv_time),INTERVAL 5 MINUTE)))","cast(common_recv_time/300 as int) * 300")
changed_sql = changed_sql.replace("`","\"")
changed_sql = changed_sql.replace("toDateTime(intDiv(toUInt32(toDateTime(toDateTime(common_recv_time))),300)*300)","ckdb.function.toDateTime(cast(common_recv_time/300 as int) * 300)")
changed_sql = changed_sql.replace("toDateTime(intDiv(toUInt32(toDateTime(toDateTime(common_recv_time))),3600)*3600)","ckdb.function.toDateTime(cast(common_recv_time/3600 as int) * 3600)")
changed_sql = changed_sql.replace("ORDER BY toDateTime(common_recv_time)","ORDER BY common_recv_time")
changed_sql = changed_sql.replace("common_recv_time >= toDateTime($time1) AND common_recv_time < toDateTime($time2)","common_recv_time >= ckdb.function.toUnixTimestamp($time1) AND common_recv_time < ckdb.function.toUnixTimestamp($time2)")
changed_sql = changed_sql.replace("toDateTime(toUnixTimestamp(toDateTime(toStartOfInterval(toDateTime(common_recv_time),INTERVAL 30 SECOND))))","ckdb.function.toDateTime(cast(common_recv_time/30 as int) * 30)")
changed_sql = changed_sql.replace("toDateTime(cast(common_recv_time/300 as int) * 300)","ckdb.function.toDateTime(cast(common_recv_time/300 as int) * 300)")
changed_sql = changed_sql.replace("toDateTime(intDiv(toUInt32(toDateTime(toDateTime(common_recv_time))), 3600)*3600)","ckdb.function.toDateTime(cast(common_recv_time/3600 as int) * 3600)")
changed_sql = changed_sql.replace("toDateTime(common_recv_time)","ckdb.function.toDateTime(common_recv_time)")
changed_sql = changed_sql.replace("toDateTime($time2)","ckdb.function.toUnixTimestamp($time2)")
changed_sql = changed_sql.replace("toDateTime($time1)","ckdb.function.toUnixTimestamp($time1)")
changed_sql = changed_sql.replace("uniq(","approx_distinct(")
changed_sql = changed_sql.replace("group by \"URI\"","group by http_url")
changed_sql = changed_sql.replace("GROUP BY \"URL\" LIMIT 1048576","GROUP BY http_url LIMIT 1048576")
changed_sql = changed_sql.replace("GROUP BY \"Http URL\" LIMIT 1048576","GROUP BY http_url LIMIT 1048576")
changed_sql = changed_sql.replace("GROUP BY \"Domain\" LIMIT 1048576","GROUP BY http_domain LIMIT 1048576")
changed_sql = changed_sql.replace("GROUP BY \"Http.Domain\" LIMIT 1048576","GROUP BY http_domain LIMIT 1048576")
changed_sql = changed_sql.replace("GROUP BY stat_time ORDER BY","GROUP BY ckdb.function.toDateTime(cast(common_recv_time/30 as int) * 30) ORDER BY")
changed_sql = changed_sql.replace("GROUP BY \"Server Port\" LIMIT 1048576","GROUP BY common_server_port LIMIT 1048576")
changed_sql = changed_sql.replace("GROUP BY stat_time","GROUP BY ckdb.function.toDateTime(cast(common_recv_time/300 as int) * 300)")
changed_sql = changed_sql.replace("GROUP BY \"Client IP\", \"SSL.SNI\" LIMIT 1048576","GROUP BY common_client_ip, ssl_sni LIMIT 1048576")
#changed_sql = changed_sql.replace("GROUP BY stat_time ORDER BY stat_time ASC LIMIT 10000","GROUP BY from_unixtime(cast(common_recv_time/30 as int) * 30,'UTC+8') ORDER BY stat_time ASC LIMIT 10000")
changed_sql = changed_sql.replace("GROUP BY \"Sled IP\", \"Internal IP\" LIMIT 1048576","GROUP BY common_sled_ip, common_internal_ip LIMIT 1048576")
changed_sql = changed_sql.replace("GROUP BY \"Server ASN\", \"Client ASN\" LIMIT 1048576","GROUP BY common_server_asn, common_client_asn LIMIT 1048576")
changed_sql = changed_sql.replace("GROUP BY \"device_id\" ORDER BY \"Bytes\" DESC LIMIT 1048576","GROUP BY common_device_id ORDER BY \"Bytes\" DESC LIMIT 1048576")
changed_sql = changed_sql.replace("GROUP BY \"Http.Domain\" ORDER BY \"Client IP\" DESC LIMIT 1048576","GROUP BY http_domain ORDER BY \"Client IP\" DESC LIMIT 1048576")
changed_sql = changed_sql.replace("GROUP BY \"Sled IP\", \"External IP\", \"Internal IP\" LIMIT 1048576","GROUP BY common_sled_ip, common_external_ip ,common_internal_ip LIMIT 1048576")
changed_sql = changed_sql.replace("GROUP BY \"Http.Domain\", \"Subscriber ID\" ORDER BY \"Sessions\" DESC LIMIT 1048576","GROUP BY http_domain, common_subscriber_id ORDER BY \"Sessions\" DESC LIMIT 1048576")
changed_sql = changed_sql.replace("GROUP BY \"source\" ORDER BY \"Sessions\" DESC LIMIT 1048576)","GROUP BY coalesce(nullif(common_subscriber_id, ''), nullif(common_client_ip, '')) ORDER BY \"Sessions\" DESC LIMIT 1048576)")
changed_sql = changed_sql.replace("GROUP BY \"destination\" ORDER BY \"Sessions\" DESC LIMIT 1048576)","GROUP BY coalesce(nullif(http_domain, ''), nullif(common_server_ip, '')) ORDER BY \"Sessions\" DESC LIMIT 1048576)")
changed_sql = changed_sql.replace("GROUP BY \"server_ip\" ORDER BY \"Bytes\" desc )","GROUP BY common_server_ip ORDER BY \"Bytes\" desc )")
changed_sql = changed_sql.replace("LIMIT 0,30","LIMIT 30")
changed_sql = changed_sql.replace("GROUP BY \"domain\" ORDER BY \"Bytes\" desc","GROUP BY http_domain ORDER BY \"Bytes\" desc")
changed_sql = changed_sql.replace("GROUP BY \"Server IP\" , \"Http.Domain\" ORDER BY \"Bytes\" desc LIMIT 1048576","GROUP BY common_server_ip , http_domain ORDER BY \"Bytes\" desc LIMIT 1048576")
changed_sql = changed_sql.replace("GROUP BY \"Client IP\", \"Http.Domain\" ORDER BY \"Sessions\" DESC LIMIT 1048576)","GROUP BY common_client_ip, http_domain ORDER BY \"Sessions\" DESC LIMIT 1048576)")
changed_sql = changed_sql.replace("GROUP BY \"Receive Time\", \"Device ID\" LIMIT 10000","GROUP BY cast(common_recv_time/300 as int) * 300,common_device_id LIMIT 10000")
changed_sql = changed_sql.replace("GROUP BY \"Receive Time\"","GROUP BY cast(common_recv_time/300 as int) * 300")
#changed_sql = changed_sql.replace("","")
return changed_sql
return changed_sql
def change_sql(origin_sql):
changed_sql = origin_sql.replace('toUnixTimestamp($time1)','to_unixtime(timestamp $time1)')
changed_sql = changed_sql.replace('toUnixTimestamp($time2)','to_unixtime(timestamp $time2)')
changed_sql = changed_sql.replace('toDateTime(common_recv_time) AS common_recv_time',"from_unixtime(common_recv_time,'UTC+8')")
changed_sql = changed_sql.replace("notEmpty(http_url)","length(http_url)!= 0")
changed_sql = changed_sql.replace("notEmpty(http_domain)","length(http_domain)!= 0")
changed_sql = changed_sql.replace("notEmpty(common_l7_protocol)","length(common_l7_protocol)!= 0")
changed_sql = changed_sql.replace("notEmpty(common_server_ip)","length(common_server_ip)!= 0")
changed_sql = changed_sql.replace("notEmpty(http_domain)","length(http_domain)!= 0")
changed_sql = changed_sql.replace("notEmpty(common_subscriber_id)","length(common_subscriber_id)!= 0")
#changed_sql = changed_sql.replace("toUnixTimestamp(toDateTime(toStartOfInterval(toDateTime(common_recv_time),INTERVAL 5 MINUTE)))","floor(common_recv_time/300) * 300")
changed_sql = changed_sql.replace("toUnixTimestamp(toDateTime(toStartOfInterval(toDateTime(common_recv_time),INTERVAL 5 MINUTE)))","cast(common_recv_time/300 as int) * 300")
changed_sql = changed_sql.replace("`","\"")
changed_sql = changed_sql.replace("toDateTime(intDiv(toUInt32(toDateTime(toDateTime(common_recv_time))),300)*300)","from_unixtime(cast(common_recv_time/300 as int) * 300,'UTC+8')")
changed_sql = changed_sql.replace("toDateTime(intDiv(toUInt32(toDateTime(toDateTime(common_recv_time))),3600)*3600)","from_unixtime(cast(common_recv_time/3600 as int) * 3600,'UTC+8')")
changed_sql = changed_sql.replace("ORDER BY toDateTime(common_recv_time)","ORDER BY common_recv_time")
changed_sql = changed_sql.replace("common_recv_time >= toDateTime($time1) AND common_recv_time < toDateTime($time2)","common_recv_time >= to_unixtime(timestamp $time1) AND common_recv_time < to_unixtime(timestamp $time2)")
changed_sql = changed_sql.replace("toDateTime(toUnixTimestamp(toDateTime(toStartOfInterval(toDateTime(common_recv_time),INTERVAL 30 SECOND))))","from_unixtime(cast(common_recv_time/30 as int) * 30,'UTC+8')")
changed_sql = changed_sql.replace("toDateTime(cast(common_recv_time/300 as int) * 300)","from_unixtime(cast(common_recv_time/300 as int) * 300,'UTC+8')")
changed_sql = changed_sql.replace("toDateTime(intDiv(toUInt32(toDateTime(toDateTime(common_recv_time))), 3600)*3600)","from_unixtime(cast(common_recv_time/3600 as int) * 3600,'UTC+8')")
changed_sql = changed_sql.replace("toDateTime(common_recv_time)","from_unixtime(common_recv_time,'UTC+8')")
changed_sql = changed_sql.replace("toDateTime($time2)","to_unixtime(timestamp $time2)")
changed_sql = changed_sql.replace("toDateTime($time1)","to_unixtime(timestamp $time1)")
changed_sql = changed_sql.replace("uniq(","approx_distinct(")
changed_sql = changed_sql.replace("group by \"URI\"","group by http_url")
changed_sql = changed_sql.replace("GROUP BY \"URL\" LIMIT 1048576","GROUP BY http_url LIMIT 1048576")
changed_sql = changed_sql.replace("GROUP BY \"Http URL\" LIMIT 1048576","GROUP BY http_url LIMIT 1048576")
changed_sql = changed_sql.replace("GROUP BY \"Domain\" LIMIT 1048576","GROUP BY http_domain LIMIT 1048576")
changed_sql = changed_sql.replace("GROUP BY \"Http.Domain\" LIMIT 1048576","GROUP BY http_domain LIMIT 1048576")
changed_sql = changed_sql.replace("GROUP BY stat_time ORDER BY","GROUP BY from_unixtime(cast(common_recv_time/30 as int) * 30,'UTC+8') ORDER BY")
changed_sql = changed_sql.replace("GROUP BY \"Server Port\" LIMIT 1048576","GROUP BY common_server_port LIMIT 1048576")
changed_sql = changed_sql.replace("GROUP BY stat_time","GROUP BY from_unixtime(cast(common_recv_time/300 as int) * 300,'UTC+8')")
changed_sql = changed_sql.replace("GROUP BY \"Client IP\", \"SSL.SNI\" LIMIT 1048576","GROUP BY common_client_ip, ssl_sni LIMIT 1048576")
#changed_sql = changed_sql.replace("GROUP BY stat_time ORDER BY stat_time ASC LIMIT 10000","GROUP BY from_unixtime(cast(common_recv_time/30 as int) * 30,'UTC+8') ORDER BY stat_time ASC LIMIT 10000")
changed_sql = changed_sql.replace("GROUP BY \"Sled IP\", \"Internal IP\" LIMIT 1048576","GROUP BY common_sled_ip, common_internal_ip LIMIT 1048576")
changed_sql = changed_sql.replace("GROUP BY \"Server ASN\", \"Client ASN\" LIMIT 1048576","GROUP BY common_server_asn, common_client_asn LIMIT 1048576")
changed_sql = changed_sql.replace("GROUP BY \"device_id\" ORDER BY \"Bytes\" DESC LIMIT 1048576","GROUP BY common_device_id ORDER BY \"Bytes\" DESC LIMIT 1048576")
changed_sql = changed_sql.replace("GROUP BY \"Http.Domain\" ORDER BY \"Client IP\" DESC LIMIT 1048576","GROUP BY http_domain ORDER BY \"Client IP\" DESC LIMIT 1048576")
changed_sql = changed_sql.replace("GROUP BY \"Sled IP\", \"External IP\", \"Internal IP\" LIMIT 1048576","GROUP BY common_sled_ip, common_external_ip ,common_internal_ip LIMIT 1048576")
changed_sql = changed_sql.replace("GROUP BY \"Http.Domain\", \"Subscriber ID\" ORDER BY \"Sessions\" DESC LIMIT 1048576","GROUP BY http_domain, common_subscriber_id ORDER BY \"Sessions\" DESC LIMIT 1048576")
changed_sql = changed_sql.replace("GROUP BY \"source\" ORDER BY \"Sessions\" DESC LIMIT 1048576)","GROUP BY coalesce(nullif(common_subscriber_id, ''), nullif(common_client_ip, '')) ORDER BY \"Sessions\" DESC LIMIT 1048576)")
changed_sql = changed_sql.replace("GROUP BY \"destination\" ORDER BY \"Sessions\" DESC LIMIT 1048576)","GROUP BY coalesce(nullif(http_domain, ''), nullif(common_server_ip, '')) ORDER BY \"Sessions\" DESC LIMIT 1048576)")
changed_sql = changed_sql.replace("GROUP BY \"server_ip\" ORDER BY \"Bytes\" desc )","GROUP BY common_server_ip ORDER BY \"Bytes\" desc )")
changed_sql = changed_sql.replace("LIMIT 0,30","LIMIT 30")
changed_sql = changed_sql.replace("GROUP BY \"domain\" ORDER BY \"Bytes\" desc","GROUP BY http_domain ORDER BY \"Bytes\" desc")
changed_sql = changed_sql.replace("GROUP BY \"Server IP\" , \"Http.Domain\" ORDER BY \"Bytes\" desc LIMIT 1048576","GROUP BY common_server_ip , http_domain ORDER BY \"Bytes\" desc LIMIT 1048576")
changed_sql = changed_sql.replace("GROUP BY \"Client IP\", \"Http.Domain\" ORDER BY \"Sessions\" DESC LIMIT 1048576)","GROUP BY common_client_ip, http_domain ORDER BY \"Sessions\" DESC LIMIT 1048576)")
changed_sql = changed_sql.replace("GROUP BY \"Receive Time\", \"Device ID\" LIMIT 10000","GROUP BY cast(common_recv_time/300 as int) * 300,common_device_id LIMIT 10000")
changed_sql = changed_sql.replace("GROUP BY \"Receive Time\"","GROUP BY cast(common_recv_time/300 as int) * 300")
#changed_sql = changed_sql.replace("","")
return changed_sql
def ch_to_ol():
with open("./poc-ch.json","r") as file:
content = file.read()
all = json.loads(content)
olsql = {}
for k,v in all.items():
olsql[k] = change_sql_ckdb(v)
with open("./ckdb-ol.json","w") as file:
file.write(json.dumps(olsql))
ch_to_ol()
|