summaryrefslogtreecommitdiff
path: root/src/convert_sql.py
diff options
context:
space:
mode:
author姜鹏辉 <[email protected]>2021-07-09 18:26:33 +0800
committer姜鹏辉 <[email protected]>2021-07-09 18:26:33 +0800
commit302d49453c90c859898329ce3f800e0ca08a872f (patch)
tree81b9282281b34b409711820cd6170a6504c7e9f7 /src/convert_sql.py
parent446e465c3bf75d7d7c60c8680ffc76143bd091a2 (diff)
改进
Diffstat (limited to 'src/convert_sql.py')
-rw-r--r--src/convert_sql.py86
1 files changed, 86 insertions, 0 deletions
diff --git a/src/convert_sql.py b/src/convert_sql.py
new file mode 100644
index 0000000..e730642
--- /dev/null
+++ b/src/convert_sql.py
@@ -0,0 +1,86 @@
+
+
+## convert clickhouse's sql into openlookeng's sql
+
+import json
+def txt_to_json():
+ with open("poc-ch.txt",'r') as file:
+ content = file.readlines()
+ i = 1
+ all = {}
+ for line in content:
+ all['Q'+ str(i)] = line
+ i += 1
+
+ with open("poc-ch.json",'w') as file:
+ content = json.dumps(all)
+ file.write(content)
+
+
+
+def change_sql(origin_sql):
+ changed_sql = origin_sql.replace('toUnixTimestamp($time1)','to_unixtime(timestamp $time1)')
+ changed_sql = changed_sql.replace('toUnixTimestamp($time2)','to_unixtime(timestamp $time2)')
+ changed_sql = changed_sql.replace('toDateTime(common_recv_time) AS common_recv_time',"from_unixtime(common_recv_time,'UTC+8')")
+ changed_sql = changed_sql.replace("notEmpty(http_url)","length(http_url)!= 0")
+ changed_sql = changed_sql.replace("notEmpty(http_domain)","length(http_domain)!= 0")
+ changed_sql = changed_sql.replace("notEmpty(common_l7_protocol)","length(common_l7_protocol)!= 0")
+ changed_sql = changed_sql.replace("notEmpty(common_server_ip)","length(common_server_ip)!= 0")
+ changed_sql = changed_sql.replace("notEmpty(http_domain)","length(http_domain)!= 0")
+ changed_sql = changed_sql.replace("notEmpty(common_subscriber_id)","length(common_subscriber_id)!= 0")
+ #changed_sql = changed_sql.replace("toUnixTimestamp(toDateTime(toStartOfInterval(toDateTime(common_recv_time),INTERVAL 5 MINUTE)))","floor(common_recv_time/300) * 300")
+ changed_sql = changed_sql.replace("toUnixTimestamp(toDateTime(toStartOfInterval(toDateTime(common_recv_time),INTERVAL 5 MINUTE)))","cast(common_recv_time/300 as int) * 300")
+ changed_sql = changed_sql.replace("`","\"")
+ changed_sql = changed_sql.replace("toDateTime(intDiv(toUInt32(toDateTime(toDateTime(common_recv_time))),300)*300)","from_unixtime(cast(common_recv_time/300 as int) * 300,'UTC+8')")
+ changed_sql = changed_sql.replace("toDateTime(intDiv(toUInt32(toDateTime(toDateTime(common_recv_time))),3600)*3600)","from_unixtime(cast(common_recv_time/3600 as int) * 3600,'UTC+8')")
+ changed_sql = changed_sql.replace("ORDER BY toDateTime(common_recv_time)","ORDER BY common_recv_time")
+ changed_sql = changed_sql.replace("common_recv_time >= toDateTime($time1) AND common_recv_time < toDateTime($time2)","common_recv_time >= to_unixtime(timestamp $time1) AND common_recv_time < to_unixtime(timestamp $time2)")
+ changed_sql = changed_sql.replace("toDateTime(toUnixTimestamp(toDateTime(toStartOfInterval(toDateTime(common_recv_time),INTERVAL 30 SECOND))))","from_unixtime(cast(common_recv_time/30 as int) * 30,'UTC+8')")
+ changed_sql = changed_sql.replace("toDateTime(cast(common_recv_time/300 as int) * 300)","from_unixtime(cast(common_recv_time/300 as int) * 300,'UTC+8')")
+ changed_sql = changed_sql.replace("toDateTime(intDiv(toUInt32(toDateTime(toDateTime(common_recv_time))), 3600)*3600)","from_unixtime(cast(common_recv_time/3600 as int) * 3600,'UTC+8')")
+ changed_sql = changed_sql.replace("toDateTime(common_recv_time)","from_unixtime(common_recv_time,'UTC+8')")
+ changed_sql = changed_sql.replace("toDateTime($time2)","to_unixtime(timestamp $time2)")
+ changed_sql = changed_sql.replace("toDateTime($time1)","to_unixtime(timestamp $time1)")
+ changed_sql = changed_sql.replace("uniq(","approx_distinct(")
+ changed_sql = changed_sql.replace("group by \"URI\"","group by http_url")
+ changed_sql = changed_sql.replace("GROUP BY \"URL\" LIMIT 1048576","GROUP BY http_url LIMIT 1048576")
+ changed_sql = changed_sql.replace("GROUP BY \"Http URL\" LIMIT 1048576","GROUP BY http_url LIMIT 1048576")
+ changed_sql = changed_sql.replace("GROUP BY \"Domain\" LIMIT 1048576","GROUP BY http_domain LIMIT 1048576")
+ changed_sql = changed_sql.replace("GROUP BY \"Http.Domain\" LIMIT 1048576","GROUP BY http_domain LIMIT 1048576")
+ changed_sql = changed_sql.replace("GROUP BY stat_time ORDER BY","GROUP BY from_unixtime(cast(common_recv_time/30 as int) * 30,'UTC+8') ORDER BY")
+ changed_sql = changed_sql.replace("GROUP BY \"Server Port\" LIMIT 1048576","GROUP BY common_server_port LIMIT 1048576")
+ changed_sql = changed_sql.replace("GROUP BY stat_time","GROUP BY from_unixtime(cast(common_recv_time/300 as int) * 300,'UTC+8')")
+ changed_sql = changed_sql.replace("GROUP BY \"Client IP\", \"SSL.SNI\" LIMIT 1048576","GROUP BY common_client_ip, ssl_sni LIMIT 1048576")
+ #changed_sql = changed_sql.replace("GROUP BY stat_time ORDER BY stat_time ASC LIMIT 10000","GROUP BY from_unixtime(cast(common_recv_time/30 as int) * 30,'UTC+8') ORDER BY stat_time ASC LIMIT 10000")
+ changed_sql = changed_sql.replace("GROUP BY \"Sled IP\", \"Internal IP\" LIMIT 1048576","GROUP BY common_sled_ip, common_internal_ip LIMIT 1048576")
+ changed_sql = changed_sql.replace("GROUP BY \"Server ASN\", \"Client ASN\" LIMIT 1048576","GROUP BY common_server_asn, common_client_asn LIMIT 1048576")
+ changed_sql = changed_sql.replace("GROUP BY \"device_id\" ORDER BY \"Bytes\" DESC LIMIT 1048576","GROUP BY common_device_id ORDER BY \"Bytes\" DESC LIMIT 1048576")
+ changed_sql = changed_sql.replace("GROUP BY \"Http.Domain\" ORDER BY \"Client IP\" DESC LIMIT 1048576","GROUP BY http_domain ORDER BY \"Client IP\" DESC LIMIT 1048576")
+ changed_sql = changed_sql.replace("GROUP BY \"Sled IP\", \"External IP\", \"Internal IP\" LIMIT 1048576","GROUP BY common_sled_ip, common_external_ip ,common_internal_ip LIMIT 1048576")
+ changed_sql = changed_sql.replace("GROUP BY \"Http.Domain\", \"Subscriber ID\" ORDER BY \"Sessions\" DESC LIMIT 1048576","GROUP BY http_domain, common_subscriber_id ORDER BY \"Sessions\" DESC LIMIT 1048576")
+ changed_sql = changed_sql.replace("GROUP BY \"source\" ORDER BY \"Sessions\" DESC LIMIT 1048576)","GROUP BY coalesce(nullif(common_subscriber_id, ''), nullif(common_client_ip, '')) ORDER BY \"Sessions\" DESC LIMIT 1048576)")
+ changed_sql = changed_sql.replace("GROUP BY \"destination\" ORDER BY \"Sessions\" DESC LIMIT 1048576)","GROUP BY coalesce(nullif(http_domain, ''), nullif(common_server_ip, '')) ORDER BY \"Sessions\" DESC LIMIT 1048576)")
+ changed_sql = changed_sql.replace("GROUP BY \"server_ip\" ORDER BY \"Bytes\" desc )","GROUP BY common_server_ip ORDER BY \"Bytes\" desc )")
+ changed_sql = changed_sql.replace("LIMIT 0,30","LIMIT 30")
+ changed_sql = changed_sql.replace("GROUP BY \"domain\" ORDER BY \"Bytes\" desc","GROUP BY http_domain ORDER BY \"Bytes\" desc")
+ changed_sql = changed_sql.replace("GROUP BY \"Server IP\" , \"Http.Domain\" ORDER BY \"Bytes\" desc LIMIT 1048576","GROUP BY common_server_ip , http_domain ORDER BY \"Bytes\" desc LIMIT 1048576")
+ changed_sql = changed_sql.replace("GROUP BY \"Client IP\", \"Http.Domain\" ORDER BY \"Sessions\" DESC LIMIT 1048576)","GROUP BY common_client_ip, http_domain ORDER BY \"Sessions\" DESC LIMIT 1048576)")
+ changed_sql = changed_sql.replace("GROUP BY \"Receive Time\", \"Device ID\" LIMIT 10000","GROUP BY cast(common_recv_time/300 as int) * 300,common_device_id LIMIT 10000")
+ changed_sql = changed_sql.replace("GROUP BY \"Receive Time\"","GROUP BY cast(common_recv_time/300 as int) * 300")
+
+ #changed_sql = changed_sql.replace("","")
+
+ return changed_sql
+
+def ch_to_ol():
+ with open("./poc-ch.json","r") as file:
+ content = file.read()
+ all = json.loads(content)
+ olsql = {}
+ for k,v in all.items():
+ olsql[k] = change_sql(v)
+
+ with open("./auto-ol.json","w") as file:
+ file.write(json.dumps(olsql))
+
+ch_to_ol() \ No newline at end of file