summaryrefslogtreecommitdiff
path: root/python/gen-sql.py
blob: 3126e75c1eb26ba3177dc1c9bcb47a2fe0f15098 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
from datetime import datetime, timedelta


def sort_func(item):
    if item['type'] == 'DAY':
        return 1
    elif item['type'] == 'HOUR':
        return 2
    elif item['type'] == 'HALF-HOUR':
        return 3
    elif item['type'] == 'TEN-MIN':
        return 4
    elif item['type'] == 'FIVE-MIN':
        return 5
    elif item['type'] == 'MIN':
        return 6
    else:
        return 7


if __name__ == '__main__':
    days = [
        datetime(year=2023, month=7, day=10),
        datetime(year=2023, month=7, day=12),
        datetime(year=2023, month=6, day=30),
        datetime(year=2023, month=7, day=2),
    ]
    hours_interval = [
        {'type': 'HOUR', 'title': '0:00-1:00', 'start': timedelta(hours=0), 'end': timedelta(hours=1)},
        {'type': 'HOUR', 'title': '7:00-8:00', 'start': timedelta(hours=7), 'end': timedelta(hours=8)},
        {'type': 'HOUR', 'title': '9:30-10:30', 'start': timedelta(hours=9.5), 'end': timedelta(hours=10.5)},
        {'type': 'HOUR', 'title': '11:00-12:00', 'start': timedelta(hours=11), 'end': timedelta(hours=12)},
        {'type': 'HOUR', 'title': '15:00-16:00', 'start': timedelta(hours=15), 'end': timedelta(hours=16)},

        # half hours
        {'type': 'HALF-HOUR', 'title': '0:00-0:30', 'start': timedelta(hours=0), 'end': timedelta(hours=0.5)},
        {'type': 'HALF-HOUR', 'title': '6:00-6:30', 'start': timedelta(hours=6), 'end': timedelta(hours=6.5)},
        {'type': 'HALF-HOUR', 'title': '11:30-12:00', 'start': timedelta(hours=11.5), 'end': timedelta(hours=12)},
        {'type': 'HALF-HOUR', 'title': '16:30-17:00', 'start': timedelta(hours=16.5), 'end': timedelta(hours=17)},

    ]
    hours = [
        {'title': '07', 'value': timedelta(hours=7)},
        {'title': '10:30', 'value': timedelta(hours=10.5)},
        {'title': '11', 'value': timedelta(hours=11)},
        {'title': '16', 'value': timedelta(hours=16)},
    ]
    minutes_interval = [
        # ten minutes
        {'type': 'TEN-MIN', 'title': '-5:00-5:00', 'start': timedelta(minutes=-5), 'end': timedelta(minutes=5)},
        {'type': 'TEN-MIN', 'title': '15:00-25:00', 'start': timedelta(minutes=15), 'end': timedelta(minutes=25)},
        {'type': 'TEN-MIN', 'title': '30:00-40:00', 'start': timedelta(minutes=30), 'end': timedelta(minutes=40)},
        {'type': 'TEN-MIN', 'title': '45:00-55:00', 'start': timedelta(minutes=45), 'end': timedelta(minutes=55)},

        # five minutes
        {'type': 'FIVE-MIN', 'title': '0:00-5:00', 'start': timedelta(minutes=0), 'end': timedelta(minutes=5)},
        {'type': 'FIVE-MIN', 'title': '27:00-32:00', 'start': timedelta(minutes=27), 'end': timedelta(minutes=32)},
        {'type': 'FIVE-MIN', 'title': '42:00-47:00', 'start': timedelta(minutes=42), 'end': timedelta(minutes=47)},
        {'type': 'FIVE-MIN', 'title': '50:00-55:00', 'start': timedelta(minutes=50), 'end': timedelta(minutes=55)},

        # minute
        {'type': 'MIN', 'title': '0:00-1:00', 'start': timedelta(minutes=0), 'end': timedelta(minutes=1)},
        {'type': 'MIN', 'title': '15:00-16:00', 'start': timedelta(minutes=15), 'end': timedelta(minutes=16)},
        {'type': 'MIN', 'title': '29:00-30:00', 'start': timedelta(minutes=29), 'end': timedelta(minutes=30)},
        {'type': 'MIN', 'title': '40:00-41:00', 'start': timedelta(minutes=40), 'end': timedelta(minutes=41)},
    ]
    list = []
    for day in days:
        list.append({'type': 'DAY', 'title': day.strftime("%Y-%m-%d"), 'start': day.strftime("%Y-%m-%d %H:%M:%S"),
                     'end': (day + timedelta(days=1)).strftime("%Y-%m-%d %H:%M:%S")})
        # print("DAY,{title},'{start},'{end}".format(title=day.strftime("%Y-%m-%d"), start=day,
        #                                            end=day + timedelta(days=1)))

        # hour
        for hi in hours_interval:
            start = day + hi['start']
            end = day + hi['end']
            list.append(
                {'type': hi['type'], 'title': '{} {}'.format(day.strftime("%Y-%m-%d"), hi['title']),
                 'start': start.strftime("%Y-%m-%d %H:%M:%S"),
                 'end': end.strftime("%Y-%m-%d %H:%M:%S")}
            )
            # print("{type},{title},'{start},'{end}".format(type=hi['type'],
            #                                               title='{} {}'.format(day.strftime("%Y-%m-%d"), hi['title']),
            #                                               start=start,
            #                                               end=end))
        for hour in hours:
            now = day + hour['value']
            for mi in minutes_interval:
                start = now + mi['start']
                end = now + mi['end']
                list.append({'type': mi['type'], 'title': '{} [{}] {}'.format(now.strftime("%Y-%m-%d"),
                                                                              hour['title'], mi['title']),
                             'start': start.strftime("%Y-%m-%d %H:%M:%S"),
                             'end': end.strftime("%Y-%m-%d %H:%M:%S")})
                # print("{type},{title},'{start},'{end}".format(type=mi['type'],
                #                                               title='{} [{}] {}'.format(now.strftime("%Y-%m-%d"),
                #                                                                         hour['title'], mi['title']),
                #                                               start=start,
                #                                               end=end))

    result = sorted(list, key=sort_func)

    sql = """
    SELECT
        '"{type}"'                                      AS tpe,
        '"{title}"'                                     AS datetime,
        '"{start}"'                                     AS start_time,
        '"{end}"'                                       AS end_time,
        count(DISTINCT http_domain)                     AS unique_domain_size,
        count(DISTINCT http_host, ssl_sni, quic_sni)    AS unique_fqdn_size
    FROM session_record
    WHERE common_recv_time >= toDateTime('{start}', 'Asia/Shanghai')
      AND common_recv_time < toDateTime('{end}', 'Asia/Shanghai')"""

    sqls = '\n   UNION ALL '.join([sql.format(type=ret['type'], title=ret['title'],
                                              start=ret['start'], end=ret['end']) for ret in result])

    print(sqls)