summaryrefslogtreecommitdiff
path: root/connection_dataset_new.py
blob: 031f3345d0264cd79c310b23099459b1766031ad (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
import os
import numpy as np
def getHSFirstTime(path):
    timestamp=None
    with open(path,'r') as file:
        lines=file.readlines()
    for line in lines:
        if('rly_data' in line and 'in' in line):
            timestamp=line.split('\t')[0]
            # print(timestamp)
            break
    return timestamp

def getClientFirstTime(path):
    timestamp=None
    with open(path,'r') as file:
        lines=file.readlines()
    for line in lines:
        if('rly_data' in line and 'out' in line):
            timestamp=line.split('\t')[1]
            # print(timestamp)
            break
    return timestamp      

def getClientFirstTimeDic(path):
    firstTimeList={}
    filelist=os.listdir(path)
    for filename in filelist:
        trace_name=filename.split('-')[0]
        if(len([x for x in filelist if trace_name in x])==1):
            timestamp=getClientFirstTime(os.path.join(path,filename))
            if(not timestamp is None):
                firstTimeList[timestamp]=filename
    return firstTimeList

def getHSFirstTimeDic(path):
    firstTimeList={}
    for filename in os.listdir(path):
        timestamp=getHSFirstTime(os.path.join(path,filename))
        if(not timestamp is None):
            firstTimeList[timestamp]=filename
    return firstTimeList

def getCellList(path,t):
    celllist=[]
    timelist=[]
    with open(path,'r') as file:
        lines=file.readlines()
    for line in lines:
        if('\trelay' not in line):
            continue
        if('in' in line):
            cell=-1
        elif('out' in line):
            cell=1
        if('client'==t):
            timestamp=line.split('\t')[1]
            timelist.append(float(timestamp))
        elif('hs'==t):
            timestamp=line.split('\t')[0]
            timelist.append(float(timestamp))
        celllist.append(cell)
    return celllist,timelist

def process(client_log_path,hs_log_path,save_path):
    log_file=open('{}/process_relay.log'.format(save_path),'w+')
    total_dic={}
    website_list=os.listdir(client_log_path)
    for website in website_list:
        webclient_path=os.path.join(client_log_path,website)
        webhs_path=os.path.join(hs_log_path,website)
        if(os.path.exists(webclient_path) and os.path.exists(webhs_path)):
            website_dic={'client':{'cell':[],'time':[]},'hs':{'cell':[],'time':[]}}
            client_time_dic=getClientFirstTimeDic(webclient_path)
            hs_time_dic=getHSFirstTimeDic(webhs_path)
            for client_key in client_time_dic.keys():
                tmp_key=0.0
                is_multiple=False
                # tmp_filename=''
                for hs_key in hs_time_dic.keys():
                    if(abs((float(client_key))-(float(hs_key)))<2):
                        if(tmp_key==0.0):
                            tmp_key=hs_key
                            # tmp_filename=hs_time_dic[tmp_key]
                        else:
                            print('multiple similar first key which client file {} with key is {} and hs file {} with key is {},and hs tmp file is {} with key is {}'.format(client_time_dic[client_key],client_key,hs_time_dic[hs_key],hs_key,hs_time_dic[tmp_key],tmp_key))
                            is_multiple=True
                if(not tmp_key == 0.0 and not is_multiple):
                    conn_client_filename=client_time_dic[client_key]
                    conn_hs_filename=hs_time_dic[tmp_key]
                    log_file.write('connection relationship : client is {} , hs is {}\n'.format(conn_client_filename,conn_hs_filename))
                    conn_client_cell_list,conn_client_time_list=getCellList(os.path.join(webclient_path,conn_client_filename),'client')
                    conn_hs_cell_list,conn_hs_time_list=getCellList(os.path.join(webhs_path,conn_hs_filename),'hs')
                    website_dic['client']['cell'].append(conn_client_cell_list)
                    website_dic['hs']['cell'].append(conn_hs_cell_list)
                    website_dic['client']['time'].append(conn_client_time_list)
                    website_dic['hs']['time'].append(conn_hs_time_list)
            log_file.write('website is {} and size of client is {} and size of hs is {}\n'.format(website,len(website_dic['client']),len(website_dic['hs'])))
            total_dic[website]=website_dic
        else:
            log_file.write("ERROR: client path is {} and hs path is {}\n".format(os.path.exists(webclient_path),os.path.exists(webhs_path)))
    # return total_dic
    np.savez_compressed('{}/results_relayx.npz'.format(save_path),data=total_dic)
    log_file.close()
    
def main():
    client_log_path='./client'
    hs_log_path='./hs'
    save_path='./'
    process(client_log_path,hs_log_path,save_path)

def main_test():
    client_log_path=r'G:\桌面\selfrun1_data1\test_616\client_rp'
    hs_log_path=r'G:\桌面\selfrun1_data1\test_616\hs_rp'
    save_path=r'G:\桌面\selfrun1_data1\test_616'
    process(client_log_path,hs_log_path,save_path)
    
if __name__=="__main__":
    # print('process start...')
    main()