1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
|
import os
import numpy as np
def getHSFirstTime(path):
timestamp=None
with open(path,'r') as file:
lines=file.readlines()
for line in lines:
if('rly_data' in line and 'in' in line):
timestamp=line.split('\t')[0]
# print(timestamp)
break
return timestamp
def getClientFirstTime(path):
timestamp=None
with open(path,'r') as file:
lines=file.readlines()
for line in lines:
if('rly_data' in line and 'out' in line):
timestamp=line.split('\t')[1]
# print(timestamp)
break
return timestamp
def getClientFirstTimeDic(path):
firstTimeList={}
filelist=os.listdir(path)
for filename in filelist:
trace_name=filename.split('-')[0]
if(len([x for x in filelist if trace_name in x])==1):
timestamp=getClientFirstTime(os.path.join(path,filename))
if(not timestamp is None):
firstTimeList[timestamp]=filename
return firstTimeList
def getHSFirstTimeDic(path):
firstTimeList={}
for filename in os.listdir(path):
timestamp=getHSFirstTime(os.path.join(path,filename))
if(not timestamp is None):
firstTimeList[timestamp]=filename
return firstTimeList
def getCellList(path,t):
celllist=[]
timelist=[]
with open(path,'r') as file:
lines=file.readlines()
for line in lines:
if('\trelay' not in line):
continue
if('in' in line):
cell=-1
elif('out' in line):
cell=1
if('client'==t):
timestamp=line.split('\t')[1]
timelist.append(float(timestamp))
elif('hs'==t):
timestamp=line.split('\t')[0]
timelist.append(float(timestamp))
celllist.append(cell)
return celllist,timelist
def process(client_log_path,hs_log_path,save_path):
log_file=open('{}/process_relay.log'.format(save_path),'w+')
total_dic={}
website_list=os.listdir(client_log_path)
for website in website_list:
webclient_path=os.path.join(client_log_path,website)
webhs_path=os.path.join(hs_log_path,website)
if(os.path.exists(webclient_path) and os.path.exists(webhs_path)):
website_dic={'client':{'cell':[],'time':[]},'hs':{'cell':[],'time':[]}}
client_time_dic=getClientFirstTimeDic(webclient_path)
hs_time_dic=getHSFirstTimeDic(webhs_path)
for client_key in client_time_dic.keys():
tmp_key=0.0
is_multiple=False
# tmp_filename=''
for hs_key in hs_time_dic.keys():
if(abs((float(client_key))-(float(hs_key)))<2):
if(tmp_key==0.0):
tmp_key=hs_key
# tmp_filename=hs_time_dic[tmp_key]
else:
print('multiple similar first key which client file {} with key is {} and hs file {} with key is {},and hs tmp file is {} with key is {}'.format(client_time_dic[client_key],client_key,hs_time_dic[hs_key],hs_key,hs_time_dic[tmp_key],tmp_key))
is_multiple=True
if(not tmp_key == 0.0 and not is_multiple):
conn_client_filename=client_time_dic[client_key]
conn_hs_filename=hs_time_dic[tmp_key]
log_file.write('connection relationship : client is {} , hs is {}\n'.format(conn_client_filename,conn_hs_filename))
conn_client_cell_list,conn_client_time_list=getCellList(os.path.join(webclient_path,conn_client_filename),'client')
conn_hs_cell_list,conn_hs_time_list=getCellList(os.path.join(webhs_path,conn_hs_filename),'hs')
website_dic['client']['cell'].append(conn_client_cell_list)
website_dic['hs']['cell'].append(conn_hs_cell_list)
website_dic['client']['time'].append(conn_client_time_list)
website_dic['hs']['time'].append(conn_hs_time_list)
log_file.write('website is {} and size of client is {} and size of hs is {}\n'.format(website,len(website_dic['client']),len(website_dic['hs'])))
total_dic[website]=website_dic
else:
log_file.write("ERROR: client path is {} and hs path is {}\n".format(os.path.exists(webclient_path),os.path.exists(webhs_path)))
# return total_dic
np.savez_compressed('{}/results_relayx.npz'.format(save_path),data=total_dic)
log_file.close()
def main():
client_log_path='./client'
hs_log_path='./hs'
save_path='./'
process(client_log_path,hs_log_path,save_path)
def main_test():
client_log_path=r'G:\桌面\selfrun1_data1\test_616\client_rp'
hs_log_path=r'G:\桌面\selfrun1_data1\test_616\hs_rp'
save_path=r'G:\桌面\selfrun1_data1\test_616'
process(client_log_path,hs_log_path,save_path)
if __name__=="__main__":
# print('process start...')
main()
|