summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authormeiqi wang <[email protected]>2024-06-28 06:11:16 +0000
committermeiqi wang <[email protected]>2024-06-28 06:11:16 +0000
commit4ffed7dc2906789a7169466519c3758e472ea72c (patch)
tree7333ef09db55086421d80f3b2a2976d597bd61c9
parentc244f2489c5c004d4a4ea83346e4709be4af7c9f (diff)
上传新文件
-rw-r--r--connection_dataset_new.py120
1 files changed, 120 insertions, 0 deletions
diff --git a/connection_dataset_new.py b/connection_dataset_new.py
new file mode 100644
index 0000000..031f334
--- /dev/null
+++ b/connection_dataset_new.py
@@ -0,0 +1,120 @@
+import os
+import numpy as np
+def getHSFirstTime(path):
+ timestamp=None
+ with open(path,'r') as file:
+ lines=file.readlines()
+ for line in lines:
+ if('rly_data' in line and 'in' in line):
+ timestamp=line.split('\t')[0]
+ # print(timestamp)
+ break
+ return timestamp
+
+def getClientFirstTime(path):
+ timestamp=None
+ with open(path,'r') as file:
+ lines=file.readlines()
+ for line in lines:
+ if('rly_data' in line and 'out' in line):
+ timestamp=line.split('\t')[1]
+ # print(timestamp)
+ break
+ return timestamp
+
+def getClientFirstTimeDic(path):
+ firstTimeList={}
+ filelist=os.listdir(path)
+ for filename in filelist:
+ trace_name=filename.split('-')[0]
+ if(len([x for x in filelist if trace_name in x])==1):
+ timestamp=getClientFirstTime(os.path.join(path,filename))
+ if(not timestamp is None):
+ firstTimeList[timestamp]=filename
+ return firstTimeList
+
+def getHSFirstTimeDic(path):
+ firstTimeList={}
+ for filename in os.listdir(path):
+ timestamp=getHSFirstTime(os.path.join(path,filename))
+ if(not timestamp is None):
+ firstTimeList[timestamp]=filename
+ return firstTimeList
+
+def getCellList(path,t):
+ celllist=[]
+ timelist=[]
+ with open(path,'r') as file:
+ lines=file.readlines()
+ for line in lines:
+ if('\trelay' not in line):
+ continue
+ if('in' in line):
+ cell=-1
+ elif('out' in line):
+ cell=1
+ if('client'==t):
+ timestamp=line.split('\t')[1]
+ timelist.append(float(timestamp))
+ elif('hs'==t):
+ timestamp=line.split('\t')[0]
+ timelist.append(float(timestamp))
+ celllist.append(cell)
+ return celllist,timelist
+
+def process(client_log_path,hs_log_path,save_path):
+ log_file=open('{}/process_relay.log'.format(save_path),'w+')
+ total_dic={}
+ website_list=os.listdir(client_log_path)
+ for website in website_list:
+ webclient_path=os.path.join(client_log_path,website)
+ webhs_path=os.path.join(hs_log_path,website)
+ if(os.path.exists(webclient_path) and os.path.exists(webhs_path)):
+ website_dic={'client':{'cell':[],'time':[]},'hs':{'cell':[],'time':[]}}
+ client_time_dic=getClientFirstTimeDic(webclient_path)
+ hs_time_dic=getHSFirstTimeDic(webhs_path)
+ for client_key in client_time_dic.keys():
+ tmp_key=0.0
+ is_multiple=False
+ # tmp_filename=''
+ for hs_key in hs_time_dic.keys():
+ if(abs((float(client_key))-(float(hs_key)))<2):
+ if(tmp_key==0.0):
+ tmp_key=hs_key
+ # tmp_filename=hs_time_dic[tmp_key]
+ else:
+ print('multiple similar first key which client file {} with key is {} and hs file {} with key is {},and hs tmp file is {} with key is {}'.format(client_time_dic[client_key],client_key,hs_time_dic[hs_key],hs_key,hs_time_dic[tmp_key],tmp_key))
+ is_multiple=True
+ if(not tmp_key == 0.0 and not is_multiple):
+ conn_client_filename=client_time_dic[client_key]
+ conn_hs_filename=hs_time_dic[tmp_key]
+ log_file.write('connection relationship : client is {} , hs is {}\n'.format(conn_client_filename,conn_hs_filename))
+ conn_client_cell_list,conn_client_time_list=getCellList(os.path.join(webclient_path,conn_client_filename),'client')
+ conn_hs_cell_list,conn_hs_time_list=getCellList(os.path.join(webhs_path,conn_hs_filename),'hs')
+ website_dic['client']['cell'].append(conn_client_cell_list)
+ website_dic['hs']['cell'].append(conn_hs_cell_list)
+ website_dic['client']['time'].append(conn_client_time_list)
+ website_dic['hs']['time'].append(conn_hs_time_list)
+ log_file.write('website is {} and size of client is {} and size of hs is {}\n'.format(website,len(website_dic['client']),len(website_dic['hs'])))
+ total_dic[website]=website_dic
+ else:
+ log_file.write("ERROR: client path is {} and hs path is {}\n".format(os.path.exists(webclient_path),os.path.exists(webhs_path)))
+ # return total_dic
+ np.savez_compressed('{}/results_relayx.npz'.format(save_path),data=total_dic)
+ log_file.close()
+
+def main():
+ client_log_path='./client'
+ hs_log_path='./hs'
+ save_path='./'
+ process(client_log_path,hs_log_path,save_path)
+
+def main_test():
+ client_log_path=r'G:\桌面\selfrun1_data1\test_616\client_rp'
+ hs_log_path=r'G:\桌面\selfrun1_data1\test_616\hs_rp'
+ save_path=r'G:\桌面\selfrun1_data1\test_616'
+ process(client_log_path,hs_log_path,save_path)
+
+if __name__=="__main__":
+ # print('process start...')
+ main()