diff options
Diffstat (limited to 'cicflow.py')
| -rw-r--r-- | cicflow.py | 126 |
1 files changed, 126 insertions, 0 deletions
diff --git a/cicflow.py b/cicflow.py new file mode 100644 index 0000000..ae1bdea --- /dev/null +++ b/cicflow.py @@ -0,0 +1,126 @@ +import csv
+import os
+import pandas as pd
+import _pickle as pkl
+import numpy as np
+
+
+def merge_csv(input_dir="C:\\Users\\JiaTing\\Desktop\\CICFlowMeter-master\\result\\myData\\OW\\web",
+ save_filename="./result/ow_doh_features.csv", truncated_num=5, label=0):
+ files = os.listdir(input_dir)
+ # df = pd.DataFrame(columns=["features", "labels"])
+ frames = []
+ for filename in files:
+ if not filename.endswith(".csv"):
+ continue
+ full_filename = os.path.join(input_dir, filename)
+ df = pd.read_csv(full_filename)
+ # print(len(df))
+ frames.append(df)
+ index = 0
+ df = pd.concat(frames).fillna(1e10)
+ for row in df.iloc[:, :-1].values.tolist():
+ proto = row[5]
+ if proto != 6:
+ continue
+ features = row[7:]
+ if features[1] + features[2] < truncated_num or features[1] < 1e-5 or features[2] < 1e-5:
+ continue
+ index += 1
+
+ # print(len(df))
+ save_df = pd.DataFrame(columns=["features", "label"], index=range(index))
+ index = 0
+ for row in df.iloc[:, :-1].values.tolist():
+ proto = row[5]
+ if proto != 6:
+ continue
+
+ features = row[7:]
+ features = features[0:3] + features[5:13] + features[37:41] + features[15:23] + features[24:28] + features[50:51]
+ # print(type(features[1]))
+ # print(row)
+ # print(features,features[-1])
+ if features[1] + features[2] < truncated_num or features[1] < 1e-5 or features[2] < 1e-5:
+ continue
+ for i in range(len(features)):
+ feature = features[i]
+ if isinstance(feature, str):
+ # print(type(feature),feature)
+ features[i] = float(feature)
+ feature = float(feature)
+ if np.isnan(feature) or np.isinf(feature) or not np.isfinite(feature):
+ print(np.isnan(feature), np.isinf(feature), not np.isfinite(feature), feature)
+ print(features)
+ features[i] = 1e7
+ save_df.loc[index] = [features, label]
+ index += 1
+ # print(index)
+ # print(save_df)
+ save_df.to_csv(save_filename)
+ pkl_name = save_filename.replace("csv", "pkl")
+ f_pkl = open(pkl_name, "wb")
+ pkl.dump(save_df, f_pkl)
+ f_pkl.close()
+
+
+def merge_all_pkl():
+ cw_doh_dataset = pkl.load(open("./result/cw_doh_features.pkl", "rb"))
+ cw_web_dataset = pkl.load(open("./result/cw_web_features.pkl", "rb"))
+ cw_file_dataset = pkl.load(open("./result/cw_file_features.pkl", "rb"))
+ cw_voip_dataset = pkl.load(open("./result/cw_voip_features.pkl", "rb"))
+ cw_chat_dataset = pkl.load(open("./result/cw_chat_features.pkl", "rb"))
+ cw_email_dataset = pkl.load(open("./result/cw_email_features.pkl", "rb"))
+ cw_streaming_dataset = pkl.load(open("./result/cw_streaming_features.pkl", "rb"))
+
+ # cw_web_dataset['label'] = cw_web_dataset['label'].map(lambda x: 1)
+ # cw_web_dataset.to_csv("./result/cw_web_features.pkl")
+ #
+ # cw_file_dataset['label'] = cw_file_dataset['label'].map(lambda x: 2)
+ # cw_file_dataset.to_csv("./result/cw_file_features.pkl")
+ #
+ # cw_voip_dataset['label'] = cw_voip_dataset['label'].map(lambda x: 3)
+ # cw_voip_dataset.to_csv("./result/cw_voip_features.pkl")
+ #
+ # cw_chat_dataset['label'] = cw_chat_dataset['label'].map(lambda x: 4)
+ # cw_chat_dataset.to_csv("./result/cw_chat_features.pkl")
+ #
+ # cw_email_dataset['label'] = cw_email_dataset['label'].map(lambda x: 5)
+ # cw_email_dataset.to_csv("./result/cw_email_features.pkl")
+ #
+ # cw_streaming_dataset['label'] = cw_streaming_dataset['label'].map(lambda x: 6)
+ # cw_streaming_dataset.to_csv("./result/cw_streaming_features.pkl")
+ frames = [cw_doh_dataset, cw_web_dataset, cw_chat_dataset, cw_email_dataset, cw_streaming_dataset, cw_file_dataset,
+ cw_voip_dataset]
+ df = pd.concat(frames)
+ save_filename = "./result/all_features.csv"
+ df.to_csv(save_filename)
+ pkl_name = save_filename.replace("csv", "pkl")
+ f_pkl = open(pkl_name, "wb")
+ pkl.dump(df, f_pkl)
+ f_pkl.close()
+
+
+if __name__ == '__main__':
+ input_and_output_tuple = []
+ input_and_output_tuple.append(
+ ("C:\\Users\\JiaTing\\Desktop\\CICFlowMeter-master\\result\\myData\\doh", "./result/doh_features.csv"))
+ input_and_output_tuple.append(
+ ("C:\\Users\\JiaTing\\Desktop\\CICFlowMeter-master\\result\\myData\\web", "./result/web_features.csv"))
+ input_and_output_tuple.append(
+ ("C:\\Users\\JiaTing\\Desktop\\CICFlowMeter-master\\result\\ISCX\\File", "./result/file_features.csv"))
+ input_and_output_tuple.append(
+ ("C:\\Users\\JiaTing\\Desktop\\CICFlowMeter-master\\result\\ISCX\\Email", "./result/email_features.csv"))
+ input_and_output_tuple.append(
+ ("C:\\Users\\JiaTing\\Desktop\\CICFlowMeter-master\\result\\ISCX\\VoIP", "./result/voip_features.csv"))
+ input_and_output_tuple.append(
+ ("C:\\Users\\JiaTing\\Desktop\\CICFlowMeter-master\\result\\ISCX\\Chat", "./result/chat_features.csv"))
+
+ label = 0
+ for input_dir, save_filename in input_and_output_tuple:
+ print(input_dir)
+ print(save_filename)
+ truncated_num = 5
+ merge_csv(input_dir, save_filename, truncated_num, label=label)
+ label += 1
+ # merge_all_pkl()
|
