summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authoryifei cheng <[email protected]>2023-06-26 12:33:22 +0000
committeryifei cheng <[email protected]>2023-06-26 12:33:22 +0000
commit74793c2daba349aef2e07895379fa0f134f068a2 (patch)
treeba402fe8c481ba894a6b7652eb7e7bf565d97649
parentf05eb1f44155c25b12297946321f8789fe22f218 (diff)
Upload New File
-rw-r--r--evaluate/GiniIndex.py54
1 files changed, 54 insertions, 0 deletions
diff --git a/evaluate/GiniIndex.py b/evaluate/GiniIndex.py
new file mode 100644
index 0000000..01238d6
--- /dev/null
+++ b/evaluate/GiniIndex.py
@@ -0,0 +1,54 @@
+import numpy as np
+import pandas as pd
+import json
+
+from sklearn.model_selection import train_test_split
+from sklearn.ensemble import RandomForestClassifier
+
+def CalcGiniIndex(X, y):
+ forest = RandomForestClassifier(n_estimators=10000, n_jobs=-1)
+ forest.fit(X, y)
+ return forest.feature_importances_.tolist()
+
+def loadCICIDS(name):
+ data = np.load("/home/sunhanwu/datasets/cicids2017/npy/{}.npy".format(name), allow_pickle=True)
+ X = data[:, :-1].astype(np.float32)
+ y = data[:, -1].astype(int)
+ X[np.where(np.isnan(X))] = 0
+ X[np.where(X >= np.finfo(np.float32).max)] = np.finfo(np.float32).max - 1
+ return X, y
+
+def loadMTA(name):
+ data = np.load("/home/sunhanwu/datasets/MTA/cicflownpy/{}.npy".format(name), allow_pickle=True)
+ X = data[:, :-1].astype(np.float32)
+ y = data[:, -1].astype(int)
+ X[np.where(np.isnan(X))] = 0
+ X[np.where(X >= np.finfo(np.float32).max)] = np.finfo(np.float32).max - 1
+ return X, y
+
+def CalcCICIDS2017():
+ malware = ['Botnet', 'BruteForce', 'DDoS', 'Fuzzing', 'PortScan']
+ CICIDS2017_Gini = {}
+ for cls in malware:
+ print("calc gini index of {}".format(cls))
+ X, y = loadCICIDS(cls)
+ importance = CalcGiniIndex(X, y)
+ CICIDS2017_Gini[cls] = importance
+ print("calc gini index of {} done.".format(cls))
+ with open('/home/sunhanwu/work2021/TrafficAdversarial/experiment/src/result/CICIDS2017_GI.json', 'w') as f:
+ json.dump(CICIDS2017_Gini, f)
+
+def CalcMTA():
+ malware = ['Dridex', 'Gozi', 'Quakbot', 'Tofsee', 'TrickBot']
+ MTA_Gini = {}
+ for cls in malware:
+ print("calc gini index of {}".format(cls))
+ X, y = loadMTA(cls)
+ importance = CalcGiniIndex(X, y)
+ MTA_Gini[cls] = importance
+ print("calc gini index of {} done.".format(cls))
+ with open('/home/sunhanwu/work2021/TrafficAdversarial/experiment/src/result/MTA_GI.json', 'w') as f:
+ json.dump(MTA_Gini, f)
+
+if __name__ == '__main__':
+ CalcMTA() \ No newline at end of file