diff options
| author | yifei cheng <[email protected]> | 2023-06-26 12:33:22 +0000 |
|---|---|---|
| committer | yifei cheng <[email protected]> | 2023-06-26 12:33:22 +0000 |
| commit | 74793c2daba349aef2e07895379fa0f134f068a2 (patch) | |
| tree | ba402fe8c481ba894a6b7652eb7e7bf565d97649 | |
| parent | f05eb1f44155c25b12297946321f8789fe22f218 (diff) | |
Upload New File
| -rw-r--r-- | evaluate/GiniIndex.py | 54 |
1 files changed, 54 insertions, 0 deletions
diff --git a/evaluate/GiniIndex.py b/evaluate/GiniIndex.py new file mode 100644 index 0000000..01238d6 --- /dev/null +++ b/evaluate/GiniIndex.py @@ -0,0 +1,54 @@ +import numpy as np +import pandas as pd +import json + +from sklearn.model_selection import train_test_split +from sklearn.ensemble import RandomForestClassifier + +def CalcGiniIndex(X, y): + forest = RandomForestClassifier(n_estimators=10000, n_jobs=-1) + forest.fit(X, y) + return forest.feature_importances_.tolist() + +def loadCICIDS(name): + data = np.load("/home/sunhanwu/datasets/cicids2017/npy/{}.npy".format(name), allow_pickle=True) + X = data[:, :-1].astype(np.float32) + y = data[:, -1].astype(int) + X[np.where(np.isnan(X))] = 0 + X[np.where(X >= np.finfo(np.float32).max)] = np.finfo(np.float32).max - 1 + return X, y + +def loadMTA(name): + data = np.load("/home/sunhanwu/datasets/MTA/cicflownpy/{}.npy".format(name), allow_pickle=True) + X = data[:, :-1].astype(np.float32) + y = data[:, -1].astype(int) + X[np.where(np.isnan(X))] = 0 + X[np.where(X >= np.finfo(np.float32).max)] = np.finfo(np.float32).max - 1 + return X, y + +def CalcCICIDS2017(): + malware = ['Botnet', 'BruteForce', 'DDoS', 'Fuzzing', 'PortScan'] + CICIDS2017_Gini = {} + for cls in malware: + print("calc gini index of {}".format(cls)) + X, y = loadCICIDS(cls) + importance = CalcGiniIndex(X, y) + CICIDS2017_Gini[cls] = importance + print("calc gini index of {} done.".format(cls)) + with open('/home/sunhanwu/work2021/TrafficAdversarial/experiment/src/result/CICIDS2017_GI.json', 'w') as f: + json.dump(CICIDS2017_Gini, f) + +def CalcMTA(): + malware = ['Dridex', 'Gozi', 'Quakbot', 'Tofsee', 'TrickBot'] + MTA_Gini = {} + for cls in malware: + print("calc gini index of {}".format(cls)) + X, y = loadMTA(cls) + importance = CalcGiniIndex(X, y) + MTA_Gini[cls] = importance + print("calc gini index of {} done.".format(cls)) + with open('/home/sunhanwu/work2021/TrafficAdversarial/experiment/src/result/MTA_GI.json', 'w') as f: + json.dump(MTA_Gini, f) + +if __name__ == '__main__': + CalcMTA()
\ No newline at end of file |
