diff options
| author | yifei cheng <[email protected]> | 2023-06-26 12:30:00 +0000 |
|---|---|---|
| committer | yifei cheng <[email protected]> | 2023-06-26 12:30:00 +0000 |
| commit | 21c242b31fb855f2f96b851219d38c4d8c1c64fb (patch) | |
| tree | 08e4edde4b5dfbc58e9164dbd95aea71fff065fa | |
| parent | 13dd6c5e5231a23ba590ad81c27d39928563ca64 (diff) | |
Upload New File
| -rw-r--r-- | substitudeModel/DNN.py | 399 |
1 files changed, 399 insertions, 0 deletions
diff --git a/substitudeModel/DNN.py b/substitudeModel/DNN.py new file mode 100644 index 0000000..a1d47d7 --- /dev/null +++ b/substitudeModel/DNN.py @@ -0,0 +1,399 @@ +""" +Date: 2022-03-07 +Author: [email protected] +Desc: dnn-based subtitute model for fsnet +""" + +import torch +import ipdb +from torch import nn +from TargetModel.FSNet.dataset import C2Data +from utils.CICIDSData import CICIDS +from attack.collectionDataset import CollectionDataset +from utils.CICIDSData import dataconfig +from torch.utils.data import DataLoader +from TargetModel.FSNet.train import computeFPR +from TargetModel.FSNet.utils import save_model +from sklearn.metrics import confusion_matrix +import numpy as np +from tqdm import tqdm +import torch.nn.functional as F + + +class DNN(nn.Module): + """ + DNN-based model + """ + def __init__(self, param): + """ + + :param input_size: + :param num_class: + """ + super(DNN, self).__init__() + self.input_size = param['input_size'] + self.num_class = param['num_class'] + + self.linear1 = nn.Linear(self.input_size, 2048) + self.linear2 = nn.Linear(2048, 1024) + self.linear3 = nn.Linear(1024, 512) + self.linear4 = nn.Linear(512, self.num_class) + self.dropout1 = nn.Dropout(0.2) + self.dropout2 = nn.Dropout(0.2) + + def forward(self, inputs): + """ + + :param inputs: + :return: + """ + inputs = inputs.float() + inputs = F.relu(self.linear1(inputs)) + inputs = self.dropout1(inputs) + inputs = F.relu(self.linear2(inputs)) + inputs = self.dropout2(inputs) + inputs = F.relu(self.linear3(inputs)) + inputs = self.linear4(inputs) + # inputs.shape = (batch_size, num_class) + return inputs + +# if __name__ == '__main__': +def train(name): + # hyper param + epoch_size= 20 + batch_size = 128 + lr = 1e-4 + + # model param + param = { + "input_size": 77, + "num_class": 2 + } + + cicids = CICIDS(name) + sample_szie = len(cicids) + # botname = "Gozi" + botname = name + # normal = "CTUNone" + arch = "dnn" + + total_size = sample_szie + test_size = int(total_size * 0.2) + train_size = int((total_size - test_size) * 0.8) + valid_size = total_size - test_size - train_size + print("train data: {}".format(train_size)) + print("valid data: {}".format(valid_size)) + print("test data: {}".format(test_size)) + + # use GPU if it is available, oterwise use cpu + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + + # pre the dataloader + # c2data = C2Data(botname) + # c2data = CollectionDataset('../adversarialData/collectionData.npy') + # train_valid_data, test_data = torch.utils.data.random_split(c2data, [200, 200]) + # train_data, valid_data = torch.utils.data.random_split(train_valid_data, [100, 100]) + # train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True, drop_last=False) + # valid_loader = DataLoader(valid_data, batch_size=batch_size, shuffle=True, drop_last=False) + # test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=True, drop_last=False) + train_valid_data, test_data = torch.utils.data.random_split(cicids, [train_size + valid_size, test_size]) + train_data, valid_data = torch.utils.data.random_split(train_valid_data, [train_size, valid_size]) + train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True, drop_last=False) + valid_loader = DataLoader(valid_data, batch_size=batch_size, shuffle=True, drop_last=False) + test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=True, drop_last=False) + + # model + + dnn = DNN(param) + dnn.to(device) + + # loss func + crossEntropy = torch.nn.CrossEntropyLoss() + adam = torch.optim.Adam(dnn.parameters(), lr=lr) + # lossFunc = torch.nn.KLDivLoss() + + # trainning + for i in range(epoch_size): + dnn.train() + loss_list = [] + acc_list = [] + recall_list = [] + f1_list = [] + for batch_x, batch_y in tqdm(train_loader): + batch_x = batch_x.to(device, dtype=torch.float) + batch_y = batch_y.to(device) + output = dnn(batch_x) + # output.shape = (batch_size, sequence, num_class) + acc, recall, f1 = computeFPR(y_pred=output, y_target=batch_y) + # ipdb.set_trace() + batch_y = batch_y.squeeze() + # batch_y = F.softmax(batch_y) + # output = F.softmax(output) + loss = crossEntropy(output, batch_y) + + acc_list.append(acc) + recall_list.append(recall) + f1_list.append(f1) + loss_list.append(loss.item()) + + adam.zero_grad() + loss.backward() + adam.step() + print("[Training {:03d}] acc: {:.2%}, recall: {:.2%}, f1: {:.2%}, loss: {:.2f}".format(i + 1, + np.mean(acc_list), + np.mean(recall_list), + np.mean(f1_list), + np.mean(loss_list))) + + # validing + dnn.eval() + loss_list = [] + acc_list = [] + recall_list = [] + f1_list = [] + for batch_x, batch_y in valid_loader: + batch_x = batch_x.to(device, dtype=torch.float) + batch_y = batch_y.to(device) + output = dnn(batch_x) + # output.shape = (batch_size, sequence, num_class) + acc, recall, f1 = computeFPR(y_pred=output, y_target=batch_y) + batch_y = batch_y.squeeze() + # batch_y = F.softmax(batch_y) + # output = F.softmax(output) + loss = crossEntropy(output, batch_y) + + acc_list.append(acc) + recall_list.append(recall) + f1_list.append(f1) + loss_list.append(loss.item()) + print("[Validing {:03d}] acc: {:.2%}, recall: {:.2%}, f1: {:.2%}, loss: {:.2f}".format(i + 1, + np.mean(acc_list), + np.mean(recall_list), + np.mean(f1_list), + np.mean(loss_list))) + + # testing + dnn.eval() + loss_list = [] + acc_list = [] + recall_list = [] + f1_list = [] + y_true = [] + y_pred = [] + for batch_x, batch_y in test_loader: + batch_x = batch_x.to(device, dtype=torch.float) + batch_y = batch_y.to(device) + output = dnn(batch_x) + # output.shape = (batch_size, sequence, num_class) + acc, recall, f1 = computeFPR(y_pred=output, y_target=batch_y) + batch_y = batch_y.squeeze() + # batch_y = F.softmax(batch_y) + # output = F.softmax(output) + loss = crossEntropy(output, batch_y) + + acc_list.append(acc) + recall_list.append(recall) + f1_list.append(f1) + loss_list.append(loss.item()) + y_true += batch_y.detach().cpu().numpy().tolist() + y_pred += torch.argmax(output, dim=1).detach().cpu().numpy().tolist() + print("[Testing {:03d}] acc: {:.2%}, recall: {:.2%}, f1: {:.2%}, loss: {:.2f}".format(i + 1, + np.mean(acc_list), + np.mean(recall_list), + np.mean(f1_list), + np.mean(loss_list))) + print(confusion_matrix(y_true, y_pred)) + FPR = { + 'acc': np.mean(acc_list), + 'recall': np.mean(recall_list), + 'f1': np.mean(f1_list), + 'metrix': confusion_matrix(y_true,y_pred) + } + hyper = { + 'epoch_size': epoch_size, + 'lr': lr, + 'batch_size': batch_size + } + filename = "../modelFile/proxy_mta_length_{}_{}.pkt".format(arch, botname) + save_model(dnn, adam, param, hyper, FPR, filename) + +def trainMTa(botname, arch, sample_size, feature_type='length'): + # hyper param + epoch_size= 20 + batch_size = 128 + lr = 1e-4 + + # model param + param = { + "input_size": 80, + "num_class": 2 + } + + cicids = C2Data(botname, number=sample_size, sequenceLen=80, feature_type=feature_type) + sample_szie = len(cicids) + # botname = "Gozi" + botname = botname + # normal = "CTUNone" + arch = "dnn" + + total_size = sample_szie + test_size = int(total_size * 0.2) + train_size = int((total_size - test_size) * 0.8) + valid_size = total_size - test_size - train_size + print("train data: {}".format(train_size)) + print("valid data: {}".format(valid_size)) + print("test data: {}".format(test_size)) + + # use GPU if it is available, oterwise use cpu + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + + # pre the dataloader + # c2data = C2Data(botname) + # c2data = CollectionDataset('../adversarialData/collectionData.npy') + # train_valid_data, test_data = torch.utils.data.random_split(c2data, [200, 200]) + # train_data, valid_data = torch.utils.data.random_split(train_valid_data, [100, 100]) + # train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True, drop_last=False) + # valid_loader = DataLoader(valid_data, batch_size=batch_size, shuffle=True, drop_last=False) + # test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=True, drop_last=False) + train_valid_data, test_data = torch.utils.data.random_split(cicids, [train_size + valid_size, test_size]) + train_data, valid_data = torch.utils.data.random_split(train_valid_data, [train_size, valid_size]) + train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True, drop_last=False) + valid_loader = DataLoader(valid_data, batch_size=batch_size, shuffle=True, drop_last=False) + test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=True, drop_last=False) + + # model + + dnn = DNN(param) + dnn.to(device) + + # loss func + crossEntropy = torch.nn.CrossEntropyLoss() + adam = torch.optim.Adam(dnn.parameters(), lr=lr) + # lossFunc = torch.nn.KLDivLoss() + + # trainning + for i in range(epoch_size): + dnn.train() + loss_list = [] + acc_list = [] + recall_list = [] + f1_list = [] + for batch_x, batch_y in tqdm(train_loader): + batch_x = batch_x.to(device, dtype=torch.float) + batch_y = batch_y.to(device) + output = dnn(batch_x) + # output.shape = (batch_size, sequence, num_class) + acc, recall, f1 = computeFPR(y_pred=output, y_target=batch_y) + # ipdb.set_trace() + batch_y = batch_y.squeeze() + # batch_y = F.softmax(batch_y) + # output = F.softmax(output) + loss = crossEntropy(output, batch_y) + + acc_list.append(acc) + recall_list.append(recall) + f1_list.append(f1) + loss_list.append(loss.item()) + + adam.zero_grad() + loss.backward() + adam.step() + print("[Training {:03d}] acc: {:.2%}, recall: {:.2%}, f1: {:.2%}, loss: {:.2f}".format(i + 1, + np.mean(acc_list), + np.mean(recall_list), + np.mean(f1_list), + np.mean(loss_list))) + + # validing + dnn.eval() + loss_list = [] + acc_list = [] + recall_list = [] + f1_list = [] + for batch_x, batch_y in valid_loader: + batch_x = batch_x.to(device, dtype=torch.float) + batch_y = batch_y.to(device) + output = dnn(batch_x) + # output.shape = (batch_size, sequence, num_class) + acc, recall, f1 = computeFPR(y_pred=output, y_target=batch_y) + batch_y = batch_y.squeeze() + # batch_y = F.softmax(batch_y) + # output = F.softmax(output) + loss = crossEntropy(output, batch_y) + + acc_list.append(acc) + recall_list.append(recall) + f1_list.append(f1) + loss_list.append(loss.item()) + print("[Validing {:03d}] acc: {:.2%}, recall: {:.2%}, f1: {:.2%}, loss: {:.2f}".format(i + 1, + np.mean(acc_list), + np.mean(recall_list), + np.mean(f1_list), + np.mean(loss_list))) + + # testing + dnn.eval() + loss_list = [] + acc_list = [] + recall_list = [] + f1_list = [] + y_true = [] + y_pred = [] + for batch_x, batch_y in test_loader: + batch_x = batch_x.to(device, dtype=torch.float) + batch_y = batch_y.to(device) + output = dnn(batch_x) + # output.shape = (batch_size, sequence, num_class) + acc, recall, f1 = computeFPR(y_pred=output, y_target=batch_y) + batch_y = batch_y.squeeze() + # batch_y = F.softmax(batch_y) + # output = F.softmax(output) + loss = crossEntropy(output, batch_y) + + acc_list.append(acc) + recall_list.append(recall) + f1_list.append(f1) + loss_list.append(loss.item()) + y_true += batch_y.detach().cpu().numpy().tolist() + y_pred += torch.argmax(output, dim=1).detach().cpu().numpy().tolist() + print("[Testing {:03d}] acc: {:.2%}, recall: {:.2%}, f1: {:.2%}, loss: {:.2f}".format(i + 1, + np.mean(acc_list), + np.mean(recall_list), + np.mean(f1_list), + np.mean(loss_list))) + print(confusion_matrix(y_true, y_pred)) + # FPR = { + # 'acc': np.mean(acc_list), + # 'recall': np.mean(recall_list), + # 'f1': np.mean(f1_list), + # 'metrix': confusion_matrix(y_true,y_pred) + # } + # hyper = { + # 'epoch_size': epoch_size, + # 'lr': lr, + # 'batch_size': batch_size + # } + # filename = "../modelFile/proxy_mta_{}_{}_{}.pkt".format(feature_type, arch, botname) + # save_model(dnn, adam, param, hyper, FPR, filename) + return dnn + +if __name__ == '__main__': + malwares = [ + "Botnet", + "Fuzzing", + "PortScan", + "BruteForce", + "DDoS" + ] + Botnets = [ + "Tofsee", + "Dridex", + "Quakbot", + "TrickBot", + "Gozi" + ] + numbers = [20000, 8000, 700, 650, 580] + for i in range(5): + print(Botnets) + trainMTa(Botnets[i], arch='dnn', sample_size=numbers[i], feature_type='length') + break |
