Upload New File

author: yifei cheng <[email protected]> 2023-06-26 12:30:00 +0000
committer: yifei cheng <[email protected]> 2023-06-26 12:30:00 +0000
commit: 21c242b31fb855f2f96b851219d38c4d8c1c64fb (patch)
tree: 08e4edde4b5dfbc58e9164dbd95aea71fff065fa
parent: 13dd6c5e5231a23ba590ad81c27d39928563ca64 (diff)
1 files changed, 399 insertions, 0 deletions
diff --git a/substitudeModel/DNN.py b/substitudeModel/DNN.py
new file mode 100644
index 0000000..a1d47d7
--- /dev/null
+++ b/substitudeModel/DNN.py
@@ -0,0 +1,399 @@
+"""
+Date: 2022-03-07
+Author: [email protected]
+Desc: dnn-based subtitute model for fsnet
+"""
+
+import torch
+import ipdb
+from torch import nn
+from TargetModel.FSNet.dataset import C2Data
+from utils.CICIDSData import CICIDS
+from attack.collectionDataset import CollectionDataset
+from utils.CICIDSData import dataconfig
+from torch.utils.data import DataLoader
+from TargetModel.FSNet.train import computeFPR
+from TargetModel.FSNet.utils import save_model
+from sklearn.metrics import confusion_matrix
+import numpy as np
+from tqdm import tqdm
+import torch.nn.functional as F
+
+
+class DNN(nn.Module):
+    """
+    DNN-based model
+    """
+    def __init__(self, param):
+        """
+
+        :param input_size:
+        :param num_class:
+        """
+        super(DNN, self).__init__()
+        self.input_size = param['input_size']
+        self.num_class = param['num_class']
+
+        self.linear1 = nn.Linear(self.input_size, 2048)
+        self.linear2 = nn.Linear(2048, 1024)
+        self.linear3 = nn.Linear(1024, 512)
+        self.linear4 = nn.Linear(512, self.num_class)
+        self.dropout1 = nn.Dropout(0.2)
+        self.dropout2 = nn.Dropout(0.2)
+
+    def forward(self, inputs):
+        """
+
+        :param inputs:
+        :return:
+        """
+        inputs = inputs.float()
+        inputs = F.relu(self.linear1(inputs))
+        inputs = self.dropout1(inputs)
+        inputs = F.relu(self.linear2(inputs))
+        inputs = self.dropout2(inputs)
+        inputs = F.relu(self.linear3(inputs))
+        inputs = self.linear4(inputs)
+        # inputs.shape = (batch_size, num_class)
+        return inputs
+
+# if __name__ == '__main__':
+def train(name):
+    # hyper param
+    epoch_size= 20
+    batch_size = 128
+    lr = 1e-4
+
+    # model param
+    param = {
+        "input_size": 77,
+        "num_class": 2
+    }
+
+    cicids = CICIDS(name)
+    sample_szie = len(cicids)
+    # botname = "Gozi"
+    botname = name
+    # normal = "CTUNone"
+    arch = "dnn"
+
+    total_size = sample_szie
+    test_size = int(total_size * 0.2)
+    train_size = int((total_size - test_size) * 0.8)
+    valid_size = total_size - test_size - train_size
+    print("train data: {}".format(train_size))
+    print("valid data: {}".format(valid_size))
+    print("test data: {}".format(test_size))
+
+    # use GPU if it is available, oterwise use cpu
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+
+    # pre the dataloader
+    # c2data = C2Data(botname)
+    # c2data = CollectionDataset('../adversarialData/collectionData.npy')
+    # train_valid_data, test_data = torch.utils.data.random_split(c2data, [200, 200])
+    # train_data, valid_data = torch.utils.data.random_split(train_valid_data, [100, 100])
+    # train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True, drop_last=False)
+    # valid_loader = DataLoader(valid_data, batch_size=batch_size, shuffle=True, drop_last=False)
+    # test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=True, drop_last=False)
+    train_valid_data, test_data = torch.utils.data.random_split(cicids, [train_size + valid_size, test_size])
+    train_data, valid_data = torch.utils.data.random_split(train_valid_data, [train_size, valid_size])
+    train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True, drop_last=False)
+    valid_loader = DataLoader(valid_data, batch_size=batch_size, shuffle=True, drop_last=False)
+    test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=True, drop_last=False)
+
+    # model
+
+    dnn = DNN(param)
+    dnn.to(device)
+
+    # loss func
+    crossEntropy = torch.nn.CrossEntropyLoss()
+    adam = torch.optim.Adam(dnn.parameters(), lr=lr)
+    # lossFunc = torch.nn.KLDivLoss()
+
+    # trainning
+    for i in range(epoch_size):
+        dnn.train()
+        loss_list = []
+        acc_list = []
+        recall_list = []
+        f1_list = []
+        for batch_x, batch_y in tqdm(train_loader):
+            batch_x = batch_x.to(device, dtype=torch.float)
+            batch_y = batch_y.to(device)
+            output = dnn(batch_x)
+            # output.shape = (batch_size, sequence, num_class)
+            acc, recall, f1 = computeFPR(y_pred=output, y_target=batch_y)
+            # ipdb.set_trace()
+            batch_y = batch_y.squeeze()
+            # batch_y = F.softmax(batch_y)
+            # output = F.softmax(output)
+            loss = crossEntropy(output, batch_y)
+
+            acc_list.append(acc)
+            recall_list.append(recall)
+            f1_list.append(f1)
+            loss_list.append(loss.item())
+
+            adam.zero_grad()
+            loss.backward()
+            adam.step()
+        print("[Training {:03d}] acc: {:.2%}, recall: {:.2%}, f1: {:.2%}, loss: {:.2f}".format(i + 1,
+                                                                                               np.mean(acc_list),
+                                                                                               np.mean(recall_list),
+                                                                                               np.mean(f1_list),
+                                                                                               np.mean(loss_list)))
+
+        # validing
+        dnn.eval()
+        loss_list = []
+        acc_list = []
+        recall_list = []
+        f1_list = []
+        for batch_x, batch_y in valid_loader:
+            batch_x = batch_x.to(device, dtype=torch.float)
+            batch_y = batch_y.to(device)
+            output = dnn(batch_x)
+            # output.shape = (batch_size, sequence, num_class)
+            acc, recall, f1 = computeFPR(y_pred=output, y_target=batch_y)
+            batch_y = batch_y.squeeze()
+            # batch_y = F.softmax(batch_y)
+            # output = F.softmax(output)
+            loss = crossEntropy(output, batch_y)
+
+            acc_list.append(acc)
+            recall_list.append(recall)
+            f1_list.append(f1)
+            loss_list.append(loss.item())
+        print("[Validing {:03d}] acc: {:.2%}, recall: {:.2%}, f1: {:.2%}, loss: {:.2f}".format(i + 1,
+                                                                                               np.mean(acc_list),
+                                                                                               np.mean(recall_list),
+                                                                                               np.mean(f1_list),
+                                                                                               np.mean(loss_list)))
+
+    # testing
+    dnn.eval()
+    loss_list = []
+    acc_list = []
+    recall_list = []
+    f1_list = []
+    y_true = []
+    y_pred = []
+    for batch_x, batch_y in test_loader:
+        batch_x = batch_x.to(device, dtype=torch.float)
+        batch_y = batch_y.to(device)
+        output = dnn(batch_x)
+        # output.shape = (batch_size, sequence, num_class)
+        acc, recall, f1 = computeFPR(y_pred=output, y_target=batch_y)
+        batch_y = batch_y.squeeze()
+        # batch_y = F.softmax(batch_y)
+        # output = F.softmax(output)
+        loss = crossEntropy(output, batch_y)
+
+        acc_list.append(acc)
+        recall_list.append(recall)
+        f1_list.append(f1)
+        loss_list.append(loss.item())
+        y_true += batch_y.detach().cpu().numpy().tolist()
+        y_pred += torch.argmax(output, dim=1).detach().cpu().numpy().tolist()
+    print("[Testing {:03d}] acc: {:.2%}, recall: {:.2%}, f1: {:.2%}, loss: {:.2f}".format(i + 1,
+                                                                                          np.mean(acc_list),
+                                                                                          np.mean(recall_list),
+                                                                                          np.mean(f1_list),
+                                                                                          np.mean(loss_list)))
+    print(confusion_matrix(y_true, y_pred))
+    FPR = {
+        'acc': np.mean(acc_list),
+        'recall': np.mean(recall_list),
+        'f1': np.mean(f1_list),
+        'metrix': confusion_matrix(y_true,y_pred)
+    }
+    hyper = {
+        'epoch_size': epoch_size,
+        'lr': lr,
+        'batch_size': batch_size
+    }
+    filename = "../modelFile/proxy_mta_length_{}_{}.pkt".format(arch, botname)
+    save_model(dnn, adam, param, hyper, FPR, filename)
+
+def trainMTa(botname, arch, sample_size, feature_type='length'):
+    # hyper param
+    epoch_size= 20
+    batch_size = 128
+    lr = 1e-4
+
+    # model param
+    param = {
+        "input_size": 80,
+        "num_class": 2
+    }
+
+    cicids = C2Data(botname, number=sample_size, sequenceLen=80, feature_type=feature_type)
+    sample_szie = len(cicids)
+    # botname = "Gozi"
+    botname = botname
+    # normal = "CTUNone"
+    arch = "dnn"
+
+    total_size = sample_szie
+    test_size = int(total_size * 0.2)
+    train_size = int((total_size - test_size) * 0.8)
+    valid_size = total_size - test_size - train_size
+    print("train data: {}".format(train_size))
+    print("valid data: {}".format(valid_size))
+    print("test data: {}".format(test_size))
+
+    # use GPU if it is available, oterwise use cpu
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+
+    # pre the dataloader
+    # c2data = C2Data(botname)
+    # c2data = CollectionDataset('../adversarialData/collectionData.npy')
+    # train_valid_data, test_data = torch.utils.data.random_split(c2data, [200, 200])
+    # train_data, valid_data = torch.utils.data.random_split(train_valid_data, [100, 100])
+    # train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True, drop_last=False)
+    # valid_loader = DataLoader(valid_data, batch_size=batch_size, shuffle=True, drop_last=False)
+    # test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=True, drop_last=False)
+    train_valid_data, test_data = torch.utils.data.random_split(cicids, [train_size + valid_size, test_size])
+    train_data, valid_data = torch.utils.data.random_split(train_valid_data, [train_size, valid_size])
+    train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True, drop_last=False)
+    valid_loader = DataLoader(valid_data, batch_size=batch_size, shuffle=True, drop_last=False)
+    test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=True, drop_last=False)
+
+    # model
+
+    dnn = DNN(param)
+    dnn.to(device)
+
+    # loss func
+    crossEntropy = torch.nn.CrossEntropyLoss()
+    adam = torch.optim.Adam(dnn.parameters(), lr=lr)
+    # lossFunc = torch.nn.KLDivLoss()
+
+    # trainning
+    for i in range(epoch_size):
+        dnn.train()
+        loss_list = []
+        acc_list = []
+        recall_list = []
+        f1_list = []
+        for batch_x, batch_y in tqdm(train_loader):
+            batch_x = batch_x.to(device, dtype=torch.float)
+            batch_y = batch_y.to(device)
+            output = dnn(batch_x)
+            # output.shape = (batch_size, sequence, num_class)
+            acc, recall, f1 = computeFPR(y_pred=output, y_target=batch_y)
+            # ipdb.set_trace()
+            batch_y = batch_y.squeeze()
+            # batch_y = F.softmax(batch_y)
+            # output = F.softmax(output)
+            loss = crossEntropy(output, batch_y)
+
+            acc_list.append(acc)
+            recall_list.append(recall)
+            f1_list.append(f1)
+            loss_list.append(loss.item())
+
+            adam.zero_grad()
+            loss.backward()
+            adam.step()
+        print("[Training {:03d}] acc: {:.2%}, recall: {:.2%}, f1: {:.2%}, loss: {:.2f}".format(i + 1,
+                                                                                               np.mean(acc_list),
+                                                                                               np.mean(recall_list),
+                                                                                               np.mean(f1_list),
+                                                                                               np.mean(loss_list)))
+
+        # validing
+        dnn.eval()
+        loss_list = []
+        acc_list = []
+        recall_list = []
+        f1_list = []
+        for batch_x, batch_y in valid_loader:
+            batch_x = batch_x.to(device, dtype=torch.float)
+            batch_y = batch_y.to(device)
+            output = dnn(batch_x)
+            # output.shape = (batch_size, sequence, num_class)
+            acc, recall, f1 = computeFPR(y_pred=output, y_target=batch_y)
+            batch_y = batch_y.squeeze()
+            # batch_y = F.softmax(batch_y)
+            # output = F.softmax(output)
+            loss = crossEntropy(output, batch_y)
+
+            acc_list.append(acc)
+            recall_list.append(recall)
+            f1_list.append(f1)
+            loss_list.append(loss.item())
+        print("[Validing {:03d}] acc: {:.2%}, recall: {:.2%}, f1: {:.2%}, loss: {:.2f}".format(i + 1,
+                                                                                               np.mean(acc_list),
+                                                                                               np.mean(recall_list),
+                                                                                               np.mean(f1_list),
+                                                                                               np.mean(loss_list)))
+
+    # testing
+    dnn.eval()
+    loss_list = []
+    acc_list = []
+    recall_list = []
+    f1_list = []
+    y_true = []
+    y_pred = []
+    for batch_x, batch_y in test_loader:
+        batch_x = batch_x.to(device, dtype=torch.float)
+        batch_y = batch_y.to(device)
+        output = dnn(batch_x)
+        # output.shape = (batch_size, sequence, num_class)
+        acc, recall, f1 = computeFPR(y_pred=output, y_target=batch_y)
+        batch_y = batch_y.squeeze()
+        # batch_y = F.softmax(batch_y)
+        # output = F.softmax(output)
+        loss = crossEntropy(output, batch_y)
+
+        acc_list.append(acc)
+        recall_list.append(recall)
+        f1_list.append(f1)
+        loss_list.append(loss.item())
+        y_true += batch_y.detach().cpu().numpy().tolist()
+        y_pred += torch.argmax(output, dim=1).detach().cpu().numpy().tolist()
+    print("[Testing {:03d}] acc: {:.2%}, recall: {:.2%}, f1: {:.2%}, loss: {:.2f}".format(i + 1,
+                                                                                          np.mean(acc_list),
+                                                                                          np.mean(recall_list),
+                                                                                          np.mean(f1_list),
+                                                                                          np.mean(loss_list)))
+    print(confusion_matrix(y_true, y_pred))
+    # FPR = {
+    #     'acc': np.mean(acc_list),
+    #     'recall': np.mean(recall_list),
+    #     'f1': np.mean(f1_list),
+    #     'metrix': confusion_matrix(y_true,y_pred)
+    # }
+    # hyper = {
+    #     'epoch_size': epoch_size,
+    #     'lr': lr,
+    #     'batch_size': batch_size
+    # }
+    # filename = "../modelFile/proxy_mta_{}_{}_{}.pkt".format(feature_type, arch, botname)
+    # save_model(dnn, adam, param, hyper, FPR, filename)
+    return dnn
+
+if __name__ == '__main__':
+    malwares = [
+        "Botnet",
+        "Fuzzing",
+        "PortScan",
+        "BruteForce",
+        "DDoS"
+    ]
+    Botnets = [
+        "Tofsee",
+        "Dridex",
+        "Quakbot",
+        "TrickBot",
+        "Gozi"
+    ]
+    numbers = [20000, 8000, 700, 650, 580]
+    for i in range(5):
+        print(Botnets)
+        trainMTa(Botnets[i], arch='dnn', sample_size=numbers[i], feature_type='length')
+        break
author	yifei cheng <[email protected]>	2023-06-26 12:30:00 +0000
committer	yifei cheng <[email protected]>	2023-06-26 12:30:00 +0000
commit	21c242b31fb855f2f96b851219d38c4d8c1c64fb (patch)
tree	08e4edde4b5dfbc58e9164dbd95aea71fff065fa
parent	13dd6c5e5231a23ba590ad81c27d39928563ca64 (diff)