utils/MTACICFlowMeter.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48

import torch
from sklearn.utils import shuffle
from torch.utils.data import Dataset
import numpy as np
import pandas as pd


datapaths = {
    'Tofsee'    :   '/home/sunhanwu/datasets/MTA/cicflowcsv/Tofsee.csv',
    'Quakbot'   :   '/home/sunhanwu/datasets/MTA/cicflowcsv/Quakbot.csv',
    'Dridex'    :   '/home/sunhanwu/datasets/MTA/cicflowcsv/Dridex.csv',
    'Gozi'      :   '/home/sunhanwu/datasets/MTA/cicflowcsv/Gozi.csv',
    'TrickBot'  :   '/home/sunhanwu/datasets/MTA/cicflowcsv/TrickBot.csv'
}

class MTACICFlowMeter(Dataset):
    """
    MTA dataset and cicflowmeter features
    """
    def __init__(self, name, number, norm=True):
        """

        :param name:
        :param num: 0 diy; 1 dataconfig
        :param norm:
        """
        print("load data form {}".format(name))
        assert name in datapaths.keys()
        data = pd.read_csv(datapaths[name])
        benign = pd.read_csv("/home/sunhanwu/datasets/MTA/cicflowcsv/Benign.csv")
        X1 = shuffle(data).iloc[:number, :]
        X2 = shuffle(benign).iloc[:number, :]
        X = X1.append(X2)
        X = shuffle(X)
        if norm:
            max_min_scaler = lambda x: (x - np.min(x)) / (np.max(x) - np.min(x) + 1e-9)
            X.iloc[:, :-1] = X.iloc[:, :-1].apply(max_min_scaler)
        X = X.dropna()
        self.data = X.to_numpy()

    def __getitem__(self, index):
        item = self.data[index]
        X = torch.tensor(item[:-1].tolist())
        y = torch.tensor([item[-1]]).long()
        return X, y

    def __len__(self):
        return self.data.shape[0]