1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
|
import torch
from torch.utils.data import Dataset
import numpy as np
import pandas as pd
"""
CICIDS2017s数据集,提取统计特征
"""
class CICIDS2017Statistic():
def __init__(self):
print("CICIDS2017 Statistic Init")
self.config = {
# "Benign": {
# "path": '/home/sunhanwu/datasets/cicids2017/MachineLearningCVE/Friday-WorkingHours-Morning.pcap_ISCX.csv',
# "label": ['BENIGN'],
# "tag": 0
# },
"Botnet": {
"path": '/home/sunhanwu/datasets/cicids2017/MachineLearningCVE/Friday-WorkingHours-Morning.pcap_ISCX.csv',
'label': ['Bot'],
'tag': 1,
'num': 1966
},
"Fuzzing": {
"path": '/home/sunhanwu/datasets/cicids2017/MachineLearningCVE/Wednesday-workingHours.pcap_ISCX.csv',
'label': ['DoS Hulk'],
'tag': 2,
'num': 10000
},
"PortScan": {
"path": '/home/sunhanwu/datasets/cicids2017/MachineLearningCVE/Friday-WorkingHours-Afternoon-PortScan.pcap_ISCX.csv',
'label': ['PortScan'],
'tag': 3,
'num': 10000
},
"BruteForce": {
"path": '/home/sunhanwu/datasets/cicids2017/MachineLearningCVE/Tuesday-WorkingHours.pcap_ISCX.csv',
'label': ['FTP-Patator', 'SSH-Patator'],
'tag': 4,
'num': 10000
},
"DDoS": {
"path": '/home/sunhanwu/datasets/cicids2017/MachineLearningCVE/Friday-WorkingHours-Afternoon-DDos.pcap_ISCX.csv',
'label': ['DDoS'],
'tag': 5,
'num': 10000
}
}
self.load_data()
def load_data(self):
self.data = {}
for cls in self.config:
print("load data from {}".format(self.config[cls]['path']))
data = pd.read_csv(self.config[cls]['path'])
benign = data[data[' Label'] == 'BENIGN']
benign[' Label'] = 0
benign = benign.iloc[:self.config[cls]['num'], 1:]
malware = data[data[' Label'].isin(self.config[cls]['label'])]
malware.loc[malware[' Label'].isin(self.config[cls]['label']), ' Label'] = self.config[cls]['tag']
malware = malware.iloc[:self.config[cls]['num'], 1:]
self.data[cls] = np.vstack([benign, malware])
np.save("/home/sunhanwu/datasets/cicids2017/npy/" + cls + ".npy", self.data[cls])
print("{} done: total num {}".format(cls, len(self.data[cls])))
def __getitem__(self, item):
pass
def __len__(self):
pass
"""
CICIDS2017s数据集,提取序列特征
"""
class CICIDS2017_Sequence(Dataset):
def __init__(self):
print("test 2")
pass
def __getitem__(self, item):
pass
def __len__(self):
pass
if __name__ == '__main__':
D = CICIDS2017Statistic()
|