# coding:utf-8 import networkx as nx import numpy as np import csv import random import json import matplotlib.pyplot as plt ip_dict = {} json_num = 0 pro = ['tcp', 'icmp', 'udp'] # 传输层协议类型 ip = ['0', '75.127.97.72', '75.127.97.72', '75.127.97.72', '75.127.97.72', '74.63.40.21', '75.127.97.72', '97.74.144.108', '208.113.162.153', '69.84.133.138', '67.220.214.50', '97.74.144.108', '69.192.24.88', '97.74.144.108', '203.73.24.75', '97.74.144.108', '74.55.1.4', '97.74.104.201', '74.55.1.4', '69.192.24.88', '97.74.144.108', '97.74.144.108', '75.127.97.72', '75.127.97.72', '69.192.24.88', '75.127.97.72', '74.55.1.4'] addr = ['1', '2', '3', '4', '5(1)', '5(2)', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', '23', '24', '25', '26'] tag = [0, 2, 4, 1, 1, 3, 2, 1, 2, 1, 3, 1, 2, 2, 2, 2, 4, 3, 1, 1, 3, 3, 3, 3, 1, 1, 2] num_label_dict = {i:0 for i in range(5)} root_addr = "/home/liyuzhen/dataset/SEAL_DoS/test/" def port_change(row): if row[0] != '': if int(row[0]) <= 1024: return row[0] elif int(row[1]) <= 1024: return row[1] else: return 1025 elif row[2] != '': if int(row[2]) <= 1024: return row[2] elif int(row[3]) <= 1024: return row[3] else: return 1025 else: return -1 def save_dataset(i): global json_num dict_flow = {"edge": [[0, 0]], "features": {"0": "node"}, "label": 5} # 生成ip节点 if i not in ip_dict: ip_dict[i] = json_num json_num += 1 # dict_flow["features"]["0"] = ["node"] with open(root_addr + str(ip_dict[i]) + '.json', "w") as f: json.dump(dict_flow, f) num_label_dict[5] += 1 def csv_dataset(csv_num): slow_body1 = csv.reader(open('/home/dataset/CIC-DoS/morefeature/' + addr[csv_num] + '.csv', 'r')) flow_dict = {} global json_num ''' 把ip地址作为一个小图存入json文件,并记录每个ip地址图的编号 ''' #dict = {"edge": [], # "features": {"0": []}, # "label": 5} for row in slow_body1: if (row[1] == 'ip.src'): continue if not row[1]: continue if row[1] != ip[csv_num] and row[3] != ip[csv_num]: if row[1] in ip or row[3] in ip: continue if row[3] != '': if not row[3].isdigit(): continue ''' if row[1] not in ip_dict: ip_dict[row[1]] = json_num json_num += 1 dict["features"]["0"] = [row[1]] with open("D:/学习/mesa/毕设/异质图/SEAL-CI-master/input/graph_DoS/" + str(ip_dict[row[1]]) + '.json', "w") as f: json.dump(dict, f) if row[3] not in ip_dict: ip_dict[row[3]] = json_num json_num += 1 dict["features"]["0"] = [row[3]] with open("D:/学习/mesa/毕设/异质图/SEAL-CI-master/input/graph_DoS/" + str(ip_dict[row[3]]) + '.json', "w") as f: json.dump(dict, f) ''' ''' 存入src_ip, scr_port, dst_ip, dst_port的字典中,并返回,等待下一步处理 ''' pro_all = row[8].split(":") if len(pro_all) < 4: print("less than 4", row) continue if row[3] == '' and row[5] == '': five_tuple = str(row[1]) + '#' + pro_all[3] + '#' + str(row[2]) five_tuple_reverse = str(row[2]) + '#' + pro_all[3] + '#' + str(row[1]) elif row[3] == '' and row[5].isdigit(): five_tuple = str(row[1]) + '#' + str(row[5]) + '#' + pro_all[3] + '#' + str(row[2]) + '#' + str(row[6]) five_tuple_reverse = str(row[2]) + '#' + str(row[6]) + '#' + pro_all[3] + '#' + str(row[1]) + '#' + str( row[5]) elif row[3].isdigit(): five_tuple = str(row[1]) + '#' + str(row[3]) + '#' + pro_all[3] + '#' + str(row[2]) + '#' + str(row[4]) five_tuple_reverse = str(row[2]) + '#' + str(row[4]) + '#' + pro_all[3] + '#' + str(row[1]) + '#' + str( row[3]) else: print("other condition", row) continue tmp_one_packet = [int(row[7]), row[8], row[12], row[13], row[15], port_change(row[3:7])] # 包大小、协议、window_size、tcp.flags、ip.ttl、udp端口号 # tcp端口 port_change(row[3:5]), if five_tuple in flow_dict: tmp_one_packet[0] *= -1 flow_dict[five_tuple].append(tmp_one_packet) elif five_tuple_reverse in flow_dict: flow_dict[five_tuple_reverse].append(tmp_one_packet) elif five_tuple not in flow_dict and five_tuple_reverse not in flow_dict: flow_dict[five_tuple] = [] tmp_one_packet[0] *= -1 flow_dict[five_tuple].append(tmp_one_packet) return flow_dict def load_data(): global json_num global ip_dict json_num = 0 ip_dict = {} flow_nums = 0 # addr=['example'] # tag=[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] # tag=[0] # t=['slowbody','slowread','ddossim','goldeneye','slowheaders','rudy','hulk','slowloris'] # tag=[t[0],t[1],t[2],t[3],t[4],t[5],t[2],t[5],t[6],t[4],t[3],t[0],t[0],t[0],t[5],t[1],t[4], # t[6],t[6],t[7],t[4],t[7],t[4],t[3],t[3],t[6],t[5]] # ip=['75.127.97.72'] for csv_num in range(len(addr)): # 获得flow的dict flows = csv_dataset(csv_num) # 随机取其中的20% keys = list(flows.keys()) random.seed(1) random.shuffle(keys) flows_sampled = {} for i in range(int(len(flows) * 0.1)): flows_sampled[keys[i]] = flows[keys[i]] flows = flows_sampled for flow_keys, flow_contents in flows.items(): five_tuple_list = flow_keys.split('#') if len(five_tuple_list) == 3: ip_first = five_tuple_list[0] ip_dst = five_tuple_list[2] else: ip_first = five_tuple_list[0] ip_dst = five_tuple_list[3] save_dataset(ip_first) save_dataset(ip_dst) dict_flow = {"edge": [], "features": {}, "label": 0} # 生成labels if five_tuple_list[0] == ip[csv_num] or five_tuple_list[2] == ip[csv_num]: dict_flow["label"] = tag[csv_num] num_label_dict[dict_flow["label"]] += 1 # 构图前准备 g = nx.Graph() node_first = 0 node_last = 0 columns = 1 for packet_num in range(len(flow_contents)): # 生成features dict_flow["features"][str(packet_num)] = flow_contents[packet_num] # 构图 g.add_node(packet_num) if packet_num > 0: # 构造图的边 if flow_contents[packet_num][0] * flow_contents[packet_num - 1][0] > 0: g.add_edge(packet_num - 1, packet_num) else: g.add_edge(packet_num, node_first) columns += 1 if columns >= 3: g.add_edge(node_last, packet_num - 1) node_first = packet_num node_last = packet_num - 1 if packet_num != 0: g.add_edge(node_last, packet_num) # 生成edges edges = [list(pair) for pair in g.edges()] dict_flow["edge"] = edges if dict_flow["edge"] == []: dict_flow["edge"] = [[0,0]] # 生成好的dict存为json json_file_name = root_addr + str(json_num) + '.json' with open(json_file_name, 'w') as f: json.dump(dict_flow, f) # 写入大图中节点的连接 with open(root_addr + "edges_DoS.csv", "a+", newline='') as f: f_writer = csv.writer(f) f_writer.writerow([ip_dict[five_tuple_list[0]], json_num]) f_writer.writerow([json_num, ip_dict[five_tuple_list[2]]]) json_num += 1 flow_nums += 1 if flow_nums >= 900: flow_nums = 0 ip_dict = {} print(num_label_dict) print(f"json_num:{json_num}") if __name__ == '__main__': load_data()