# coding:utf-8 import networkx as nx import numpy as np import csv import random import json import matplotlib.pyplot as plt ip_dict = {} json_num = 0 ip = "172.16.0.5" addr = ["1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "DNS", "LDAP", "MSSQL", "NetBIOS", "NTP", "SNMP", "SSDP", "SYN", "TFTP", "UDP", "UDP-Lag", "WebDDoS"] tag = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] pro = ['tcp', 'icmp', 'udp'] # 传输层协议类型 num_label_dict = {i:0 for i in range(14)} def port_change(row): return -1 if row[0] == '' else row[0] \ if int(row[0]) <= 1024 else row[1] \ if int(row[1]) <= 1024 else 1025 def save_dataset(i, seed): global json_num dict_flow = {"edge": [[0, 0]], "features": {"0": 0}, "label": 13} # 生成ip节点 if i not in ip_dict: ip_dict[i] = json_num json_num += 1 # dict_flow["features"]["0"] = [json_num] with open("E:/DDoS/DDoS2019/SEAL/" + str(seed) +"/" + str(ip_dict[i]) + '.json', "w") as f: json.dump(dict_flow, f) num_label_dict[13] += 1 def csv_dataset(csv_num): slow_body1 = csv.reader(open('E:\\DDoS\\DDoS2019\\0112\\morefeature\\' + addr[csv_num] + '.csv', 'r')) # 1:ip_src 2: ip_dst 3: tcp_srcport 4: tcp_dstport 5: udp_srcport 6: udp_dstport # 8: frame_len 9: frame_prot(eth:ethernet:ip:tcp:ssh) 10: frame_time 13: icmp.type # 16: tcp_ack 19: tcp_len 20: tcp_window_size 22: tcp_flags 23: ip_flags 24: ip_ttl flow_dict = {} global json_num ''' 把ip地址作为一个小图存入json文件,并记录每个ip地址图的编号 ''' # dict = {"edge": [], # "features": {"0": []}, # "label": 5} for row in slow_body1: if row[5] == 'Length': continue if not row[1]: continue if row[3] != '': if not row[3].isdigit(): continue ''' if row[1] not in ip_dict: ip_dict[row[1]] = json_num json_num += 1 dict["features"]["0"] = [row[1]] with open("D:/学习/mesa/毕设/异质图/SEAL-CI-master/input/graph_DoS/" + str(ip_dict[row[1]]) + '.json', "w") as f: json.dump(dict, f) if row[3] not in ip_dict: ip_dict[row[3]] = json_num json_num += 1 dict["features"]["0"] = [row[3]] with open("D:/学习/mesa/毕设/异质图/SEAL-CI-master/input/graph_DoS/" + str(ip_dict[row[3]]) + '.json', "w") as f: json.dump(dict, f) ''' ''' 存入src_ip, scr_port, dst_ip, dst_port的字典中,并返回,等待下一步处理 ''' pro_all = row[9].split(":") if len(pro_all) < 4: print("less than 4", row) continue if row[3] == '' and row[5] == '': five_tuple = str(row[1]) + '#' + pro_all[3] + '#' + str(row[2]) five_tuple_reverse = str(row[2]) + '#' + pro_all[3] + '#' + str(row[1]) elif row[3] == '' and row[5].isdigit(): five_tuple = str(row[1]) + '#' + str(row[5]) + '#' + pro_all[3] + '#' + str(row[2]) + '#' + str(row[6]) five_tuple_reverse = str(row[2]) + '#' + str(row[6]) + '#' + pro_all[3] + '#' + str(row[1]) + '#' + str( row[5]) elif row[3].isdigit(): five_tuple = str(row[1]) + '#' + str(row[3]) + '#' + pro_all[3] + '#' + str(row[2]) + '#' + str(row[4]) five_tuple_reverse = str(row[2]) + '#' + str(row[4]) + '#' + pro_all[3] + '#' + str(row[1]) + '#' + str( row[3]) else: print("other condition", row) continue tmp_one_packet = [int(row[8]), row[9], row[20], row[18], row[24], port_change(row[5:7])] # 包大小、协议、window_size、tcp.flags、ip.ttl、udp端口号 # tcp端口 port_change(row[3:5]), if five_tuple in flow_dict: tmp_one_packet[0] *= -1 flow_dict[five_tuple].append(tmp_one_packet) elif five_tuple_reverse in flow_dict: flow_dict[five_tuple_reverse].append(tmp_one_packet) else: flow_dict[five_tuple] = [] tmp_one_packet[0] *= -1 flow_dict[five_tuple].append(tmp_one_packet) return flow_dict def load_data(): global json_num global ip_dict random_seeds = [1, 2, 4, 7, 9] for seed in random_seeds: json_num = 0 ip_dict = {} flow_nums = 0 for csv_num in range(len(addr)): # 获得flow的dict print(f"addr: {addr[csv_num]}, json_num: {json_num}") flows = csv_dataset(csv_num) # 随机取其中的20% keys = list(flows.keys()) random.seed(seed) random.shuffle(keys) flows_sampled = {} ratio = 0.01 # *****调节数据集比例 if len(flows) < (15000 / ratio): l = min(len(flows), 15000) else: l = int(len(flows) * ratio) print(f"file_flow_before:{l}") for i in range(l): flows_sampled[keys[i]] = flows[keys[i]] flows = flows_sampled for flow_keys, flow_contents in flows.items(): five_tuple_list = flow_keys.split('#') if len(five_tuple_list) == 3: ip_first = five_tuple_list[0] ip_dst = five_tuple_list[2] else: ip_first = five_tuple_list[0] ip_dst = five_tuple_list[3] if ip_dst == ip: # 如果主动发起方不是攻击IP地址,则不正常 continue if ip_first == ip and tag[csv_num] == 0: # 不计入正常数据集中包含攻击IP地址的流量 continue # 生成ip节点 save_dataset(ip_first, seed) save_dataset(ip_dst, seed) dict_flow = {"edge": [], "features": {}, "label": 0} # 生成labels if ip_first == ip: dict_flow["label"] = tag[csv_num] num_label_dict[dict_flow["label"]] += 1 # 构图前准备 g = nx.Graph() node_first = 0 node_last = 0 columns = 1 for packet_num in range(len(flow_contents)): # 生成features dict_flow["features"][str(packet_num)] = flow_contents[packet_num] # 构图 g.add_node(packet_num) if packet_num > 0: # 构造图的边 if flow_contents[packet_num][0] * flow_contents[packet_num - 1][0] > 0: g.add_edge(packet_num - 1, packet_num) else: g.add_edge(packet_num, node_first) columns += 1 if columns >= 3: g.add_edge(node_last, packet_num - 1) node_first = packet_num node_last = packet_num - 1 if packet_num != 0: g.add_edge(node_last, packet_num) # 生成edges edges = [list(pair) for pair in g.edges()] dict_flow["edge"] = edges if dict_flow["edge"] == []: dict_flow["edge"] = [[0, 0]] # 生成好的dict存为json json_file_name = "E:/DDoS/DDoS2019/SEAL/" + str(seed) +"/" + str(json_num) + '.json' # "D:/学习/mesa/毕设/研究点二/异质图/SEAL-CI-master/input/graph_DDoS/" with open(json_file_name, 'w') as f: json.dump(dict_flow, f) # 写入大图中节点的连接 big_graph_edge = "E:/DDoS/DDoS2019/SEAL/" + str(seed) +"/edges_DDoS.csv" # "D:/学习/mesa/毕设/研究点二/异质图/SEAL-CI-master/input/edges_DDoS.csv" with open(big_graph_edge, "a+", newline='') as f: f_writer = csv.writer(f) f_writer.writerow([ip_dict[ip_first], json_num]) f_writer.writerow([json_num, ip_dict[ip_dst]]) json_num += 1 flow_nums += 1 if flow_nums >= 300: flow_nums = 0 ip_dict = {} print(num_label_dict) if __name__ == '__main__': load_data()