diff options
| author | 尹姜谊 <[email protected]> | 2024-01-12 18:12:32 +0800 |
|---|---|---|
| committer | 尹姜谊 <[email protected]> | 2024-01-12 18:12:32 +0800 |
| commit | 4111d1fac7c21e701cb9be1c1365a5ffdaab94e4 (patch) | |
| tree | 8c3f5dfc615da2575450cb67ada254c0f8de17ec /detection | |
Initial commit
Diffstat (limited to 'detection')
26 files changed, 1209 insertions, 0 deletions
diff --git a/detection/__init__.py b/detection/__init__.py new file mode 100644 index 0000000..0c8fce0 --- /dev/null +++ b/detection/__init__.py @@ -0,0 +1,6 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# @Time : 2024/1/8 11:42 +# @author : yinjinagyi +# @File : __init__.py.py +# @Function: diff --git a/detection/__pycache__/__init__.cpython-39.pyc b/detection/__pycache__/__init__.cpython-39.pyc Binary files differnew file mode 100644 index 0000000..a39bcb4 --- /dev/null +++ b/detection/__pycache__/__init__.cpython-39.pyc diff --git a/detection/__pycache__/vpn_detector.cpython-39.pyc b/detection/__pycache__/vpn_detector.cpython-39.pyc Binary files differnew file mode 100644 index 0000000..c491155 --- /dev/null +++ b/detection/__pycache__/vpn_detector.cpython-39.pyc diff --git a/detection/tool/Config.py b/detection/tool/Config.py new file mode 100644 index 0000000..214c3c9 --- /dev/null +++ b/detection/tool/Config.py @@ -0,0 +1,38 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# @Time : 2024/1/8 12:36 +# @author : yinjinagyi +# @File : Config.py +# @Function: read config.yaml file and manage all configurations + +import yaml +import os +import sys +sys.path.append('..') +from tool.Functions import get_project_path + + +class Config: + def __init__(self): + # read config.yaml file + self.config = self.read_yaml(get_project_path() + '/config.yaml') + + + # read yaml file + def read_yaml(self, yaml_file): + try: + with open(yaml_file, 'r', encoding='utf-8') as f: + config = yaml.load(f.read(), Loader=yaml.FullLoader) + return config + except Exception as e: + print(e) + sys.exit() + + + + +if __name__ == '__main__': + config = Config().config + print(config) + print(config['time_zone']) + print(config['sql']['cyberghost']['sql_tsg']) diff --git a/detection/tool/Functions.py b/detection/tool/Functions.py new file mode 100644 index 0000000..043be6b --- /dev/null +++ b/detection/tool/Functions.py @@ -0,0 +1,495 @@ +'''
+Description:
+Author: chenxu
+Date: 2022-03-31 15:15:09
+LastEditTime: 2022-09-29 10:59:11
+LastEditors: yinjiangyi
+'''
+import datetime
+import os
+import re
+import shutil
+
+import pytz
+import yaml
+import struct
+import socket
+import ipaddress
+import numpy as np
+import pandas as pd
+
+from clickhouse_driver import Client
+
+
+
+'''
+Description: 判断文件是否存在
+Param : 文件路径
+Return:
+'''
+
+
+def fileExists(readfilePath):
+ if os.path.exists(readfilePath):
+ return True
+ else:
+ return False
+
+
+'''
+Description: 读文件
+Param :
+Return:
+param {*} readfilePath
+'''
+
+
+def readTxt(readfilePath):
+ with open(readfilePath) as file:
+ lines = file.readlines()
+ listStr = list()
+ for line in lines:
+ listStr.append("".join(line.split()))
+
+ return listStr
+
+
+'''
+Description: 写文件
+Param :
+Return:
+param {*} writeFilePath
+param {*} content
+'''
+
+
+def write(writeFilePath, content):
+ with open(writeFilePath, 'a') as file:
+ for item in content:
+ file.write("".join(item))
+ file.write('\n')
+ try:
+ pass
+ except BaseException as e:
+ pass
+
+
+'''
+Description: 文件内容清空
+Param :
+Return:
+param {*} writeFilePath
+'''
+
+
+def clear(writeFilePath):
+ file = open(writeFilePath, 'w')
+ file.closed
+
+
+def clear_dir(dir_path, git_keep=False):
+ shutil.rmtree(dir_path)
+ os.mkdir(dir_path)
+ if git_keep:
+ open(dir_path + "/.gitkeep", 'a').close()
+
+
+'''
+ Description: 判断域名是否合法
+ Param : 域名
+ Return: 合法True,不合法False
+'''
+
+
+def is_valid_domain(domain):
+ pattern = re.compile(
+ r'^(([a-zA-Z]{1})|([a-zA-Z]{1}[a-zA-Z]{1})|'
+ r'([a-zA-Z]{1}[0-9]{1})|([0-9]{1}[a-zA-Z]{1})|'
+ r'([a-zA-Z0-9][-_.a-zA-Z0-9]{0,61}[a-zA-Z0-9]))\.'
+ r'([a-zA-Z]{2,13}|[a-zA-Z0-9-]{2,30}.[a-zA-Z]{2,3})$'
+ )
+ return True if pattern.match(domain) else False
+
+
+'''
+ Description: 判断域名中的非域名个数
+ Param : 域名
+ Return: 非域名个数比例
+'''
+
+
+def isNotDomain(hostList):
+ isNotdomainNum = 0
+ for item in hostList:
+ item = item.split(':')[0]
+ if is_valid_domain(item) == False:
+ isNotdomainNum = isNotdomainNum + 1
+ if len(hostList) > 0:
+ return isNotdomainNum / len(hostList)
+
+ return 0
+
+
+'''
+ Description: 判断域名中的www&>17&word>=3域名个数
+ Param : 域名
+ Return: 非域名个数比例
+'''
+
+
+def wwwDomain(hostList):
+ pattern = re.compile(r'(www)\.([a-zA-Z]{5,25})\.(com)')
+ cnt = 0
+ for item in hostList:
+ if is_valid_domain(item) and pattern.match(item) and len(item) >= 17:
+ cnt = cnt + 1
+ if len(hostList) > 0:
+ return cnt / len(hostList)
+
+ return 0
+
+
+def readYaml(path):
+ file = open(path, encoding="utf-8")
+ data = yaml.safe_load(file)
+ file.close()
+ return data
+
+
+"""
+ 将列表拆分为指定长度的多个列表
+ :param lists: 初始列表
+ :param cut_len: 每个列表的长度
+ :return: 一个二维数组 [[x,x],[x,x]]
+ """
+
+
+def cut_list(lists, cut_len):
+ res_data = []
+ if len(lists) > cut_len:
+ for i in range(int(len(lists) / cut_len)):
+ cut_a = lists[cut_len * i:cut_len * (i + 1)]
+ res_data.append(cut_a)
+
+ last_data = lists[int(len(lists) / cut_len) * cut_len:]
+ if last_data:
+ res_data.append(last_data)
+ else:
+ res_data.append(lists)
+
+ return res_data
+
+
+'''
+Description: 数据库查询
+Param : ip
+Return: ip地理位置
+'''
+
+
+def queryIpDatabase(reader, ip, type):
+ # reader = awdb.open_database(path)
+ (record, prefix_len) = reader.get_with_prefix_len(ip)
+ if type == 'isp':
+ return bytes.decode(record.get('owner'))
+ if type == 'country':
+ return bytes.decode(record.get('areacode'))
+
+
+'''
+Description: 厂商匹配查询
+Param : isp name
+Return: 匹配结果
+'''
+
+
+def ipReputation(providerStr):
+ providerList = ['IONOS SE', 'M247 Ltd', 'AltusHost B.V.', 'Packet Exchange Limited', 'Orion Network Limited',
+ 'DigitalOcean, LLC', 'Greenhost BV', 'UK-2 Limited', 'RouteLabel V.O.F.', 'InMotion Hosting, Inc.',
+ 'ONLINE S.A.S.', 'Linode, LLC', 'Hosting Services, Inc.', 'Performive LLC']
+ for item in providerList:
+ if item in providerStr or providerStr in item:
+ return 1
+ return 0
+
+
+'''
+Description: 国家匹配
+Param : country
+Return: 匹配结果
+'''
+
+
+def ipCountry(Country):
+ if Country == 'ET':
+ return 1
+ return 0
+
+
+'''
+Description: 查询sip是否属于目标ISP
+Param :
+Return: 属于返回True,否则返回False
+param {str} ip_address
+'''
+
+
+def queryipBlock(data, ip):
+ intIp = socket.ntohl(struct.unpack("I", socket.inet_aton(str(ip)))[0])
+ for index, row in data.iterrows():
+ if intIp > row['maxip'] or intIp < row['minip']:
+ continue
+ if intIp > row['minip'] and intIp < row['maxip']:
+ return True
+ return False
+
+
+# '''
+# Description: 根据spur结果收集ISP名称
+# Param :
+# Return:
+# '''
+# def collectIspName():
+# reader = awdb.open_database('developerKits/IP_city_single_WGS84.awdb')
+# spurLabel = pd.read_csv("externalTest/data/spur.csv",names=['ip','label'])
+# spurVPNip = list()
+# for index,row in spurLabel.iterrows():
+# if row['label']!='0':
+# spurVPNip.append(queryIpDatabase(reader,row['ip'],'isp'))
+# print(Counter(spurVPNip))
+
+
+'''
+Description: 文件去重
+Param :
+Return:
+'''
+
+
+def duplicateRemoval(readDir, writeDir):
+ lines_seen = set()
+ outfile = open(writeDir, "w")
+ f = open(readDir, "r")
+ for line in f:
+ if line not in lines_seen:
+ outfile.write(line)
+ lines_seen.add(line)
+ outfile.close()
+
+
+'''
+Description: 判断是否是内网地址
+Param :
+Return:
+param {*} ip
+'''
+
+
+def is_lan(ip):
+ try:
+ return ipaddress.ip_address(ip.strip()).is_private
+ except Exception as e:
+ return False
+
+
+def getAsnList(file_path):
+ '''
+ get asn list from file
+ :return:
+ '''
+ asn_list = []
+ with open(file_path) as file:
+ lines = file.readlines()
+ for line in lines:
+ list_list = line.split(',')
+ if len(list_list) >= 1:
+ asn_list.extend([str(i.strip()) for i in list_list[1:]])
+ else:
+ pass
+ return asn_list
+
+
+def getFeatureList(file_path):
+ features = []
+ with open(file_path) as file:
+ lines = file.readlines()
+ for line in lines:
+ features.append(line.strip())
+ return features
+
+
+def is_valid_ip(ip_str):
+ try:
+ ipaddress.ip_address(ip_str)
+ return True
+ except ValueError:
+ return False
+
+
+def find_invalid_ip(ip_list):
+ error_ip_list = []
+ for ip in ip_list:
+ if not is_valid_ip(ip):
+ error_ip_list.append(ip)
+ return error_ip_list
+
+
+def filter_files_by_time_range(path, start_day, end_day, suffix='.csv'):
+ # 定义一个空列表,用于保存符合条件的文件路径
+ result = []
+
+ start_day_time = datetime.datetime.strptime(start_day, '%Y-%m-%d').date()
+ end_day_time = datetime.datetime.strptime(end_day, '%Y-%m-%d').date()
+
+ # 遍历目录下所有文件,并按照创建时间进行过滤
+ for root, dirs, files in os.walk(path):
+ for file in files:
+ # 判断文件类型是否为csv文件
+ if len(suffix) > 0 and file.endswith(suffix):
+ filepath = os.path.join(root, file)
+ # 获取文件创建时间
+ # created_time = os.path.getctime(filepath)
+ created_time = os.path.getatime(filepath)
+ # 将创建时间转换为日期格式
+ created_date = datetime.datetime.fromtimestamp(created_time).date()
+ if created_date >= start_day_time and created_date < end_day_time:
+ result.append(filepath)
+ for d in dirs:
+ # 对于每个子目录,递归调用该函数,并将结果列表合并
+ result.extend(filter_files_by_time_range(os.path.join(root, d), start_day, end_day))
+
+ return list(set(result))
+
+
+def filter_files_by_created_time(path, days, suffix='.csv'):
+ # 定义一个空列表,用于保存符合条件的文件路径
+ end_day = datetime.date.today() + datetime.timedelta(days=1)
+ start_day = datetime.date.today() - datetime.timedelta(days=days)
+
+ return filter_files_by_time_range(path, start_day.strftime('%Y-%m-%d'), end_day.strftime('%Y-%m-%d'))
+
+
+def delete_dir_by_create_time(path, days):
+ ds = list(os.walk(path))
+ delta = datetime.timedelta(days=days)
+ now = datetime.datetime.now()
+
+ for d in ds:
+ os.chdir(d[0])
+ if d[2]:
+ for x in d[2]:
+ ctime = datetime.datetime.fromtimestamp(os.path.getmtime(x))
+ if ctime < (now - delta):
+ os.remove(x)
+ os.chdir(os.path.abspath(os.path.join(os.path.dirname(__file__), "../..")))
+
+
+class connectTest():
+ def __init__(self, config):
+ self.headTime = config['headTime']
+ self.tailTime = config['tailTime']
+ self.tableName = config['tableName']
+ self.timeZone = config['timeZone']
+ self.client = Client(user=config['username'], password=config['password'], host=config['host'],
+ port=config['port'], database=config['database'])
+ self.dbname = config['database']
+
+ def dataTest(self):
+ """
+ :return: 0:no data 2:wrong configuration
+ """
+ # 测试数据库有无数据
+ try:
+ # testSQL = "select * from " + self.tableName + " where (toDateTime(common_recv_time) >= '" + str(
+ # self.headTime) + "') and (toDateTime(common_recv_time) <='" + str(self.tailTime) + "') Limit 1"
+
+ testSQL = "select * from " + self.tableName + " where common_recv_time >= toDateTime('" \
+ + str(self.headTime) + "', '" + self.timeZone + "') and common_recv_time < toDateTime('" \
+ + str(self.tailTime) + "', '" + self.timeZone + "') limit 1 "
+
+ queryResult = self.client.execute(testSQL)
+
+ if len(queryResult) < 1:
+ # logger.error(datetime.datetime.now(tz=pytz.timezone(self.timeZone)).strftime(
+ # "%Y-%m-%d %H:%M:%S") + '-' + str(self.headTime) + '~' + str(
+ # self.tailTime) + ' dataTest: ' + str('No data in this time window'))
+ return 0
+ return 1
+ except:
+ return 2
+ # logger.error(datetime.datetime.now(tz=pytz.timezone(self.timeZone)).strftime(
+ # "%Y-%m-%d %H:%M:%S") + '-' + 'time: ' + str(self.headTime) + '~' + str(
+ # self.tailTime) + ' dataTest: ' + str(
+ # 'please check database configuration in config.yaml'))
+
+
+def get_project_path():
+ path = os.path.join(os.getcwd())
+ return path.rsplit('vpn-thwarting', 1)[0] + 'vpn-thwarting'
+
+
+def cal_psi(actual, predict, bins=10):
+ """
+ Discription: 计算PSI值,并输出实际和预期占比分布曲线
+ :param actual: Array或series,代表真实数据,如训练集模型得分
+ :param predict: Array或series,代表预期数据,如测试集模型得分
+ :param bins: 分段数
+ :return:
+ psi: float,PSI值
+ psi_df:DataFrame
+ """
+ actual_min = actual.min() # 实际中的最小概率
+ actual_max = actual.max() # 实际中的最大概率
+ binlen = (actual_max - actual_min) / bins
+ cuts = [actual_min + i * binlen for i in range(1, bins)] # 设定分组
+ cuts.insert(0, -float("inf"))
+ cuts.append(float("inf"))
+ actual_cuts = np.histogram(actual, bins=cuts) # 将actual等宽分箱
+ predict_cuts = np.histogram(predict, bins=cuts) # 将predict按actual的分组等宽分箱
+ actual_df = pd.DataFrame(actual_cuts[0], columns=['actual'])
+ predict_df = pd.DataFrame(predict_cuts[0], columns=['predict'])
+ psi_df = pd.merge(actual_df, predict_df, right_index=True, left_index=True)
+ psi_df['actual_rate'] = (psi_df['actual'] + 1) / psi_df['actual'].sum() # 计算占比,分子加1,防止计算PSI时分子分母为0
+ psi_df['predict_rate'] = (psi_df['predict'] + 1) / psi_df['predict'].sum()
+ psi_df['psi'] = (psi_df['actual_rate'] - psi_df['predict_rate']) * np.log(
+ psi_df['actual_rate'] / psi_df['predict_rate'])
+ psi = psi_df['psi'].sum()
+ return psi, psi_df
+
+
+def get_file_line_count(file_path):
+ """
+ Discription: 获取文件行数
+ :param file_path: 文件路径
+ :return: 文件行数
+ """
+ count = 0
+ for index, line in enumerate(open(file_path, 'r', encoding='utf-8')):
+ count += 1
+ return count
+
+
+def check_internet(timeout=3, servername='www.baidu.com'):
+ """
+ Discription: check if the internet is connected by visit a server, timeout is 3 seconds
+ :return: True or False
+ """
+ global s
+ try:
+ socket.setdefaulttimeout(timeout)
+ host = socket.gethostbyname(servername)
+ s = socket.create_connection((host, 80), 2)
+ s.close()
+ return True
+ except Exception as e:
+ return False
+
+
+def get_project_path():
+ # 返回上上级目录
+ return os.path.abspath(os.path.join(os.path.dirname(__file__), "../.."))
+
+
+
+
+
diff --git a/detection/tool/KnowledgeBaseTool.py b/detection/tool/KnowledgeBaseTool.py new file mode 100644 index 0000000..1b55b51 --- /dev/null +++ b/detection/tool/KnowledgeBaseTool.py @@ -0,0 +1,98 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# @Time : 2023/6/12 16:09 +# @author : yinjinagyi +# @File : KnowledgeBaseTool.py +# @Function: + +import json +import sys +import time +from warnings import simplefilter +import requests +from tool import Functions, LoggingTool + +logger = LoggingTool.Logger().getLogger() +simplefilter(action='ignore', category=FutureWarning) + + +class KnowledgeApi: + def __init__(self, config): + self.config = config + self.api_address = config['knowledgebase']['host'] + self.api_username = config['knowledgebase']['kb_username'] + self.api_pin = config['knowledgebase']['api_pin'] + self.api_path = config['knowledgebase']['api_path'] + self.retry_max = config['knowledgebase']['api_retry_times'] + self.request_timeout = config['knowledgebase']['api_timeout'] + # self.api_token = self.get_api_token() + self.api_token = config['knowledgebase']['api_token'] + + def get_api_token(self): + url = "http://" + self.api_address + "/sys/login" + data = { + "username": self.api_username, + "pin": self.api_pin + } + try: + resp = requests.post(url, data=json.dumps(data), + headers={'Content-Type': 'application/json'}, timeout=self.request_timeout) + return json.loads(resp.text).get('data').get('token') + except Exception as e: + logger.error(e) + sys.exit() + + def get_library_id(self, library_name): + global resp + url = "http://" + self.api_address + "/v1/knowledgeBase/list?name=" + library_name + header = { + "Cn-Authorization": self.api_token + } + try: + resp = requests.get(url, headers=header, timeout=self.request_timeout) + return json.loads(resp.text).get('data').get('list')[0].get('knowledgeId') + except Exception as e: + logger.error(resp.text) + logger.error(e) + sys.exit() + + + def file_import(self, file_path, knowledge_id, action, description=''): + url = 'http://' + self.api_address + self.api_path + file = open(file_path, "rb") + file_object = {"file": file} + + param = { + "knowledgeId": knowledge_id, + "action": action, + "description": description + } + + header = { + "Cn-Authorization": self.api_token + } + + is_succeed = 0 + response_code = -1 + for i in range(self.retry_max): + try: + resp = requests.post(url, files=file_object, params=param, + headers=header, timeout=self.request_timeout) + resp = json.loads(resp.text) + logger.info(resp) + response_code = resp.get('code') + if response_code == 200: + is_succeed = 1 + break + except requests.RequestException as e: + time.sleep(5) + logger.error(e) + continue + + if not is_succeed: + logger.error('Import Failed!') + sys.exit() + else: + logger.info('Import succeed. Response code {}.'.format(response_code)) + + file.close() diff --git a/detection/tool/LoggingTool.py b/detection/tool/LoggingTool.py new file mode 100644 index 0000000..f298964 --- /dev/null +++ b/detection/tool/LoggingTool.py @@ -0,0 +1,33 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# @Time : 2023/3/18 15:36 +# @author : yinjinagyi +# @File : LoggingTool.py +# @Function: + + +import yaml +import logging.config +import sys +sys.path.append('..') +from tool.Functions import get_project_path + + + + +class Logger: + LOGGER_NAME = 'runLogger' + + def getLogger(self, loggerName=LOGGER_NAME): + logging.config.dictConfig(load_config()) + logger = logging.getLogger(loggerName) + return logger + + +def load_config(): + project_path = get_project_path() + path = project_path + '/logging.yaml' + with open(path, 'r') as f: + config = yaml.load(f, Loader=yaml.FullLoader) + config['handlers']['console_file']['filename'] = config['handlers']['console_file']['filename'] + return config diff --git a/detection/tool/MariadbTool.py b/detection/tool/MariadbTool.py new file mode 100644 index 0000000..7a839cd --- /dev/null +++ b/detection/tool/MariadbTool.py @@ -0,0 +1,54 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# @Time : 2023/7/7 10:38 +# @author : yinjinagyi +# @File : MariadbTool.py +# @Function: + +import pymysql + + +class MariadbUtil: + + def __init__(self, host, port, user, password, database): + self.db = pymysql.connect(host=host, + port=port, + user=user, + password=password, + database=database) + self.cursor = self.db.cursor() + + def query_sql(self, sql): + """查询多条数据""" + list_result = () + try: + self.cursor.execute(sql) + list_result = self.cursor.fetchall() + except Exception as e: + print(e) + return list_result + + def insert(self, sql): + """新增数据""" + return self.__edit(sql) + + def update(self, sql): + """更新数据""" + return self.__edit(sql) + + def delete(self, sql): + """删除数据""" + return self.__edit(sql) + + def __edit(self, sql): + count = 0 + try: + count = self.cursor.execute(sql) + self.db.commit() + except Exception as e: + print(e) + return count + + def close(self): + self.cursor.close() + diff --git a/detection/tool/ProgramInit.py b/detection/tool/ProgramInit.py new file mode 100644 index 0000000..a6b19b1 --- /dev/null +++ b/detection/tool/ProgramInit.py @@ -0,0 +1,24 @@ +''' +Description: 对文件进行初始化 +Author: chenxu +Date: 2022-09-20 10:53:28 +LastEditTime: 2022-11-02 15:01:07 +LastEditors: +''' +import Functions +import os + +def dataFilerClear(): + # Clear detection1 data + # print(os.path.join(os.getcwd())) + toolFunction.clear('PsiphonServerIP.txt') + toolFunction.clear_dir('logs/', git_keep=True) + toolFunction.clear_dir('data/feature', git_keep=True) + toolFunction.clear_dir('data/result', git_keep=True) + toolFunction.clear_dir('data/model/pic') + +def modelClear(): + os.remove("data/model/originModel.model") + os.remove('data/model/RFmodel.model') + +dataFilerClear() diff --git a/detection/tool/ResultEvaluation.py b/detection/tool/ResultEvaluation.py new file mode 100644 index 0000000..6ee58df --- /dev/null +++ b/detection/tool/ResultEvaluation.py @@ -0,0 +1,28 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# @Time : 2023/3/27 19:34 +# @author : yinjinagyi +# @File : ResultEvaluation.py +# @Function: + +import os +import pandas as pd + +from codev.tool import toolFunction + + +class LabelScraper: + + def __init__(self, start_time, end_time): + self.detected_df = self.read_result() + + + def read_result(self): + output_ip_list = [] + files = toolFunction.filter_files_by_created_time('data/result', '2023-01-01', '2023-03-30') + for file in files: + output_ip_list.extend(list(pd.read_csv(file, names=['ip', 'pred_y', 'score', 'time'])['ip'])) + + result_df = pd.DataFrame(output_ip_list, columns=['ip', 'pred_y', 'score', 'time']) + + return result_df diff --git a/detection/tool/__init__.py b/detection/tool/__init__.py new file mode 100644 index 0000000..273b330 --- /dev/null +++ b/detection/tool/__init__.py @@ -0,0 +1,6 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# @Time : 2024/1/12 17:29 +# @author : yinjinagyi +# @File : __init__.py.py +# @Function: diff --git a/detection/tool/__pycache__/Config.cpython-39.pyc b/detection/tool/__pycache__/Config.cpython-39.pyc Binary files differnew file mode 100644 index 0000000..f1f042c --- /dev/null +++ b/detection/tool/__pycache__/Config.cpython-39.pyc diff --git a/detection/tool/__pycache__/Functions.cpython-39.pyc b/detection/tool/__pycache__/Functions.cpython-39.pyc Binary files differnew file mode 100644 index 0000000..1375d38 --- /dev/null +++ b/detection/tool/__pycache__/Functions.cpython-39.pyc diff --git a/detection/tool/__pycache__/KnowledgeBaseTool.cpython-39.pyc b/detection/tool/__pycache__/KnowledgeBaseTool.cpython-39.pyc Binary files differnew file mode 100644 index 0000000..11eda10 --- /dev/null +++ b/detection/tool/__pycache__/KnowledgeBaseTool.cpython-39.pyc diff --git a/detection/tool/__pycache__/LoggingTool.cpython-39.pyc b/detection/tool/__pycache__/LoggingTool.cpython-39.pyc Binary files differnew file mode 100644 index 0000000..2527566 --- /dev/null +++ b/detection/tool/__pycache__/LoggingTool.cpython-39.pyc diff --git a/detection/tool/__pycache__/MariadbTool.cpython-39.pyc b/detection/tool/__pycache__/MariadbTool.cpython-39.pyc Binary files differnew file mode 100644 index 0000000..5cfa28f --- /dev/null +++ b/detection/tool/__pycache__/MariadbTool.cpython-39.pyc diff --git a/detection/tool/__pycache__/__init__.cpython-39.pyc b/detection/tool/__pycache__/__init__.cpython-39.pyc Binary files differnew file mode 100644 index 0000000..84ff087 --- /dev/null +++ b/detection/tool/__pycache__/__init__.cpython-39.pyc diff --git a/detection/vpn_detector.py b/detection/vpn_detector.py new file mode 100644 index 0000000..c627aea --- /dev/null +++ b/detection/vpn_detector.py @@ -0,0 +1,205 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# @Time : 2024/1/8 13:51 +# @author : yinjinagyi +# @File : vpn_detector.py +# @Function: This class is the parent class of various vpn classes in file vpnservices +import argparse +import datetime +import socket + +import sys +import os +sys.path.append('..') + +import pandas as pd +from tool.LoggingTool import Logger +from clickhouse_driver import Client +from tool.Config import Config +from tool.KnowledgeBaseTool import KnowledgeApi +import concurrent.futures + + +class VpnDetector: + """ + This class is the parent class of various vpn classes in file vpnservices + Functions: Load config, query data from clickhouse database, sava results, write data to mariadb database + """ + def __init__(self, start_time, end_time): + self.config = self.load_config() + self.output_file_path = self.config['common']['output_path'] + self.logger = Logger().getLogger() + self.kb = KnowledgeApi(self.config) + self.start_time = start_time + self.end_time = end_time + self.time_zone = self.config['common']['time_zone'] + self.dbname = self.config['clickhouse']['db_name'] + self.table_name = self.config['clickhouse']['table_name'] + + self.client = Client(user=self.config['clickhouse']['username'], password=self.config['clickhouse']['password'], + host=self.config['clickhouse']['host'], + port=self.config['clickhouse']['port'], database=self.dbname) + + def load_config(self): + """ + Load config + :return: config + """ + config = Config().config + return config + + def save_to_knowledgebase(self, object_list, object_type, vpn_service_name, plugin_id, plugin_name, output_filename, confidence='suspected'): + """ + Write data to local file and knowledge base + :param object_type: ip or domain + :param object_list: list of ip or domain + :param vpn_service_name: nordvpn, cyberghostvpn, etc. + :param plugin_id: plugin id + :param plugin_name: plugin name + :param output_filename: filename + :param confidence: 3 kinds of confidence level, confirmed, suspect, tentative + :return: + """ + if object_type == 'ip': + library_name = self.config['knowledgebase']['ip_library_name'] + else: + library_name = self.config['knowledgebase']['domain_library_name'] + knowledge_id = self.kb.get_library_id(library_name) + + # convert result data into required format https://docs.geedge.net/pages/viewpage.action?pageId=104760257 + result_df = pd.DataFrame() + if object_type == 'ip': + result_df['ip1'] = object_list + result_df['ip2'] = object_list + result_df.insert(0, 'addr_format', 'Single') + if object_type == 'domain': + result_df['domain'] = object_list + + result_df['plugin_id'] = plugin_id + result_df['plugin_name'] = plugin_name + result_df['vpn_service_name'] = vpn_service_name + result_df['method'] = 'passive_ml' + result_df['confidence'] = confidence + result_df['is_valid'] = 1 + + # result save + if len(result_df) > 1: + self.logger.info('Start to update data to knowledgebase') + result_path = os.path.join('data', plugin_name) + if not os.path.exists(result_path): + os.makedirs(result_path) + result_file = os.path.join(result_path, output_filename) + result_df.to_csv(result_file, index=False) + + # update to knowledgebase + knowledge_api = KnowledgeApi(self.config) + self.logger.info('[Updating knowledgebase]- {} num:{}'.format(object_type, len(object_list))) + description_str = "Update {} record(s).".format(len(object_list)) + knowledge_api.file_import(result_file, knowledge_id, 'update', description_str) + + + + def get_resolved_addr(self, server_name): + """ + Get ipvanish server ip list from server name list + :param server_name: server name list + :return: ip addr list + """ + resolved_addr = None + try: + with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: + resolved_addr = socket.gethostbyname_ex(server_name)[2] + except Exception as e: + # self.logger.error("Resolve failed. {}: {} ".format(server_name, e)) + pass + + return server_name, resolved_addr + + + + def resolve_dns_for_domain_list(self, domain_list, max_workers=100): + """ + Resolve domain list to ip list + :param domain_list: + :param max_workers: + :return: + """ + results = [] + with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor: + futures = [executor.submit(self.get_resolved_addr, domain) for domain in domain_list] + for future in concurrent.futures.as_completed(futures): + try: + server_name, resolved_addr = future.result() + if resolved_addr is not None: + results.extend(resolved_addr) + except Exception as e: + self.logger.error(e) + + return results + + +# 入口函数定义 +if __name__ == '__main__': + + parser = argparse.ArgumentParser(description='VPN detection') + parser.add_argument('-p', '--plugin', type=str, help='plugin name') + parser.add_argument('-m', '--mode', type=str, default='recent', help='recent or fixed') + parser.add_argument('-r', '--recent_interval', type=str, default='1h', help='recent time') + parser.add_argument('-s', '--start', type=str, default='', help='start time') + parser.add_argument('-e', '--end', type=str, default='', help='end time') + + args = parser.parse_args() + plugin_name = args.plugin + mode = args.mode + start_time = args.start + end_time = args.end + + # parameter verification + if mode == 'recent': + if start_time != '' or end_time != '': + print('Please input correct time format') + exit() + recent_interval = args.recent_interval + if recent_interval[-1] == 'h': + recent_interval = int(recent_interval[:-1]) + elif recent_interval[-1] == 'd': + recent_interval = int(recent_interval[:-1]) * 24 + else: + print('Please input correct recent interval') + exit() + # 根据当前时间向前取整小时 + end_time = datetime.datetime.now().strftime("%Y-%m-%d %H:00:00") + start_time = (datetime.datetime.now() - datetime.timedelta(hours=recent_interval)).strftime("%Y-%m-%d %H:00:00") + elif mode == 'fixed': + if start_time == '' or end_time == '': + print('Please input correct time format') + exit() + + else: + print('Please input correct time mode') + exit() + + + detector = None + if plugin_name == 'hotspotvpn_serverip': + from vpnservices.hotspotvpn_serverip import HotspotvpnServerip + detector = HotspotvpnServerip(start_time, end_time) + elif plugin_name == 'ipvanishvpn_servername': + from vpnservices.ipvanishvpn_servername import IpvanishvpnServername + detector = IpvanishvpnServername(start_time, end_time) + elif plugin_name == 'ipvanishvpn_serverip': + from vpnservices.ipvanishvpn_serverip import IpvanishvpnServerip + detector = IpvanishvpnServerip(start_time, end_time) + + else: + print('Please input correct plugin name') + exit() + + result_list = detector.find_server() + if len(result_list) > 0: + detector.save_to_knowledgebase(result_list, detector.object_type, + detector.vpn_service_name, detector.plugin_id, + detector.plugin_name, detector.output_file_name, detector.confidence) + + + diff --git a/detection/vpnservices/__init__.py b/detection/vpnservices/__init__.py new file mode 100644 index 0000000..7b4a106 --- /dev/null +++ b/detection/vpnservices/__init__.py @@ -0,0 +1,6 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# @Time : 2024/1/8 11:48 +# @author : yinjinagyi +# @File : __init__.py.py +# @Function: diff --git a/detection/vpnservices/__pycache__/__init__.cpython-39.pyc b/detection/vpnservices/__pycache__/__init__.cpython-39.pyc Binary files differnew file mode 100644 index 0000000..2a33991 --- /dev/null +++ b/detection/vpnservices/__pycache__/__init__.cpython-39.pyc diff --git a/detection/vpnservices/__pycache__/hotspotvpn_serverip.cpython-39.pyc b/detection/vpnservices/__pycache__/hotspotvpn_serverip.cpython-39.pyc Binary files differnew file mode 100644 index 0000000..bc2413a --- /dev/null +++ b/detection/vpnservices/__pycache__/hotspotvpn_serverip.cpython-39.pyc diff --git a/detection/vpnservices/__pycache__/ipvanishvpn_serverip.cpython-39.pyc b/detection/vpnservices/__pycache__/ipvanishvpn_serverip.cpython-39.pyc Binary files differnew file mode 100644 index 0000000..5e3a0e2 --- /dev/null +++ b/detection/vpnservices/__pycache__/ipvanishvpn_serverip.cpython-39.pyc diff --git a/detection/vpnservices/__pycache__/ipvanishvpn_servername.cpython-39.pyc b/detection/vpnservices/__pycache__/ipvanishvpn_servername.cpython-39.pyc Binary files differnew file mode 100644 index 0000000..b6603a4 --- /dev/null +++ b/detection/vpnservices/__pycache__/ipvanishvpn_servername.cpython-39.pyc diff --git a/detection/vpnservices/hotspotvpn_serverip.py b/detection/vpnservices/hotspotvpn_serverip.py new file mode 100644 index 0000000..39aa875 --- /dev/null +++ b/detection/vpnservices/hotspotvpn_serverip.py @@ -0,0 +1,63 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# @Time : 2024/1/11 15:45 +# @author : yinjinagyi +# @File : hotspotvpn_serverip.py +# @Function: + +from vpn_detector import VpnDetector +import pandas as pd + + +class HotspotvpnServerip(VpnDetector): + """ + + This class is used to detect hotspotvpn server ip + """ + + def __init__(self, start_time, end_time): + super().__init__(start_time, end_time) + self.plugin_config = self.load_config()['hotspotvpn_serverip'] + self.plugin_id = self.plugin_config['plugin_id'] + self.plugin_name = self.plugin_config['plugin_name'] + self.object_type = self.plugin_config['object_type'] + self.vpn_service_name = self.plugin_config['vpn_service_name'] + self.confidence = self.plugin_config['confidence'] + self.output_file_name = self.plugin_name + '_' + str(self.start_time).replace(' ', '_').replace(':', '')[:13] + '.csv' + self.start_time = start_time + self.end_time = end_time + + self.sql = self.plugin_config['sql'] + self.masquerede_domains = ["'"+i.strip()+"'" for i in self.plugin_config['domains'].split(',')] + + def find_server(self): + """ + Get hotspotvpn server ip from clickhouse database + :return: hotspotvpn server ip list + """ + self.logger.info('Start to query hotspotvpn server ip from clickhouse database') + + # construct query sql + TIME_FILTER_PATTERN = "(common_recv_time > toDateTime('{$start_time}', '{$time_zone}')) AND(common_recv_time <= toDateTime('{$end_time}', '{$time_zone}'))" + time_filter = TIME_FILTER_PATTERN.replace("{$start_time}", str(self.start_time)).replace("{$end_time}", str( + self.end_time)).replace("{$time_zone}", self.time_zone) + self.sql = self.sql.replace("{$db_name}", self.dbname).replace("{$table_name}", self.table_name) + self.sql = self.sql.replace("{$time_filter}", time_filter) + self.sql = self.sql.replace("{$domain_list}", ','.join(self.masquerede_domains)) + + # self.logger.info("Sql for {}: {}".format(self.plugin_name, self.sql)) + + # query data from clickhouse database + try: + hotspotvpn_serverip_df = pd.DataFrame(self.client.execute(self.sql)) + finally: + self.client.disconnect() + + if hotspotvpn_serverip_df.empty: + self.logger.info('No hotspotvpn server ip found from clickhouse database') + return [] + hotspotvpn_serverip_list = hotspotvpn_serverip_df[0].drop_duplicates().tolist() + self.logger.info('Query hotspotvpn server ip from clickhouse database successfully. {} items found' + .format(len(hotspotvpn_serverip_list))) + + return hotspotvpn_serverip_list diff --git a/detection/vpnservices/ipvanishvpn_serverip.py b/detection/vpnservices/ipvanishvpn_serverip.py new file mode 100644 index 0000000..4d6daaa --- /dev/null +++ b/detection/vpnservices/ipvanishvpn_serverip.py @@ -0,0 +1,91 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# @Time : 2024/1/12 10:00 +# @author : yinjinagyi +# @File : ipvanishvpn_serverip.py +# @Function: + +import socket +from vpn_detector import VpnDetector +from tool.Functions import check_internet +from tool.MariadbTool import MariadbUtil + + +class IpvanishvpnServerip(VpnDetector): + """ + This class is used to detect ipvanishvpn server ip + """ + + def __init__(self, start_time, end_time): + super().__init__(start_time, end_time) + self.plugin_config = self.load_config()['ipvanishvpn_serverip'] + self.plugin_id = self.plugin_config['plugin_id'] + self.plugin_name = self.plugin_config['plugin_name'] + self.object_type = self.plugin_config['object_type'] + self.vpn_service_name = self.plugin_config['vpn_service_name'] + self.confidence = self.plugin_config['confidence'] + self.output_file_name = self.plugin_name + '_' + str(self.start_time).replace(' ', '_').replace(':', '')[:13] + '.csv' + self.start_time = start_time + self.end_time = end_time + + self.kb_sql = self.plugin_config['kb_sql'] + self.kb_dbname = self.config['knowledgebase']['db_name'] + self.kb_table_name = self.config['knowledgebase']['domain_library_name'] + + self.mariadb = MariadbUtil(self.config['mariadb']['host'], self.config['mariadb']['port'], + self.config['mariadb']['user'], str(self.config['mariadb']['pswd']), + self.config['mariadb']['db_name']) + self.mariadb_dbname = self.config['mariadb']['db_name'] + self.mariadb_ip_tb_name = self.config['mariadb']['ip_table_name'] + self.mariadb_domain_tb_name = self.config['mariadb']['domain_table_name'] + + + def find_more_servernames(self, server_name_list): + """ + Find more server name from observed ipvanish server name list + :return: server name list + """ + prefix_list = [] + expanded_server_names = [] + + for server_name in server_name_list: + domain = server_name.strip() + domain_prefix = domain[:5] + prefix_list.append(domain_prefix) + + prefix_list = set(prefix_list) + + for domain_prefix in prefix_list: + domain_list = [f"{domain_prefix}{str(index).zfill(2)}.vpn.ipvanish.com" for index in range(100)] + expanded_server_names.extend(domain_list) + + return expanded_server_names + + + def find_server(self): + """ + Get ipvanishvpn server ip from clickhouse database + :return: ipvanishvpn server ip list + """ + self.kb_sql = self.kb_sql.replace("{$mariadb_dbname}", self.mariadb_dbname).replace("{$mariadb_domain_tablename}", self.mariadb_domain_tb_name) + + ipvanish_servername_list = [] + resolved_ip_list = [] + try: + query_result = self.mariadb.query_sql(self.kb_sql) + finally: + self.mariadb.close() + + if query_result: + ipvanish_servername_list = [i[0] for i in query_result] + + # 判断是否能够访问外网,如果能够访问外网,则从外网获取ipvanish_servername_list的域名解析地址 + if check_internet(): + ipvanish_servername_list = self.find_more_servernames(ipvanish_servername_list) + resolved_ip_list = self.resolve_dns_for_domain_list(ipvanish_servername_list) + + + return resolved_ip_list + + + diff --git a/detection/vpnservices/ipvanishvpn_servername.py b/detection/vpnservices/ipvanishvpn_servername.py new file mode 100644 index 0000000..3d82074 --- /dev/null +++ b/detection/vpnservices/ipvanishvpn_servername.py @@ -0,0 +1,62 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# @Time : 2024/1/11 15:45 +# @author : yinjinagyi +# @File : ipvanishvpn_servername.py +# @Function: + +import sys +sys.path.append('..') +from vpn_detector import VpnDetector +import pandas as pd + + +class IpvanishvpnServername(VpnDetector): + """ + + This class is used to detect ipvanish server name + """ + + def __init__(self, start_time, end_time): + super().__init__(start_time, end_time) + self.plugin_config = self.load_config()['ipvanishvpn_servername'] + self.plugin_id = self.plugin_config['plugin_id'] + self.plugin_name = self.plugin_config['plugin_name'] + self.object_type = self.plugin_config['object_type'] + self.vpn_service_name = self.plugin_config['vpn_service_name'] + self.confidence = self.plugin_config['confidence'] + self.output_file_name = self.plugin_name + '_' + str(self.start_time).replace(' ', '_').replace(':', '')[:13] + '.csv' + self.start_time = start_time + self.end_time = end_time + + self.sql = self.plugin_config['sql'] + + def find_server(self): + """ + Get ipvanishvpn server name from clickhouse database + :return: ipvanishvpn server name list + """ + self.logger.info('Start to query ipvanishvpn server name from session record') + + # construct query sql + TIME_FILTER_PATTERN = "(common_recv_time > toDateTime('{$start_time}', '{$time_zone}')) AND(common_recv_time <= toDateTime('{$end_time}', '{$time_zone}'))" + time_filter = TIME_FILTER_PATTERN.replace("{$start_time}", str(self.start_time)).replace("{$end_time}", str( + self.end_time)).replace("{$time_zone}", self.time_zone) + self.sql = self.sql.replace("{$db_name}", self.dbname).replace("{$table_name}", self.table_name) + self.sql = self.sql.replace("{$time_filter}", time_filter) + self.logger.info("Sql for {}: {}".format(self.plugin_name, self.sql)) + + # query data from clickhouse database + try: + ipvanishvpn_servername_df = pd.DataFrame(self.client.execute(self.sql)) + finally: + self.client.disconnect() + + if ipvanishvpn_servername_df.empty: + self.logger.info('No ipvanishvpn server ip found from clickhouse database') + return [] + ipvanishvpn_servername_list = ipvanishvpn_servername_df[0].drop_duplicates().tolist() + self.logger.info('Query ipvanishvpn server ip from clickhouse database successfully. {} items found' + .format(len(ipvanishvpn_servername_list))) + + return ipvanishvpn_servername_list |
