From a301d6b402b7d0ed1cf9b163283edbda8ead2e60 Mon Sep 17 00:00:00 2001
From: wanglihui <949764788@qq.com>
Date: Wed, 15 Jul 2020 19:33:59 +0800
Subject: 抽象document父类
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
ip-learning-java-test/pom.xml | 15 +-
.../main/java/cn/ac/iie/dao/BaseArangoData.java | 58 +++-
.../java/cn/ac/iie/dao/BaseClickhouseData.java | 324 +++++++--------------
.../main/java/cn/ac/iie/dao/UpdateGraphData.java | 157 ++++++++++
.../java/cn/ac/iie/etl/UpdateEFqdnAddressIp.java | 58 ----
.../java/cn/ac/iie/etl/UpdateEIpVisitFqdn.java | 58 ----
.../src/main/java/cn/ac/iie/etl/UpdateVFqdn.java | 94 ------
.../src/main/java/cn/ac/iie/etl/UpdateVIP.java | 60 ----
.../cn/ac/iie/etl/read/ReadHistoryArangoData.java | 44 ---
.../cn/ac/iie/etl/relationship/LocateFqdn2Ip.java | 31 --
.../cn/ac/iie/etl/relationship/VisitIp2Fqdn.java | 17 --
.../java/cn/ac/iie/etl/update/Relationship.java | 191 ------------
.../src/main/java/cn/ac/iie/etl/update/Vertex.java | 116 --------
.../src/main/java/cn/ac/iie/etl/vertex/Fqdn.java | 20 --
.../src/main/java/cn/ac/iie/etl/vertex/Ip.java | 20 --
.../cn/ac/iie/service/UpdateEFqdnAddressIp.java | 58 ++++
.../java/cn/ac/iie/service/UpdateEIpVisitFqdn.java | 58 ++++
.../main/java/cn/ac/iie/service/UpdateVFqdn.java | 94 ++++++
.../src/main/java/cn/ac/iie/service/UpdateVIP.java | 60 ++++
.../cn/ac/iie/service/read/ReadClickhouseData.java | 251 ++++++++++++++++
.../ac/iie/service/read/ReadHistoryArangoData.java | 50 ++++
.../ac/iie/service/relationship/LocateFqdn2Ip.java | 76 +++++
.../service/relationship/LocateSubscriber2Ip.java | 32 ++
.../ac/iie/service/relationship/VisitIp2Fqdn.java | 20 ++
.../java/cn/ac/iie/service/update/Document.java | 118 ++++++++
.../cn/ac/iie/service/update/Relationship.java | 158 ++++++++++
.../main/java/cn/ac/iie/service/update/Vertex.java | 40 +++
.../main/java/cn/ac/iie/service/vertex/Fqdn.java | 21 ++
.../src/main/java/cn/ac/iie/service/vertex/Ip.java | 79 +++++
.../java/cn/ac/iie/service/vertex/Subscriber.java | 21 ++
.../cn/ac/iie/test/IpLearningApplicationTest.java | 71 +----
.../cn/ac/iie/test/ReadArangoDBThreadTest.java | 95 ------
.../main/java/cn/ac/iie/utils/ArangoDBConnect.java | 2 +-
.../src/main/resources/application.properties | 6 +-
.../src/main/resources/clickhouse.properties | 3 +-
.../src/main/resources/log4j.properties | 24 ++
.../src/test/java/cn/ac/iie/TestArango.java | 2 +-
37 files changed, 1506 insertions(+), 1096 deletions(-)
create mode 100644 ip-learning-java-test/src/main/java/cn/ac/iie/dao/UpdateGraphData.java
delete mode 100644 ip-learning-java-test/src/main/java/cn/ac/iie/etl/UpdateEFqdnAddressIp.java
delete mode 100644 ip-learning-java-test/src/main/java/cn/ac/iie/etl/UpdateEIpVisitFqdn.java
delete mode 100644 ip-learning-java-test/src/main/java/cn/ac/iie/etl/UpdateVFqdn.java
delete mode 100644 ip-learning-java-test/src/main/java/cn/ac/iie/etl/UpdateVIP.java
delete mode 100644 ip-learning-java-test/src/main/java/cn/ac/iie/etl/read/ReadHistoryArangoData.java
delete mode 100644 ip-learning-java-test/src/main/java/cn/ac/iie/etl/relationship/LocateFqdn2Ip.java
delete mode 100644 ip-learning-java-test/src/main/java/cn/ac/iie/etl/relationship/VisitIp2Fqdn.java
delete mode 100644 ip-learning-java-test/src/main/java/cn/ac/iie/etl/update/Relationship.java
delete mode 100644 ip-learning-java-test/src/main/java/cn/ac/iie/etl/update/Vertex.java
delete mode 100644 ip-learning-java-test/src/main/java/cn/ac/iie/etl/vertex/Fqdn.java
delete mode 100644 ip-learning-java-test/src/main/java/cn/ac/iie/etl/vertex/Ip.java
create mode 100644 ip-learning-java-test/src/main/java/cn/ac/iie/service/UpdateEFqdnAddressIp.java
create mode 100644 ip-learning-java-test/src/main/java/cn/ac/iie/service/UpdateEIpVisitFqdn.java
create mode 100644 ip-learning-java-test/src/main/java/cn/ac/iie/service/UpdateVFqdn.java
create mode 100644 ip-learning-java-test/src/main/java/cn/ac/iie/service/UpdateVIP.java
create mode 100644 ip-learning-java-test/src/main/java/cn/ac/iie/service/read/ReadClickhouseData.java
create mode 100644 ip-learning-java-test/src/main/java/cn/ac/iie/service/read/ReadHistoryArangoData.java
create mode 100644 ip-learning-java-test/src/main/java/cn/ac/iie/service/relationship/LocateFqdn2Ip.java
create mode 100644 ip-learning-java-test/src/main/java/cn/ac/iie/service/relationship/LocateSubscriber2Ip.java
create mode 100644 ip-learning-java-test/src/main/java/cn/ac/iie/service/relationship/VisitIp2Fqdn.java
create mode 100644 ip-learning-java-test/src/main/java/cn/ac/iie/service/update/Document.java
create mode 100644 ip-learning-java-test/src/main/java/cn/ac/iie/service/update/Relationship.java
create mode 100644 ip-learning-java-test/src/main/java/cn/ac/iie/service/update/Vertex.java
create mode 100644 ip-learning-java-test/src/main/java/cn/ac/iie/service/vertex/Fqdn.java
create mode 100644 ip-learning-java-test/src/main/java/cn/ac/iie/service/vertex/Ip.java
create mode 100644 ip-learning-java-test/src/main/java/cn/ac/iie/service/vertex/Subscriber.java
delete mode 100644 ip-learning-java-test/src/main/java/cn/ac/iie/test/ReadArangoDBThreadTest.java
create mode 100644 ip-learning-java-test/src/main/resources/log4j.properties
diff --git a/ip-learning-java-test/pom.xml b/ip-learning-java-test/pom.xml
index 73c4361..f124e52 100644
--- a/ip-learning-java-test/pom.xml
+++ b/ip-learning-java-test/pom.xml
@@ -10,6 +10,19 @@
+
+
+ org.slf4j
+ slf4j-api
+ 1.7.21
+
+
+
+ org.slf4j
+ slf4j-log4j12
+ 1.7.21
+
+
ru.yandex.clickhouse
clickhouse-jdbc
@@ -31,7 +44,7 @@
com.arangodb
arangodb-java-driver
- 4.2.2
+ 6.6.3
diff --git a/ip-learning-java-test/src/main/java/cn/ac/iie/dao/BaseArangoData.java b/ip-learning-java-test/src/main/java/cn/ac/iie/dao/BaseArangoData.java
index c00523c..d90ee44 100644
--- a/ip-learning-java-test/src/main/java/cn/ac/iie/dao/BaseArangoData.java
+++ b/ip-learning-java-test/src/main/java/cn/ac/iie/dao/BaseArangoData.java
@@ -1,39 +1,74 @@
package cn.ac.iie.dao;
import cn.ac.iie.config.ApplicationConfig;
-import cn.ac.iie.etl.read.ReadHistoryArangoData;
+import cn.ac.iie.service.read.ReadHistoryArangoData;
import cn.ac.iie.utils.ArangoDBConnect;
+import cn.ac.iie.utils.ClickhouseConnect;
import cn.ac.iie.utils.ExecutorThreadPool;
import com.arangodb.ArangoCursor;
import com.arangodb.entity.BaseDocument;
import com.arangodb.entity.BaseEdgeDocument;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.ArrayList;
+import java.util.Enumeration;
import java.util.concurrent.ConcurrentHashMap;
+/**
+ * 获取arangoDB历史数据
+ */
public class BaseArangoData {
- public static ConcurrentHashMap v_Fqdn_Map = new ConcurrentHashMap<>();
- public static ConcurrentHashMap v_Ip_Map = new ConcurrentHashMap<>();
+ private static final Logger LOG = LoggerFactory.getLogger(BaseArangoData.class);
+
+ public static ConcurrentHashMap v_Fqdn_Map = new ConcurrentHashMap<>();
+ public static ConcurrentHashMap v_Ip_Map = new ConcurrentHashMap<>();
+ public static ConcurrentHashMap v_Subscriber_Map = new ConcurrentHashMap<>();
public static ConcurrentHashMap e_Fqdn_Address_Ip_Map = new ConcurrentHashMap<>();
public static ConcurrentHashMap e_Ip_Visit_Fqdn_Map = new ConcurrentHashMap<>();
+ public static ConcurrentHashMap e_Subsciber_Locate_Ip_Map = new ConcurrentHashMap<>();
private static ArangoDBConnect arangoDBConnect = ArangoDBConnect.getInstance();
private static ExecutorThreadPool threadPool = ExecutorThreadPool.getInstance();
public void baseDocumentDataMap(){
- readHistoryData("FQDN", v_Fqdn_Map);
- readHistoryData("IP", v_Ip_Map);
- readHistoryData("R_LOCATE_FQDN2IP", e_Fqdn_Address_Ip_Map);
- readHistoryData("R_VISIT_IP2FQDN", e_Ip_Visit_Fqdn_Map);
+ long startA = System.currentTimeMillis();
+ readHistoryData("FQDN", v_Fqdn_Map,BaseDocument.class);
+ readHistoryData("IP", v_Ip_Map,BaseDocument.class);
+ readHistoryData("SUBSCRIBER",v_Subscriber_Map,BaseDocument.class);
+// readHistoryData("R_LOCATE_FQDN2IP", e_Fqdn_Address_Ip_Map);
+// readHistoryData("R_VISIT_IP2FQDN", e_Ip_Visit_Fqdn_Map);
+// readHistoryData("R_LOCATE_SUBSCRIBER2IP",e_Subsciber_Locate_Ip_Map);
+ threadPool.shutdown();
+ threadPool.awaitThreadTask();
+ LOG.info("v_Fqdn_Map大小:"+v_Fqdn_Map.size());
+ LOG.info("v_Ip_Map大小:"+v_Ip_Map.size());
+ LOG.info("v_Subscriber_Map大小:"+v_Subscriber_Map.size());
+ LOG.info("e_Fqdn_Address_Ip_Map大小:"+e_Fqdn_Address_Ip_Map.size());
+ LOG.info("e_Ip_Visit_Fqdn_Map大小:"+e_Ip_Visit_Fqdn_Map.size());
+ LOG.info("e_Subsciber_Locate_Ip_Map大小:"+e_Subsciber_Locate_Ip_Map.size());
+ long lastA = System.currentTimeMillis();
+ LOG.info("读取ArangoDb时间:"+(lastA - startA));
+ }
+
+ public static void main(String[] args) {
+ new BaseArangoData().readHistoryData("IP", v_Ip_Map,BaseDocument.class);
threadPool.shutdown();
threadPool.awaitThreadTask();
+ ArrayList baseEdgeDocuments = new ArrayList<>();
+ Enumeration keys = v_Ip_Map.keys();
+ arangoDBConnect.overwrite(baseEdgeDocuments,"IP");
+ arangoDBConnect.clean();
+
}
- private void readHistoryData(String table, ConcurrentHashMap map){
+ private void readHistoryData(String table, ConcurrentHashMap map, Class type){
try {
long[] timeRange = getTimeRange(table);
for (int i = 0; i < ApplicationConfig.THREAD_POOL_NUMBER; i++) {
String sql = getQuerySql(timeRange, i, table);
- ReadHistoryArangoData readHistoryArangoData = new ReadHistoryArangoData(arangoDBConnect, sql, map);
+ ReadHistoryArangoData readHistoryArangoData = new ReadHistoryArangoData<>(arangoDBConnect, sql, map,type);
threadPool.executor(readHistoryArangoData);
}
}catch (Exception e){
@@ -55,9 +90,9 @@ public class BaseArangoData {
minTime = Long.parseLong(doc.getAttribute("min_time").toString());
}
long lastTime = System.currentTimeMillis();
- System.out.println("查询最大最小时间用时:" + (lastTime - startTime));
+ LOG.info(sql+"\n查询最大最小时间用时:" + (lastTime - startTime));
}else {
- System.out.println("获取最大最小时间异常");
+ LOG.warn("获取ArangoDb时间范围为空");
}
}catch (Exception e){
e.printStackTrace();
@@ -75,5 +110,4 @@ public class BaseArangoData {
return "FOR doc IN "+table+" filter doc.FIRST_FOUND_TIME >= "+minThreadTime+" and doc.FIRST_FOUND_TIME <= "+maxThreadTime+" RETURN doc";
}
-
}
diff --git a/ip-learning-java-test/src/main/java/cn/ac/iie/dao/BaseClickhouseData.java b/ip-learning-java-test/src/main/java/cn/ac/iie/dao/BaseClickhouseData.java
index 9d084a1..b33b73c 100644
--- a/ip-learning-java-test/src/main/java/cn/ac/iie/dao/BaseClickhouseData.java
+++ b/ip-learning-java-test/src/main/java/cn/ac/iie/dao/BaseClickhouseData.java
@@ -1,287 +1,185 @@
package cn.ac.iie.dao;
import cn.ac.iie.config.ApplicationConfig;
-import cn.ac.iie.etl.UpdateEFqdnAddressIp;
-import cn.ac.iie.etl.UpdateEIpVisitFqdn;
-import cn.ac.iie.etl.UpdateVFqdn;
-import cn.ac.iie.etl.UpdateVIP;
import cn.ac.iie.utils.ClickhouseConnect;
-import cn.ac.iie.utils.TopDomainUtils;
import com.alibaba.druid.pool.DruidPooledConnection;
import com.arangodb.entity.BaseDocument;
import com.arangodb.entity.BaseEdgeDocument;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
-import java.sql.Connection;
import java.sql.ResultSet;
import java.sql.Statement;
import java.util.ArrayList;
import java.util.HashMap;
-public class BaseClickhouseData {
- private static final ClickhouseConnect manger = ClickhouseConnect.getInstance();
- private static HashMap>> vFqdnMap = new HashMap<>();
- private static HashMap> vIpMap = new HashMap<>();
- private static HashMap> eFqdnAddressIpMap = new HashMap<>();
- private static HashMap> eIpVisitFqdnMap = new HashMap<>();
- public Connection connection;
- public Statement pstm;
+import static cn.ac.iie.service.read.ReadClickhouseData.*;
- public BaseClickhouseData(){}
+/**
+ * 读取clickhouse数据,封装到map
+ * @author wlh
+ */
+public class BaseClickhouseData {
+ private static final Logger LOG = LoggerFactory.getLogger(BaseClickhouseData.class);
- private static long[] getTimeLimit(){
- long maxTime = ApplicationConfig.READ_CLICKHOUSE_MAX_TIME;
- long minTime = ApplicationConfig.READ_CLICKHOUSE_MIN_TIME;
- return new long[]{maxTime,minTime};
- }
+ private static ClickhouseConnect manger = ClickhouseConnect.getInstance();
+ static HashMap>> vFqdnMap = new HashMap<>();
+ static HashMap>> vIpMap = new HashMap<>();
+ static HashMap>> vSubscriberMap = new HashMap<>();
+ static HashMap>> eFqdnAddressIpMap = new HashMap<>();
+ static HashMap>> eIpVisitFqdnMap = new HashMap<>();
+ static HashMap>> eSubsciberLocateIpMap = new HashMap<>();
- static {
- for (int i = 0;i < ApplicationConfig.THREAD_POOL_NUMBER;i++){
- vFqdnMap.put(i,new HashMap<>());
- }
- System.out.println("V_FQDN resultMap初始化完成");
- for (int i = 0;i < ApplicationConfig.THREAD_POOL_NUMBER;i++){
- vIpMap.put(i,new HashMap<>());
- }
- System.out.println("V_IP resultMap初始化完成");
- for (int i = 0;i < ApplicationConfig.THREAD_POOL_NUMBER;i++){
- eFqdnAddressIpMap.put(i,new HashMap<>());
- }
- System.out.println("E_ADDRESS_V_FQDN_TO_V_IP resultMap初始化完成");
- for (int i = 0;i < ApplicationConfig.THREAD_POOL_NUMBER;i++){
- eIpVisitFqdnMap.put(i,new HashMap<>());
- }
- System.out.println("E_VISIT_V_IP_TO_V_FQDN resultMap初始化完成");
- }
+ private DruidPooledConnection connection;
+ private Statement statement;
- public static void BaseVFqdn(){
- BaseVDomainFromReferer();
- long[] timeLimit = getTimeLimit();
- long maxTime = timeLimit[0];
- long minTime = timeLimit[1];
- String where = "recv_time >= "+minTime+" and recv_time <= "+maxTime+" and media_domain != '' ";
- String sql = "SELECT media_domain AS FQDN_NAME,MIN(recv_time) AS FIRST_FOUND_TIME,MAX(recv_time) AS LAST_FOUND_TIME,COUNT( * ) AS FQDN_COUNT_TOTAL FROM media_expire_patch WHERE "+where+" GROUP BY media_domain";
- System.out.println(sql);
+ void baseVertexFqdn() {
+ initializeMap(vFqdnMap);
+ LOG.info("FQDN resultMap初始化完成");
+ String sql = getVertexFqdnSql();
long start = System.currentTimeMillis();
try {
- DruidPooledConnection connection = manger.getConnection();
- Statement statement = connection.createStatement();
+ connection = manger.getConnection();
+ statement = connection.createStatement();
ResultSet resultSet = statement.executeQuery(sql);
- while (resultSet.next()){
- String fqdnName = resultSet.getString("FQDN_NAME");
- long firstFoundTime = resultSet.getLong("FIRST_FOUND_TIME");
- long lastFoundTime = resultSet.getLong("LAST_FOUND_TIME");
- long fqdnCountTotal = resultSet.getLong("FQDN_COUNT_TOTAL");
- BaseDocument newDoc = new BaseDocument();
- newDoc.setKey(fqdnName);
- newDoc.addAttribute("FQDN_NAME",fqdnName);
- newDoc.addAttribute("FIRST_FOUND_TIME",firstFoundTime);
- newDoc.addAttribute("LAST_FOUND_TIME",lastFoundTime);
- newDoc.addAttribute("FQDN_COUNT_TOTAL",fqdnCountTotal);
- int i = fqdnName.hashCode() % ApplicationConfig.THREAD_POOL_NUMBER;
- HashMap> documentHashMap = vFqdnMap.getOrDefault(i, new HashMap<>());
- ArrayList documentArrayList = documentHashMap.getOrDefault(fqdnName, new ArrayList<>());
- documentArrayList.add(newDoc);
- documentHashMap.put(fqdnName,documentArrayList);
+ while (resultSet.next()) {
+ BaseDocument newDoc = getVertexFqdnDocument(resultSet);
+ if (newDoc != null) {
+ putMapByHashcode(newDoc,vFqdnMap);
+ }
}
long last = System.currentTimeMillis();
- System.out.println("读取clickhouse v_FQDN时间:"+(last - start));
- for (int i = 0;i < ApplicationConfig.THREAD_POOL_NUMBER;i++){
- HashMap> baseDocumentHashMap = vFqdnMap.get(i);
- UpdateVFqdn updateVFqdn = new UpdateVFqdn(baseDocumentHashMap);
- updateVFqdn.run();
- }
- }catch (Exception e){
- e.printStackTrace();
+ LOG.info(sql + "\n读取clickhouse v_FQDN时间:" + (last - start));
+ } catch (Exception e) {
+ LOG.error(e.toString());
+ }finally {
+ manger.clear(statement,connection);
}
}
- private static void BaseVDomainFromReferer(){
- long[] timeLimit = getTimeLimit();
- long maxTime = timeLimit[0];
- long minTime = timeLimit[1];
- String where = "recv_time >= "+minTime+" and recv_time <= "+maxTime+" and s1_referer != '' ";
- String sql = "SELECT s1_referer AS FQDN_NAME,MIN(recv_time) AS FIRST_FOUND_TIME,MAX(recv_time) AS LAST_FOUND_TIME,COUNT( * ) AS FQDN_COUNT_TOTAL FROM media_expire_patch WHERE "+where+" GROUP BY s1_referer";
- System.out.println(sql);
+ void baseVertexIp() {
+ initializeMap(vIpMap);
+ LOG.info("IP resultMap初始化完成");
+ String sql = getVertexIpSql();
long start = System.currentTimeMillis();
try {
- DruidPooledConnection connection = manger.getConnection();
- Statement statement = connection.createStatement();
+ connection = manger.getConnection();
+ statement = connection.createStatement();
ResultSet resultSet = statement.executeQuery(sql);
- while (resultSet.next()){
- String referer = resultSet.getString("FQDN_NAME");
- String fqdnName = TopDomainUtils.getDomainFromUrl(referer);
- long firstFoundTime = resultSet.getLong("FIRST_FOUND_TIME");
- long lastFoundTime = resultSet.getLong("LAST_FOUND_TIME");
- long fqdnCountTotal = resultSet.getLong("FQDN_COUNT_TOTAL");
- BaseDocument newDoc = new BaseDocument();
- newDoc.setKey(fqdnName);
- newDoc.addAttribute("FQDN_NAME",fqdnName);
- newDoc.addAttribute("FIRST_FOUND_TIME",firstFoundTime);
- newDoc.addAttribute("LAST_FOUND_TIME",lastFoundTime);
- newDoc.addAttribute("FQDN_COUNT_TOTAL",fqdnCountTotal);
- int i = fqdnName.hashCode() % ApplicationConfig.THREAD_POOL_NUMBER;
- HashMap> documentHashMap = vFqdnMap.getOrDefault(i, new HashMap<>());
- ArrayList documentArrayList = documentHashMap.getOrDefault(fqdnName, new ArrayList<>());
- documentArrayList.add(newDoc);
- documentHashMap.put(fqdnName,documentArrayList);
+ while (resultSet.next()) {
+ BaseDocument newDoc = getVertexIpDocument(resultSet);
+ putMapByHashcode(newDoc,vIpMap);
}
long last = System.currentTimeMillis();
- System.out.println("读取clickhouse v_FQDN时间:"+(last - start));
- }catch (Exception e){
- e.printStackTrace();
+ LOG.info(sql + "\n读取clickhouse v_IP时间:" + (last - start));
+ } catch (Exception e) {
+ LOG.error(e.toString());
+ }finally {
+ manger.clear(statement,connection);
}
}
- public static void BaseVIp(){
- long[] timeLimit = getTimeLimit();
- long maxTime = timeLimit[0];
- long minTime = timeLimit[1];
- String where = " recv_time >= "+minTime+" and recv_time <= "+maxTime;
- String sql = "SELECT IP,location,MIN(recv_time) AS FIRST_FOUND_TIME,MAX(recv_time) AS LAST_FOUND_TIME,COUNT(*) AS IP_COUNT_TOTAL FROM(( SELECT s1_s_ip AS IP, s1_s_location_region AS location, recv_time FROM media_expire_patch where "+where+" ) UNION ALL ( SELECT s1_d_ip AS IP, s1_d_location_region AS location, recv_time FROM media_expire_patch where "+where+" )) GROUP BY IP,location";
- System.out.println(sql);
+ void baseVertexSubscriber(){
+ initializeMap(vSubscriberMap);
+ LOG.info("SUBSCRIBER resultMap初始化完成");
+ String sql = getVertexSubscriberSql();
long start = System.currentTimeMillis();
try {
- DruidPooledConnection connection = manger.getConnection();
- Statement statement = connection.createStatement();
+ connection = manger.getConnection();
+ statement = connection.createStatement();
ResultSet resultSet = statement.executeQuery(sql);
while (resultSet.next()){
- String ip = resultSet.getString("IP");
- String location = resultSet.getString("location");
- long firstFoundTime = resultSet.getLong("FIRST_FOUND_TIME");
- long lastFoundTime = resultSet.getLong("LAST_FOUND_TIME");
- long ipCountTotal = resultSet.getLong("IP_COUNT_TOTAL");
- BaseDocument newDoc = new BaseDocument();
- newDoc.setKey(ip);
- newDoc.addAttribute("IP",ip);
- newDoc.addAttribute("IP_LOCATION",location);
- newDoc.addAttribute("FIRST_FOUND_TIME",firstFoundTime);
- newDoc.addAttribute("LAST_FOUND_TIME",lastFoundTime);
- newDoc.addAttribute("IP_COUNT_TOTAL",ipCountTotal);
- int i = ip.hashCode() % ApplicationConfig.THREAD_POOL_NUMBER;
- HashMap documentHashMap = vIpMap.getOrDefault(i, new HashMap());
- documentHashMap.put(ip,newDoc);
+ BaseDocument newDoc = getVertexSubscriberDocument(resultSet);
+ putMapByHashcode(newDoc,vSubscriberMap);
}
long last = System.currentTimeMillis();
- System.out.println("读取clickhouse v_IP时间:"+(last - start));
- for (int i = 0;i < ApplicationConfig.THREAD_POOL_NUMBER;i++){
- HashMap baseDocumentHashMap = vIpMap.get(i);
- UpdateVIP updateVIp = new UpdateVIP(baseDocumentHashMap);
- updateVIp.run();
- }
+ LOG.info(sql + "\n读取clickhouse v_SUBSCRIBER时间:" + (last - start));
}catch (Exception e){
+ LOG.error(sql + "\n读取clickhouse v_SUBSCRIBER失败");
e.printStackTrace();
+ }finally {
+ manger.clear(statement,connection);
}
}
- public static void BaseEFqdnAddressIp(){
- long[] timeLimit = getTimeLimit();
- long maxTime = timeLimit[0];
- long minTime = timeLimit[1];
- String where = "recv_time >= "+minTime+" and recv_time <= "+maxTime +" AND media_domain != '' AND s1_d_ip != '' ";
- String sql = "SELECT media_domain AS V_FQDN,s1_d_ip AS V_IP,MIN( recv_time ) AS FIRST_FOUND_TIME,MAX( recv_time ) AS LAST_FOUND_TIME,COUNT( * ) AS COUNT_TOTAL FROM media_expire_patch WHERE "+where+" GROUP BY s1_d_ip,media_domain";
- System.out.println(sql);
+ void baseRelationshipSubscriberLocateIp(){
+ initializeMap(eSubsciberLocateIpMap);
+ LOG.info("R_LOCATE_SUBSCRIBER2IP");
+ String sql = getRelationshipSubsciberLocateIpSql();
long start = System.currentTimeMillis();
try {
- DruidPooledConnection connection = manger.getConnection();
- Statement statement = connection.createStatement();
+ connection = manger.getConnection();
+ statement = connection.createStatement();
ResultSet resultSet = statement.executeQuery(sql);
while (resultSet.next()){
- String vFqdn = resultSet.getString("V_FQDN");
- String vIp = resultSet.getString("V_IP");
- long firstFoundTime = resultSet.getLong("FIRST_FOUND_TIME");
- long lastFoundTime = resultSet.getLong("LAST_FOUND_TIME");
- long countTotal = resultSet.getLong("COUNT_TOTAL");
- String key = vFqdn+"-"+vIp;
- BaseEdgeDocument newDoc = new BaseEdgeDocument();
- newDoc.setKey(key);
- newDoc.setFrom("V_FQDN/"+vFqdn);
- newDoc.setTo("V_IP/"+vIp);
- newDoc.addAttribute("FIRST_FOUND_TIME",firstFoundTime);
- newDoc.addAttribute("LAST_FOUND_TIME",lastFoundTime);
- newDoc.addAttribute("COUNT_TOTAL",countTotal);
- int i = key.hashCode() % ApplicationConfig.THREAD_POOL_NUMBER;
- HashMap documentHashMap = eFqdnAddressIpMap.getOrDefault(i, new HashMap());
- documentHashMap.put(key,newDoc);
+ BaseEdgeDocument newDoc = getRelationshipSubsciberLocateIpDocument(resultSet);
+ putMapByHashcode(newDoc,eSubsciberLocateIpMap);
}
long last = System.currentTimeMillis();
- System.out.println("读取clickhouse EFqdnAddressIp时间:"+(last - start));
- for (int i = 0;i < ApplicationConfig.THREAD_POOL_NUMBER;i++){
- HashMap baseDocumentHashMap = eFqdnAddressIpMap.get(i);
- UpdateEFqdnAddressIp updateEFqdnAddressIp = new UpdateEFqdnAddressIp(baseDocumentHashMap);
- updateEFqdnAddressIp.run();
- }
+ LOG.info(sql + "\n读取clickhouse ESubsciberLocateIp时间:" + (last - start));
}catch (Exception e){
+ LOG.error(sql + "\n读取clickhouse ESubsciberLocateIp失败");
e.printStackTrace();
+ }finally {
+ manger.clear(statement,connection);
}
}
- public static void BaseEdgeFqdnSameFqdn(){
- long[] timeLimit = getTimeLimit();
- long maxTime = timeLimit[0];
- long minTime = timeLimit[1];
- String where = "recv_time >= "+minTime+" and recv_time <= "+maxTime +" AND media_domain != '' AND s1_d_ip != '' ";
- String sql = "SELECT s1_domain AS V_FQDN,s1_referer,MIN(recv_time) AS FIRST_FOUND_TIME,MAX( recv_time ) AS LAST_FOUND_TIME,COUNT( * ) AS COUNT_TOTAL FROM media_expire_patch WHERE "+where+" GROUP BY s1_referer,s1_domain";
- System.out.println(sql);
+ void baseRelationshipFqdnAddressIp() {
+ initializeMap(eFqdnAddressIpMap);
+ LOG.info("R_LOCATE_FQDN2IP resultMap初始化完成");
+ String sql = getRelationshipFqdnAddressIpSql();
+ long start = System.currentTimeMillis();
try {
- DruidPooledConnection connection = manger.getConnection();
- Statement statement = connection.createStatement();
+ connection = manger.getConnection();
+ statement = connection.createStatement();
ResultSet resultSet = statement.executeQuery(sql);
- while (resultSet.next()){
- String vFqdn = resultSet.getString("V_FQDN");
- String referer = resultSet.getString("s1_referer");
- String refererDomain = TopDomainUtils.getDomainFromUrl(referer);
- long firstFoundTime = resultSet.getLong("FIRST_FOUND_TIME");
- long lastFoundTime = resultSet.getLong("LAST_FOUND_TIME");
- long countTotal = resultSet.getLong("COUNT_TOTAL");
- String key = vFqdn+"-"+refererDomain;
+ while (resultSet.next()) {
+ BaseEdgeDocument newDoc = getRelationFqdnAddressIpDocument(resultSet);
+ putMapByHashcode(newDoc,eFqdnAddressIpMap);
}
- }catch (Exception e){
- e.printStackTrace();
+ long last = System.currentTimeMillis();
+ LOG.info(sql + "\n读取clickhouse EFqdnAddressIp时间:" + (last - start));
+ } catch (Exception e) {
+ LOG.error(e.toString());
+ }finally {
+ manger.clear(statement,connection);
}
-
}
- public static void BaseEIpVisitFqdn(){
- long[] timeLimit = getTimeLimit();
- long maxTime = timeLimit[0];
- long minTime = timeLimit[1];
- String where = "recv_time >= "+minTime+" and recv_time <= "+maxTime+" AND s1_s_ip != '' AND media_domain != '' ";
- String sql = "SELECT s1_s_ip AS V_IP,media_domain AS V_FQDN,MIN( recv_time ) AS FIRST_FOUND_TIME,MAX( recv_time ) AS LAST_FOUND_TIME,COUNT( * ) AS COUNT_TOTAL FROM media_expire_patch WHERE "+where+" GROUP BY s1_s_ip,media_domain";
- System.out.println(sql);
+ void baseRelationshipIpVisitFqdn() {
+ initializeMap(eIpVisitFqdnMap);
+ LOG.info("R_VISIT_IP2FQDN resultMap初始化完成");
+ String sql = getRelationshipIpVisitFqdnSql();
long start = System.currentTimeMillis();
try {
- DruidPooledConnection connection = manger.getConnection();
- Statement statement = connection.createStatement();
+ connection = manger.getConnection();
+ statement = connection.createStatement();
ResultSet resultSet = statement.executeQuery(sql);
- while (resultSet.next()){
- String vIp = resultSet.getString("V_IP");
- String vFqdn = resultSet.getString("V_FQDN");
- long firstFoundTime = resultSet.getLong("FIRST_FOUND_TIME");
- long lastFoundTime = resultSet.getLong("LAST_FOUND_TIME");
- long countTotal = resultSet.getLong("COUNT_TOTAL");
- String key = vIp +"-"+ vFqdn;
- BaseEdgeDocument newDoc = new BaseEdgeDocument();
- newDoc.setKey(key);
- newDoc.setFrom("V_IP/"+vIp);
- newDoc.setTo("V_FQDN/"+vFqdn);
- newDoc.addAttribute("FIRST_FOUND_TIME",firstFoundTime);
- newDoc.addAttribute("LAST_FOUND_TIME",lastFoundTime);
- newDoc.addAttribute("COUNT_TOTAL",countTotal);
- int i = key.hashCode() % ApplicationConfig.THREAD_POOL_NUMBER;
- HashMap documentHashMap = eIpVisitFqdnMap.getOrDefault(i, new HashMap());
- documentHashMap.put(key,newDoc);
+ while (resultSet.next()) {
+ BaseEdgeDocument newDoc = getRelationIpVisitFqdnDocument(resultSet);
+ putMapByHashcode(newDoc,eIpVisitFqdnMap);
}
long last = System.currentTimeMillis();
- System.out.println("读取clickhouse EIpVisitFqdn时间:"+(last - start));
- for (int i = 0;i < ApplicationConfig.THREAD_POOL_NUMBER;i++){
- HashMap baseDocumentHashMap = eIpVisitFqdnMap.get(i);
- UpdateEIpVisitFqdn updateEIpVisitFqdn = new UpdateEIpVisitFqdn(baseDocumentHashMap);
- updateEIpVisitFqdn.run();
+ LOG.info(sql + "\n读取clickhouse EIpVisitFqdn时间:" + (last - start));
+ } catch (Exception e) {
+ LOG.error(e.toString());
+ }finally {
+ manger.clear(statement,connection);
+ }
+ }
+
+ private void initializeMap(HashMap>> map){
+ try {
+ for (int i = 0; i < ApplicationConfig.THREAD_POOL_NUMBER; i++) {
+ map.put(i, new HashMap<>());
}
}catch (Exception e){
e.printStackTrace();
+ LOG.error("初始化数据失败");
}
}
+
}
diff --git a/ip-learning-java-test/src/main/java/cn/ac/iie/dao/UpdateGraphData.java b/ip-learning-java-test/src/main/java/cn/ac/iie/dao/UpdateGraphData.java
new file mode 100644
index 0000000..825543b
--- /dev/null
+++ b/ip-learning-java-test/src/main/java/cn/ac/iie/dao/UpdateGraphData.java
@@ -0,0 +1,157 @@
+package cn.ac.iie.dao;
+
+import cn.ac.iie.config.ApplicationConfig;
+import cn.ac.iie.service.relationship.LocateFqdn2Ip;
+import cn.ac.iie.service.relationship.LocateSubscriber2Ip;
+import cn.ac.iie.service.relationship.VisitIp2Fqdn;
+import cn.ac.iie.service.vertex.Fqdn;
+import cn.ac.iie.service.vertex.Ip;
+import cn.ac.iie.service.vertex.Subscriber;
+import cn.ac.iie.utils.ArangoDBConnect;
+import cn.ac.iie.utils.ExecutorThreadPool;
+import com.arangodb.entity.BaseDocument;
+import com.arangodb.entity.BaseEdgeDocument;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.concurrent.CountDownLatch;
+
+/**
+ * 更新图数据库业务类
+ */
+public class UpdateGraphData {
+ private static final Logger LOG = LoggerFactory.getLogger(UpdateGraphData.class);
+ private static ExecutorThreadPool pool = ExecutorThreadPool.getInstance();
+ private static ArangoDBConnect arangoManger = ArangoDBConnect.getInstance();
+
+ private CountDownLatch countDownLatch;
+
+ public void updateArango(){
+ long startC = System.currentTimeMillis();
+ try {
+ BaseClickhouseData baseClickhouseData = new BaseClickhouseData();
+ baseClickhouseData.baseVertexFqdn();
+ updateVertexFqdn();
+
+ baseClickhouseData.baseVertexIp();
+ updateVertexIp();
+
+// baseClickhouseData.baseRelationshipFqdnAddressIp();
+// updateRelationFqdnAddressIp();
+
+// baseClickhouseData.baseRelationshipIpVisitFqdn();
+// updateRelationIpVisitFqdn();
+
+ baseClickhouseData.baseVertexSubscriber();
+ updateVertexSubscriber();
+
+// baseClickhouseData.baseRelationshipSubscriberLocateIp();
+// updateRelationshipSubsciberLocateIp();
+ }catch (Exception e){
+ e.printStackTrace();
+ }finally {
+ arangoManger.clean();
+ }
+ long lastC = System.currentTimeMillis();
+ LOG.info("更新ArangoDb时间:"+(lastC - startC));
+ }
+
+ private void updateVertexFqdn(){
+ try {
+ countDownLatch = new CountDownLatch(ApplicationConfig.THREAD_POOL_NUMBER);
+ for (int i = 0; i < ApplicationConfig.THREAD_POOL_NUMBER; i++) {
+ HashMap> stringArrayListHashMap = BaseClickhouseData.vFqdnMap.get(i);
+ LOG.info("vFqdn baseDocumentHashMap大小:" + stringArrayListHashMap.size());
+ Fqdn updateFqdn = new Fqdn(stringArrayListHashMap, arangoManger, "FQDN", BaseArangoData.v_Fqdn_Map,countDownLatch);
+ updateFqdn.run();
+ }
+ countDownLatch.await();
+ LOG.info("---------FQDN vertex 更新完毕---------");
+ }catch (Exception e){
+ e.printStackTrace();
+ }
+ }
+
+ private void updateVertexSubscriber(){
+ try {
+ countDownLatch = new CountDownLatch(ApplicationConfig.THREAD_POOL_NUMBER);
+ for (int i = 0; i < ApplicationConfig.THREAD_POOL_NUMBER; i++) {
+ HashMap> stringArrayListHashMap = BaseClickhouseData.vSubscriberMap.get(i);
+ LOG.info("vSubscriber baseDocumentHashMap大小:" + stringArrayListHashMap.size());
+ Subscriber updateSubscriber = new Subscriber(stringArrayListHashMap, arangoManger, "SUBSCRIBER", BaseArangoData.v_Subscriber_Map,countDownLatch);
+ updateSubscriber.run();
+ }
+ countDownLatch.await();
+ LOG.info("---------SUBSCRIBER vertex 更新完毕---------");
+ }catch (Exception e){
+ e.printStackTrace();
+ }
+ }
+
+ private void updateRelationshipSubsciberLocateIp(){
+ try {
+ countDownLatch = new CountDownLatch(ApplicationConfig.THREAD_POOL_NUMBER);
+ for (int i = 0; i < ApplicationConfig.THREAD_POOL_NUMBER; i++) {
+ HashMap> baseDocumentHashMap = BaseClickhouseData.eSubsciberLocateIpMap.get(i);
+ LOG.info("ESubsciberLocateIp baseDocumentHashMap大小:" + baseDocumentHashMap.size());
+ LocateSubscriber2Ip locateSubscriber2Ip = new LocateSubscriber2Ip(baseDocumentHashMap, arangoManger, "R_LOCATE_SUBSCRIBER2IP", BaseArangoData.e_Subsciber_Locate_Ip_Map, countDownLatch);
+ locateSubscriber2Ip.run();
+ }
+ countDownLatch.await();
+ LOG.info("------------R_LOCATE_SUBSCRIBER2IP relationship 更新完毕----------------");
+ }catch (Exception e){
+ e.printStackTrace();
+ }
+ }
+
+ private void updateVertexIp(){
+ try {
+ countDownLatch = new CountDownLatch(ApplicationConfig.THREAD_POOL_NUMBER);
+ for (int i = 0; i < ApplicationConfig.THREAD_POOL_NUMBER; i++) {
+ HashMap> stringArrayListHashMap = BaseClickhouseData.vIpMap.get(i);
+ LOG.info("vIp baseDocumentHashMap大小:" + stringArrayListHashMap.size());
+ Ip updateIp = new Ip(stringArrayListHashMap, arangoManger, "IP", BaseArangoData.v_Ip_Map, countDownLatch);
+ updateIp.run();
+ }
+ countDownLatch.await();
+ LOG.info("----------IP vertex 更新完毕-------------");
+ }catch (Exception e){
+ e.printStackTrace();
+ }
+ }
+
+ private void updateRelationFqdnAddressIp(){
+ try {
+ countDownLatch = new CountDownLatch(ApplicationConfig.THREAD_POOL_NUMBER);
+ for (int i = 0; i < ApplicationConfig.THREAD_POOL_NUMBER; i++) {
+ HashMap> baseDocumentHashMap = BaseClickhouseData.eFqdnAddressIpMap.get(i);
+ LOG.info("EFqdnAddressIp baseDocumentHashMap大小:" + baseDocumentHashMap.size());
+ LocateFqdn2Ip fqdnAddressIp = new LocateFqdn2Ip(baseDocumentHashMap, arangoManger, "R_LOCATE_FQDN2IP", BaseArangoData.e_Fqdn_Address_Ip_Map, countDownLatch);
+ fqdnAddressIp.run();
+ }
+ countDownLatch.await();
+ LOG.info("------------R_LOCATE_FQDN2IP relationship 更新完毕----------------");
+ }catch (Exception e){
+ e.printStackTrace();
+ }
+ }
+
+ private void updateRelationIpVisitFqdn(){
+ try {
+ countDownLatch = new CountDownLatch(ApplicationConfig.THREAD_POOL_NUMBER);
+ for (int i = 0; i < ApplicationConfig.THREAD_POOL_NUMBER; i++) {
+ HashMap> baseDocumentHashMap = BaseClickhouseData.eIpVisitFqdnMap.get(i);
+ LOG.info("EIpVisitFqdn baseDocumentHashMap大小:" + baseDocumentHashMap.size());
+ VisitIp2Fqdn ipVisitFqdn = new VisitIp2Fqdn(baseDocumentHashMap,arangoManger,"R_VISIT_IP2FQDN",BaseArangoData.e_Ip_Visit_Fqdn_Map,countDownLatch);
+ ipVisitFqdn.run();
+ }
+ countDownLatch.await();
+ LOG.info("---------------R_VISIT_IP2FQDN ralationship 更新完毕----------------");
+ }catch (Exception e){
+ e.printStackTrace();
+ }
+ }
+
+}
diff --git a/ip-learning-java-test/src/main/java/cn/ac/iie/etl/UpdateEFqdnAddressIp.java b/ip-learning-java-test/src/main/java/cn/ac/iie/etl/UpdateEFqdnAddressIp.java
deleted file mode 100644
index 0abbc05..0000000
--- a/ip-learning-java-test/src/main/java/cn/ac/iie/etl/UpdateEFqdnAddressIp.java
+++ /dev/null
@@ -1,58 +0,0 @@
-package cn.ac.iie.etl;
-
-import cn.ac.iie.config.ApplicationConfig;
-import cn.ac.iie.dao.BaseArangoData;
-import cn.ac.iie.utils.ArangoDBConnect;
-import com.arangodb.entity.BaseDocument;
-import com.arangodb.entity.BaseEdgeDocument;
-
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.Set;
-
-public class UpdateEFqdnAddressIp implements Runnable {
- private HashMap documentHashMap;
-
- private static ArangoDBConnect arangoManger = ArangoDBConnect.getInstance();
-
- public UpdateEFqdnAddressIp(HashMap documentHashMap) {
- this.documentHashMap = documentHashMap;
- }
- @Override
- public void run() {
- Set keySet = documentHashMap.keySet();
- ArrayList docInsert = new ArrayList<>();
- ArrayList docUpdate = new ArrayList<>();
- int i = 0;
- try {
- for (String key:keySet){
- BaseEdgeDocument newEdgeDocument = documentHashMap.getOrDefault(key, null);
- if (newEdgeDocument != null){
- i += 1;
- BaseEdgeDocument edgeDocument = BaseArangoData.e_Fqdn_Address_Ip_Map.getOrDefault(key, null);
- if (edgeDocument != null){
- Object lastFoundTime = newEdgeDocument.getAttribute("LAST_FOUND_TIME");
- long countTotal = Long.parseLong(newEdgeDocument.getAttribute("COUNT_TOTAL").toString());
- long updateCountTotal = Long.parseLong(edgeDocument.getAttribute("COUNT_TOTAL").toString());
- edgeDocument.addAttribute("LAST_FOUND_TIME",lastFoundTime);
- edgeDocument.addAttribute("COUNT_TOTAL",countTotal+updateCountTotal);
- docInsert.add(edgeDocument);
- }else {
- docUpdate.add(newEdgeDocument);
- }
- }
- if (i >= ApplicationConfig.UPDATE_ARANGO_BATCH){
- arangoManger.insertAndUpdate(docInsert,docUpdate,"E_ADDRESS_V_FQDN_TO_V_IP");
- System.out.println("更新"+i);
- i = 0;
- }
- }
- if (i != 0){
- arangoManger.insertAndUpdate(docInsert,docUpdate,"E_ADDRESS_V_FQDN_TO_V_IP");
- System.out.println("更新"+i);
- }
- }catch (Exception e){
- e.printStackTrace();
- }
- }
-}
diff --git a/ip-learning-java-test/src/main/java/cn/ac/iie/etl/UpdateEIpVisitFqdn.java b/ip-learning-java-test/src/main/java/cn/ac/iie/etl/UpdateEIpVisitFqdn.java
deleted file mode 100644
index a07dadf..0000000
--- a/ip-learning-java-test/src/main/java/cn/ac/iie/etl/UpdateEIpVisitFqdn.java
+++ /dev/null
@@ -1,58 +0,0 @@
-package cn.ac.iie.etl;
-
-import cn.ac.iie.config.ApplicationConfig;
-import cn.ac.iie.dao.BaseArangoData;
-import cn.ac.iie.utils.ArangoDBConnect;
-import com.arangodb.entity.BaseDocument;
-import com.arangodb.entity.BaseEdgeDocument;
-
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.Set;
-
-public class UpdateEIpVisitFqdn implements Runnable {
- private HashMap documentHashMap;
-
- private static final ArangoDBConnect arangoManger = ArangoDBConnect.getInstance();
-
- public UpdateEIpVisitFqdn(HashMap documentHashMap) {
- this.documentHashMap = documentHashMap;
- }
- @Override
- public void run() {
- Set keySet = documentHashMap.keySet();
- ArrayList docInsert = new ArrayList<>();
- ArrayList docUpdate = new ArrayList<>();
- int i = 0;
- try {
- for (String key:keySet){
- BaseEdgeDocument newEdgeDocument = documentHashMap.getOrDefault(key, null);
- if (newEdgeDocument != null){
- i += 1;
- BaseEdgeDocument edgeDocument = BaseArangoData.e_Ip_Visit_Fqdn_Map.getOrDefault(key, null);
- if (edgeDocument != null){
- Object lastFoundTime = newEdgeDocument.getAttribute("LAST_FOUND_TIME");
- long countTotal = Long.parseLong(newEdgeDocument.getAttribute("COUNT_TOTAL").toString());
- long updateCountTotal = Long.parseLong(edgeDocument.getAttribute("COUNT_TOTAL").toString());
- edgeDocument.addAttribute("LAST_FOUND_TIME",lastFoundTime);
- edgeDocument.addAttribute("COUNT_TOTAL",countTotal+updateCountTotal);
- docInsert.add(edgeDocument);
- }else {
- docUpdate.add(newEdgeDocument);
- }
- }
- if (i >= ApplicationConfig.UPDATE_ARANGO_BATCH){
- arangoManger.insertAndUpdate(docInsert,docUpdate,"E_VISIT_V_IP_TO_V_FQDN");
- System.out.println("更新"+i);
- i = 0;
- }
- }
- if (i != 0){
- arangoManger.insertAndUpdate(docInsert,docUpdate,"E_VISIT_V_IP_TO_V_FQDN");
- System.out.println("更新"+i);
- }
- }catch (Exception e){
- e.printStackTrace();
- }
- }
-}
diff --git a/ip-learning-java-test/src/main/java/cn/ac/iie/etl/UpdateVFqdn.java b/ip-learning-java-test/src/main/java/cn/ac/iie/etl/UpdateVFqdn.java
deleted file mode 100644
index 037dc40..0000000
--- a/ip-learning-java-test/src/main/java/cn/ac/iie/etl/UpdateVFqdn.java
+++ /dev/null
@@ -1,94 +0,0 @@
-package cn.ac.iie.etl;
-
-import cn.ac.iie.config.ApplicationConfig;
-import cn.ac.iie.dao.BaseArangoData;
-import cn.ac.iie.utils.ArangoDBConnect;
-import com.arangodb.entity.BaseDocument;
-
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.Map;
-import java.util.Set;
-
-public class UpdateVFqdn implements Runnable{
-
- private HashMap> documentHashMap;
-
- private static ArangoDBConnect arangoManger = ArangoDBConnect.getInstance();
-
- public UpdateVFqdn(HashMap> documentHashMap) {
- this.documentHashMap = documentHashMap;
- }
-
- @Override
- public void run() {
- Set keySet = documentHashMap.keySet();
- ArrayList docInsert = new ArrayList<>();
- ArrayList docUpdate = new ArrayList<>();
- int i = 0;
- try {
- for (String key:keySet){
- ArrayList documentArrayList = documentHashMap.getOrDefault(key, null);
- BaseDocument newDocument = mergeVFqdn(documentArrayList);
-
- if (newDocument != null){
- i += 1;
- BaseDocument document = BaseArangoData.v_Fqdn_Map.getOrDefault(key, null);
- if (document != null){
- Object lastFoundTime = newDocument.getAttribute("LAST_FOUND_TIME");
- long fqdnCountTotal = Long.parseLong(newDocument.getAttribute("FQDN_COUNT_TOTAL").toString());
- long countTotal = Long.parseLong(document.getAttribute("FQDN_COUNT_TOTAL").toString());
- document.addAttribute("LAST_FOUND_TIME",lastFoundTime);
- document.addAttribute("FQDN_COUNT_TOTAL",countTotal+fqdnCountTotal);
- docUpdate.add(document);
- }else {
- docInsert.add(newDocument);
- }
- }
- if (i >= ApplicationConfig.UPDATE_ARANGO_BATCH){
- arangoManger.insertAndUpdate(docInsert,docUpdate,"V_FQDN");
- System.out.println("更新"+i);
- i = 0;
- }
- }
- if (i != 0){
- arangoManger.insertAndUpdate(docInsert,docUpdate,"V_FQDN");
- System.out.println("更新"+i);
- }
- }catch (Exception e){
- e.printStackTrace();
- }
-
- }
-
- private BaseDocument mergeVFqdn(ArrayList documentArrayList){
- if (documentArrayList == null || documentArrayList.isEmpty()){
- return null;
- }else if (documentArrayList.size() == 1){
- return documentArrayList.get(0);
- }else {
- BaseDocument document = new BaseDocument();
- Map properties = document.getProperties();
- for (BaseDocument doc:documentArrayList){
- if (properties.isEmpty()){
- document = doc;
- properties = doc.getProperties();
- }else {
- long firstFoundTime = Long.parseLong(properties.getOrDefault("FIRST_FOUND_TIME", 0L).toString());
- long docFirstFoundTime = Long.parseLong(doc.getAttribute("FIRST_FOUND_TIME").toString());
- properties.put("FIRST_FOUND_TIME",firstFoundTimedocLastFoundTime? lastFoundTime:docLastFoundTime);
-
- long fqdnCountTotal = Long.parseLong(properties.getOrDefault("FQDN_COUNT_TOTAL", 0L).toString());
- long docFqdnCountTotal = Long.parseLong(doc.getAttribute("FQDN_COUNT_TOTAL").toString());
- properties.put("FQDN_COUNT_TOTAL",fqdnCountTotal+docFqdnCountTotal);
- }
- }
- document.setProperties(properties);
- return document;
- }
- }
-}
diff --git a/ip-learning-java-test/src/main/java/cn/ac/iie/etl/UpdateVIP.java b/ip-learning-java-test/src/main/java/cn/ac/iie/etl/UpdateVIP.java
deleted file mode 100644
index 12a906a..0000000
--- a/ip-learning-java-test/src/main/java/cn/ac/iie/etl/UpdateVIP.java
+++ /dev/null
@@ -1,60 +0,0 @@
-package cn.ac.iie.etl;
-
-
-import cn.ac.iie.config.ApplicationConfig;
-import cn.ac.iie.dao.BaseArangoData;
-import cn.ac.iie.utils.ArangoDBConnect;
-import com.arangodb.entity.BaseDocument;
-
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.Set;
-
-public class UpdateVIP implements Runnable {
-
- private HashMap documentHashMap;
-
- private static final ArangoDBConnect arangoManger = ArangoDBConnect.getInstance();
-
- public UpdateVIP(HashMap documentHashMap) {
- this.documentHashMap = documentHashMap;
- }
-
- @Override
- public void run() {
- Set keySet = documentHashMap.keySet();
- ArrayList docInsert = new ArrayList<>();
- ArrayList docUpdate = new ArrayList<>();
- int i = 0;
- try {
- for (String key:keySet){
- BaseDocument newDocument = documentHashMap.getOrDefault(key, null);
- if (newDocument != null){
- i += 1;
- BaseDocument document = BaseArangoData.v_Ip_Map.getOrDefault(key, null);
- if (document != null){
- Object lastFoundTime = newDocument.getAttribute("LAST_FOUND_TIME");
- long ipCountTotal = Long.parseLong(newDocument.getAttribute("IP_COUNT_TOTAL").toString());
- long countTotal = Long.parseLong(document.getAttribute("IP_COUNT_TOTAL").toString());
- document.addAttribute("LAST_FOUND_TIME",lastFoundTime);
- document.addAttribute("IP_COUNT_TOTAL",countTotal+ipCountTotal);
- docUpdate.add(document);
- }else {
- docInsert.add(newDocument);
- }
- }
- if (i >= ApplicationConfig.UPDATE_ARANGO_BATCH){
- arangoManger.insertAndUpdate(docInsert,docUpdate,"V_IP");
- System.out.println("更新"+i);
- i = 0;
- }
- }
- if (i != 0){
- arangoManger.insertAndUpdate(docInsert,docUpdate,"V_IP");
- System.out.println("更新"+i);
- }
- }catch (Exception e){
- e.printStackTrace();
- }
- }
-}
diff --git a/ip-learning-java-test/src/main/java/cn/ac/iie/etl/read/ReadHistoryArangoData.java b/ip-learning-java-test/src/main/java/cn/ac/iie/etl/read/ReadHistoryArangoData.java
deleted file mode 100644
index 971b29b..0000000
--- a/ip-learning-java-test/src/main/java/cn/ac/iie/etl/read/ReadHistoryArangoData.java
+++ /dev/null
@@ -1,44 +0,0 @@
-package cn.ac.iie.etl.read;
-
-import cn.ac.iie.utils.ArangoDBConnect;
-import com.arangodb.ArangoCursor;
-import com.arangodb.entity.BaseEdgeDocument;
-
-import java.util.List;
-import java.util.concurrent.ConcurrentHashMap;
-
-/**
- * @author wlh
- * 多线程全量读取arangoDb历史数据,封装到map
- */
-public class ReadHistoryArangoData extends Thread {
- private ArangoDBConnect arangoDBConnect;
- private String query;
- private ConcurrentHashMap map;
-
- public ReadHistoryArangoData(ArangoDBConnect arangoDBConnect, String query, ConcurrentHashMap map) {
- this.arangoDBConnect = arangoDBConnect;
- this.query = query;
- this.map = map;
- }
-
- @Override
- public void run() {
- String name = Thread.currentThread().getName();
- System.out.println(name + ":" + query);
- long s = System.currentTimeMillis();
- ArangoCursor docs = arangoDBConnect.executorQuery(query, BaseEdgeDocument.class);
- if (docs != null){
- List baseDocuments = docs.asListRemaining();
- int i = 0;
- for (BaseEdgeDocument doc : baseDocuments) {
- String key = doc.getKey();
- map.put(key, doc);
- i++;
- }
- System.out.println(name + ":共处理数据" + i);
- long l = System.currentTimeMillis();
- System.out.println(name + "运行时间:" + (l - s));
- }
- }
-}
diff --git a/ip-learning-java-test/src/main/java/cn/ac/iie/etl/relationship/LocateFqdn2Ip.java b/ip-learning-java-test/src/main/java/cn/ac/iie/etl/relationship/LocateFqdn2Ip.java
deleted file mode 100644
index 3988096..0000000
--- a/ip-learning-java-test/src/main/java/cn/ac/iie/etl/relationship/LocateFqdn2Ip.java
+++ /dev/null
@@ -1,31 +0,0 @@
-package cn.ac.iie.etl.relationship;
-
-import cn.ac.iie.etl.update.Relationship;
-import cn.ac.iie.utils.ArangoDBConnect;
-import com.arangodb.entity.BaseEdgeDocument;
-
-import java.util.HashMap;
-import java.util.Map;
-import java.util.concurrent.ConcurrentHashMap;
-
-public class LocateFqdn2Ip extends Relationship {
-
- public LocateFqdn2Ip(HashMap> newDocumentHashMap,
- ArangoDBConnect arangoManger,
- String collectionName,
- ConcurrentHashMap historyDocumentMap) {
- super(newDocumentHashMap, arangoManger, collectionName, historyDocumentMap);
- }
-
- @Override
- protected void mergeFunction(Map properties, BaseEdgeDocument schemaEdgeDoc){
- super.mergeFunction(properties,schemaEdgeDoc);
- super.mergeDistinctClientIp(properties,schemaEdgeDoc);
- }
-
- @Override
- protected void updateFunction(BaseEdgeDocument newEdgeDocument, BaseEdgeDocument historyEdgeDocument) {
- super.updateFunction(newEdgeDocument, historyEdgeDocument);
- super.updateDistinctClientIp(newEdgeDocument, historyEdgeDocument);
- }
-}
diff --git a/ip-learning-java-test/src/main/java/cn/ac/iie/etl/relationship/VisitIp2Fqdn.java b/ip-learning-java-test/src/main/java/cn/ac/iie/etl/relationship/VisitIp2Fqdn.java
deleted file mode 100644
index efafb16..0000000
--- a/ip-learning-java-test/src/main/java/cn/ac/iie/etl/relationship/VisitIp2Fqdn.java
+++ /dev/null
@@ -1,17 +0,0 @@
-package cn.ac.iie.etl.relationship;
-
-import cn.ac.iie.etl.update.Relationship;
-import cn.ac.iie.utils.ArangoDBConnect;
-import com.arangodb.entity.BaseEdgeDocument;
-
-import java.util.HashMap;
-import java.util.concurrent.ConcurrentHashMap;
-
-public class VisitIp2Fqdn extends Relationship {
- public VisitIp2Fqdn(HashMap> newDocumentHashMap,
- ArangoDBConnect arangoManger,
- String collectionName,
- ConcurrentHashMap historyDocumentMap) {
- super(newDocumentHashMap, arangoManger, collectionName, historyDocumentMap);
- }
-}
diff --git a/ip-learning-java-test/src/main/java/cn/ac/iie/etl/update/Relationship.java b/ip-learning-java-test/src/main/java/cn/ac/iie/etl/update/Relationship.java
deleted file mode 100644
index 78141ae..0000000
--- a/ip-learning-java-test/src/main/java/cn/ac/iie/etl/update/Relationship.java
+++ /dev/null
@@ -1,191 +0,0 @@
-package cn.ac.iie.etl.update;
-
-import cn.ac.iie.config.ApplicationConfig;
-import cn.ac.iie.utils.ArangoDBConnect;
-import com.arangodb.entity.BaseEdgeDocument;
-
-import java.util.*;
-import java.util.concurrent.ConcurrentHashMap;
-
-public class Relationship extends Thread {
-
- protected HashMap> newDocumentHashMap;
- protected ArangoDBConnect arangoManger;
- protected String collectionName;
- protected ConcurrentHashMap historyDocumentMap;
-
- public Relationship(HashMap> newDocumentHashMap,
- ArangoDBConnect arangoManger,
- String collectionName,
- ConcurrentHashMap historyDocumentMap) {
- this.newDocumentHashMap = newDocumentHashMap;
- this.arangoManger = arangoManger;
- this.collectionName = collectionName;
- this.historyDocumentMap = historyDocumentMap;
- }
-
- @Override
- public void run() {
- Set keySet = newDocumentHashMap.keySet();
- ArrayList docInsert = new ArrayList<>();
- int i = 0;
- try {
- for (String key : keySet) {
- HashMap newEdgeDocumentSchemaMap = newDocumentHashMap.getOrDefault(key, null);
- if (newEdgeDocumentSchemaMap != null) {
- BaseEdgeDocument newEdgeDocument = mergeRelationship(newEdgeDocumentSchemaMap);
- i += 1;
- BaseEdgeDocument historyEdgeDocument = historyDocumentMap.getOrDefault(key, null);
- updateRelationship(newEdgeDocument,historyEdgeDocument,docInsert);
- if (i >= ApplicationConfig.UPDATE_ARANGO_BATCH) {
- arangoManger.overwrite(docInsert, collectionName);
- System.out.println("更新"+collectionName+":" + i);
- i = 0;
- }
- }
- }
- if (i != 0) {
- arangoManger.overwrite(docInsert, collectionName);
- System.out.println("更新"+collectionName+":" + i);
- }
- } catch (Exception e) {
- e.printStackTrace();
- System.out.println(e.toString());
- }
- }
-
- private BaseEdgeDocument mergeRelationship(HashMap newEdgeDocumentSchemaMap) {
- BaseEdgeDocument newBaseEdgeDocument = new BaseEdgeDocument();
- Set schemaSets = newEdgeDocumentSchemaMap.keySet();
- Map properties = newBaseEdgeDocument.getProperties();
-
- for (String schema : schemaSets) {
- BaseEdgeDocument schemaEdgeDoc = newEdgeDocumentSchemaMap.get(schema);
- if (!properties.isEmpty()) {
- mergeFunction(properties, schemaEdgeDoc);
- } else {
- newBaseEdgeDocument = schemaEdgeDoc;
- properties = schemaEdgeDoc.getProperties();
- }
- setSchemaCount(schema, schemaEdgeDoc, properties);
- }
- properties.remove("COUNT_TOTAL");
- checkSchemaProperty(properties);
-
- newBaseEdgeDocument.setProperties(properties);
- return newBaseEdgeDocument;
- }
-
- private void updateRelationship(BaseEdgeDocument newEdgeDocument,BaseEdgeDocument historyEdgeDocument,ArrayList docInsert){
- if (historyEdgeDocument != null) {
- updateFunction(newEdgeDocument, historyEdgeDocument);
- docInsert.add(historyEdgeDocument);
- } else {
- docInsert.add(newEdgeDocument);
- }
- }
-
- protected void updateFunction(BaseEdgeDocument newEdgeDocument,BaseEdgeDocument historyEdgeDocument){
- updateFoundTime(newEdgeDocument,historyEdgeDocument);
- setSchemaCntByHistory(historyEdgeDocument,"TLS_CNT_RECENT","TLS_CNT_TOTAL",newEdgeDocument);
- setSchemaCntByHistory(historyEdgeDocument,"HTTP_CNT_RECENT","HTTP_CNT_TOTAL",newEdgeDocument);
-// updateDistinctClientIp(newEdgeDocument,historyEdgeDocument);
- }
-
- protected void updateFoundTime(BaseEdgeDocument newEdgeDocument,BaseEdgeDocument historyEdgeDocument){
- Object lastFoundTime = newEdgeDocument.getAttribute("LAST_FOUND_TIME");
- historyEdgeDocument.addAttribute("LAST_FOUND_TIME", lastFoundTime);
- }
-
- protected void setSchemaCntByHistory(BaseEdgeDocument historyEdgeDocument,String schema,String totalSchema,BaseEdgeDocument newEdgeDocument){
- long countTotal = Long.parseLong(newEdgeDocument.getAttribute(totalSchema).toString());
- long updateCountTotal = Long.parseLong(historyEdgeDocument.getAttribute(totalSchema).toString());
-
- ArrayList cntRecent = (ArrayList) historyEdgeDocument.getAttribute(schema);
- Long[] cntRecentsSrc = cntRecent.toArray(new Long[cntRecent.size()]);
- Long[] cntRecentsDst = new Long[7];
- System.arraycopy(cntRecentsSrc, 0, cntRecentsDst, 1, cntRecentsSrc.length - 1);
- cntRecentsDst[0] = countTotal;
-
- historyEdgeDocument.addAttribute(schema, cntRecentsDst);
- historyEdgeDocument.addAttribute(totalSchema, countTotal + updateCountTotal);
- }
-
- protected void mergeFunction(Map properties, BaseEdgeDocument schemaEdgeDoc) {
- mergeFoundTime(properties, schemaEdgeDoc);
-// mergeDistinctClientIp(properties,schemaEdgeDoc);
- }
-
- protected void mergeDistinctClientIp(Map properties, BaseEdgeDocument schemaEdgeDoc){
- String[] schemaDistCipRecents = (String[]) schemaEdgeDoc.getAttribute("DIST_CIP_RECENT");
- String[] distCipRecents = (String[]) properties.get("DIST_CIP_RECENT");
- Object[] mergeClientIp = distinctIp(schemaDistCipRecents, distCipRecents);
- properties.put("DIST_CIP_RECENT", mergeClientIp);
- properties.put("DIST_CIP_TOTAL",mergeClientIp);
- }
-
- protected void updateDistinctClientIp(BaseEdgeDocument newEdgeDocument,BaseEdgeDocument edgeDocument){
- ArrayList distCipTotal = (ArrayList) edgeDocument.getAttribute("DIST_CIP_TOTAL");
- String[] distCipTotalsSrc = distCipTotal.toArray(new String[distCipTotal.size()]);
-
- Object[] distCipRecentsSrc = (Object[])newEdgeDocument.getAttribute("DIST_CIP_RECENT");
- if (distCipTotalsSrc.length == 30) {
- Object[] distCipTotals = distinctIp(distCipTotalsSrc, distCipRecentsSrc);
- edgeDocument.addAttribute("DIST_CIP_TOTAL", distCipTotals);
- }
- edgeDocument.addAttribute("DIST_CIP_RECENT", distCipRecentsSrc);
- }
-
- protected Object[] distinctIp(Object[] distCipTotalsSrc,Object[] distCipRecentsSrc){
- HashSet