diff options
| author | gujinkai <[email protected]> | 2023-11-23 18:20:02 +0800 |
|---|---|---|
| committer | gujinkai <[email protected]> | 2023-11-23 18:20:02 +0800 |
| commit | 08c9b83955dedb843cdb09104c3aa5d8bd57b2fd (patch) | |
| tree | 54d3b2cdf47438016f39bd7cfc9aeb98700156e0 | |
| parent | 56603f5aff1bd9b0d89f79e1365d5da7c134d83b (diff) | |
refactor: refactor tagUtils logic
11 files changed, 461 insertions, 433 deletions
diff --git a/platform-etl/src/main/java/com/zdjizhi/etl/knowledge/KnowledgeManager.java b/platform-etl/src/main/java/com/zdjizhi/etl/knowledge/KnowledgeManager.java index 437a1a8..1b2f861 100644 --- a/platform-etl/src/main/java/com/zdjizhi/etl/knowledge/KnowledgeManager.java +++ b/platform-etl/src/main/java/com/zdjizhi/etl/knowledge/KnowledgeManager.java @@ -7,6 +7,9 @@ import com.zdjizhi.base.common.CommonInternalConfig; import com.zdjizhi.base.utils.FileUtils; import com.zdjizhi.base.utils.HttpClientUtils; import com.zdjizhi.etl.utils.*; +import com.zdjizhi.etl.utils.tag.AppTagUtils; +import com.zdjizhi.etl.utils.tag.DomainTagUtils; +import com.zdjizhi.etl.utils.tag.IpTagUtils; import org.apache.flink.configuration.Configuration; import java.io.ByteArrayInputStream; @@ -75,10 +78,6 @@ public class KnowledgeManager { Map<Long, String> domainTagMap = new HashMap<>(); Map<Long, String> appTagMap = new HashMap<>(); - List<String> ipInvalid = new ArrayList<>(); - List<String> domainInvalid = new ArrayList<>(); - List<String> appInvalid = new ArrayList<>(); - Map<Long, String> ipTagUrl = new HashMap<>(); Map<Long, String> domainTagUrl = new HashMap<>(); Map<Long, String> appTagUrl = new HashMap<>(); @@ -104,23 +103,14 @@ public class KnowledgeManager { if (configuration.get(CommonInternalConfig.IP_TAG_TYPE).equals(type)) { ipTagMap.put(id, sha256); ipTagUrl.put(id, fileUrl); - if (isValid != 1) { - ipInvalid.add(name); - } } if (configuration.get(CommonInternalConfig.DOMAIN_TAG_TYPE).equals(type)) { domainTagMap.put(id, sha256); domainTagUrl.put(id, fileUrl); - if (isValid != 1) { - domainInvalid.add(name); - } } if (configuration.get(CommonInternalConfig.APP_TAG_TYPE).equals(type)) { appTagMap.put(id, sha256); appTagUrl.put(id, fileUrl); - if (isValid != 1) { - appInvalid.add(name); - } } } else { String fileName = type + "." + format; @@ -140,8 +130,6 @@ public class KnowledgeManager { IPUtils.load(); - TagUtils.updateInvalid(ipInvalid, domainInvalid, appInvalid); - if (!checkSha256(ipTagKnowLedgeFileSha256, ipTagMap)) { ipTagKnowLedgeFileSha256 = ipTagMap; Map<Long, byte[]> contents = new HashMap<>(ipTagMap.size()); @@ -149,7 +137,7 @@ public class KnowledgeManager { byte[] downloadBytes = downloadFile(ipTagUrl.get(s), ipTagMap.get(s), 1); contents.put(s, downloadBytes); } - TagUtils.updateIpTag(contents); + IpTagUtils.updateTag(contents); } if (!checkSha256(domainTagKnowLedgeFileSha256, domainTagMap)) { domainTagKnowLedgeFileSha256 = domainTagMap; @@ -158,7 +146,7 @@ public class KnowledgeManager { byte[] downloadBytes = downloadFile(domainTagUrl.get(s), domainTagMap.get(s), 1); contents.put(s, downloadBytes); } - TagUtils.updateDomainTag(contents); + DomainTagUtils.updateTag(contents); } if (!checkSha256(appTagKnowLedgeFileSha256, appTagMap)) { appTagKnowLedgeFileSha256 = appTagMap; @@ -167,7 +155,7 @@ public class KnowledgeManager { byte[] downloadBytes = downloadFile(appTagUrl.get(s), appTagMap.get(s), 1); contents.put(s, downloadBytes); } - TagUtils.updateAppTag(contents); + AppTagUtils.updateTag(contents); } } catch (Exception e) { e.printStackTrace(System.out); diff --git a/platform-etl/src/main/java/com/zdjizhi/etl/operator/EtlProcessFunc.java b/platform-etl/src/main/java/com/zdjizhi/etl/operator/EtlProcessFunc.java index a211bf3..ca75f09 100644 --- a/platform-etl/src/main/java/com/zdjizhi/etl/operator/EtlProcessFunc.java +++ b/platform-etl/src/main/java/com/zdjizhi/etl/operator/EtlProcessFunc.java @@ -6,6 +6,9 @@ import com.zdjizhi.etl.common.CommonConfig; import com.zdjizhi.etl.knowledge.NacosManager; import com.zdjizhi.etl.rule.RuleUpdateListener; import com.zdjizhi.etl.utils.*; +import com.zdjizhi.etl.utils.tag.AppTagUtils; +import com.zdjizhi.etl.utils.tag.DomainTagUtils; +import com.zdjizhi.etl.utils.tag.IpTagUtils; import org.apache.flink.configuration.Configuration; import org.apache.flink.streaming.api.functions.ProcessFunction; import org.apache.flink.util.Collector; @@ -54,7 +57,9 @@ public class EtlProcessFunc extends ProcessFunction<String, CnRecordLog> { PsiphonUtils.setPsiphonInfo(recordLog); IocDarkWebUtils.setIocDarkWebInfo(recordLog); IocMalwareUtils.setIocMalwareInfo(recordLog); - TagUtils.setTag(recordLog); + IpTagUtils.setTag(recordLog); + DomainTagUtils.setTag(recordLog); + AppTagUtils.setTag(recordLog); out.collect(recordLog); } catch (Exception e) { LOG.error("ETL解析错误\nsource:{} \nrecordLog:{} \n", value, recordLog, e); diff --git a/platform-etl/src/main/java/com/zdjizhi/etl/rule/RuleUpdateListener.java b/platform-etl/src/main/java/com/zdjizhi/etl/rule/RuleUpdateListener.java index 2eba80b..31e111a 100644 --- a/platform-etl/src/main/java/com/zdjizhi/etl/rule/RuleUpdateListener.java +++ b/platform-etl/src/main/java/com/zdjizhi/etl/rule/RuleUpdateListener.java @@ -5,7 +5,7 @@ import com.zdjizhi.base.rule.cache.RuleCache; import com.zdjizhi.base.rule.info.RuleInfo; import com.zdjizhi.etl.utils.IocDarkWebUtils; import com.zdjizhi.etl.utils.IocMalwareUtils; -import com.zdjizhi.etl.utils.TagUtils; +import com.zdjizhi.etl.utils.tag.BaseTagUtils; import org.apache.flink.configuration.Configuration; import java.util.Collection; @@ -76,7 +76,7 @@ public class RuleUpdateListener implements RuleCache.UpdateListener<Long, RuleIn } else if (configuration.get(CommonInternalConfig.IOC_MALWARE_TYPE).equals(ruleConfig.getLibrarySource())) { IocMalwareUtils.updateInternalRuleId(key, value); } else { - TagUtils.updateInternalRuleId(key, value); + BaseTagUtils.updateInternalRuleId(key, value); } } @@ -101,7 +101,7 @@ public class RuleUpdateListener implements RuleCache.UpdateListener<Long, RuleIn } else if (configuration.get(CommonInternalConfig.IOC_MALWARE_TYPE).equals(ruleConfig.getLibrarySource())) { IocMalwareUtils.removeInternalRuleId(key, value); } else { - TagUtils.removeInternalRuleId(key, value); + BaseTagUtils.removeInternalRuleId(key, value); } } } diff --git a/platform-etl/src/main/java/com/zdjizhi/etl/utils/IocDarkWebUtils.java b/platform-etl/src/main/java/com/zdjizhi/etl/utils/IocDarkWebUtils.java index 6f077d0..347ed8a 100644 --- a/platform-etl/src/main/java/com/zdjizhi/etl/utils/IocDarkWebUtils.java +++ b/platform-etl/src/main/java/com/zdjizhi/etl/utils/IocDarkWebUtils.java @@ -48,22 +48,26 @@ public class IocDarkWebUtils { public static void setIocDarkWebInfo(CnRecordLog cnRecordLog) { - String serverIp = cnRecordLog.getCommon_server_ip(); - if (ipMap.containsKey(serverIp)) { - String ipTag = ipMap.get(serverIp); - String ipTagLowerCase = ipTag.toLowerCase(); - if (tagRuleIdMap.containsKey(ipTagLowerCase)) { - cnRecordLog.putRuleIdAndIocType(tagRuleIdMap.get(ipTagLowerCase), "ip"); + try { + String serverIp = cnRecordLog.getCommon_server_ip(); + if (ipMap.containsKey(serverIp)) { + String ipTag = ipMap.get(serverIp); + String ipTagLowerCase = ipTag.toLowerCase(); + if (tagRuleIdMap.containsKey(ipTagLowerCase)) { + cnRecordLog.putRuleIdAndIocType(tagRuleIdMap.get(ipTagLowerCase), "ip"); + } + cnRecordLog.putServer_ip_tag(ipTag); } - cnRecordLog.putServer_ip_tag(ipTag); - } - String domainTag = getDomainTag(cnRecordLog.getDomain()); - if (domainTag != null) { - String domainTagLowerCase = domainTag.toLowerCase(); - if (tagRuleIdMap.containsKey(domainTagLowerCase)) { - cnRecordLog.putRuleIdAndIocType(tagRuleIdMap.get(domainTagLowerCase), "domain"); + String domainTag = getDomainTag(cnRecordLog.getDomain()); + if (domainTag != null) { + String domainTagLowerCase = domainTag.toLowerCase(); + if (tagRuleIdMap.containsKey(domainTagLowerCase)) { + cnRecordLog.putRuleIdAndIocType(tagRuleIdMap.get(domainTagLowerCase), "domain"); + } + cnRecordLog.putDomain_tag(domainTag); } - cnRecordLog.putDomain_tag(domainTag); + } catch (Exception e) { + LOG.error("ETL解析错误\nrecordLog:{} \n", cnRecordLog, e); } } diff --git a/platform-etl/src/main/java/com/zdjizhi/etl/utils/IocMalwareUtils.java b/platform-etl/src/main/java/com/zdjizhi/etl/utils/IocMalwareUtils.java index 1035484..954b4d1 100644 --- a/platform-etl/src/main/java/com/zdjizhi/etl/utils/IocMalwareUtils.java +++ b/platform-etl/src/main/java/com/zdjizhi/etl/utils/IocMalwareUtils.java @@ -53,32 +53,36 @@ public class IocMalwareUtils { public static void setIocMalwareInfo(CnRecordLog cnRecordLog) { - String serverIp = cnRecordLog.getCommon_server_ip(); - if (ipMap.containsKey(serverIp)) { - String ipTag = ipMap.get(serverIp); - if (ipTag != null) { - String ipTagLowerCase = ipTag.toLowerCase(); - cnRecordLog.putServer_ip_tag(ipTag); - if (tagRuleIdMap.containsKey(ipTagLowerCase)) { - cnRecordLog.putRuleIdAndIocType(tagRuleIdMap.get(ipTagLowerCase), "ip"); + try { + String serverIp = cnRecordLog.getCommon_server_ip(); + if (ipMap.containsKey(serverIp)) { + String ipTag = ipMap.get(serverIp); + if (ipTag != null) { + String ipTagLowerCase = ipTag.toLowerCase(); + cnRecordLog.putServer_ip_tag(ipTag); + if (tagRuleIdMap.containsKey(ipTagLowerCase)) { + cnRecordLog.putRuleIdAndIocType(tagRuleIdMap.get(ipTagLowerCase), "ip"); + } } } - } - String domainTag = getDomainTags(cnRecordLog.getDomain()); - if (domainTag != null) { - String domainTagLowerCase = domainTag.toLowerCase(); - cnRecordLog.putDomain_tag(domainTag); - if (tagRuleIdMap.containsKey(domainTagLowerCase)) { - cnRecordLog.putRuleIdAndIocType(tagRuleIdMap.get(domainTagLowerCase), "domain"); + String domainTag = getDomainTags(cnRecordLog.getDomain()); + if (domainTag != null) { + String domainTagLowerCase = domainTag.toLowerCase(); + cnRecordLog.putDomain_tag(domainTag); + if (tagRuleIdMap.containsKey(domainTagLowerCase)) { + cnRecordLog.putRuleIdAndIocType(tagRuleIdMap.get(domainTagLowerCase), "domain"); + } } - } - List<String> urlTags = getUrlTags(cnRecordLog.getHttp_url()); - for (String urlTag : urlTags) { - String urlTagLowerCase = urlTag.toLowerCase(); - if (tagRuleIdMap.containsKey(urlTagLowerCase)) { - cnRecordLog.putRuleIdAndIocType(tagRuleIdMap.get(urlTagLowerCase), "url"); + List<String> urlTags = getUrlTags(cnRecordLog.getHttp_url()); + for (String urlTag : urlTags) { + String urlTagLowerCase = urlTag.toLowerCase(); + if (tagRuleIdMap.containsKey(urlTagLowerCase)) { + cnRecordLog.putRuleIdAndIocType(tagRuleIdMap.get(urlTagLowerCase), "url"); + } + //url only need to detection } - //url only need to detection + } catch (Exception e) { + LOG.error("ETL解析错误\nrecordLog:{} \n", cnRecordLog, e); } } diff --git a/platform-etl/src/main/java/com/zdjizhi/etl/utils/TagUtils.java b/platform-etl/src/main/java/com/zdjizhi/etl/utils/TagUtils.java deleted file mode 100644 index 576216c..0000000 --- a/platform-etl/src/main/java/com/zdjizhi/etl/utils/TagUtils.java +++ /dev/null @@ -1,375 +0,0 @@ -package com.zdjizhi.etl.utils; - -import com.zdjizhi.base.common.CnRecordLog; -import com.zdjizhi.base.common.CommonInternalConfig; -import com.zdjizhi.base.rule.info.RuleInfo; -import inet.ipaddr.IPAddress; -import inet.ipaddr.IPAddressString; -import org.apache.commons.lang3.StringUtils; -import org.apache.flink.shaded.guava18.com.google.common.collect.Range; -import org.apache.flink.shaded.guava18.com.google.common.collect.RangeMap; -import org.apache.flink.shaded.guava18.com.google.common.collect.TreeRangeMap; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.io.ByteArrayInputStream; -import java.io.InputStreamReader; -import java.sql.Time; -import java.util.*; - -public class TagUtils { - - private static final Logger logger = LoggerFactory.getLogger(TagUtils.class); - - private static TreeRangeMap<IPAddress, Node> ipTagRules = TreeRangeMap.create(); - private static Trie<Node> fqdnTagFuzzyMatchRules = new Trie<>(); - private static Map<String, Node> fqdnTagFullMatchRules = new HashMap<>(); - private static Map<String, Node> appLabelTagRules = new HashMap<>(); - - private static Map<Long, List<Long>> libraryIdRuleIdMap = new HashMap<>(); - - private static List<String> ipInvalid = new ArrayList<>(); - private static List<String> domainInvalid = new ArrayList<>(); - private static List<String> appInvalid = new ArrayList<>(); - - public static void updateInvalid(List<String> list1, List<String> list2, List<String> list3) { - ipInvalid = list1; - domainInvalid = list2; - appInvalid = list3; - } - - public static void updateIpTag(Map<Long, byte[]> contents) { - TreeRangeMap<IPAddress, Node> newIpTagRules = TreeRangeMap.create(); - for (Long s : contents.keySet()) { - parseIpTagRules(newIpTagRules, s, contents.get(s)); - } - ipTagRules = newIpTagRules; - } - - public static void updateDomainTag(Map<Long, byte[]> contents) { - Trie<Node> newFqdnTagFuzzyMatchRules = new Trie<>(); - Map<String, Node> newFqdnTagFullMatchRules = new HashMap<>(); - for (Long s : contents.keySet()) { - parseFqdnTagRules(newFqdnTagFuzzyMatchRules, newFqdnTagFullMatchRules, s, contents.get(s)); - } - fqdnTagFuzzyMatchRules = newFqdnTagFuzzyMatchRules; - fqdnTagFullMatchRules = newFqdnTagFullMatchRules; - } - - public static void updateAppTag(Map<Long, byte[]> contents) { - Map<String, Node> newAppLabelTagRules = new HashMap<>(); - for (Long s : contents.keySet()) { - parseAppLabelTagRules(newAppLabelTagRules, s, contents.get(s)); - } - appLabelTagRules = newAppLabelTagRules; - } - - private static void parseIpTagRules(TreeRangeMap<IPAddress, Node> treeRangeMap, Long id, byte[] ipFileContent) { - try { - InputStreamReader reader = new InputStreamReader(new ByteArrayInputStream(ipFileContent)); - List<String> needColumns = new ArrayList<>(); - needColumns.add("tag_value"); - needColumns.add("addr_format"); - needColumns.add("ip1"); - needColumns.add("ip2"); - HighCsvReader csvReader = new HighCsvReader(reader, needColumns); - HighCsvReader.CsvIterator iterator = csvReader.getIterator(); - while (iterator.hasNext()) { - Map<String, String> line = iterator.next(); - try { - String tagValue = line.get("tag_value"); - String addrFormat = line.get("addr_format"); - String ip1 = line.get("ip1"); - String ip2 = line.get("ip2"); - - IPAddress startIpAddress = null; - IPAddress endIpAddress = null; - if ("Single".equals(addrFormat)) { - IPAddress ipAddress = new IPAddressString(ip1).getAddress(); - if (ipAddress == null) { - continue; - } - startIpAddress = ipAddress; - endIpAddress = ipAddress; - } else if ("Range".equals(addrFormat)) { - IPAddress ipAddress1 = new IPAddressString(ip1).getAddress(); - IPAddress ipAddress2 = new IPAddressString(ip2).getAddress(); - if (ipAddress1 == null || ipAddress2 == null) { - continue; - } - startIpAddress = ipAddress1; - endIpAddress = ipAddress2; - } else if ("CIDR".equals(addrFormat)) { - IPAddress cidrIpAddress = new IPAddressString(ip1 + "/" + ip2).getAddress(); - if (cidrIpAddress == null) { - continue; - } - IPAddress ipAddressLower = cidrIpAddress.getLower(); - IPAddress ipAddressUpper = cidrIpAddress.getUpper(); - startIpAddress = ipAddressLower; - endIpAddress = ipAddressUpper; - } else { - System.out.println("unknown addrFormat: " + addrFormat); - continue; - } - - RangeMap<IPAddress, Node> ipAddressListRangeMap = treeRangeMap.subRangeMap(Range.closed(startIpAddress, endIpAddress)); - Map<Range<IPAddress>, Node> rangeListMap = ipAddressListRangeMap.asMapOfRanges(); - TreeRangeMap<IPAddress, Node> subRangeMap = TreeRangeMap.create(); - Node node = new Node(); - node.getTags().add(tagValue); - node.put(id, "ip"); - subRangeMap.put(Range.closed(startIpAddress, endIpAddress), node); - for (Range<IPAddress> ipAddressRange : rangeListMap.keySet()) { - Node ipAddressNode = ipAddressListRangeMap.get(ipAddressRange.lowerEndpoint()); - ipAddressNode.getTags().add(tagValue); - node.put(id, "ip"); - subRangeMap.put(ipAddressRange, ipAddressNode); - } - treeRangeMap.putAll(subRangeMap); - } catch (Exception lineException) { - System.out.println("ip tag line: " + line.toString() + " parse error:" + lineException); - lineException.printStackTrace(System.out); - } - } - } catch (Exception e) { - logger.error("parse ip rules id" + id + " failed", e); - } - - - } - - private static void parseFqdnTagRules(Trie<Node> fqdnTagsFuzzy, Map<String, Node> fqdnTagsFull, Long id, byte[] fqdnFileContent) { - try { - InputStreamReader reader = new InputStreamReader(new ByteArrayInputStream(fqdnFileContent)); - List<String> needColumns = new ArrayList<>(); - needColumns.add("tag_value"); - needColumns.add("domain"); - HighCsvReader csvReader = new HighCsvReader(reader, needColumns); - HighCsvReader.CsvIterator iterator = csvReader.getIterator(); - while (iterator.hasNext()) { - Map<String, String> line = iterator.next(); - String tagValue = line.get("tag_value"); - String rule = line.get("domain"); - - if (rule == null || rule.length() == 0) { - continue; - } - if (!rule.startsWith("$") && !rule.startsWith("*")) { - continue; - } - String newRule = rule.substring(1); - if (rule.startsWith("*")){ - Node node = new Node(); - node.getTags().add(tagValue); - node.put(id, "domain"); - fqdnTagsFuzzy.put(StringUtils.reverse(newRule), node); - } else { - if (fqdnTagsFull.containsKey(newRule)) { - Node node = fqdnTagsFull.get(newRule); - node.getTags().add(tagValue); - node.put(id, "domain"); - } else { - Node node = new Node(); - node.getTags().add(tagValue); - node.put(id, "domain"); - fqdnTagsFull.put(newRule, node); - } - } - } - } catch (Exception e) { - logger.error("parse fqdn rules id" + id + " failed", e); - } - - - } - - private static void parseAppLabelTagRules(Map<String, Node> appTags, Long id, byte[] appLabelFileContent) { - try { - InputStreamReader reader = new InputStreamReader(new ByteArrayInputStream(appLabelFileContent)); - List<String> needColumns = new ArrayList<>(); - needColumns.add("tag_value"); - needColumns.add("app_name"); - HighCsvReader csvReader = new HighCsvReader(reader, needColumns); - HighCsvReader.CsvIterator iterator = csvReader.getIterator(); - while (iterator.hasNext()) { - Map<String, String> line = iterator.next(); - String tagValue = line.get("tag_value"); - String appName = line.get("app_name"); - - if (appTags.containsKey(appName)) { - Node node = appTags.get(appName); - node.getTags().add(tagValue); - node.put(id, "app"); - } else { - Node node = new Node(); - node.getTags().add(tagValue); - node.put(id, "app"); - appTags.put(appName, node); - } - } - } catch (Exception e) { - logger.error("parse app rules id" + id + " failed", e); - } - } - - public static void updateInternalRuleId(Long key, RuleInfo ruleInfo) { - System.out.println(TimeUtils.getCurrentTime() + ": tagUtils update rule: " + ruleInfo.getName() + ":" + ruleInfo.getRuleId()); - long ruleId = ruleInfo.getRuleId(); - RuleInfo.RuleConfig ruleConfig = ruleInfo.getRuleConfig(); - if (ruleConfig instanceof RuleInfo.IndicatorMatchConfig) { - Long libraryId = ((RuleInfo.IndicatorMatchConfig) ruleConfig).getLibraryId(); - if (libraryIdRuleIdMap.containsKey(libraryId)) { - libraryIdRuleIdMap.get(libraryId).add(ruleId); - } else { - List<Long> ruleIdList = new ArrayList<>(); - ruleIdList.add(ruleId); - libraryIdRuleIdMap.put(libraryId, ruleIdList); - } - } - } - - public static void removeInternalRuleId(Long key, RuleInfo ruleInfo) { - System.out.println(TimeUtils.getCurrentTime() + ": tagUtils remove rule : " + ruleInfo.getName() + ":" + ruleInfo.getRuleId()); - RuleInfo.RuleConfig ruleConfig = ruleInfo.getRuleConfig(); - if (ruleConfig instanceof RuleInfo.IndicatorMatchConfig) { - Long libraryId = ((RuleInfo.IndicatorMatchConfig) ruleConfig).getLibraryId(); - if (libraryIdRuleIdMap.containsKey(libraryId)) { - libraryIdRuleIdMap.get(libraryId).remove(key); - } - } - } - - private static void setRuleIdAndIocType(CnRecordLog entity, Node node) { - for (int i = 0; i < node.knowledgeIds.size(); i++) { - Long libraryId = node.knowledgeIds.get(i); - String libraryType = node.knowledgeTypes.get(i); - if (libraryIdRuleIdMap.containsKey(libraryId)) { - for (Long ruleId : libraryIdRuleIdMap.get(libraryId)) { - entity.getRule_id_list().add(ruleId); - entity.getIoc_type_list().add(libraryType); - } - } - } - - } - - public static void setTag(CnRecordLog entity) { - try { - setIpTags(entity); - setFqdnTags(entity); - setAppLabelTags(entity); - } catch (Exception e) { - logger.error("ETL解析错误\nrecordLog:{} \n", entity, e); - } - - } - - private static void setIpTags(CnRecordLog entity) { - Node clientIpNode = getIpNode(entity.getCommon_client_ip()); - entity.putClient_ip_tags(clientIpNode.getTags()); - //client ip not need detection - /*setRuleIdAndIocType(entity, clientIpNode);*/ - Node serverIpNode = getIpNode(entity.getCommon_server_ip()); - entity.putServer_ip_tags(serverIpNode.getTags()); - setRuleIdAndIocType(entity, serverIpNode); - } - - private static Node getIpNode(String ip) { - Node node = null; - IPAddress address = new IPAddressString(ip).getAddress(); - if (address != null) { - node = ipTagRules.get(address); - } - return node == null ? new Node() : node; - } - - private static void setFqdnTags(CnRecordLog entity) { - Node domainNode = getFqdnTags(entity.getDomain()); - entity.putDomain_tags(domainNode.getTags()); - setRuleIdAndIocType(entity, domainNode); - } - - private static Node getFqdnTags(String fqdn) { - Node node = new Node(); - if (fqdn == null || fqdn.length() == 0) { - return node; - } - - List<Node> nodes = fqdnTagFuzzyMatchRules.get(StringUtils.reverse(fqdn)); - node.mergeAll(nodes); - - if (fqdnTagFullMatchRules.containsKey(fqdn)) { - Node currentNode = fqdnTagFullMatchRules.get(fqdn); - node.merge(currentNode); - } - - return node; - } - - private static void setAppLabelTags(CnRecordLog entity) { - Node appLabelNode = getAppLabelNode(entity.getCommon_app_label()); - entity.putApp_tags(appLabelNode.getTags()); - setRuleIdAndIocType(entity, appLabelNode); - } - - private static Node getAppLabelNode(String appLable) { - if (appLable == null || appLable.length() == 0) { - return new Node(); - } - - if (appLabelTagRules.containsKey(appLable)) { - return appLabelTagRules.get(appLable); - } - - return new Node(); - } - - private static boolean isMatch(String rule, String context) { - return context != null && context.endsWith(rule); - } - - private static class Node { - private List<String> tags = new ArrayList<>(); - private List<Long> knowledgeIds = new ArrayList<>(); - - private List<String> knowledgeTypes = new ArrayList<>(); - - public List<String> getTags() { - return tags; - } - - public void put(Long id, String type) { - if (id == null || type == null) { - return; - } - knowledgeIds.add(id); - knowledgeTypes.add(type); - } - - public void merge(Node node) { - this.tags.addAll(node.getTags()); - this.knowledgeIds.addAll(node.knowledgeIds); - this.knowledgeTypes.addAll(node.knowledgeTypes); - } - - public void mergeAll(List<Node> nodes) { - for (Node node : nodes) { - this.merge(node); - } - } - } - public static void close() { - ipTagRules.clear(); - ipTagRules = null; - fqdnTagFuzzyMatchRules = null; - fqdnTagFullMatchRules.clear(); - fqdnTagFullMatchRules = null; - appLabelTagRules.clear(); - appLabelTagRules = null; - ipInvalid = null; - domainInvalid = null; - appInvalid = null; - } - -} diff --git a/platform-etl/src/main/java/com/zdjizhi/etl/utils/tag/AppTagUtils.java b/platform-etl/src/main/java/com/zdjizhi/etl/utils/tag/AppTagUtils.java new file mode 100644 index 0000000..dd21540 --- /dev/null +++ b/platform-etl/src/main/java/com/zdjizhi/etl/utils/tag/AppTagUtils.java @@ -0,0 +1,79 @@ +package com.zdjizhi.etl.utils.tag; + +import com.zdjizhi.base.common.CnRecordLog; +import com.zdjizhi.etl.utils.HighCsvReader; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.ByteArrayInputStream; +import java.io.InputStreamReader; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +public class AppTagUtils { + + private static final Logger logger = LoggerFactory.getLogger(AppTagUtils.class); + + private static Map<String, Node> appLabelTagRules = new HashMap<>(); + + public static void updateTag(Map<Long, byte[]> contents) { + Map<String, Node> newAppLabelTagRules = new HashMap<>(); + for (Long s : contents.keySet()) { + parseAppLabelTagRules(newAppLabelTagRules, s, contents.get(s)); + } + appLabelTagRules = newAppLabelTagRules; + } + + public static void setTag(CnRecordLog entity) { + try { + Node appLabelNode = getAppLabelNode(entity.getCommon_app_label()); + entity.putApp_tags(appLabelNode.getTags()); + BaseTagUtils.setRuleIdAndIocType(entity, appLabelNode); + } catch (Exception e) { + logger.error("ETL解析错误\nrecordLog:{} \n", entity, e); + } + } + + private static Node getAppLabelNode(String appLable) { + if (appLable == null || appLable.length() == 0) { + return new Node(); + } + + if (appLabelTagRules.containsKey(appLable)) { + return appLabelTagRules.get(appLable); + } + + return new Node(); + } + + private static void parseAppLabelTagRules(Map<String, Node> appTags, Long id, byte[] appLabelFileContent) { + try { + InputStreamReader reader = new InputStreamReader(new ByteArrayInputStream(appLabelFileContent)); + List<String> needColumns = new ArrayList<>(); + needColumns.add("tag_value"); + needColumns.add("app_name"); + HighCsvReader csvReader = new HighCsvReader(reader, needColumns); + HighCsvReader.CsvIterator iterator = csvReader.getIterator(); + while (iterator.hasNext()) { + Map<String, String> line = iterator.next(); + String tagValue = line.get("tag_value"); + String appName = line.get("app_name"); + + if (appTags.containsKey(appName)) { + Node node = appTags.get(appName); + node.getTags().add(tagValue); + node.put(id, "app"); + } else { + Node node = new Node(); + node.getTags().add(tagValue); + node.put(id, "app"); + appTags.put(appName, node); + } + } + } catch (Exception e) { + logger.error("parse app rules id" + id + " failed", e); + } + } +} diff --git a/platform-etl/src/main/java/com/zdjizhi/etl/utils/tag/BaseTagUtils.java b/platform-etl/src/main/java/com/zdjizhi/etl/utils/tag/BaseTagUtils.java new file mode 100644 index 0000000..15e7dbb --- /dev/null +++ b/platform-etl/src/main/java/com/zdjizhi/etl/utils/tag/BaseTagUtils.java @@ -0,0 +1,55 @@ +package com.zdjizhi.etl.utils.tag; + +import com.zdjizhi.base.common.CnRecordLog; +import com.zdjizhi.base.rule.info.RuleInfo; +import com.zdjizhi.etl.utils.TimeUtils; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +public class BaseTagUtils { + private static final Map<Long, List<Long>> libraryIdRuleIdMap = new HashMap<>(); + + public static void updateInternalRuleId(Long key, RuleInfo ruleInfo) { + System.out.println(TimeUtils.getCurrentTime() + ": tagUtils update rule: " + ruleInfo.getName() + ":" + ruleInfo.getRuleId()); + long ruleId = ruleInfo.getRuleId(); + RuleInfo.RuleConfig ruleConfig = ruleInfo.getRuleConfig(); + if (ruleConfig instanceof RuleInfo.IndicatorMatchConfig) { + Long libraryId = ((RuleInfo.IndicatorMatchConfig) ruleConfig).getLibraryId(); + if (libraryIdRuleIdMap.containsKey(libraryId)) { + libraryIdRuleIdMap.get(libraryId).add(ruleId); + } else { + List<Long> ruleIdList = new ArrayList<>(); + ruleIdList.add(ruleId); + libraryIdRuleIdMap.put(libraryId, ruleIdList); + } + } + } + + public static void removeInternalRuleId(Long key, RuleInfo ruleInfo) { + System.out.println(TimeUtils.getCurrentTime() + ": tagUtils remove rule : " + ruleInfo.getName() + ":" + ruleInfo.getRuleId()); + RuleInfo.RuleConfig ruleConfig = ruleInfo.getRuleConfig(); + if (ruleConfig instanceof RuleInfo.IndicatorMatchConfig) { + Long libraryId = ((RuleInfo.IndicatorMatchConfig) ruleConfig).getLibraryId(); + if (libraryIdRuleIdMap.containsKey(libraryId)) { + libraryIdRuleIdMap.get(libraryId).remove(key); + } + } + } + + protected static void setRuleIdAndIocType(CnRecordLog entity, Node node) { + for (int i = 0; i < node.knowledgeIds.size(); i++) { + Long libraryId = node.knowledgeIds.get(i); + String libraryType = node.knowledgeTypes.get(i); + if (libraryIdRuleIdMap.containsKey(libraryId)) { + for (Long ruleId : libraryIdRuleIdMap.get(libraryId)) { + entity.getRule_id_list().add(ruleId); + entity.getIoc_type_list().add(libraryType); + } + } + } + + } +} diff --git a/platform-etl/src/main/java/com/zdjizhi/etl/utils/tag/DomainTagUtils.java b/platform-etl/src/main/java/com/zdjizhi/etl/utils/tag/DomainTagUtils.java new file mode 100644 index 0000000..b876bce --- /dev/null +++ b/platform-etl/src/main/java/com/zdjizhi/etl/utils/tag/DomainTagUtils.java @@ -0,0 +1,103 @@ +package com.zdjizhi.etl.utils.tag; + +import com.zdjizhi.base.common.CnRecordLog; +import com.zdjizhi.etl.utils.HighCsvReader; +import com.zdjizhi.etl.utils.Trie; +import org.apache.commons.lang3.StringUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.ByteArrayInputStream; +import java.io.InputStreamReader; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +public class DomainTagUtils { + + private static final Logger logger = LoggerFactory.getLogger(DomainTagUtils.class); + + private static Trie<Node> fqdnTagFuzzyMatchRules = new Trie<>(); + private static Map<String, Node> fqdnTagFullMatchRules = new HashMap<>(); + + public static void updateTag(Map<Long, byte[]> contents) { + Trie<Node> newFqdnTagFuzzyMatchRules = new Trie<>(); + Map<String, Node> newFqdnTagFullMatchRules = new HashMap<>(); + for (Long s : contents.keySet()) { + parseFqdnTagRules(newFqdnTagFuzzyMatchRules, newFqdnTagFullMatchRules, s, contents.get(s)); + } + fqdnTagFuzzyMatchRules = newFqdnTagFuzzyMatchRules; + fqdnTagFullMatchRules = newFqdnTagFullMatchRules; + } + + public static void setTag(CnRecordLog entity) { + try { + Node domainNode = getFqdnTags(entity.getDomain()); + entity.putDomain_tags(domainNode.getTags()); + BaseTagUtils.setRuleIdAndIocType(entity, domainNode); + } catch (Exception e) { + logger.error("ETL解析错误\nrecordLog:{} \n", entity, e); + } + } + + private static Node getFqdnTags(String fqdn) { + Node node = new Node(); + if (fqdn == null || fqdn.length() == 0) { + return node; + } + + List<Node> nodes = fqdnTagFuzzyMatchRules.get(StringUtils.reverse(fqdn)); + node.mergeAll(nodes); + + if (fqdnTagFullMatchRules.containsKey(fqdn)) { + Node currentNode = fqdnTagFullMatchRules.get(fqdn); + node.merge(currentNode); + } + + return node; + } + + private static void parseFqdnTagRules(Trie<Node> fqdnTagsFuzzy, Map<String, Node> fqdnTagsFull, Long id, byte[] fqdnFileContent) { + try { + InputStreamReader reader = new InputStreamReader(new ByteArrayInputStream(fqdnFileContent)); + List<String> needColumns = new ArrayList<>(); + needColumns.add("tag_value"); + needColumns.add("domain"); + HighCsvReader csvReader = new HighCsvReader(reader, needColumns); + HighCsvReader.CsvIterator iterator = csvReader.getIterator(); + while (iterator.hasNext()) { + Map<String, String> line = iterator.next(); + String tagValue = line.get("tag_value"); + String rule = line.get("domain"); + + if (rule == null || rule.length() == 0) { + continue; + } + if (!rule.startsWith("$") && !rule.startsWith("*")) { + continue; + } + String newRule = rule.substring(1); + if (rule.startsWith("*")) { + Node node = new Node(); + node.getTags().add(tagValue); + node.put(id, "domain"); + fqdnTagsFuzzy.put(StringUtils.reverse(newRule), node); + } else { + if (fqdnTagsFull.containsKey(newRule)) { + Node node = fqdnTagsFull.get(newRule); + node.getTags().add(tagValue); + node.put(id, "domain"); + } else { + Node node = new Node(); + node.getTags().add(tagValue); + node.put(id, "domain"); + fqdnTagsFull.put(newRule, node); + } + } + } + } catch (Exception e) { + logger.error("parse fqdn rules id" + id + " failed", e); + } + } +} diff --git a/platform-etl/src/main/java/com/zdjizhi/etl/utils/tag/IpTagUtils.java b/platform-etl/src/main/java/com/zdjizhi/etl/utils/tag/IpTagUtils.java new file mode 100644 index 0000000..b2bf6fa --- /dev/null +++ b/platform-etl/src/main/java/com/zdjizhi/etl/utils/tag/IpTagUtils.java @@ -0,0 +1,130 @@ +package com.zdjizhi.etl.utils.tag; + +import com.zdjizhi.base.common.CnRecordLog; +import com.zdjizhi.etl.utils.HighCsvReader; +import inet.ipaddr.IPAddress; +import inet.ipaddr.IPAddressString; +import org.apache.flink.shaded.guava18.com.google.common.collect.Range; +import org.apache.flink.shaded.guava18.com.google.common.collect.RangeMap; +import org.apache.flink.shaded.guava18.com.google.common.collect.TreeRangeMap; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.ByteArrayInputStream; +import java.io.InputStreamReader; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; + +public class IpTagUtils { + + private static final Logger logger = LoggerFactory.getLogger(IpTagUtils.class); + + private static TreeRangeMap<IPAddress, Node> ipTagRules = TreeRangeMap.create(); + + public static void updateTag(Map<Long, byte[]> contents) { + TreeRangeMap<IPAddress, Node> newIpTagRules = TreeRangeMap.create(); + for (Long s : contents.keySet()) { + parseIpTagRules(newIpTagRules, s, contents.get(s)); + } + ipTagRules = newIpTagRules; + } + + public static void setTag(CnRecordLog entity) { + try { + Node clientIpNode = getIpNode(entity.getCommon_client_ip()); + entity.putClient_ip_tags(clientIpNode.getTags()); + //client ip not need detection + /*BaseTagUtils.setRuleIdAndIocType(entity, clientIpNode);*/ + Node serverIpNode = getIpNode(entity.getCommon_server_ip()); + entity.putServer_ip_tags(serverIpNode.getTags()); + BaseTagUtils.setRuleIdAndIocType(entity, serverIpNode); + } catch (Exception e) { + logger.error("ETL解析错误\nrecordLog:{} \n", entity, e); + } + } + + private static Node getIpNode(String ip) { + Node node = null; + IPAddress address = new IPAddressString(ip).getAddress(); + if (address != null) { + node = ipTagRules.get(address); + } + return node == null ? new Node() : node; + } + + private static void parseIpTagRules(TreeRangeMap<IPAddress, Node> treeRangeMap, Long id, byte[] ipFileContent) { + try { + InputStreamReader reader = new InputStreamReader(new ByteArrayInputStream(ipFileContent)); + List<String> needColumns = new ArrayList<>(); + needColumns.add("tag_value"); + needColumns.add("addr_format"); + needColumns.add("ip1"); + needColumns.add("ip2"); + HighCsvReader csvReader = new HighCsvReader(reader, needColumns); + HighCsvReader.CsvIterator iterator = csvReader.getIterator(); + while (iterator.hasNext()) { + Map<String, String> line = iterator.next(); + try { + String tagValue = line.get("tag_value"); + String addrFormat = line.get("addr_format"); + String ip1 = line.get("ip1"); + String ip2 = line.get("ip2"); + + IPAddress startIpAddress = null; + IPAddress endIpAddress = null; + if ("Single".equals(addrFormat)) { + IPAddress ipAddress = new IPAddressString(ip1).getAddress(); + if (ipAddress == null) { + continue; + } + startIpAddress = ipAddress; + endIpAddress = ipAddress; + } else if ("Range".equals(addrFormat)) { + IPAddress ipAddress1 = new IPAddressString(ip1).getAddress(); + IPAddress ipAddress2 = new IPAddressString(ip2).getAddress(); + if (ipAddress1 == null || ipAddress2 == null) { + continue; + } + startIpAddress = ipAddress1; + endIpAddress = ipAddress2; + } else if ("CIDR".equals(addrFormat)) { + IPAddress cidrIpAddress = new IPAddressString(ip1 + "/" + ip2).getAddress(); + if (cidrIpAddress == null) { + continue; + } + IPAddress ipAddressLower = cidrIpAddress.getLower(); + IPAddress ipAddressUpper = cidrIpAddress.getUpper(); + startIpAddress = ipAddressLower; + endIpAddress = ipAddressUpper; + } else { + System.out.println("unknown addrFormat: " + addrFormat); + continue; + } + + RangeMap<IPAddress, Node> ipAddressListRangeMap = treeRangeMap.subRangeMap(Range.closed(startIpAddress, endIpAddress)); + Map<Range<IPAddress>, Node> rangeListMap = ipAddressListRangeMap.asMapOfRanges(); + TreeRangeMap<IPAddress, Node> subRangeMap = TreeRangeMap.create(); + Node node = new Node(); + node.getTags().add(tagValue); + node.put(id, "ip"); + subRangeMap.put(Range.closed(startIpAddress, endIpAddress), node); + for (Range<IPAddress> ipAddressRange : rangeListMap.keySet()) { + Node ipAddressNode = ipAddressListRangeMap.get(ipAddressRange.lowerEndpoint()); + ipAddressNode.getTags().add(tagValue); + node.put(id, "ip"); + subRangeMap.put(ipAddressRange, ipAddressNode); + } + treeRangeMap.putAll(subRangeMap); + } catch (Exception lineException) { + System.out.println("ip tag line: " + line.toString() + " parse error:" + lineException); + lineException.printStackTrace(System.out); + } + } + } catch (Exception e) { + logger.error("parse ip rules id" + id + " failed", e); + } + + + } +} diff --git a/platform-etl/src/main/java/com/zdjizhi/etl/utils/tag/Node.java b/platform-etl/src/main/java/com/zdjizhi/etl/utils/tag/Node.java new file mode 100644 index 0000000..af2f4dc --- /dev/null +++ b/platform-etl/src/main/java/com/zdjizhi/etl/utils/tag/Node.java @@ -0,0 +1,35 @@ +package com.zdjizhi.etl.utils.tag; + +import java.util.ArrayList; +import java.util.List; + +public class Node { + private List<String> tags = new ArrayList<>(); + protected List<Long> knowledgeIds = new ArrayList<>(); + + protected List<String> knowledgeTypes = new ArrayList<>(); + + public List<String> getTags() { + return tags; + } + + public void put(Long id, String type) { + if (id == null || type == null) { + return; + } + knowledgeIds.add(id); + knowledgeTypes.add(type); + } + + public void merge(Node node) { + this.tags.addAll(node.getTags()); + this.knowledgeIds.addAll(node.knowledgeIds); + this.knowledgeTypes.addAll(node.knowledgeTypes); + } + + public void mergeAll(List<Node> nodes) { + for (Node node : nodes) { + this.merge(node); + } + } +} |
