diff options
| author | gujinkai <[email protected]> | 2024-04-17 18:22:09 +0800 |
|---|---|---|
| committer | gujinkai <[email protected]> | 2024-04-17 18:22:09 +0800 |
| commit | 836aa56a117b1b7b594e02d38b956cdfe84ddc06 (patch) | |
| tree | 72852ce0bf74212daf31ed11b176836be79543ad | |
| parent | dab260a31aa561bf427fc89cfe23032be537e65d (diff) | |
[Feature][core] change match method of domain in IntelligenceIndicatorKnowledgeBaseHandler
4 files changed, 81 insertions, 15 deletions
diff --git a/groot-core/src/main/java/com/geedgenetworks/core/udf/knowlegdebase/handler/IntelligenceIndicatorKnowledgeBaseHandler.java b/groot-core/src/main/java/com/geedgenetworks/core/udf/knowlegdebase/handler/IntelligenceIndicatorKnowledgeBaseHandler.java index 53fa0de..232a61a 100644 --- a/groot-core/src/main/java/com/geedgenetworks/core/udf/knowlegdebase/handler/IntelligenceIndicatorKnowledgeBaseHandler.java +++ b/groot-core/src/main/java/com/geedgenetworks/core/udf/knowlegdebase/handler/IntelligenceIndicatorKnowledgeBaseHandler.java @@ -1,8 +1,10 @@ package com.geedgenetworks.core.udf.knowlegdebase.handler; +import com.geedgenetworks.core.utils.cn.common.Trie; import com.geedgenetworks.core.utils.cn.csv.HighCsvReader; import inet.ipaddr.IPAddress; import inet.ipaddr.IPAddressString; +import org.apache.commons.lang3.StringUtils; import org.apache.flink.shaded.guava18.com.google.common.collect.Range; import org.apache.flink.shaded.guava18.com.google.common.collect.TreeRangeMap; import org.slf4j.Logger; @@ -23,8 +25,12 @@ public class IntelligenceIndicatorKnowledgeBaseHandler extends AbstractSingleKno private TreeRangeMap<IPAddress, List<String>> ipTagMap = TreeRangeMap.create(); + // $开头,精确匹配 private HashMap<String, List<String>> domainTagMap = new HashMap<>(); + // *开头,模糊匹配 + private Trie<String> domainSuffix = new Trie<>(); + private IntelligenceIndicatorKnowledgeBaseHandler() { } @@ -50,6 +56,7 @@ public class IntelligenceIndicatorKnowledgeBaseHandler extends AbstractSingleKno HighCsvReader highCsvReader = new HighCsvReader(new InputStreamReader(new ByteArrayInputStream(content)), needColumns); TreeRangeMap<IPAddress, List<String>> newIpTagMap = TreeRangeMap.create(); HashMap<String, List<String>> newDomainMap = new HashMap<>((int) (highCsvReader.getLineNumber() / 0.75F + 1.0F)); + Trie<String> newDomainSuffix = new Trie<>(); HighCsvReader.CsvIterator iterator = highCsvReader.getIterator(); while (iterator.hasNext()) { Map<String, String> line = iterator.next(); @@ -104,10 +111,14 @@ public class IntelligenceIndicatorKnowledgeBaseHandler extends AbstractSingleKno }); newIpTagMap.putAll(subRangeMap); } else if ("Domain".equals(type)) { - if (newDomainMap.containsKey(domain)) { - newDomainMap.get(domain).addAll(tags); + String finalDomain = domain.substring(1); + if (domain.startsWith("$")) { + newDomainMap.computeIfAbsent(finalDomain, k -> new ArrayList<>()).addAll(tags); + } else if (domain.startsWith("*")) { + String reverseDomain = StringUtils.reverse(finalDomain); + tags.forEach(tag -> newDomainSuffix.put(reverseDomain, tag)); } else { - newDomainMap.put(domain, new ArrayList<>(tags)); + logger.warn("intelligence indicator find unknown domain: " + domain); } } } catch (Exception lineException) { @@ -116,6 +127,7 @@ public class IntelligenceIndicatorKnowledgeBaseHandler extends AbstractSingleKno } ipTagMap = newIpTagMap; domainTagMap = newDomainMap; + domainSuffix = newDomainSuffix; } catch (Exception e) { logger.error(this.getClass().getSimpleName() + " update error", e); return false; @@ -133,19 +145,13 @@ public class IntelligenceIndicatorKnowledgeBaseHandler extends AbstractSingleKno } public List<String> lookupByDomain(String domain) { + List<String> result = new ArrayList<>(); if (domain == null || domain.length() == 0) { - return new ArrayList<String>(); - } - if (domainTagMap.containsKey(domain)) { - return domainTagMap.get(domain); - } else { - int index = domain.indexOf(".") + 1; - if (index > 0) { - return lookupByDomain(domain.substring(index)); - } else { - return new ArrayList<String>(); - } + return result; } + Optional.ofNullable(domainTagMap.get(domain)).ifPresent(result::addAll); + result.addAll(domainSuffix.get(StringUtils.reverse(domain))); + return result; } @Override diff --git a/groot-core/src/main/java/com/geedgenetworks/core/utils/cn/common/Trie.java b/groot-core/src/main/java/com/geedgenetworks/core/utils/cn/common/Trie.java index 676815c..313aa4f 100644 --- a/groot-core/src/main/java/com/geedgenetworks/core/utils/cn/common/Trie.java +++ b/groot-core/src/main/java/com/geedgenetworks/core/utils/cn/common/Trie.java @@ -7,6 +7,27 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +/** + * Trie tree + * + * @param <T> data type + * @description Trie tree put every character of the string into a node, and the data is stored in the last node. + * for example: + * how to store: + * if we put "baidu.com":"1" and "baidu.cn":"2" into the trie tree, the tree will be like this: + * root -> b -> a -> i -> d -> u -> . -> c -> o -> m + * -> n + * the data "1" is stored in the last node "m" and the data "2" is stored in the last node "n" + * then we put "baidu":"3" into the trie tree, the tree will be like this: + * root -> b -> a -> i -> d -> u -> . -> c -> o -> m + * -> n + * the data "3" will be stored in the node "u" + * <p> + * how to get: + * traversal the trie tree by the special string, and get all the data in the path + * if we get "baidu.com" from the trie tree, we will get "1" and "3" + * if we get "baidu.cn" from the trie tree, we will get "2" and "3" + */ public class Trie<T> { private final Node<T> root = new Node<>(); diff --git a/groot-core/src/test/java/com/geedgenetworks/core/udf/cn/IntelligenceIndicatorLookupTest.java b/groot-core/src/test/java/com/geedgenetworks/core/udf/cn/IntelligenceIndicatorLookupTest.java index b5df7e0..804c7ca 100644 --- a/groot-core/src/test/java/com/geedgenetworks/core/udf/cn/IntelligenceIndicatorLookupTest.java +++ b/groot-core/src/test/java/com/geedgenetworks/core/udf/cn/IntelligenceIndicatorLookupTest.java @@ -27,7 +27,7 @@ public class IntelligenceIndicatorLookupTest { void setUp() { runtimeContext = mockRuntimeContext(); - String content = "type,ip_addr_format,ip1,ip2,domain,tags\nIP,CIDR,116.178.65.0,25,ali.com,\"阿里1,云服务1\"\nDomain,CIDR,116.178.65.0,25,ali.com,\"阿里2,云服务2\""; + String content = "type,ip_addr_format,ip1,ip2,domain,tags\nIP,CIDR,116.178.65.0,25,ali.com,\"阿里1,云服务1\"\nDomain,CIDR,116.178.65.0,25,$ali.com,\"阿里2,云服务2\"\nDomain,CIDR,116.178.65.0,25,*baidu.com,\"阿里3,云服务3\""; mockKnowledgeBaseHandler(content); intelligenceIndicatorLookup = new IntelligenceIndicatorLookup(); @@ -117,6 +117,25 @@ public class IntelligenceIndicatorLookupTest { assertEquals(Arrays.asList("test", "test1", "阿里2", "云服务2"), evaluate.getExtractedFields().get("domain_tags")); } + @Test + void evaluate5() { + UDFContext udfContext = new UDFContext(); + Map<String, Object> parameters = new HashMap<>(); + parameters.put("kb_name", kbName); + parameters.put("option", "DOMAIN_TO_TAG"); + udfContext.setParameters(parameters); + udfContext.setLookup_fields(Collections.singletonList("domain")); + udfContext.setOutput_fields(Collections.singletonList("domain_tags")); + intelligenceIndicatorLookup.open(runtimeContext, udfContext); + + Event event = new Event(); + Map<String, Object> fields = new HashMap<>(); + fields.put("domain", "test.baidu.com"); + event.setExtractedFields(fields); + Event evaluate = intelligenceIndicatorLookup.evaluate(event); + assertEquals(Arrays.asList("阿里3", "云服务3"), evaluate.getExtractedFields().get("domain_tags")); + } + @AfterEach void afterAll() { clearState(); diff --git a/groot-core/src/test/java/com/geedgenetworks/core/udf/cn/TrieTest.java b/groot-core/src/test/java/com/geedgenetworks/core/udf/cn/TrieTest.java index b54d13d..312e41a 100644 --- a/groot-core/src/test/java/com/geedgenetworks/core/udf/cn/TrieTest.java +++ b/groot-core/src/test/java/com/geedgenetworks/core/udf/cn/TrieTest.java @@ -43,6 +43,26 @@ public class TrieTest { List<String> strings8 = trie.get(StringUtils.reverse("txj/r~/moc.elgoog.yxorpdeef//:ptth")); assertEquals(Arrays.asList("4"), strings8); + + Trie<String> trie1 = new Trie<>(); + + trie1.put("baidu.com", "1"); + trie1.put("baidu.cn", "2"); + trie1.put("baidu", "3"); + + List<String> list1 = trie1.get("baidu.com"); + assertEquals(Arrays.asList("3", "1"), list1); + + List<String> list2 = trie1.get("baidu.cn"); + assertEquals(Arrays.asList("3", "2"), list2); + + + Trie<String> trie2 = new Trie<>(); + trie2.put("baidu.com", "1"); + trie2.put("baidu.com", "2"); + trie2.put("baidu.com", "3"); + List<String> list = trie2.get("baidu.com.cn"); + assertEquals(Arrays.asList("1", "2", "3"), list); } @Test |
