summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorgujinkai <[email protected]>2024-04-17 18:22:09 +0800
committergujinkai <[email protected]>2024-04-17 18:22:09 +0800
commit836aa56a117b1b7b594e02d38b956cdfe84ddc06 (patch)
tree72852ce0bf74212daf31ed11b176836be79543ad
parentdab260a31aa561bf427fc89cfe23032be537e65d (diff)
[Feature][core] change match method of domain in IntelligenceIndicatorKnowledgeBaseHandler
-rw-r--r--groot-core/src/main/java/com/geedgenetworks/core/udf/knowlegdebase/handler/IntelligenceIndicatorKnowledgeBaseHandler.java34
-rw-r--r--groot-core/src/main/java/com/geedgenetworks/core/utils/cn/common/Trie.java21
-rw-r--r--groot-core/src/test/java/com/geedgenetworks/core/udf/cn/IntelligenceIndicatorLookupTest.java21
-rw-r--r--groot-core/src/test/java/com/geedgenetworks/core/udf/cn/TrieTest.java20
4 files changed, 81 insertions, 15 deletions
diff --git a/groot-core/src/main/java/com/geedgenetworks/core/udf/knowlegdebase/handler/IntelligenceIndicatorKnowledgeBaseHandler.java b/groot-core/src/main/java/com/geedgenetworks/core/udf/knowlegdebase/handler/IntelligenceIndicatorKnowledgeBaseHandler.java
index 53fa0de..232a61a 100644
--- a/groot-core/src/main/java/com/geedgenetworks/core/udf/knowlegdebase/handler/IntelligenceIndicatorKnowledgeBaseHandler.java
+++ b/groot-core/src/main/java/com/geedgenetworks/core/udf/knowlegdebase/handler/IntelligenceIndicatorKnowledgeBaseHandler.java
@@ -1,8 +1,10 @@
package com.geedgenetworks.core.udf.knowlegdebase.handler;
+import com.geedgenetworks.core.utils.cn.common.Trie;
import com.geedgenetworks.core.utils.cn.csv.HighCsvReader;
import inet.ipaddr.IPAddress;
import inet.ipaddr.IPAddressString;
+import org.apache.commons.lang3.StringUtils;
import org.apache.flink.shaded.guava18.com.google.common.collect.Range;
import org.apache.flink.shaded.guava18.com.google.common.collect.TreeRangeMap;
import org.slf4j.Logger;
@@ -23,8 +25,12 @@ public class IntelligenceIndicatorKnowledgeBaseHandler extends AbstractSingleKno
private TreeRangeMap<IPAddress, List<String>> ipTagMap = TreeRangeMap.create();
+ // $开头,精确匹配
private HashMap<String, List<String>> domainTagMap = new HashMap<>();
+ // *开头,模糊匹配
+ private Trie<String> domainSuffix = new Trie<>();
+
private IntelligenceIndicatorKnowledgeBaseHandler() {
}
@@ -50,6 +56,7 @@ public class IntelligenceIndicatorKnowledgeBaseHandler extends AbstractSingleKno
HighCsvReader highCsvReader = new HighCsvReader(new InputStreamReader(new ByteArrayInputStream(content)), needColumns);
TreeRangeMap<IPAddress, List<String>> newIpTagMap = TreeRangeMap.create();
HashMap<String, List<String>> newDomainMap = new HashMap<>((int) (highCsvReader.getLineNumber() / 0.75F + 1.0F));
+ Trie<String> newDomainSuffix = new Trie<>();
HighCsvReader.CsvIterator iterator = highCsvReader.getIterator();
while (iterator.hasNext()) {
Map<String, String> line = iterator.next();
@@ -104,10 +111,14 @@ public class IntelligenceIndicatorKnowledgeBaseHandler extends AbstractSingleKno
});
newIpTagMap.putAll(subRangeMap);
} else if ("Domain".equals(type)) {
- if (newDomainMap.containsKey(domain)) {
- newDomainMap.get(domain).addAll(tags);
+ String finalDomain = domain.substring(1);
+ if (domain.startsWith("$")) {
+ newDomainMap.computeIfAbsent(finalDomain, k -> new ArrayList<>()).addAll(tags);
+ } else if (domain.startsWith("*")) {
+ String reverseDomain = StringUtils.reverse(finalDomain);
+ tags.forEach(tag -> newDomainSuffix.put(reverseDomain, tag));
} else {
- newDomainMap.put(domain, new ArrayList<>(tags));
+ logger.warn("intelligence indicator find unknown domain: " + domain);
}
}
} catch (Exception lineException) {
@@ -116,6 +127,7 @@ public class IntelligenceIndicatorKnowledgeBaseHandler extends AbstractSingleKno
}
ipTagMap = newIpTagMap;
domainTagMap = newDomainMap;
+ domainSuffix = newDomainSuffix;
} catch (Exception e) {
logger.error(this.getClass().getSimpleName() + " update error", e);
return false;
@@ -133,19 +145,13 @@ public class IntelligenceIndicatorKnowledgeBaseHandler extends AbstractSingleKno
}
public List<String> lookupByDomain(String domain) {
+ List<String> result = new ArrayList<>();
if (domain == null || domain.length() == 0) {
- return new ArrayList<String>();
- }
- if (domainTagMap.containsKey(domain)) {
- return domainTagMap.get(domain);
- } else {
- int index = domain.indexOf(".") + 1;
- if (index > 0) {
- return lookupByDomain(domain.substring(index));
- } else {
- return new ArrayList<String>();
- }
+ return result;
}
+ Optional.ofNullable(domainTagMap.get(domain)).ifPresent(result::addAll);
+ result.addAll(domainSuffix.get(StringUtils.reverse(domain)));
+ return result;
}
@Override
diff --git a/groot-core/src/main/java/com/geedgenetworks/core/utils/cn/common/Trie.java b/groot-core/src/main/java/com/geedgenetworks/core/utils/cn/common/Trie.java
index 676815c..313aa4f 100644
--- a/groot-core/src/main/java/com/geedgenetworks/core/utils/cn/common/Trie.java
+++ b/groot-core/src/main/java/com/geedgenetworks/core/utils/cn/common/Trie.java
@@ -7,6 +7,27 @@ import java.util.HashMap;
import java.util.List;
import java.util.Map;
+/**
+ * Trie tree
+ *
+ * @param <T> data type
+ * @description Trie tree put every character of the string into a node, and the data is stored in the last node.
+ * for example:
+ * how to store:
+ * if we put "baidu.com":"1" and "baidu.cn":"2" into the trie tree, the tree will be like this:
+ * root -> b -> a -> i -> d -> u -> . -> c -> o -> m
+ * -> n
+ * the data "1" is stored in the last node "m" and the data "2" is stored in the last node "n"
+ * then we put "baidu":"3" into the trie tree, the tree will be like this:
+ * root -> b -> a -> i -> d -> u -> . -> c -> o -> m
+ * -> n
+ * the data "3" will be stored in the node "u"
+ * <p>
+ * how to get:
+ * traversal the trie tree by the special string, and get all the data in the path
+ * if we get "baidu.com" from the trie tree, we will get "1" and "3"
+ * if we get "baidu.cn" from the trie tree, we will get "2" and "3"
+ */
public class Trie<T> {
private final Node<T> root = new Node<>();
diff --git a/groot-core/src/test/java/com/geedgenetworks/core/udf/cn/IntelligenceIndicatorLookupTest.java b/groot-core/src/test/java/com/geedgenetworks/core/udf/cn/IntelligenceIndicatorLookupTest.java
index b5df7e0..804c7ca 100644
--- a/groot-core/src/test/java/com/geedgenetworks/core/udf/cn/IntelligenceIndicatorLookupTest.java
+++ b/groot-core/src/test/java/com/geedgenetworks/core/udf/cn/IntelligenceIndicatorLookupTest.java
@@ -27,7 +27,7 @@ public class IntelligenceIndicatorLookupTest {
void setUp() {
runtimeContext = mockRuntimeContext();
- String content = "type,ip_addr_format,ip1,ip2,domain,tags\nIP,CIDR,116.178.65.0,25,ali.com,\"阿里1,云服务1\"\nDomain,CIDR,116.178.65.0,25,ali.com,\"阿里2,云服务2\"";
+ String content = "type,ip_addr_format,ip1,ip2,domain,tags\nIP,CIDR,116.178.65.0,25,ali.com,\"阿里1,云服务1\"\nDomain,CIDR,116.178.65.0,25,$ali.com,\"阿里2,云服务2\"\nDomain,CIDR,116.178.65.0,25,*baidu.com,\"阿里3,云服务3\"";
mockKnowledgeBaseHandler(content);
intelligenceIndicatorLookup = new IntelligenceIndicatorLookup();
@@ -117,6 +117,25 @@ public class IntelligenceIndicatorLookupTest {
assertEquals(Arrays.asList("test", "test1", "阿里2", "云服务2"), evaluate.getExtractedFields().get("domain_tags"));
}
+ @Test
+ void evaluate5() {
+ UDFContext udfContext = new UDFContext();
+ Map<String, Object> parameters = new HashMap<>();
+ parameters.put("kb_name", kbName);
+ parameters.put("option", "DOMAIN_TO_TAG");
+ udfContext.setParameters(parameters);
+ udfContext.setLookup_fields(Collections.singletonList("domain"));
+ udfContext.setOutput_fields(Collections.singletonList("domain_tags"));
+ intelligenceIndicatorLookup.open(runtimeContext, udfContext);
+
+ Event event = new Event();
+ Map<String, Object> fields = new HashMap<>();
+ fields.put("domain", "test.baidu.com");
+ event.setExtractedFields(fields);
+ Event evaluate = intelligenceIndicatorLookup.evaluate(event);
+ assertEquals(Arrays.asList("阿里3", "云服务3"), evaluate.getExtractedFields().get("domain_tags"));
+ }
+
@AfterEach
void afterAll() {
clearState();
diff --git a/groot-core/src/test/java/com/geedgenetworks/core/udf/cn/TrieTest.java b/groot-core/src/test/java/com/geedgenetworks/core/udf/cn/TrieTest.java
index b54d13d..312e41a 100644
--- a/groot-core/src/test/java/com/geedgenetworks/core/udf/cn/TrieTest.java
+++ b/groot-core/src/test/java/com/geedgenetworks/core/udf/cn/TrieTest.java
@@ -43,6 +43,26 @@ public class TrieTest {
List<String> strings8 = trie.get(StringUtils.reverse("txj/r~/moc.elgoog.yxorpdeef//:ptth"));
assertEquals(Arrays.asList("4"), strings8);
+
+ Trie<String> trie1 = new Trie<>();
+
+ trie1.put("baidu.com", "1");
+ trie1.put("baidu.cn", "2");
+ trie1.put("baidu", "3");
+
+ List<String> list1 = trie1.get("baidu.com");
+ assertEquals(Arrays.asList("3", "1"), list1);
+
+ List<String> list2 = trie1.get("baidu.cn");
+ assertEquals(Arrays.asList("3", "2"), list2);
+
+
+ Trie<String> trie2 = new Trie<>();
+ trie2.put("baidu.com", "1");
+ trie2.put("baidu.com", "2");
+ trie2.put("baidu.com", "3");
+ List<String> list = trie2.get("baidu.com.cn");
+ assertEquals(Arrays.asList("1", "2", "3"), list);
}
@Test