summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
author王宽 <[email protected]>2024-04-19 09:19:57 +0000
committer王宽 <[email protected]>2024-04-19 09:19:57 +0000
commit35d98fa5adfe514f33b9d674d30dbe3875fb6b68 (patch)
treea9af77d7e943d0f18e9630e7215192a59b0fcef9
parentdab260a31aa561bf427fc89cfe23032be537e65d (diff)
parente1416e693581bb062ce1d316403bfa28cb4fc973 (diff)
Merge branch 'feature/intelligence_indicator_domain_lookup' into 'develop'
[Feature][core] change match method of domain in IntelligenceIndicatorKnowledgeBaseHandler See merge request galaxy/platform/groot-stream!38
-rw-r--r--groot-core/src/main/java/com/geedgenetworks/core/udf/cn/IntelligenceIndicatorLookup.java4
-rw-r--r--groot-core/src/main/java/com/geedgenetworks/core/udf/cn/UserDefineTagLookup.java63
-rw-r--r--groot-core/src/main/java/com/geedgenetworks/core/udf/knowlegdebase/handler/IntelligenceIndicatorKnowledgeBaseHandler.java38
-rw-r--r--groot-core/src/main/java/com/geedgenetworks/core/utils/cn/common/Trie.java21
-rw-r--r--groot-core/src/test/java/com/geedgenetworks/core/udf/cn/IntelligenceIndicatorLookupTest.java21
-rw-r--r--groot-core/src/test/java/com/geedgenetworks/core/udf/cn/TrieTest.java20
6 files changed, 121 insertions, 46 deletions
diff --git a/groot-core/src/main/java/com/geedgenetworks/core/udf/cn/IntelligenceIndicatorLookup.java b/groot-core/src/main/java/com/geedgenetworks/core/udf/cn/IntelligenceIndicatorLookup.java
index e386437..545fbaa 100644
--- a/groot-core/src/main/java/com/geedgenetworks/core/udf/cn/IntelligenceIndicatorLookup.java
+++ b/groot-core/src/main/java/com/geedgenetworks/core/udf/cn/IntelligenceIndicatorLookup.java
@@ -37,7 +37,7 @@ public class IntelligenceIndicatorLookup extends AbstractKnowledgeUDF {
switch (option) {
case "IP_TO_TAG":
List<String> ipTags = knowledgeBaseHandler.lookupByIp(lookupValue);
- if (ipTags != null) {
+ if (ipTags != null && ipTags.size() > 0) {
hitCounter.inc();
if (event.getExtractedFields().get(outputFieldName) != null && event.getExtractedFields().get(outputFieldName) instanceof List) {
((List<String>) event.getExtractedFields().get(outputFieldName)).addAll(ipTags);
@@ -48,7 +48,7 @@ public class IntelligenceIndicatorLookup extends AbstractKnowledgeUDF {
break;
case "DOMAIN_TO_TAG":
List<String> domainTags = knowledgeBaseHandler.lookupByDomain(lookupValue);
- if (domainTags != null) {
+ if (domainTags != null && domainTags.size() > 0) {
hitCounter.inc();
if (event.getExtractedFields().get(outputFieldName) != null && event.getExtractedFields().get(outputFieldName) instanceof List) {
((List<String>) event.getExtractedFields().get(outputFieldName)).addAll(domainTags);
diff --git a/groot-core/src/main/java/com/geedgenetworks/core/udf/cn/UserDefineTagLookup.java b/groot-core/src/main/java/com/geedgenetworks/core/udf/cn/UserDefineTagLookup.java
index 3e924ab..0eaf2ad 100644
--- a/groot-core/src/main/java/com/geedgenetworks/core/udf/cn/UserDefineTagLookup.java
+++ b/groot-core/src/main/java/com/geedgenetworks/core/udf/cn/UserDefineTagLookup.java
@@ -44,39 +44,48 @@ public class UserDefineTagLookup extends AbstractKnowledgeWithRuleUDF {
switch (option) {
case "IP_TO_TAG":
List<AbstractMultipleKnowledgeBaseHandler.Node> ipNodes = ipKnowledgeBaseHandler.lookup(lookupValue);
- ipNodes.forEach(node -> {
- lookupTagsCounter.inc();
- tags.add(node.getTag());
- List<RuleKnowledgeBaseHandler.Rule> rules = ruleKnowledgeBaseHandler.lookupByKbId(node.getKbId());
- if (rules != null) {
- ruleHitCounter.inc();
- rules.forEach(rule -> ruleMetadata.addRule(rule.getRuleId(), IocType.IP.getType()));
- }
- });
+ if (ipNodes != null && ipNodes.size() > 0) {
+ hitCounter.inc();
+ ipNodes.forEach(node -> {
+ lookupTagsCounter.inc();
+ tags.add(node.getTag());
+ List<RuleKnowledgeBaseHandler.Rule> rules = ruleKnowledgeBaseHandler.lookupByKbId(node.getKbId());
+ if (rules != null) {
+ ruleHitCounter.inc();
+ rules.forEach(rule -> ruleMetadata.addRule(rule.getRuleId(), IocType.IP.getType()));
+ }
+ });
+ }
break;
case "DOMAIN_TO_TAG":
List<AbstractMultipleKnowledgeBaseHandler.Node> domainNodes = domainKnowledgeBaseHandler.lookup(lookupValue);
- domainNodes.forEach(node -> {
- lookupTagsCounter.inc();
- tags.add(node.getTag());
- List<RuleKnowledgeBaseHandler.Rule> rules = ruleKnowledgeBaseHandler.lookupByKbId(node.getKbId());
- if (rules != null) {
- ruleHitCounter.inc();
- rules.forEach(rule -> ruleMetadata.addRule(rule.getRuleId(), IocType.DOMAIN.getType()));
- }
- });
+ if (domainNodes != null && domainNodes.size() > 0) {
+ hitCounter.inc();
+ domainNodes.forEach(node -> {
+ lookupTagsCounter.inc();
+ tags.add(node.getTag());
+ List<RuleKnowledgeBaseHandler.Rule> rules = ruleKnowledgeBaseHandler.lookupByKbId(node.getKbId());
+ if (rules != null) {
+ ruleHitCounter.inc();
+ rules.forEach(rule -> ruleMetadata.addRule(rule.getRuleId(), IocType.DOMAIN.getType()));
+ }
+ });
+ }
break;
case "APP_TO_TAG":
List<AbstractMultipleKnowledgeBaseHandler.Node> appNodes = appKnowledgeBaseHandler.lookup(lookupValue);
- appNodes.forEach(node -> {
- lookupTagsCounter.inc();
- tags.add(node.getTag());
- List<RuleKnowledgeBaseHandler.Rule> rules = ruleKnowledgeBaseHandler.lookupByKbId(node.getKbId());
- if (rules != null) {
- ruleHitCounter.inc();
- rules.forEach(rule -> ruleMetadata.addRule(rule.getRuleId(), IocType.APP.getType()));
- }
- });
+ if (appNodes != null && appNodes.size() > 0) {
+ hitCounter.inc();
+ appNodes.forEach(node -> {
+ lookupTagsCounter.inc();
+ tags.add(node.getTag());
+ List<RuleKnowledgeBaseHandler.Rule> rules = ruleKnowledgeBaseHandler.lookupByKbId(node.getKbId());
+ if (rules != null) {
+ ruleHitCounter.inc();
+ rules.forEach(rule -> ruleMetadata.addRule(rule.getRuleId(), IocType.APP.getType()));
+ }
+ });
+ }
break;
default:
break;
diff --git a/groot-core/src/main/java/com/geedgenetworks/core/udf/knowlegdebase/handler/IntelligenceIndicatorKnowledgeBaseHandler.java b/groot-core/src/main/java/com/geedgenetworks/core/udf/knowlegdebase/handler/IntelligenceIndicatorKnowledgeBaseHandler.java
index 53fa0de..716f72f 100644
--- a/groot-core/src/main/java/com/geedgenetworks/core/udf/knowlegdebase/handler/IntelligenceIndicatorKnowledgeBaseHandler.java
+++ b/groot-core/src/main/java/com/geedgenetworks/core/udf/knowlegdebase/handler/IntelligenceIndicatorKnowledgeBaseHandler.java
@@ -1,8 +1,10 @@
package com.geedgenetworks.core.udf.knowlegdebase.handler;
+import com.geedgenetworks.core.utils.cn.common.Trie;
import com.geedgenetworks.core.utils.cn.csv.HighCsvReader;
import inet.ipaddr.IPAddress;
import inet.ipaddr.IPAddressString;
+import org.apache.commons.lang3.StringUtils;
import org.apache.flink.shaded.guava18.com.google.common.collect.Range;
import org.apache.flink.shaded.guava18.com.google.common.collect.TreeRangeMap;
import org.slf4j.Logger;
@@ -23,8 +25,12 @@ public class IntelligenceIndicatorKnowledgeBaseHandler extends AbstractSingleKno
private TreeRangeMap<IPAddress, List<String>> ipTagMap = TreeRangeMap.create();
+ // $开头,精确匹配
private HashMap<String, List<String>> domainTagMap = new HashMap<>();
+ // *开头,模糊匹配
+ private Trie<String> domainSuffix = new Trie<>();
+
private IntelligenceIndicatorKnowledgeBaseHandler() {
}
@@ -50,6 +56,7 @@ public class IntelligenceIndicatorKnowledgeBaseHandler extends AbstractSingleKno
HighCsvReader highCsvReader = new HighCsvReader(new InputStreamReader(new ByteArrayInputStream(content)), needColumns);
TreeRangeMap<IPAddress, List<String>> newIpTagMap = TreeRangeMap.create();
HashMap<String, List<String>> newDomainMap = new HashMap<>((int) (highCsvReader.getLineNumber() / 0.75F + 1.0F));
+ Trie<String> newDomainSuffix = new Trie<>();
HighCsvReader.CsvIterator iterator = highCsvReader.getIterator();
while (iterator.hasNext()) {
Map<String, String> line = iterator.next();
@@ -104,10 +111,14 @@ public class IntelligenceIndicatorKnowledgeBaseHandler extends AbstractSingleKno
});
newIpTagMap.putAll(subRangeMap);
} else if ("Domain".equals(type)) {
- if (newDomainMap.containsKey(domain)) {
- newDomainMap.get(domain).addAll(tags);
+ String finalDomain = domain.substring(1);
+ if (domain.startsWith("$")) {
+ newDomainMap.computeIfAbsent(finalDomain, k -> new ArrayList<>()).addAll(tags);
+ } else if (domain.startsWith("*")) {
+ String reverseDomain = StringUtils.reverse(finalDomain);
+ tags.forEach(tag -> newDomainSuffix.put(reverseDomain, tag));
} else {
- newDomainMap.put(domain, new ArrayList<>(tags));
+ logger.warn("intelligence indicator find unknown domain: " + domain);
}
}
} catch (Exception lineException) {
@@ -116,6 +127,7 @@ public class IntelligenceIndicatorKnowledgeBaseHandler extends AbstractSingleKno
}
ipTagMap = newIpTagMap;
domainTagMap = newDomainMap;
+ domainSuffix = newDomainSuffix;
} catch (Exception e) {
logger.error(this.getClass().getSimpleName() + " update error", e);
return false;
@@ -124,28 +136,22 @@ public class IntelligenceIndicatorKnowledgeBaseHandler extends AbstractSingleKno
}
public List<String> lookupByIp(String ip) {
- List<String> tags = null;
+ List<String> tags = new ArrayList<>();
IPAddress address = new IPAddressString(ip).getAddress();
if (address != null) {
- tags = ipTagMap.get(address);
+ Optional.ofNullable(ipTagMap.get(address)).ifPresent(tags::addAll);
}
return tags;
}
public List<String> lookupByDomain(String domain) {
+ List<String> result = new ArrayList<>();
if (domain == null || domain.length() == 0) {
- return new ArrayList<String>();
- }
- if (domainTagMap.containsKey(domain)) {
- return domainTagMap.get(domain);
- } else {
- int index = domain.indexOf(".") + 1;
- if (index > 0) {
- return lookupByDomain(domain.substring(index));
- } else {
- return new ArrayList<String>();
- }
+ return result;
}
+ Optional.ofNullable(domainTagMap.get(domain)).ifPresent(result::addAll);
+ result.addAll(domainSuffix.get(StringUtils.reverse(domain)));
+ return result;
}
@Override
diff --git a/groot-core/src/main/java/com/geedgenetworks/core/utils/cn/common/Trie.java b/groot-core/src/main/java/com/geedgenetworks/core/utils/cn/common/Trie.java
index 676815c..4b7ddf7 100644
--- a/groot-core/src/main/java/com/geedgenetworks/core/utils/cn/common/Trie.java
+++ b/groot-core/src/main/java/com/geedgenetworks/core/utils/cn/common/Trie.java
@@ -7,6 +7,27 @@ import java.util.HashMap;
import java.util.List;
import java.util.Map;
+/**
+ * Trie tree
+ *
+ * @param <T> data type
+ * @description Trie tree put every character of the string into a node, and the data is stored in the last node.
+ * for example:
+ * how to store:
+ * if we put "baidu.com":"1" and "baidu.cn":"2" into the trie tree, the tree will be like this:
+ * root -> b -> a -> i -> d -> u -> . -> c -> o -> m
+ * -> n
+ * the data "1" is stored in the last node "m" and the data "2" is stored in the last node "n"
+ * then we put "baidu":"3" into the trie tree, the tree will be like this:
+ * root -> b -> a -> i -> d -> u -> . -> c -> o -> m
+ * -> n
+ * the data "3" will be stored in the node "u"
+ * <p>
+ * how to get:
+ * traversal the trie tree by the special string, and get all the data in the path
+ * if we get "baidu.com" from the trie tree, we will get "1" and "3"
+ * if we get "baidu.cn" from the trie tree, we will get "2" and "3"
+ */
public class Trie<T> {
private final Node<T> root = new Node<>();
diff --git a/groot-core/src/test/java/com/geedgenetworks/core/udf/cn/IntelligenceIndicatorLookupTest.java b/groot-core/src/test/java/com/geedgenetworks/core/udf/cn/IntelligenceIndicatorLookupTest.java
index b5df7e0..804c7ca 100644
--- a/groot-core/src/test/java/com/geedgenetworks/core/udf/cn/IntelligenceIndicatorLookupTest.java
+++ b/groot-core/src/test/java/com/geedgenetworks/core/udf/cn/IntelligenceIndicatorLookupTest.java
@@ -27,7 +27,7 @@ public class IntelligenceIndicatorLookupTest {
void setUp() {
runtimeContext = mockRuntimeContext();
- String content = "type,ip_addr_format,ip1,ip2,domain,tags\nIP,CIDR,116.178.65.0,25,ali.com,\"阿里1,云服务1\"\nDomain,CIDR,116.178.65.0,25,ali.com,\"阿里2,云服务2\"";
+ String content = "type,ip_addr_format,ip1,ip2,domain,tags\nIP,CIDR,116.178.65.0,25,ali.com,\"阿里1,云服务1\"\nDomain,CIDR,116.178.65.0,25,$ali.com,\"阿里2,云服务2\"\nDomain,CIDR,116.178.65.0,25,*baidu.com,\"阿里3,云服务3\"";
mockKnowledgeBaseHandler(content);
intelligenceIndicatorLookup = new IntelligenceIndicatorLookup();
@@ -117,6 +117,25 @@ public class IntelligenceIndicatorLookupTest {
assertEquals(Arrays.asList("test", "test1", "阿里2", "云服务2"), evaluate.getExtractedFields().get("domain_tags"));
}
+ @Test
+ void evaluate5() {
+ UDFContext udfContext = new UDFContext();
+ Map<String, Object> parameters = new HashMap<>();
+ parameters.put("kb_name", kbName);
+ parameters.put("option", "DOMAIN_TO_TAG");
+ udfContext.setParameters(parameters);
+ udfContext.setLookup_fields(Collections.singletonList("domain"));
+ udfContext.setOutput_fields(Collections.singletonList("domain_tags"));
+ intelligenceIndicatorLookup.open(runtimeContext, udfContext);
+
+ Event event = new Event();
+ Map<String, Object> fields = new HashMap<>();
+ fields.put("domain", "test.baidu.com");
+ event.setExtractedFields(fields);
+ Event evaluate = intelligenceIndicatorLookup.evaluate(event);
+ assertEquals(Arrays.asList("阿里3", "云服务3"), evaluate.getExtractedFields().get("domain_tags"));
+ }
+
@AfterEach
void afterAll() {
clearState();
diff --git a/groot-core/src/test/java/com/geedgenetworks/core/udf/cn/TrieTest.java b/groot-core/src/test/java/com/geedgenetworks/core/udf/cn/TrieTest.java
index b54d13d..312e41a 100644
--- a/groot-core/src/test/java/com/geedgenetworks/core/udf/cn/TrieTest.java
+++ b/groot-core/src/test/java/com/geedgenetworks/core/udf/cn/TrieTest.java
@@ -43,6 +43,26 @@ public class TrieTest {
List<String> strings8 = trie.get(StringUtils.reverse("txj/r~/moc.elgoog.yxorpdeef//:ptth"));
assertEquals(Arrays.asList("4"), strings8);
+
+ Trie<String> trie1 = new Trie<>();
+
+ trie1.put("baidu.com", "1");
+ trie1.put("baidu.cn", "2");
+ trie1.put("baidu", "3");
+
+ List<String> list1 = trie1.get("baidu.com");
+ assertEquals(Arrays.asList("3", "1"), list1);
+
+ List<String> list2 = trie1.get("baidu.cn");
+ assertEquals(Arrays.asList("3", "2"), list2);
+
+
+ Trie<String> trie2 = new Trie<>();
+ trie2.put("baidu.com", "1");
+ trie2.put("baidu.com", "2");
+ trie2.put("baidu.com", "3");
+ List<String> list = trie2.get("baidu.com.cn");
+ assertEquals(Arrays.asList("1", "2", "3"), list);
}
@Test