summaryrefslogtreecommitdiff
path: root/scanner/expr_matcher
diff options
context:
space:
mode:
authorliuwentan <[email protected]>2023-12-20 06:16:23 +0000
committerliuwentan <[email protected]>2023-12-20 06:16:23 +0000
commit580a594806928096fc220bda0033ed8c209f97b6 (patch)
tree7a32185de1d7901b644ba898cb179a5bcc8cb7e4 /scanner/expr_matcher
parente65239abe7c579f56c31391224380fe8ea82196a (diff)
[PATCH] Add bloom filter to optimize expr_matcher performance
Diffstat (limited to 'scanner/expr_matcher')
-rw-r--r--scanner/expr_matcher/adapter_hs/adapter_hs.cpp58
-rw-r--r--scanner/expr_matcher/adapter_rs/adapter_rs.cpp64
-rw-r--r--scanner/expr_matcher/expr_matcher.cpp3
-rw-r--r--scanner/expr_matcher/expr_matcher_inc.h2
4 files changed, 121 insertions, 6 deletions
diff --git a/scanner/expr_matcher/adapter_hs/adapter_hs.cpp b/scanner/expr_matcher/adapter_hs/adapter_hs.cpp
index 0140b7a..d978df4 100644
--- a/scanner/expr_matcher/adapter_hs/adapter_hs.cpp
+++ b/scanner/expr_matcher/adapter_hs/adapter_hs.cpp
@@ -18,6 +18,7 @@
#include "adapter_hs.h"
#include "uthash/uthash.h"
+#include "bloom/bloom.h"
#include "maat_utils.h"
#include "../../bool_matcher/bool_matcher.h"
@@ -66,6 +67,7 @@ struct hs_lit_engine {
size_t n_thread;
hs_database_t *hs_db;
hs_scratch_t **hs_scratches;
+ struct bloom **blooms;
struct hs_lit_stream **streams;
struct pattern_attribute *ref_pat_attr;
struct log_handle *logger;
@@ -76,6 +78,7 @@ struct hs_regex_engine {
size_t n_thread;
hs_database_t *hs_db;
hs_scratch_t **hs_scratches;
+ struct bloom **blooms;
struct hs_regex_stream **streams;
struct pattern_attribute *ref_pat_attr;
struct log_handle *logger;
@@ -163,6 +166,16 @@ void hs_lit_engine_free(void *hs_lit_engine)
hs_lit_inst->hs_db = NULL;
}
+ if (hs_lit_inst->blooms != NULL) {
+ for (i = 0; i < hs_lit_inst->n_thread; i++) {
+ if (hs_lit_inst->blooms[i] != NULL) {
+ bloom_free(hs_lit_inst->blooms[i]);
+ FREE(hs_lit_inst->blooms[i]);
+ }
+ }
+ FREE(hs_lit_inst->blooms);
+ }
+
if (hs_lit_inst->hs_scratches != NULL) {
for (i = 0; i < hs_lit_inst->n_thread; i++) {
if (hs_lit_inst->hs_scratches[i] != NULL) {
@@ -197,6 +210,12 @@ void *hs_lit_engine_new(struct expr_rule *rules, size_t n_rule,
hs_lit_inst->hs_db = (hs_database_t *)hs_lit_db;
hs_lit_inst->logger = logger;
hs_lit_inst->ref_pat_attr = pat_attr;
+ hs_lit_inst->blooms = ALLOC(struct bloom *, n_thread);
+ for (size_t i = 0; i < n_thread; i++) {
+ hs_lit_inst->blooms[i] = ALLOC(struct bloom, 1);
+ bloom_init2(hs_lit_inst->blooms[i], 1024, 0.001);
+ }
+
hs_lit_inst->hs_scratches = ALLOC(hs_scratch_t *, n_thread);
int ret = hs_alloc_scratches((hs_database_t *)hs_lit_db, hs_lit_inst->hs_scratches,
n_thread, logger);
@@ -228,6 +247,23 @@ static int matched_event_cb(unsigned int id, unsigned long long from,
unsigned long long pattern_id = id;
struct matched_pattern *matched_pat = (struct matched_pattern *)ctx;
+ unsigned long long *tmp_pat_id = NULL;
+ if (utarray_len(matched_pat->pattern_ids) < (MAX_HIT_PATTERN_NUM / 10)) {
+ for (size_t i = 0; i < utarray_len(matched_pat->pattern_ids); i++) {
+ tmp_pat_id = (unsigned long long *)utarray_eltptr(matched_pat->pattern_ids, i);
+ if (*tmp_pat_id == pattern_id) {
+ return 0;
+ }
+ }
+ } else {
+ if (bloom_check(matched_pat->ref_bloom, (char *)&pattern_id,
+ sizeof(unsigned long long)) == 1) {
+ return 0;
+ }
+ bloom_add(matched_pat->ref_bloom, (char *)&pattern_id,
+ sizeof(unsigned long long));
+ }
+
if (utarray_len(matched_pat->pattern_ids) >= MAX_HIT_PATTERN_NUM) {
return 0;
}
@@ -302,6 +338,7 @@ void *hs_lit_stream_open(void *hs_lit_engine, int thread_id)
lit_stream->thread_id = thread_id;
lit_stream->ref_hs_rt = hs_lit_inst;
lit_stream->matched_pat = ALLOC(struct matched_pattern, 1);
+ lit_stream->matched_pat->ref_bloom = hs_lit_inst->blooms[thread_id];
lit_stream->matched_pat->ref_pat_attr = hs_lit_inst->ref_pat_attr;
utarray_new(lit_stream->matched_pat->pattern_ids, &ut_hs_pattern_id_icd);
utarray_reserve(lit_stream->matched_pat->pattern_ids, MAX_HIT_PATTERN_NUM);
@@ -343,6 +380,7 @@ void hs_lit_stream_close(void *hs_lit_stream)
/* stream->hs_rt point to hs_instance->hs_rt which will call free
same as hs_attr */
stream->ref_hs_rt = NULL;
+ stream->matched_pat->ref_bloom = NULL;
stream->matched_pat->ref_pat_attr = NULL;
if (stream->matched_pat->pattern_ids != NULL) {
@@ -399,6 +437,7 @@ static int gather_hit_pattern_id(struct matched_pattern *matched_pat,
*n_pattern_id = array_index;
utarray_clear(matched_pat->pattern_ids);
+ bloom_reset(matched_pat->ref_bloom);
return 0;
}
@@ -479,6 +518,16 @@ void hs_regex_engine_free(void *hs_regex_engine)
hs_regex_inst->hs_db = NULL;
}
+ if (hs_regex_inst->blooms != NULL) {
+ for (i = 0; i < hs_regex_inst->n_thread; i++) {
+ if (hs_regex_inst->blooms[i] != NULL) {
+ bloom_free(hs_regex_inst->blooms[i]);
+ FREE(hs_regex_inst->blooms[i]);
+ }
+ }
+ FREE(hs_regex_inst->blooms);
+ }
+
if (hs_regex_inst->hs_scratches != NULL) {
for (i = 0; i < hs_regex_inst->n_thread; i++) {
if (hs_regex_inst->hs_scratches[i] != NULL) {
@@ -513,8 +562,13 @@ void *hs_regex_engine_new(struct expr_rule *rules, size_t n_rule,
hs_regex_inst->hs_db = (hs_database_t *)hs_regex_db;
hs_regex_inst->ref_pat_attr = pat_attr;
hs_regex_inst->logger = logger;
- hs_regex_inst->hs_scratches = ALLOC(hs_scratch_t *, n_thread);
+ hs_regex_inst->blooms = ALLOC(struct bloom *, n_thread);
+ for (size_t i = 0; i < n_thread; i++) {
+ hs_regex_inst->blooms[i] = ALLOC(struct bloom, 1);
+ bloom_init2(hs_regex_inst->blooms[i], 1024, 0.001);
+ }
+ hs_regex_inst->hs_scratches = ALLOC(hs_scratch_t *, n_thread);
int ret = hs_alloc_scratches((hs_database_t *)hs_regex_db,
hs_regex_inst->hs_scratches,
n_thread, logger);
@@ -570,6 +624,7 @@ void hs_regex_stream_close(void *hs_regex_stream)
/* stream->hs_rt point to hs_instance->hs_rt which will call free
same as hs_attr */
stream->ref_hs_rt = NULL;
+ stream->matched_pat->ref_bloom = NULL;
stream->matched_pat->ref_pat_attr = NULL;
if (stream->matched_pat->pattern_ids != NULL) {
@@ -595,6 +650,7 @@ void *hs_regex_stream_open(void *hs_regex_engine, int thread_id)
regex_stream->thread_id = thread_id;
regex_stream->ref_hs_rt = hs_regex_inst;
regex_stream->matched_pat = ALLOC(struct matched_pattern, 1);
+ regex_stream->matched_pat->ref_bloom = hs_regex_inst->blooms[thread_id];
regex_stream->matched_pat->ref_pat_attr = hs_regex_inst->ref_pat_attr;
utarray_new(regex_stream->matched_pat->pattern_ids, &ut_hs_pattern_id_icd);
utarray_reserve(regex_stream->matched_pat->pattern_ids, MAX_HIT_PATTERN_NUM);
diff --git a/scanner/expr_matcher/adapter_rs/adapter_rs.cpp b/scanner/expr_matcher/adapter_rs/adapter_rs.cpp
index dbc6880..795c3e9 100644
--- a/scanner/expr_matcher/adapter_rs/adapter_rs.cpp
+++ b/scanner/expr_matcher/adapter_rs/adapter_rs.cpp
@@ -63,7 +63,8 @@ struct rs_regex_stream {
struct rs_lit_engine {
size_t n_thread;
rs_database_t *rs_db;
- struct rs_lit_stream **streams; /* per thread */
+ struct bloom **blooms;
+ struct rs_lit_stream **streams; /* per thread */
struct pattern_attribute *ref_pat_attr;
struct log_handle *logger;
};
@@ -72,7 +73,8 @@ struct rs_lit_engine {
struct rs_regex_engine {
size_t n_thread;
rs_database_t *rs_db;
- struct rs_regex_stream **streams; /* per thread */
+ struct bloom **blooms;
+ struct rs_regex_stream **streams; /* per thread */
struct pattern_attribute *ref_pat_attr;
struct log_handle *logger;
};
@@ -187,6 +189,23 @@ static int matched_event_cb(unsigned int id, int pos_offset, int from, int to,
unsigned long long pattern_id = id;
struct matched_pattern *matched_pat = (struct matched_pattern *)ctx;
+ unsigned long long *tmp_pat_id = NULL;
+ if (utarray_len(matched_pat->pattern_ids) < (MAX_HIT_PATTERN_NUM / 10)) {
+ for (size_t i = 0; i < utarray_len(matched_pat->pattern_ids); i++) {
+ tmp_pat_id = (unsigned long long *)utarray_eltptr(matched_pat->pattern_ids, i);
+ if (*tmp_pat_id == pattern_id) {
+ return 0;
+ }
+ }
+ } else {
+ if (bloom_check(matched_pat->ref_bloom, (char *)&pattern_id,
+ sizeof(unsigned long long)) == 1) {
+ return 0;
+ }
+ bloom_add(matched_pat->ref_bloom, (char *)&pattern_id,
+ sizeof(unsigned long long));
+ }
+
if (utarray_len(matched_pat->pattern_ids) >= MAX_HIT_PATTERN_NUM) {
return 0;
}
@@ -264,6 +283,7 @@ static int gather_hit_pattern_id(struct matched_pattern *matched_pat,
*n_pattern_id = array_index;
utarray_clear(matched_pat->pattern_ids);
+ bloom_reset(matched_pat->ref_bloom);
return 0;
}
@@ -281,6 +301,16 @@ void rs_lit_engine_free(void *rs_lit_engine)
rs_lit_inst->rs_db = NULL;
}
+ if (rs_lit_inst->blooms != NULL) {
+ for (size_t i = 0; i < rs_lit_inst->n_thread; i++) {
+ if (rs_lit_inst->blooms[i] != NULL) {
+ bloom_free(rs_lit_inst->blooms[i]);
+ FREE(rs_lit_inst->blooms[i]);
+ }
+ }
+ FREE(rs_lit_inst->blooms);
+ }
+
if (rs_lit_inst->streams != NULL) {
for (size_t i = 0; i < rs_lit_inst->n_thread; i++) {
if (rs_lit_inst->streams[i] != NULL) {
@@ -306,8 +336,14 @@ void *rs_lit_engine_new(struct expr_rule *rules, size_t n_rule,
rs_lit_inst->rs_db = (rs_database_t *)rs_lit_db;
rs_lit_inst->ref_pat_attr = pat_attr;
rs_lit_inst->logger = logger;
- rs_lit_inst->streams = ALLOC(struct rs_lit_stream *, n_thread);
+ rs_lit_inst->blooms = ALLOC(struct bloom *, n_thread);
+ for (size_t i = 0; i < n_thread; i++) {
+ rs_lit_inst->blooms[i] = ALLOC(struct bloom, 1);
+ bloom_init2(rs_lit_inst->blooms[i], 1024, 0.001);
+ }
+
+ rs_lit_inst->streams = ALLOC(struct rs_lit_stream *, n_thread);
for (size_t i = 0; i < n_thread; i++) {
rs_lit_inst->streams[i] = (struct rs_lit_stream *)rs_lit_stream_open(rs_lit_inst, i);
}
@@ -354,6 +390,7 @@ void *rs_lit_stream_open(void *rs_lit_engine, int thread_id)
lit_stream->thread_id = thread_id;
lit_stream->ref_rs_rt = rs_lit_inst;
lit_stream->matched_pat = ALLOC(struct matched_pattern, 1);
+ lit_stream->matched_pat->ref_bloom = rs_lit_inst->blooms[thread_id];
lit_stream->matched_pat->ref_pat_attr = rs_lit_inst->ref_pat_attr;
utarray_new(lit_stream->matched_pat->pattern_ids, &ut_rs_pattern_id_icd);
utarray_reserve(lit_stream->matched_pat->pattern_ids, MAX_HIT_PATTERN_NUM);
@@ -387,6 +424,7 @@ void rs_lit_stream_close(void *rs_lit_stream)
/* rs_stream->rs_rt point to rs_instance->rs_rt which will call free
same as rs_attr */
lit_stream->ref_rs_rt = NULL;
+ lit_stream->matched_pat->ref_bloom = NULL;
lit_stream->matched_pat->ref_pat_attr = NULL;
if (lit_stream->matched_pat->pattern_ids != NULL) {
@@ -434,6 +472,16 @@ void rs_regex_engine_free(void *rs_regex_engine)
rs_regex_inst->rs_db = NULL;
}
+ if (rs_regex_inst->blooms != NULL) {
+ for (size_t i = 0; i < rs_regex_inst->n_thread; i++) {
+ if (rs_regex_inst->blooms[i] != NULL) {
+ bloom_free(rs_regex_inst->blooms[i]);
+ FREE(rs_regex_inst->blooms[i]);
+ }
+ }
+ FREE(rs_regex_inst->blooms);
+ }
+
if (rs_regex_inst->streams != NULL) {
for (size_t i = 0; i < rs_regex_inst->n_thread; i++) {
if (rs_regex_inst->streams[i] != NULL) {
@@ -459,8 +507,14 @@ void *rs_regex_engine_new(struct expr_rule *rules, size_t n_rule,
rs_regex_inst->rs_db = (rs_database_t *)rs_regex_db;
rs_regex_inst->ref_pat_attr = pat_attr;
rs_regex_inst->logger = logger;
+
+ rs_regex_inst->blooms = ALLOC(struct bloom *, n_thread);
+ for (size_t i = 0; i < n_thread; i++) {
+ rs_regex_inst->blooms[i] = ALLOC(struct bloom, 1);
+ bloom_init2(rs_regex_inst->blooms[i], 1024, 0.001);
+ }
+
rs_regex_inst->streams = ALLOC(struct rs_regex_stream *, n_thread);
-
for (size_t i = 0; i < n_thread; i++) {
rs_regex_inst->streams[i] = (struct rs_regex_stream *)rs_regex_stream_open(rs_regex_inst, i);
}
@@ -507,6 +561,7 @@ void *rs_regex_stream_open(void *rs_regex_engine, int thread_id)
regex_stream->thread_id = thread_id;
regex_stream->ref_rs_rt = rs_regex_inst;
regex_stream->matched_pat = ALLOC(struct matched_pattern, 1);
+ regex_stream->matched_pat->ref_bloom = rs_regex_inst->blooms[thread_id];
regex_stream->matched_pat->ref_pat_attr = rs_regex_inst->ref_pat_attr;
utarray_new(regex_stream->matched_pat->pattern_ids, &ut_rs_pattern_id_icd);
utarray_reserve(regex_stream->matched_pat->pattern_ids, MAX_HIT_PATTERN_NUM);
@@ -540,6 +595,7 @@ void rs_regex_stream_close(void *rs_regex_stream)
/* rs_stream->rs_rt point to rs_instance->rs_rt which will call free
same as rs_attr */
regex_stream->ref_rs_rt = NULL;
+ regex_stream->matched_pat->ref_bloom = NULL;
regex_stream->matched_pat->ref_pat_attr = NULL;
if (regex_stream->matched_pat->pattern_ids != NULL) {
diff --git a/scanner/expr_matcher/expr_matcher.cpp b/scanner/expr_matcher/expr_matcher.cpp
index 16ec4ee..3a8f9e9 100644
--- a/scanner/expr_matcher/expr_matcher.cpp
+++ b/scanner/expr_matcher/expr_matcher.cpp
@@ -13,6 +13,7 @@
#include <sys/syscall.h>
#include "log/log.h"
+#include "bloom/bloom.h"
#include "maat_utils.h"
#include "../bool_matcher/bool_matcher.h"
#include "expr_matcher_inc.h"
@@ -409,7 +410,7 @@ static int expr_matcher_bool_matcher_match(struct bool_matcher *bm, struct bool_
unsigned long long unique_pat_ids[n_hit_pattern];
size_t n_unique_pat_id = 0;
- qsort(hit_pattern_ids, n_hit_pattern, sizeof(unsigned long long *), compare_pattern_id);
+ qsort(hit_pattern_ids, n_hit_pattern, sizeof(unsigned long long), compare_pattern_id);
for (size_t i = 0; i < n_hit_pattern; i++) {
tmp_pat_id = hit_pattern_ids[i];
diff --git a/scanner/expr_matcher/expr_matcher_inc.h b/scanner/expr_matcher/expr_matcher_inc.h
index 57782ed..c575508 100644
--- a/scanner/expr_matcher/expr_matcher_inc.h
+++ b/scanner/expr_matcher/expr_matcher_inc.h
@@ -18,6 +18,7 @@ extern "C"
#include <stddef.h>
#include "uthash/utarray.h"
+#include "bloom/bloom.h"
#include "expr_matcher.h"
#define MAX_HIT_PATTERN_NUM 1024
@@ -36,6 +37,7 @@ struct pattern_attribute {
struct matched_pattern {
UT_array *pattern_ids;
+ struct bloom *ref_bloom;
struct pattern_attribute *ref_pat_attr;
size_t scan_data_len;
};