summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorliuwentan <[email protected]>2023-11-24 11:05:52 +0800
committerliuwentan <[email protected]>2023-11-24 11:05:52 +0800
commit179c983b124fd73c18586ccb4816dd222a90339e (patch)
treeb77afafe9414994d03048f03089efb08b31bc9f9
parenta0cd830eaa1162fc35d71690319525fe8d20f980 (diff)
[FEATURE]support switch expr engine automatically
-rw-r--r--include/maat.h3
-rw-r--r--scanner/expr_matcher/adapter_hs/adapter_hs.cpp1037
-rw-r--r--scanner/expr_matcher/adapter_hs/adapter_hs.h74
-rw-r--r--scanner/expr_matcher/adapter_rs/adapter_rs.cpp763
-rw-r--r--scanner/expr_matcher/adapter_rs/adapter_rs.h90
-rw-r--r--scanner/expr_matcher/expr_matcher.cpp549
-rw-r--r--scanner/expr_matcher/expr_matcher.h37
-rw-r--r--scanner/expr_matcher/expr_matcher_inc.h47
-rw-r--r--src/maat_api.c6
-rw-r--r--src/maat_expr.c64
-rw-r--r--src/maat_table.c11
-rw-r--r--test/expr_matcher_gtest.cpp2
-rw-r--r--test/maat_framework_gtest.cpp2
13 files changed, 1476 insertions, 1209 deletions
diff --git a/include/maat.h b/include/maat.h
index 7af3bf1..8beaf67 100644
--- a/include/maat.h
+++ b/include/maat.h
@@ -58,7 +58,8 @@ enum maat_update_type {
enum maat_expr_engine {
MAAT_EXPR_ENGINE_HS = 0, //hyperscan(default engine)
- MAAT_EXPR_ENGINE_RS //rulescan
+ MAAT_EXPR_ENGINE_RS, //rulescan
+ MAAT_EXPR_ENGINE_AUTO
};
struct ip_addr {
diff --git a/scanner/expr_matcher/adapter_hs/adapter_hs.cpp b/scanner/expr_matcher/adapter_hs/adapter_hs.cpp
index c5ebfa9..fc0a2b5 100644
--- a/scanner/expr_matcher/adapter_hs/adapter_hs.cpp
+++ b/scanner/expr_matcher/adapter_hs/adapter_hs.cpp
@@ -17,13 +17,10 @@
#include <sys/syscall.h>
#include "adapter_hs.h"
-#include "uthash/utarray.h"
#include "uthash/uthash.h"
#include "maat_utils.h"
#include "../../bool_matcher/bool_matcher.h"
-#define MAX_HIT_PATTERN_NUM 1024
-
pid_t hs_gettid()
{
return syscall(SYS_gettid);
@@ -39,7 +36,7 @@ static const char *hs_module_name_str(const char *name)
#define MODULE_ADAPTER_HS hs_module_name_str("maat.adapter_hs")
-struct adpt_hs_compile_data {
+struct hs_compile_data {
enum expr_pattern_type pat_type;
unsigned int *ids;
unsigned int *flags;
@@ -48,61 +45,44 @@ struct adpt_hs_compile_data {
unsigned int n_patterns;
};
-struct adapter_hs_scratch {
- hs_scratch_t **literal_scratches;
- hs_scratch_t **regex_scratches;
- struct bool_expr_match **bool_match_buffs;
-};
-
-struct adapter_hs_stream {
+struct hs_lit_stream {
int thread_id;
- hs_stream_t *literal_stream;
- hs_stream_t *regex_stream;
- struct adapter_hs_runtime *ref_hs_rt;
+ hs_stream_t *hs_stream;
+ struct hs_lit_engine *ref_hs_rt;
struct matched_pattern *matched_pat;
struct log_handle *logger;
};
-/* adapter_hs runtime */
-struct adapter_hs_runtime {
- hs_database_t *literal_db;
- hs_database_t *regex_db;
-
- struct adapter_hs_scratch *scratch;
- struct adapter_hs_stream **streams;
- struct bool_matcher *bm;
-};
-
-/* adapter_hs instance */
-struct adapter_hs {
- size_t n_worker_thread;
- size_t n_expr;
- size_t n_patterns;
- struct adapter_hs_runtime *hs_rt;
- struct pattern_attribute *hs_attr;
+struct hs_regex_stream {
+ int thread_id;
+ hs_stream_t *hs_stream;
+ struct hs_regex_engine *ref_hs_rt;
+ struct matched_pattern *matched_pat;
struct log_handle *logger;
};
-struct pattern_offset {
- long long start;
- long long end;
-};
-
-struct pattern_attribute {
- long long pattern_id;
- enum expr_match_mode match_mode;
- struct pattern_offset offset;
+/* hs literal runtime */
+struct hs_lit_engine {
+ size_t n_thread;
+ hs_database_t *hs_db;
+ hs_scratch_t **hs_scratches;
+ struct hs_lit_stream **streams;
+ struct pattern_attribute *ref_pat_attr;
+ struct log_handle *logger;
};
-struct matched_pattern {
- UT_array *pattern_ids;
- size_t n_patterns;
- struct pattern_attribute *ref_hs_attr;
- size_t scan_data_len;
+/* hs regex runtime */
+struct hs_regex_engine {
+ size_t n_thread;
+ hs_database_t *hs_db;
+ hs_scratch_t **hs_scratches;
+ struct hs_regex_stream **streams;
+ struct pattern_attribute *ref_pat_attr;
+ struct log_handle *logger;
};
-static int _hs_alloc_scratch(hs_database_t *db, hs_scratch_t **scratches,
- size_t n_worker_thread, struct log_handle *logger)
+static int hs_alloc_scratches(hs_database_t *db, hs_scratch_t **scratches,
+ size_t n_worker_thread, struct log_handle *logger)
{
size_t scratch_size = 0;
@@ -134,205 +114,6 @@ static int _hs_alloc_scratch(hs_database_t *db, hs_scratch_t **scratches,
return 0;
}
-static int adpt_hs_alloc_scratch(struct adapter_hs_runtime *hs_rt,
- size_t n_worker_thread,
- enum expr_pattern_type pattern_type,
- struct log_handle *logger)
-{
- int ret = 0;
-
- if (pattern_type == EXPR_PATTERN_TYPE_STR) {
- hs_rt->scratch->literal_scratches = ALLOC(hs_scratch_t *, n_worker_thread);
- ret = _hs_alloc_scratch(hs_rt->literal_db, hs_rt->scratch->literal_scratches,
- n_worker_thread, logger);
- if (ret < 0) {
- FREE(hs_rt->scratch->literal_scratches);
- return -1;
- }
- } else {
- hs_rt->scratch->regex_scratches = ALLOC(hs_scratch_t *, n_worker_thread);
- ret = _hs_alloc_scratch(hs_rt->regex_db, hs_rt->scratch->regex_scratches,
- n_worker_thread, logger);
- if (ret < 0) {
- FREE(hs_rt->scratch->regex_scratches);
- return -1;
- }
- }
-
- return 0;
-}
-
-/**
- * @brief build hs block database for literal string and regex expression respectively
- *
- * @retval 0(success) -1(failed)
-*/
-static int adpt_hs_build_database(struct adapter_hs_runtime *hs_rt,
- struct adpt_hs_compile_data *literal_cd,
- struct adpt_hs_compile_data *regex_cd,
- struct log_handle *logger)
-{
- hs_error_t err;
- hs_compile_error_t *compile_err = NULL;
-
- if (NULL == hs_rt || (NULL == literal_cd && NULL == regex_cd)) {
- return -1;
- }
-
- if (literal_cd != NULL) {
- err = hs_compile_lit_multi((const char *const *)literal_cd->patterns,
- literal_cd->flags,literal_cd->ids,
- literal_cd->pattern_lens, literal_cd->n_patterns,
- HS_MODE_STREAM | HS_MODE_SOM_HORIZON_SMALL, NULL,
- &hs_rt->literal_db, &compile_err);
- if (err != HS_SUCCESS) {
- if (compile_err) {
- log_fatal(logger, MODULE_ADAPTER_HS, "[%s:%d] compile error: %s",
- __FUNCTION__, __LINE__, compile_err->message);
- }
-
- hs_free_compile_error(compile_err);
- return -1;
- }
- }
-
- if (regex_cd != NULL) {
- err = hs_compile_multi((const char *const *)regex_cd->patterns,
- regex_cd->flags, regex_cd->ids, regex_cd->n_patterns,
- HS_MODE_STREAM,
- NULL, &hs_rt->regex_db, &compile_err);
- if (err != HS_SUCCESS) {
- if (compile_err) {
- log_fatal(logger, MODULE_ADAPTER_HS, "[%s:%d] compile error: %s",
- __FUNCTION__, __LINE__, compile_err->message);
- }
- hs_free_compile_error(compile_err);
- return -1;
- }
- }
-
- return 0;
-}
-
-static struct adpt_hs_compile_data *
-adpt_hs_compile_data_new(enum expr_pattern_type pat_type, size_t n_patterns)
-{
- struct adpt_hs_compile_data *hs_cd = ALLOC(struct adpt_hs_compile_data, 1);
- hs_cd->pat_type = pat_type;
- hs_cd->patterns = ALLOC(char *, n_patterns);
- hs_cd->pattern_lens = ALLOC(size_t, n_patterns);
- hs_cd->n_patterns = n_patterns;
- hs_cd->ids = ALLOC(unsigned int, n_patterns);
- hs_cd->flags = ALLOC(unsigned int, n_patterns);
-
- return hs_cd;
-}
-
-static void adpt_hs_compile_data_free(struct adpt_hs_compile_data *hs_cd)
-{
- if (NULL == hs_cd) {
- return;
- }
-
- if (hs_cd->patterns != NULL) {
- for (size_t i = 0; i < hs_cd->n_patterns; i++) {
- FREE(hs_cd->patterns[i]);
- }
-
- FREE(hs_cd->patterns);
- }
-
- if (hs_cd->pattern_lens != NULL) {
- FREE(hs_cd->pattern_lens);
- }
-
- if (hs_cd->ids != NULL) {
- FREE(hs_cd->ids);
- }
-
- if (hs_cd->flags != NULL) {
- FREE(hs_cd->flags);
- }
-
- FREE(hs_cd);
-}
-
-static void populate_compile_data(struct adpt_hs_compile_data *compile_data,
- int index, int pattern_id, char *pat,
- size_t pat_len, int case_sensitive)
-{
- compile_data->ids[index] = pattern_id;
-
- /* set flags */
- if (compile_data->pat_type == EXPR_PATTERN_TYPE_STR) {
- compile_data->flags[index] |= HS_FLAG_SOM_LEFTMOST;
- }
-
- if (case_sensitive == EXPR_CASE_INSENSITIVE) {
- compile_data->flags[index] |= HS_FLAG_CASELESS;
- }
-
- compile_data->pattern_lens[index] = pat_len;
- compile_data->patterns[index] = ALLOC(char, pat_len + 1);
- memcpy(compile_data->patterns[index], pat, pat_len);
-}
-
-static struct bool_expr *bool_exprs_new(struct expr_rule *rules, size_t n_rule,
- struct pattern_attribute *pattern_attr,
- struct adpt_hs_compile_data *literal_cd,
- struct adpt_hs_compile_data *regex_cd,
- size_t *n_pattern)
-{
- uint32_t pattern_index = 0;
- uint32_t literal_index = 0;
- uint32_t regex_index = 0;
-
- struct bool_expr *bool_exprs = ALLOC(struct bool_expr, n_rule);
- if (NULL == bool_exprs) {
- return NULL;
- }
-
- /* populate adpt_hs_compile_data and bool_expr */
- for (size_t i = 0; i < n_rule; i++) {
-
- for (size_t j = 0; j < rules[i].n_patterns; j++) {
- pattern_attr[pattern_index].pattern_id = pattern_index;
- pattern_attr[pattern_index].match_mode = rules[i].patterns[j].match_mode;
-
- if (pattern_attr[pattern_index].match_mode == EXPR_MATCH_MODE_SUB ||
- pattern_attr[pattern_index].match_mode == EXPR_MATCH_MODE_EXACTLY) {
- pattern_attr[pattern_index].offset.start = rules[i].patterns[j].start_offset;
- pattern_attr[pattern_index].offset.end = rules[i].patterns[j].end_offset;
- }
-
- /* literal pattern */
- if (rules[i].patterns[j].type == EXPR_PATTERN_TYPE_STR) {
- populate_compile_data(literal_cd, literal_index, pattern_index,
- rules[i].patterns[j].pat, rules[i].patterns[j].pat_len,
- rules[i].patterns[j].case_sensitive);
- literal_index++;
- } else {
- /* regex pattern */
- populate_compile_data(regex_cd, regex_index, pattern_index,
- rules[i].patterns[j].pat, rules[i].patterns[j].pat_len,
- rules[i].patterns[j].case_sensitive);
- regex_index++;
- }
-
- bool_exprs[i].items[j].item_id = pattern_index++;
- bool_exprs[i].items[j].not_flag = 0;
- }
-
- bool_exprs[i].expr_id = rules[i].expr_id;
- bool_exprs[i].item_num = rules[i].n_patterns;
- bool_exprs[i].user_tag = rules[i].tag;
- }
-
- *n_pattern = pattern_index;
-
- return bool_exprs;
-}
-
static int verify_regex_expression(const char *regex_str, struct log_handle *logger)
{
hs_expr_info_t *info = NULL;
@@ -359,7 +140,7 @@ static int verify_regex_expression(const char *regex_str, struct log_handle *log
return 1;
}
-int adapter_hs_verify_regex_expression(const char *regex_expr, struct log_handle *logger)
+int hs_verify_regex_expression(const char *regex_expr, struct log_handle *logger)
{
if (NULL == regex_expr) {
return 0;
@@ -368,204 +149,72 @@ int adapter_hs_verify_regex_expression(const char *regex_expr, struct log_handle
return verify_regex_expression(regex_expr, logger);
}
-void *adapter_hs_new(struct expr_rule *rules, size_t n_rule,
- size_t n_literal_pattern, size_t n_regex_pattern,
- size_t n_worker_thread, struct log_handle *logger)
+void hs_lit_engine_free(void *hs_lit_engine)
{
- /* get the sum of pattern */
- size_t i = 0;
- struct adpt_hs_compile_data *literal_cd = NULL;
- struct adpt_hs_compile_data *regex_cd = NULL;
-
- if (n_literal_pattern > 0) {
- literal_cd = adpt_hs_compile_data_new(EXPR_PATTERN_TYPE_STR, n_literal_pattern);
- }
-
- if (n_regex_pattern > 0) {
- regex_cd = adpt_hs_compile_data_new(EXPR_PATTERN_TYPE_REG, n_regex_pattern);
- }
-
- size_t pattern_cnt = n_literal_pattern + n_regex_pattern;
- struct adapter_hs *hs_inst = ALLOC(struct adapter_hs, 1);
- hs_inst->hs_attr = ALLOC(struct pattern_attribute, pattern_cnt);
- hs_inst->logger = logger;
- hs_inst->n_worker_thread = n_worker_thread;
- hs_inst->n_expr = n_rule;
-
- struct bool_expr *bool_exprs = bool_exprs_new(rules, n_rule, hs_inst->hs_attr,
- literal_cd, regex_cd, &pattern_cnt);
- if (NULL == bool_exprs) {
- return NULL;
- }
- hs_inst->n_patterns = pattern_cnt;
-
- /* create bool matcher */
- size_t mem_size = 0;
- int hs_ret = 0;
-
- hs_inst->hs_rt = ALLOC(struct adapter_hs_runtime, 1);
-
- //hs_rt->bm
- hs_inst->hs_rt->bm = bool_matcher_new(bool_exprs, n_rule, &mem_size);
- if (hs_inst->hs_rt->bm != NULL) {
- log_info(logger, MODULE_ADAPTER_HS,
- "Adapter_hs module: build bool matcher of %zu expressions"
- " with %zu bytes memory", n_rule, mem_size);
- } else {
- log_fatal(logger, MODULE_ADAPTER_HS,
- "[%s:%d] Adapter_hs module: build bool matcher failed",
- __FUNCTION__, __LINE__);
-
- hs_ret = -1;
- }
- FREE(bool_exprs);
-
- /* build hs database hs_rt->literal_db & hs_rt->regex_db */
- int ret = adpt_hs_build_database(hs_inst->hs_rt, literal_cd, regex_cd, logger);
- if (ret < 0) {
- hs_ret = -1;
- }
-
- if (literal_cd != NULL) {
- adpt_hs_compile_data_free(literal_cd);
- literal_cd = NULL;
- }
-
- if (regex_cd != NULL) {
- adpt_hs_compile_data_free(regex_cd);
- regex_cd = NULL;
+ if (NULL == hs_lit_engine) {
+ return;
}
- if (hs_ret < 0) {
- goto error;
- }
+ struct hs_lit_engine *hs_lit_inst = (struct hs_lit_engine *)hs_lit_engine;
+ size_t i = 0;
- /* alloc scratch */
- hs_inst->hs_rt->scratch = ALLOC(struct adapter_hs_scratch, 1);
- hs_inst->hs_rt->scratch->bool_match_buffs = ALLOC(struct bool_expr_match *,
- n_worker_thread);
- for (i = 0; i < n_worker_thread; i++) {
- hs_inst->hs_rt->scratch->bool_match_buffs[i] = ALLOC(struct bool_expr_match,
- MAX_HIT_EXPR_NUM);
+ if (hs_lit_inst->hs_db != NULL) {
+ hs_free_database(hs_lit_inst->hs_db);
+ hs_lit_inst->hs_db = NULL;
}
- /* literal and regex scratch can't reuse */
- if (n_literal_pattern > 0) {
- ret = adpt_hs_alloc_scratch(hs_inst->hs_rt, n_worker_thread,
- EXPR_PATTERN_TYPE_STR, logger);
- if (ret < 0) {
- goto error;
+ if (hs_lit_inst->hs_scratches != NULL) {
+ for (i = 0; i < hs_lit_inst->n_thread; i++) {
+ if (hs_lit_inst->hs_scratches[i] != NULL) {
+ hs_free_scratch(hs_lit_inst->hs_scratches[i]);
+ hs_lit_inst->hs_scratches[i] = NULL;
+ }
}
+ FREE(hs_lit_inst->hs_scratches);
}
- if (n_regex_pattern > 0) {
- ret = adpt_hs_alloc_scratch(hs_inst->hs_rt, n_worker_thread,
- EXPR_PATTERN_TYPE_REG, logger);
- if (ret < 0) {
- goto error;
+ if (hs_lit_inst->streams != NULL) {
+ for (i = 0; i < hs_lit_inst->n_thread; i++) {
+ if (hs_lit_inst->streams[i] != NULL) {
+ hs_lit_stream_close(hs_lit_inst->streams[i]);
+ hs_lit_inst->streams[i] = NULL;
+ }
}
+ FREE(hs_lit_inst->streams);
}
- hs_inst->hs_rt->streams = ALLOC(struct adapter_hs_stream *, n_worker_thread);
- for (i = 0; i < n_worker_thread; i++) {
- hs_inst->hs_rt->streams[i] = (struct adapter_hs_stream *)adapter_hs_stream_open(hs_inst, i);
- }
-
- return hs_inst;
-error:
- adapter_hs_free(hs_inst);
- return NULL;
+ FREE(hs_lit_inst);
}
-void adapter_hs_free(void *hs_instance)
+void *hs_lit_engine_new(struct expr_rule *rules, size_t n_rule,
+ struct pattern_attribute *pat_attr,
+ void *hs_lit_db, size_t n_thread,
+ struct log_handle *logger)
{
- if (NULL == hs_instance) {
- return;
- }
-
- struct adapter_hs *hs_inst = (struct adapter_hs *)hs_instance;
- size_t i = 0;
-
- if (hs_inst->hs_rt != NULL) {
- if (hs_inst->hs_rt->literal_db != NULL) {
- hs_free_database(hs_inst->hs_rt->literal_db);
- hs_inst->hs_rt->literal_db = NULL;
- }
-
- if (hs_inst->hs_rt->regex_db != NULL) {
- hs_free_database(hs_inst->hs_rt->regex_db);
- hs_inst->hs_rt->regex_db = NULL;
- }
-
- if (hs_inst->hs_rt->scratch != NULL) {
- if (hs_inst->hs_rt->scratch->literal_scratches != NULL) {
- for (i = 0; i < hs_inst->n_worker_thread; i++) {
- if (hs_inst->hs_rt->scratch->literal_scratches[i] != NULL) {
- hs_free_scratch(hs_inst->hs_rt->scratch->literal_scratches[i]);
- hs_inst->hs_rt->scratch->literal_scratches[i] = NULL;
- }
- }
- FREE(hs_inst->hs_rt->scratch->literal_scratches);
- }
-
- if (hs_inst->hs_rt->scratch->regex_scratches != NULL) {
- for (i = 0; i < hs_inst->n_worker_thread; i++) {
- if (hs_inst->hs_rt->scratch->regex_scratches[i] != NULL) {
- hs_free_scratch(hs_inst->hs_rt->scratch->regex_scratches[i]);
- hs_inst->hs_rt->scratch->regex_scratches[i] = NULL;
- }
- }
- FREE(hs_inst->hs_rt->scratch->regex_scratches);
- }
-
- if (hs_inst->hs_rt->scratch->bool_match_buffs != NULL) {
- for (i = 0; i < hs_inst->n_worker_thread; i++) {
- if (hs_inst->hs_rt->scratch->bool_match_buffs[i] != NULL) {
- FREE(hs_inst->hs_rt->scratch->bool_match_buffs[i]);
- }
- }
-
- FREE(hs_inst->hs_rt->scratch->bool_match_buffs);
- }
-
- FREE(hs_inst->hs_rt->scratch);
- }
-
- if (hs_inst->hs_rt->bm != NULL) {
- bool_matcher_free(hs_inst->hs_rt->bm);
- hs_inst->hs_rt->bm = NULL;
- }
-
- if (hs_inst->hs_rt->streams != NULL) {
- for (i = 0; i < hs_inst->n_worker_thread; i++) {
- if (hs_inst->hs_rt->streams[i] != NULL) {
- adapter_hs_stream_close(hs_inst->hs_rt->streams[i]);
- hs_inst->hs_rt->streams[i] = NULL;
- }
- }
- FREE(hs_inst->hs_rt->streams);
- }
-
- FREE(hs_inst->hs_rt);
+ struct hs_lit_engine *hs_lit_inst = ALLOC(struct hs_lit_engine, 1);
+
+ hs_lit_inst->n_thread = n_thread;
+ hs_lit_inst->hs_db = (hs_database_t *)hs_lit_db;
+ hs_lit_inst->logger = logger;
+ hs_lit_inst->ref_pat_attr = pat_attr;
+ hs_lit_inst->hs_scratches = ALLOC(hs_scratch_t *, n_thread);
+ int ret = hs_alloc_scratches((hs_database_t *)hs_lit_db, hs_lit_inst->hs_scratches,
+ n_thread, logger);
+ if (ret < 0) {
+ log_fatal(logger, MODULE_ADAPTER_HS,
+ "[%s:%d]alloc scratches for hs lit runtime failed.",
+ __FUNCTION__, __LINE__);
+ FREE(hs_lit_inst->hs_scratches);
+ FREE(hs_lit_inst);
+ return NULL;
}
- if (hs_inst->hs_attr != NULL) {
- FREE(hs_inst->hs_attr);
+ hs_lit_inst->streams = ALLOC(struct hs_lit_stream *, n_thread);
+ for (size_t i = 0; i < n_thread; i++) {
+ hs_lit_inst->streams[i] = (struct hs_lit_stream *)hs_lit_stream_open(hs_lit_inst, i);
}
- FREE(hs_inst);
-}
-
-static inline int compare_pattern_id(const void *a, const void *b)
-{
- long long ret = *(const unsigned long long *)a - *(const unsigned long long *)b;
- if (ret == 0) {
- return 0;
- } else if(ret < 0) {
- return -1;
- } else {
- return 1;
- }
+ return hs_lit_inst;
}
/**
@@ -579,16 +228,12 @@ static int matched_event_cb(unsigned int id, unsigned long long from,
unsigned long long pattern_id = id;
struct matched_pattern *matched_pat = (struct matched_pattern *)ctx;
- if (id > matched_pat->n_patterns || id < 0) {
- return 0;
- }
-
if (utarray_len(matched_pat->pattern_ids) >= MAX_HIT_PATTERN_NUM) {
return 0;
}
int ret = 0;
- struct pattern_attribute pat_attr = matched_pat->ref_hs_attr[id];
+ struct pattern_attribute pat_attr = matched_pat->ref_pat_attr[id];
switch (pat_attr.match_mode) {
case EXPR_MATCH_MODE_EXACTLY:
if (0 == from && matched_pat->scan_data_len == to) {
@@ -643,89 +288,62 @@ static int matched_event_cb(unsigned int id, unsigned long long from,
}
UT_icd ut_hs_pattern_id_icd = {sizeof(unsigned long long), NULL, NULL, NULL};
-void *adapter_hs_stream_open(void *hs_instance, int thread_id)
+void *hs_lit_stream_open(void *hs_lit_engine, int thread_id)
{
- if (NULL == hs_instance || thread_id < 0) {
+ if (NULL == hs_lit_engine || thread_id < 0) {
return NULL;
}
- struct adapter_hs *hs_inst = (struct adapter_hs *)hs_instance;
- struct adapter_hs_stream *hs_stream = ALLOC(struct adapter_hs_stream, 1);
+ struct hs_lit_engine *hs_lit_inst = (struct hs_lit_engine *)hs_lit_engine;
+ struct hs_lit_stream *lit_stream = ALLOC(struct hs_lit_stream, 1);
hs_error_t err;
- hs_stream->logger = hs_inst->logger;
- hs_stream->thread_id = thread_id;
- hs_stream->ref_hs_rt = hs_inst->hs_rt;
- hs_stream->matched_pat = ALLOC(struct matched_pattern, 1);
- hs_stream->matched_pat->ref_hs_attr = hs_inst->hs_attr;
- hs_stream->matched_pat->n_patterns = hs_inst->n_patterns;
- utarray_new(hs_stream->matched_pat->pattern_ids, &ut_hs_pattern_id_icd);
- utarray_reserve(hs_stream->matched_pat->pattern_ids, MAX_HIT_PATTERN_NUM);
-
- int err_count = 0;
- if (hs_inst->hs_rt->literal_db != NULL) {
- err = hs_open_stream(hs_inst->hs_rt->literal_db, 0,
- &hs_stream->literal_stream);
- if (err != HS_SUCCESS) {
- log_fatal(hs_inst->logger, MODULE_ADAPTER_HS,
- "hs_open_stream failed, hs err:%d", err);
- err_count++;
- }
- }
+ lit_stream->logger = hs_lit_inst->logger;
+ lit_stream->thread_id = thread_id;
+ lit_stream->ref_hs_rt = hs_lit_inst;
+ lit_stream->matched_pat = ALLOC(struct matched_pattern, 1);
+ lit_stream->matched_pat->ref_pat_attr = hs_lit_inst->ref_pat_attr;
+ utarray_new(lit_stream->matched_pat->pattern_ids, &ut_hs_pattern_id_icd);
+ utarray_reserve(lit_stream->matched_pat->pattern_ids, MAX_HIT_PATTERN_NUM);
- if (hs_inst->hs_rt->regex_db != NULL) {
- err = hs_open_stream(hs_inst->hs_rt->regex_db, 0,
- &hs_stream->regex_stream);
+ if (hs_lit_inst->hs_db != NULL) {
+ err = hs_open_stream(hs_lit_inst->hs_db, 0, &lit_stream->hs_stream);
if (err != HS_SUCCESS) {
- log_fatal(hs_inst->logger, MODULE_ADAPTER_HS,
+ log_fatal(hs_lit_inst->logger, MODULE_ADAPTER_HS,
"hs_open_stream failed, hs err:%d", err);
- err_count++;
+ goto error;
}
}
- if (err_count > 0) {
- goto error;
- }
-
- return hs_stream;
+ return lit_stream;
error:
- if (hs_stream->literal_stream != NULL) {
- hs_close_stream(hs_stream->literal_stream, NULL, NULL, NULL);
- hs_stream->literal_stream = NULL;
- }
-
- if (hs_stream->regex_stream != NULL) {
- hs_close_stream(hs_stream->regex_stream, NULL, NULL, NULL);
- hs_stream->regex_stream = NULL;
+ if (lit_stream->hs_stream != NULL) {
+ hs_close_stream(lit_stream->hs_stream, NULL, NULL, NULL);
+ lit_stream->hs_stream = NULL;
}
- FREE(hs_stream);
+ FREE(lit_stream);
return NULL;
}
-void adapter_hs_stream_close(void *hs_stream)
+void hs_lit_stream_close(void *hs_lit_stream)
{
- if (NULL == hs_stream) {
+ if (NULL == hs_lit_stream) {
return;
}
- struct adapter_hs_stream *stream = (struct adapter_hs_stream *)hs_stream;
+ struct hs_lit_stream *stream = (struct hs_lit_stream *)hs_lit_stream;
if (stream->ref_hs_rt != NULL) {
- if (stream->literal_stream != NULL) {
- hs_close_stream(stream->literal_stream, NULL, NULL, NULL);
- stream->literal_stream = NULL;
- }
-
- if (stream->regex_stream != NULL) {
- hs_close_stream(stream->regex_stream, NULL, NULL, NULL);
- stream->regex_stream = NULL;
+ if (stream->hs_stream != NULL) {
+ hs_close_stream(stream->hs_stream, NULL, NULL, NULL);
+ stream->hs_stream = NULL;
}
}
/* stream->hs_rt point to hs_instance->hs_rt which will call free
same as hs_attr */
stream->ref_hs_rt = NULL;
- stream->matched_pat->ref_hs_attr = NULL;
+ stream->matched_pat->ref_pat_attr = NULL;
if (stream->matched_pat->pattern_ids != NULL) {
utarray_free(stream->matched_pat->pattern_ids);
@@ -736,84 +354,273 @@ void adapter_hs_stream_close(void *hs_stream)
FREE(stream);
}
-static void adapter_hs_stream_reset(struct adapter_hs_stream *hs_stream)
+static void hs_lit_stream_reset(struct hs_lit_stream *hs_lit_stream)
{
- if (NULL == hs_stream) {
+ if (NULL == hs_lit_stream) {
return;
}
- struct adapter_hs_scratch *scratch = hs_stream->ref_hs_rt->scratch;
- if (hs_stream->literal_stream != NULL) {
- hs_reset_stream(hs_stream->literal_stream, 0,
- scratch->literal_scratches[hs_stream->thread_id],
- matched_event_cb, hs_stream->matched_pat);
+ hs_scratch_t **scratches = hs_lit_stream->ref_hs_rt->hs_scratches;
+ if (hs_lit_stream->hs_stream != NULL) {
+ hs_reset_stream(hs_lit_stream->hs_stream, 0,
+ scratches[hs_lit_stream->thread_id],
+ matched_event_cb, hs_lit_stream->matched_pat);
}
+}
- if (hs_stream->regex_stream != NULL) {
- hs_reset_stream(hs_stream->regex_stream, 0,
- scratch->regex_scratches[hs_stream->thread_id],
- matched_event_cb, hs_stream->matched_pat);
+static void hs_regex_stream_reset(struct hs_regex_stream *hs_regex_stream)
+{
+ if (NULL == hs_regex_stream) {
+ return;
}
- utarray_clear(hs_stream->matched_pat->pattern_ids);
+ hs_scratch_t **scratches = hs_regex_stream->ref_hs_rt->hs_scratches;
+ if (hs_regex_stream->hs_stream != NULL) {
+ hs_reset_stream(hs_regex_stream->hs_stream, 0,
+ scratches[hs_regex_stream->thread_id],
+ matched_event_cb, hs_regex_stream->matched_pat);
+ }
}
-int adapter_hs_scan_match(struct bool_matcher *bm, UT_array *pattern_ids,
- struct bool_expr_match *match_buff, size_t buff_size,
- struct expr_scan_result *results, size_t n_result,
- size_t *n_hit_result)
+static int gather_hit_pattern_id(struct matched_pattern *matched_pat,
+ unsigned long long *pattern_id_array,
+ size_t array_size, size_t *n_pattern_id)
{
- size_t n_pattern_id = utarray_len(pattern_ids);
- if (0 == n_pattern_id) {
- *n_hit_result = 0;
+ size_t pattern_id_cnt = utarray_len(matched_pat->pattern_ids);
+ if (0 == pattern_id_cnt) {
+ *n_pattern_id = 0;
return 0;
}
- utarray_sort(pattern_ids, compare_pattern_id);
+ size_t array_index = 0;
+ for (size_t i = 0; i < pattern_id_cnt && array_index < array_size; i++) {
+ pattern_id_array[array_index++] = *(unsigned long long *)utarray_eltptr(matched_pat->pattern_ids, i);
+ }
+
+ *n_pattern_id = array_index;
+ utarray_clear(matched_pat->pattern_ids);
+
+ return 0;
+}
+
+int hs_lit_stream_scan(void *hs_lit_stream, const char *data, size_t data_len,
+ unsigned long long *pattern_id_array, size_t array_size,
+ size_t *n_pattern_id)
+{
+ hs_error_t err;
+
+ if (NULL == hs_lit_stream || NULL == data || 0 == data_len ||
+ NULL == pattern_id_array || 0 == array_size || NULL == n_pattern_id) {
+ return -1;
+ }
+
+ /*
+ In streaming mode, a non-zero return from the user-specified event-handler
+ function has consequences for the rest of that stream's lifetime: when a
+ non-zero return occurs, it signals that no more of the stream should be
+ scanned. Consequently if the user makes a subsequent call to
+ `hs_scan_stream` on a stream whose processing was terminated in this way,
+ hs_scan_stream will return `HS_SCAN_TERMINATED`. This case has not been
+ demonstrated in pcapscan, as its callback always returns 0.
+ */
+ struct hs_lit_stream *lit_stream = (struct hs_lit_stream *)hs_lit_stream;
+ int thread_id = lit_stream->thread_id;
+ hs_scratch_t **scratches = lit_stream->ref_hs_rt->hs_scratches;
+ lit_stream->matched_pat->scan_data_len = data_len;
+
+ if (lit_stream->hs_stream != NULL) {
+ if (scratches != NULL) {
+ err = hs_scan_stream(lit_stream->hs_stream, data, data_len,
+ 0, scratches[thread_id], matched_event_cb,
+ lit_stream->matched_pat);
+ if (err != HS_SUCCESS) {
+ return -1;
+ }
+ } else {
+ log_fatal(lit_stream->logger, MODULE_ADAPTER_HS,
+ "literal scratches is null, thread_id:%d", thread_id);
+ return -1;
+ }
+ }
+
+ return gather_hit_pattern_id(lit_stream->matched_pat, pattern_id_array,
+ array_size, n_pattern_id);
+}
+
+int hs_lit_engine_scan(void *hs_lit_engine, int thread_id,
+ const char *data, size_t data_len,
+ unsigned long long *pattern_id_array,
+ size_t array_size, size_t *n_pattern_id)
+{
+ if (NULL == hs_lit_engine || NULL == data || 0 == data_len) {
+ return -1;
+ }
+
+ struct hs_lit_engine *hs_lit_inst = (struct hs_lit_engine *)hs_lit_engine;
+ struct hs_lit_stream *hs_lit_stream = hs_lit_inst->streams[thread_id];
+ assert(hs_lit_stream != NULL);
+
+ hs_lit_stream_reset(hs_lit_stream);
+ return hs_lit_stream_scan(hs_lit_stream, data, data_len, pattern_id_array,
+ array_size, n_pattern_id);
+}
+
+void hs_regex_engine_free(void *hs_regex_engine)
+{
+ if (NULL == hs_regex_engine) {
+ return;
+ }
+
+ struct hs_regex_engine *hs_regex_inst = (struct hs_regex_engine *)hs_regex_engine;
+ size_t i = 0;
+
+ if (hs_regex_inst->hs_db != NULL) {
+ hs_free_database(hs_regex_inst->hs_db);
+ hs_regex_inst->hs_db = NULL;
+ }
- unsigned long long prev_pattern_id = 0xFFFFFFFFFFFFFFFF;
- unsigned long long tmp_pattern_id = 0;
- size_t n_unique_pattern_id = 0;
- unsigned long long unique_pattern_ids[n_pattern_id];
+ if (hs_regex_inst->hs_scratches != NULL) {
+ for (i = 0; i < hs_regex_inst->n_thread; i++) {
+ if (hs_regex_inst->hs_scratches[i] != NULL) {
+ hs_free_scratch(hs_regex_inst->hs_scratches[i]);
+ hs_regex_inst->hs_scratches[i] = NULL;
+ }
+ }
+ FREE(hs_regex_inst->hs_scratches);
+ }
- for (size_t i = 0; i < n_pattern_id; i++) {
- tmp_pattern_id = *(unsigned long long *)utarray_eltptr(pattern_ids, i);
- if (tmp_pattern_id != prev_pattern_id) {
- unique_pattern_ids[n_unique_pattern_id++] = tmp_pattern_id;
- prev_pattern_id = tmp_pattern_id;
+ if (hs_regex_inst->streams != NULL) {
+ for (i = 0; i < hs_regex_inst->n_thread; i++) {
+ if (hs_regex_inst->streams[i] != NULL) {
+ hs_regex_stream_close(hs_regex_inst->streams[i]);
+ hs_regex_inst->streams[i] = NULL;
+ }
}
+ FREE(hs_regex_inst->streams);
}
- int bool_matcher_ret = bool_matcher_match(bm, unique_pattern_ids,
- n_unique_pattern_id,
- match_buff, buff_size);
- if (bool_matcher_ret < 0) {
- goto next;
+ FREE(hs_regex_inst);
+}
+
+void *hs_regex_engine_new(struct expr_rule *rules, size_t n_rule,
+ struct pattern_attribute *pat_attr,
+ void *hs_regex_db, size_t n_thread,
+ struct log_handle *logger)
+{
+ struct hs_regex_engine *hs_regex_inst = ALLOC(struct hs_regex_engine, 1);
+
+ hs_regex_inst->n_thread = n_thread;
+ hs_regex_inst->hs_db = (hs_database_t *)hs_regex_db;
+ hs_regex_inst->ref_pat_attr = pat_attr;
+ hs_regex_inst->logger = logger;
+ hs_regex_inst->hs_scratches = ALLOC(hs_scratch_t *, n_thread);
+
+ int ret = hs_alloc_scratches((hs_database_t *)hs_regex_db,
+ hs_regex_inst->hs_scratches,
+ n_thread, logger);
+ if (ret < 0) {
+ log_fatal(logger, MODULE_ADAPTER_HS,
+ "[%s:%d]alloc scratches for hs regex runtime failed.",
+ __FUNCTION__, __LINE__);
+ FREE(hs_regex_inst->hs_scratches);
+ FREE(hs_regex_inst);
+ return NULL;
+ }
+
+ hs_regex_inst->streams = ALLOC(struct hs_regex_stream *, n_thread);
+ for (size_t i = 0; i < n_thread; i++) {
+ hs_regex_inst->streams[i] = (struct hs_regex_stream *)hs_regex_stream_open(hs_regex_inst, i);
+ }
+
+ return hs_regex_inst;
+}
+
+int hs_regex_engine_scan(void *hs_regex_engine, int thread_id,
+ const char *data, size_t data_len,
+ unsigned long long *pattern_id_array,
+ size_t array_size, size_t *n_pattern_id)
+{
+ if (NULL == hs_regex_engine || NULL == data || 0 == data_len) {
+ return -1;
+ }
+
+ struct hs_regex_engine *hs_regex_inst = (struct hs_regex_engine *)hs_regex_engine;
+ struct hs_regex_stream *hs_regex_stream = hs_regex_inst->streams[thread_id];
+ assert(hs_regex_stream != NULL);
+
+ hs_regex_stream_reset(hs_regex_stream);
+ return hs_regex_stream_scan(hs_regex_stream, data, data_len, pattern_id_array,
+ array_size, n_pattern_id);
+}
+
+void hs_regex_stream_close(void *hs_regex_stream)
+{
+ if (NULL == hs_regex_stream) {
+ return;
+ }
+
+ struct hs_regex_stream *stream = (struct hs_regex_stream *)hs_regex_stream;
+ if (stream->ref_hs_rt != NULL) {
+ if (stream->hs_stream != NULL) {
+ hs_close_stream(stream->hs_stream, NULL, NULL, NULL);
+ stream->hs_stream = NULL;
+ }
+ }
+
+ /* stream->hs_rt point to hs_instance->hs_rt which will call free
+ same as hs_attr */
+ stream->ref_hs_rt = NULL;
+ stream->matched_pat->ref_pat_attr = NULL;
+
+ if (stream->matched_pat->pattern_ids != NULL) {
+ utarray_free(stream->matched_pat->pattern_ids);
+ stream->matched_pat->pattern_ids = NULL;
}
- if (bool_matcher_ret > (int)n_result) {
- bool_matcher_ret = n_result;
+ FREE(stream->matched_pat);
+ FREE(stream);
+}
+
+void *hs_regex_stream_open(void *hs_regex_engine, int thread_id)
+{
+ if (NULL == hs_regex_engine || thread_id < 0) {
+ return NULL;
}
+
+ struct hs_regex_engine *hs_regex_inst = (struct hs_regex_engine *)hs_regex_engine;
+ struct hs_regex_stream *regex_stream = ALLOC(struct hs_regex_stream, 1);
+ hs_error_t err;
+
+ regex_stream->logger = hs_regex_inst->logger;
+ regex_stream->thread_id = thread_id;
+ regex_stream->ref_hs_rt = hs_regex_inst;
+ regex_stream->matched_pat = ALLOC(struct matched_pattern, 1);
+ regex_stream->matched_pat->ref_pat_attr = hs_regex_inst->ref_pat_attr;
+ utarray_new(regex_stream->matched_pat->pattern_ids, &ut_hs_pattern_id_icd);
+ utarray_reserve(regex_stream->matched_pat->pattern_ids, MAX_HIT_PATTERN_NUM);
- for (int index = 0; index < bool_matcher_ret; index++) {
- results[index].rule_id = match_buff[index].expr_id;
- results[index].user_tag = match_buff[index].user_tag;
+ if (hs_regex_inst->hs_db != NULL) {
+ err = hs_open_stream(hs_regex_inst->hs_db, 0, &regex_stream->hs_stream);
+ if (err != HS_SUCCESS) {
+ log_fatal(hs_regex_inst->logger, MODULE_ADAPTER_HS,
+ "hs_open_stream failed, hs err:%d", err);
+ goto error;
+ }
}
- *n_hit_result = bool_matcher_ret;
-next:
- utarray_clear(pattern_ids);
- return bool_matcher_ret;
+ return regex_stream;
+error:
+ hs_regex_stream_close(regex_stream);
+ return NULL;
}
-int adapter_hs_scan_stream(void *hs_stream, const char *data, size_t data_len,
- struct expr_scan_result *results, size_t n_result,
- size_t *n_hit_result)
+int hs_regex_stream_scan(void *hs_regex_stream, const char *data, size_t data_len,
+ unsigned long long *pattern_id_array, size_t array_size,
+ size_t *n_pattern_id)
{
hs_error_t err;
- if (NULL == hs_stream || NULL == data || 0 == data_len ||
- NULL == results || 0 == n_result || NULL == n_hit_result) {
+ if (NULL == hs_regex_stream || NULL == data || 0 == data_len) {
return -1;
}
@@ -826,69 +633,145 @@ int adapter_hs_scan_stream(void *hs_stream, const char *data, size_t data_len,
hs_scan_stream will return `HS_SCAN_TERMINATED`. This case has not been
demonstrated in pcapscan, as its callback always returns 0.
*/
-
- int err_count = 0;
- struct adapter_hs_stream *stream = (struct adapter_hs_stream *)hs_stream;
- int thread_id = stream->thread_id;
- struct adapter_hs_scratch *scratch = stream->ref_hs_rt->scratch;
- stream->matched_pat->scan_data_len = data_len;
-
- int err_scratch_flag = 0;
- if (stream->literal_stream != NULL) {
- if (scratch->literal_scratches != NULL) {
- err = hs_scan_stream(stream->literal_stream, data, data_len,
- 0, scratch->literal_scratches[thread_id],
- matched_event_cb, stream->matched_pat);
+ struct hs_regex_stream *regex_stream = (struct hs_regex_stream *)hs_regex_stream;
+ int thread_id = regex_stream->thread_id;
+ hs_scratch_t **scratches = regex_stream->ref_hs_rt->hs_scratches;
+ regex_stream->matched_pat->scan_data_len = data_len;
+
+ if (regex_stream->hs_stream != NULL) {
+ if (scratches != NULL) {
+ err = hs_scan_stream(regex_stream->hs_stream, data, data_len,
+ 0, scratches[thread_id], matched_event_cb,
+ regex_stream->matched_pat);
if (err != HS_SUCCESS) {
- err_count++;
+ return -1;
}
} else {
- log_fatal(stream->logger, MODULE_ADAPTER_HS,
- "literal_scratches is null, thread_id:%d", thread_id);
- err_scratch_flag++;
+ log_fatal(regex_stream->logger, MODULE_ADAPTER_HS,
+ "literal scratches is null, thread_id:%d", thread_id);
+ return -1;
}
}
- if (stream->regex_stream != NULL) {
- if (scratch->regex_scratches != NULL) {
- err = hs_scan_stream(stream->regex_stream, data, data_len,
- 0, scratch->regex_scratches[thread_id],
- matched_event_cb, stream->matched_pat);
- if (err != HS_SUCCESS) {
- err_count++;
- }
- } else {
- log_fatal(stream->logger, MODULE_ADAPTER_HS,
- "regex_scratches is null, thread_id:%d", thread_id);
- err_scratch_flag++;
+ return gather_hit_pattern_id(regex_stream->matched_pat, pattern_id_array,
+ array_size, n_pattern_id);
+}
+
+void *hs_compile_data_new(enum expr_pattern_type pat_type, size_t n_patterns)
+{
+ struct hs_compile_data *hs_cd = ALLOC(struct hs_compile_data, 1);
+
+ hs_cd->pat_type = pat_type;
+ hs_cd->patterns = ALLOC(char *, n_patterns);
+ hs_cd->pattern_lens = ALLOC(size_t, n_patterns);
+ hs_cd->n_patterns = n_patterns;
+ hs_cd->ids = ALLOC(unsigned int, n_patterns);
+ hs_cd->flags = ALLOC(unsigned int, n_patterns);
+
+ return hs_cd;
+}
+
+void hs_compile_data_free(void *compile_data)
+{
+ if (NULL == compile_data) {
+ return;
+ }
+
+ struct hs_compile_data *hs_cd = (struct hs_compile_data *)compile_data;
+ if (hs_cd->patterns != NULL) {
+ for (size_t i = 0; i < hs_cd->n_patterns; i++) {
+ FREE(hs_cd->patterns[i]);
}
+
+ FREE(hs_cd->patterns);
}
- if (err_count == 2) {
- return -1;
+ if (hs_cd->pattern_lens != NULL) {
+ FREE(hs_cd->pattern_lens);
+ }
+
+ if (hs_cd->ids != NULL) {
+ FREE(hs_cd->ids);
+ }
+
+ if (hs_cd->flags != NULL) {
+ FREE(hs_cd->flags);
+ }
+
+ FREE(hs_cd);
+}
+
+void hs_populate_compile_data(void *compile_data, size_t index, int pattern_id,
+ char *pat, size_t pat_len, int case_sensitive)
+{
+ struct hs_compile_data *hs_cd = (struct hs_compile_data *)compile_data;
+
+ hs_cd->ids[index] = pattern_id;
+
+ /* set flags */
+ if (hs_cd->pat_type == EXPR_PATTERN_TYPE_STR) {
+ hs_cd->flags[index] |= HS_FLAG_SOM_LEFTMOST;
+ }
+
+ if (case_sensitive == EXPR_CASE_INSENSITIVE) {
+ hs_cd->flags[index] |= HS_FLAG_CASELESS;
}
- if (err_scratch_flag != 0) {
+ hs_cd->pattern_lens[index] = pat_len;
+ hs_cd->patterns[index] = ALLOC(char, pat_len + 1);
+ memcpy(hs_cd->patterns[index], pat, pat_len);
+}
+
+int hs_build_lit_db(void **hs_lit_db, void *compile_data, struct log_handle *logger)
+{
+ if (NULL == hs_lit_db || NULL == compile_data) {
return -1;
}
- return adapter_hs_scan_match(stream->ref_hs_rt->bm, stream->matched_pat->pattern_ids,
- scratch->bool_match_buffs[thread_id], MAX_HIT_EXPR_NUM,
- results, n_result, n_hit_result);
+ struct hs_compile_data *lit_cd = (struct hs_compile_data *)compile_data;
+ hs_compile_error_t *compile_err = NULL;
+
+ if (lit_cd != NULL) {
+ hs_error_t err = hs_compile_lit_multi((const char *const *)lit_cd->patterns,
+ lit_cd->flags,lit_cd->ids, lit_cd->pattern_lens,
+ lit_cd->n_patterns,
+ HS_MODE_STREAM | HS_MODE_SOM_HORIZON_SMALL,
+ NULL, (hs_database_t **)hs_lit_db, &compile_err);
+ if (err != HS_SUCCESS) {
+ if (compile_err) {
+ log_error(logger, MODULE_ADAPTER_HS, "[%s:%d] compile error: %s",
+ __FUNCTION__, __LINE__, compile_err->message);
+ }
+
+ hs_free_compile_error(compile_err);
+ return -1;
+ }
+ }
+
+ return 0;
}
-int adapter_hs_scan(void *hs_instance, int thread_id, const char *data, size_t data_len,
- struct expr_scan_result *results, size_t n_result, size_t *n_hit_result)
+int hs_build_regex_db(void **hs_regex_db, void *compile_data, struct log_handle *logger)
{
- if (NULL == hs_instance || NULL == data || (0 == data_len) ||
- NULL == results || 0 == n_result || NULL == n_hit_result) {
+ if (NULL == hs_regex_db || NULL == compile_data) {
return -1;
}
- struct adapter_hs *hs_inst = (struct adapter_hs *)hs_instance;
- struct adapter_hs_stream *hs_stream = hs_inst->hs_rt->streams[thread_id];
- assert(hs_stream != NULL);
+ struct hs_compile_data *regex_cd = (struct hs_compile_data *)compile_data;
+ hs_compile_error_t *compile_err = NULL;
- adapter_hs_stream_reset(hs_stream);
- return adapter_hs_scan_stream(hs_stream, data, data_len, results, n_result, n_hit_result);
+ hs_error_t err = hs_compile_multi((const char *const *)regex_cd->patterns,
+ regex_cd->flags, regex_cd->ids, regex_cd->n_patterns,
+ HS_MODE_STREAM, NULL, (hs_database_t **)hs_regex_db,
+ &compile_err);
+ if (err != HS_SUCCESS) {
+ if (compile_err) {
+ log_error(logger, MODULE_ADAPTER_HS, "[%s:%d] compile error: %s",
+ __FUNCTION__, __LINE__, compile_err->message);
+ }
+ hs_free_compile_error(compile_err);
+ return -1;
+ }
+
+ return 0;
} \ No newline at end of file
diff --git a/scanner/expr_matcher/adapter_hs/adapter_hs.h b/scanner/expr_matcher/adapter_hs/adapter_hs.h
index c9de7d2..aaeca91 100644
--- a/scanner/expr_matcher/adapter_hs/adapter_hs.h
+++ b/scanner/expr_matcher/adapter_hs/adapter_hs.h
@@ -21,52 +21,82 @@ extern "C"
#include "log/log.h"
#include "../expr_matcher.h"
+#include "../expr_matcher_inc.h"
+
+int hs_verify_regex_expression(const char *regex_expr, struct log_handle *logger);
-int adapter_hs_verify_regex_expression(const char *regex_expr, struct log_handle *logger);
/**
- * @brief new adapter_hs instance
+ * @brief new adapter_hs literal instance
*
* @param rules: logic AND expression's array
* @param n_rule: the number of logic AND expression's array
* @param nr_worker_threads: the number of scan threads which will call adapter_hs_scan()
*
- * @retval the pointer to adapter_hs instance
+ * @retval the pointer to adapter_hs literal instance
*/
-void *adapter_hs_new(struct expr_rule *rules, size_t n_rule,
- size_t n_literal_pattern, size_t n_regex_pattern,
- size_t n_worker_thread, struct log_handle *logger);
+void *hs_lit_engine_new(struct expr_rule *rules, size_t n_rule,
+ struct pattern_attribute *pat_attr,
+ void *hs_lit_db, size_t n_thread,
+ struct log_handle *logger);
+
+void *hs_regex_engine_new(struct expr_rule *rules, size_t n_rule,
+ struct pattern_attribute *pat_attr,
+ void *hs_regex_db, size_t n_thread,
+ struct log_handle *logger);
+
+void hs_lit_engine_free(void *hs_lit_engine);
+
+void hs_regex_engine_free(void *hs_regex_engine);
/**
* @brief scan input data to match logic AND expression, return all matched expr_id
*
- * @param instance: adapter_hs instance obtained by adapter_hs_new()
+ * @param hs_lit: adapter_hs literal instance obtained by adapter_hs_lit_new()
* @param thread_id: the thread_id of caller
* @param data: data to be scanned
* @param data_len: the length of data to be scanned
* @param results: the array of expr_id
* @param n_results: number of elements in array of expr_id
*/
-int adapter_hs_scan(void *hs_instance, int thread_id, const char *data, size_t data_len,
- struct expr_scan_result *results, size_t n_result, size_t *n_hit_result);
+int hs_lit_engine_scan(void *hs_lit_engine, int thread_id,
+ const char *data, size_t data_len,
+ unsigned long long *pattern_id_array,
+ size_t array_size, size_t *n_pattern_id);
-/**
- * @brief destroy adapter_hs instance
- *
- * @param instance: adapter_hs instance obtained by adapter_hs_new()
-*/
-void adapter_hs_free(void *instance);
+int hs_regex_engine_scan(void *hs_regex_engine, int thread_id,
+ const char *data, size_t data_len,
+ unsigned long long *pattern_id_array,
+ size_t array_size, size_t *n_pattern_id);
+
+void *hs_lit_stream_open(void *hs_lit_engine, int thread_id);
+
+void *hs_regex_stream_open(void *hs_regex_engine, int thread_id);
+
+void hs_lit_stream_close(void *hs_lit_stream);
+
+void hs_regex_stream_close(void *hs_regex_stream);
+
+int hs_lit_stream_scan(void *hs_lit_stream, const char *data, size_t data_len,
+ unsigned long long *pattern_id_array, size_t array_size,
+ size_t *n_pattern_id);
+
+int hs_regex_stream_scan(void *hs_regex_stream, const char *data, size_t data_len,
+ unsigned long long *pattern_id_array, size_t array_size,
+ size_t *n_pattern_id);
/**
- * @brief open adapter_hs stream after adapter_hs instance initialized for stream scan
- *
+ * @brief build database
*/
-void *adapter_hs_stream_open(void *hs_instance, int thread_id);
+void *hs_compile_data_new(enum expr_pattern_type pat_type, size_t n_patterns);
+
+void hs_compile_data_free(void *compile_data);
+
+void hs_populate_compile_data(void *compile_data, size_t index, int pattern_id,
+ char *pat, size_t pat_len, int case_sensitive);
-int adapter_hs_scan_stream(void *stream, const char *data, size_t data_len,
- struct expr_scan_result *results, size_t n_result,
- size_t *n_hit_result);
+int hs_build_lit_db(void **hs_lit_db, void *compile_data, struct log_handle *logger);
-void adapter_hs_stream_close(void *stream);
+int hs_build_regex_db(void **hs_regex_db, void *compile_data, struct log_handle *logger);
#ifdef __cplusplus
}
diff --git a/scanner/expr_matcher/adapter_rs/adapter_rs.cpp b/scanner/expr_matcher/adapter_rs/adapter_rs.cpp
index 369c385..dbc6880 100644
--- a/scanner/expr_matcher/adapter_rs/adapter_rs.cpp
+++ b/scanner/expr_matcher/adapter_rs/adapter_rs.cpp
@@ -17,13 +17,10 @@
#include "rulescan.h"
#include "adapter_rs.h"
-#include "uthash/utarray.h"
#include "uthash/uthash.h"
#include "maat_utils.h"
#include "../../bool_matcher/bool_matcher.h"
-#define MAX_HIT_PATTERN_NUM 1024
-
pid_t rs_gettid()
{
return syscall(SYS_gettid);
@@ -39,62 +36,48 @@ static const char *rs_module_name_str(const char *name)
#define MODULE_ADAPTER_RS rs_module_name_str("maat.adapter_rs")
-struct adpt_rs_compile_data {
+struct rs_compile_data {
struct scan_pattern *patterns;
size_t n_patterns;
};
-struct adapter_rs_stream {
+struct rs_lit_stream {
int thread_id;
size_t offset; /* current stream offset */
- rs_stream_t *literal_stream;
- rs_stream_t *regex_stream;
- struct adapter_rs_runtime *ref_rs_rt;
-
+ rs_stream_t *rs_stream;
+ struct rs_lit_engine *ref_rs_rt;
+ struct matched_pattern *matched_pat;
struct log_handle *logger;
};
-/* adapter_rs runtime */
-struct adapter_rs_runtime {
- rs_database_t *literal_db;
- rs_database_t *regex_db;
-
- struct bool_expr_match **bool_match_buffs; /* per thread */
- struct adapter_rs_stream **streams; /* per thread */
- struct matched_pattern **matched_pats; /* per thread */
- struct bool_matcher *bm;
-};
-
-/* adapter_rs instance */
-struct adapter_rs {
- size_t n_worker_thread;
- size_t n_expr;
- size_t n_patterns;
- struct adapter_rs_runtime *rs_rt;
- struct pattern_attribute *rs_attr;
+struct rs_regex_stream {
+ int thread_id;
+ size_t offset; /* current stream offset */
+ rs_stream_t *rs_stream;
+ struct rs_regex_engine *ref_rs_rt;
+ struct matched_pattern *matched_pat;
struct log_handle *logger;
};
-struct pattern_offset {
- long long start;
- long long end;
-};
-
-struct pattern_attribute {
- long long pattern_id;
- enum expr_match_mode match_mode;
- struct pattern_offset offset;
- size_t pattern_len;
+/* adapter_rs literal runtime */
+struct rs_lit_engine {
+ size_t n_thread;
+ rs_database_t *rs_db;
+ struct rs_lit_stream **streams; /* per thread */
+ struct pattern_attribute *ref_pat_attr;
+ struct log_handle *logger;
};
-struct matched_pattern {
- UT_array *pattern_ids;
- size_t n_patterns;
- struct pattern_attribute *ref_rs_attr;
+/* adapter_rs regex runtime */
+struct rs_regex_engine {
+ size_t n_thread;
+ rs_database_t *rs_db;
+ struct rs_regex_stream **streams; /* per thread */
+ struct pattern_attribute *ref_pat_attr;
+ struct log_handle *logger;
};
-int adapter_rs_verify_regex_expression(const char *regex_expr,
- struct log_handle *logger)
+int rs_verify_regex_expression(const char *regex_expr, struct log_handle *logger)
{
int ret = rs_verify_regex(regex_expr);
if (ret == 0) {
@@ -110,20 +93,16 @@ int adapter_rs_verify_regex_expression(const char *regex_expr,
*
* @retval 0(success) -1(failed)
*/
-static int adpt_rs_build_database(struct adapter_rs_runtime *rs_rt,
- size_t n_worker_thread,
- struct adpt_rs_compile_data *literal_cd,
- struct adpt_rs_compile_data *regex_cd,
- struct log_handle *logger)
+int rs_build_lit_db(void **rs_lit_db, void *compile_data, struct log_handle *logger)
{
- if (NULL == rs_rt) {
+ if (NULL == rs_lit_db) {
return -1;
}
- int ret = 0;
- if (literal_cd != NULL) {
- ret = rs_compile_lit(literal_cd->patterns, literal_cd->n_patterns,
- &rs_rt->literal_db);
+ struct rs_compile_data *lit_cd = (struct rs_compile_data *)compile_data;
+ if (lit_cd != NULL) {
+ int ret = rs_compile_lit(lit_cd->patterns, lit_cd->n_patterns,
+ (rs_database_t **)rs_lit_db);
if (ret < 0) {
log_fatal(logger, MODULE_ADAPTER_RS, "[%s:%d] compile error",
__FUNCTION__, __LINE__);
@@ -131,13 +110,25 @@ static int adpt_rs_build_database(struct adapter_rs_runtime *rs_rt,
}
}
+ return 0;
+}
+
+int rs_build_regex_db(void **rs_regex_db, size_t n_thread, void *compile_data,
+ struct log_handle *logger)
+{
+ if (NULL == rs_regex_db) {
+ return -1;
+ }
+
+ struct rs_compile_data *regex_cd = (struct rs_compile_data *)compile_data;
if (regex_cd != NULL) {
size_t n_failed_pats = 0;
- ret = rs_compile_regex(regex_cd->patterns, regex_cd->n_patterns,
- n_worker_thread, &rs_rt->regex_db, &n_failed_pats);
+ int ret = rs_compile_regex(regex_cd->patterns, regex_cd->n_patterns,
+ n_thread, (rs_database_t **)rs_regex_db,
+ &n_failed_pats);
if (ret < 0) {
- log_fatal(logger, MODULE_ADAPTER_RS, "[%s:%d] compile error",
- __FUNCTION__, __LINE__);
+ log_fatal(logger, MODULE_ADAPTER_RS, "[%s:%d] compile error",
+ __FUNCTION__, __LINE__);
return -1;
}
}
@@ -145,21 +136,22 @@ static int adpt_rs_build_database(struct adapter_rs_runtime *rs_rt,
return 0;
}
-static struct adpt_rs_compile_data *adpt_rs_compile_data_new(size_t n_patterns)
+void *rs_compile_data_new(enum expr_pattern_type pat_type, size_t n_patterns)
{
- struct adpt_rs_compile_data *rs_cd = ALLOC(struct adpt_rs_compile_data, 1);
+ struct rs_compile_data *rs_cd = ALLOC(struct rs_compile_data, 1);
rs_cd->patterns = ALLOC(struct scan_pattern, n_patterns);
rs_cd->n_patterns = n_patterns;
return rs_cd;
}
-static void adpt_rs_compile_data_free(struct adpt_rs_compile_data *rs_cd)
+void rs_compile_data_free(void *compile_data)
{
- if (NULL == rs_cd) {
+ if (NULL == compile_data) {
return;
}
+ struct rs_compile_data *rs_cd = (struct rs_compile_data *)compile_data;
if (rs_cd->patterns != NULL) {
for (size_t i = 0; i < rs_cd->n_patterns; i++) {
if (rs_cd->patterns[i].pattern != NULL) {
@@ -173,247 +165,16 @@ static void adpt_rs_compile_data_free(struct adpt_rs_compile_data *rs_cd)
FREE(rs_cd);
}
-static void populate_compile_data(struct adpt_rs_compile_data *compile_data,
- size_t index, long long pattern_id, char *pat,
- size_t pat_len, int case_sensitive)
-{
- compile_data->patterns[index].id = pattern_id;
- compile_data->patterns[index].case_sensitive = case_sensitive;
- compile_data->patterns[index].pattern = ALLOC(char, pat_len + 1);
- memcpy(compile_data->patterns[index].pattern, pat, pat_len);
- compile_data->patterns[index].pattern_len = pat_len;
-}
-
-static struct bool_expr *bool_exprs_new(struct expr_rule *rules, size_t n_rule,
- struct pattern_attribute *pattern_attr,
- struct adpt_rs_compile_data *literal_cd,
- struct adpt_rs_compile_data *regex_cd,
- size_t *n_pattern)
+void rs_populate_compile_data(void *compile_data, size_t index, int pattern_id,
+ char *pat, size_t pat_len, int case_sensitive)
{
- long long pattern_idx = 0;
- size_t literal_idx = 0;
- size_t regex_idx = 0;
+ struct rs_compile_data *rs_cd = (struct rs_compile_data *)compile_data;
- struct bool_expr *bool_exprs = ALLOC(struct bool_expr, n_rule);
-
- /* populate adpt_rs_compile_data and bool_expr */
- for (size_t i = 0; i < n_rule; i++) {
-
- for (size_t j = 0; j < rules[i].n_patterns; j++) {
- pattern_attr[pattern_idx].pattern_id = pattern_idx;
- pattern_attr[pattern_idx].match_mode = rules[i].patterns[j].match_mode;
- pattern_attr[pattern_idx].pattern_len = rules[i].patterns[j].pat_len;
-
- if (pattern_attr[pattern_idx].match_mode == EXPR_MATCH_MODE_SUB ||
- pattern_attr[pattern_idx].match_mode == EXPR_MATCH_MODE_EXACTLY) {
- pattern_attr[pattern_idx].offset.start = rules[i].patterns[j].start_offset;
- pattern_attr[pattern_idx].offset.end = rules[i].patterns[j].end_offset;
- }
-
- /* literal pattern */
- if (rules[i].patterns[j].type == EXPR_PATTERN_TYPE_STR) {
- populate_compile_data(literal_cd, literal_idx, pattern_idx,
- rules[i].patterns[j].pat, rules[i].patterns[j].pat_len,
- rules[i].patterns[j].case_sensitive);
- literal_idx++;
- } else {
- /* regex pattern */
- populate_compile_data(regex_cd, regex_idx, pattern_idx,
- rules[i].patterns[j].pat, rules[i].patterns[j].pat_len,
- rules[i].patterns[j].case_sensitive);
- regex_idx++;
- }
-
- bool_exprs[i].items[j].item_id = pattern_idx++;
- bool_exprs[i].items[j].not_flag = 0;
- }
-
- bool_exprs[i].expr_id = rules[i].expr_id;
- bool_exprs[i].item_num = rules[i].n_patterns;
- bool_exprs[i].user_tag = rules[i].tag;
- }
-
- *n_pattern = pattern_idx;
-
- return bool_exprs;
-}
-
-UT_icd ut_rs_pattern_id_icd = {sizeof(unsigned long long), NULL, NULL, NULL};
-void *adapter_rs_new(struct expr_rule *rules, size_t n_rule,
- size_t n_literal_pattern, size_t n_regex_pattern,
- size_t n_worker_thread, struct log_handle *logger)
-{
- /* get the sum of pattern */
- size_t i = 0;
- struct adpt_rs_compile_data *literal_cd = NULL;
- struct adpt_rs_compile_data *regex_cd = NULL;
-
- if (n_literal_pattern > 0) {
- literal_cd = adpt_rs_compile_data_new(n_literal_pattern);
- }
-
- if (n_regex_pattern > 0) {
- regex_cd = adpt_rs_compile_data_new(n_regex_pattern);
- }
-
- size_t pattern_cnt = n_literal_pattern + n_regex_pattern;
- struct adapter_rs *rs_inst = ALLOC(struct adapter_rs, 1);
- rs_inst->rs_attr = ALLOC(struct pattern_attribute, pattern_cnt);
- rs_inst->logger = logger;
- rs_inst->n_worker_thread = n_worker_thread;
- rs_inst->n_expr = n_rule;
-
- struct bool_expr *bool_exprs = bool_exprs_new(rules, n_rule, rs_inst->rs_attr,
- literal_cd, regex_cd, &pattern_cnt);
- if (NULL == bool_exprs) {
- return NULL;
- }
- rs_inst->n_patterns = pattern_cnt;
-
- /* create bool matcher */
- size_t mem_size = 0;
- int rs_ret = 0;
-
- rs_inst->rs_rt = ALLOC(struct adapter_rs_runtime, 1);
-
- //rs_rt->bm
- rs_inst->rs_rt->bm = bool_matcher_new(bool_exprs, n_rule, &mem_size);
- if (rs_inst->rs_rt->bm != NULL) {
- log_info(logger, MODULE_ADAPTER_RS,
- "Adapter_rs module: build bool matcher of %zu expressions"
- " with %zu bytes memory", n_rule, mem_size);
- } else {
- log_fatal(logger, MODULE_ADAPTER_RS,
- "[%s:%d] Adapter_rs module: build bool matcher failed",
- __FUNCTION__, __LINE__);
-
- rs_ret = -1;
- }
- FREE(bool_exprs);
-
- /* build rs database rs_rt->literal_db & rs_rt->regex_db */
- int ret = adpt_rs_build_database(rs_inst->rs_rt, n_worker_thread,
- literal_cd, regex_cd, logger);
- if (ret < 0) {
- rs_ret = -1;
- }
-
- if (literal_cd != NULL) {
- adpt_rs_compile_data_free(literal_cd);
- literal_cd = NULL;
- }
-
- if (regex_cd != NULL) {
- adpt_rs_compile_data_free(regex_cd);
- regex_cd = NULL;
- }
-
- if (rs_ret < 0) {
- goto error;
- }
-
- /* alloc scratch */
- rs_inst->rs_rt->bool_match_buffs = ALLOC(struct bool_expr_match *, n_worker_thread);
- for (i = 0; i < n_worker_thread; i++) {
- rs_inst->rs_rt->bool_match_buffs[i] = ALLOC(struct bool_expr_match, MAX_HIT_EXPR_NUM);
- }
-
- rs_inst->rs_rt->streams = ALLOC(struct adapter_rs_stream *, n_worker_thread);
- for (i = 0; i < n_worker_thread; i++) {
- rs_inst->rs_rt->streams[i] = (struct adapter_rs_stream *)adapter_rs_stream_open(rs_inst, i);
- }
-
- rs_inst->rs_rt->matched_pats = ALLOC(struct matched_pattern *, n_worker_thread);
- for (i = 0; i < n_worker_thread; i++) {
- rs_inst->rs_rt->matched_pats[i] = ALLOC(struct matched_pattern, 1);
- rs_inst->rs_rt->matched_pats[i]->ref_rs_attr = rs_inst->rs_attr;
- rs_inst->rs_rt->matched_pats[i]->n_patterns = rs_inst->n_patterns;
- utarray_new(rs_inst->rs_rt->matched_pats[i]->pattern_ids, &ut_rs_pattern_id_icd);
- utarray_reserve(rs_inst->rs_rt->matched_pats[i]->pattern_ids, MAX_HIT_PATTERN_NUM);
- }
-
- return rs_inst;
-error:
- adapter_rs_free(rs_inst);
- return NULL;
-}
-
-void adapter_rs_free(void *rs_instance)
-{
- if (NULL == rs_instance) {
- return;
- }
-
- size_t i = 0;
- struct adapter_rs *rs_inst = (struct adapter_rs *)rs_instance;
-
- if (rs_inst->rs_rt != NULL) {
- if (rs_inst->rs_rt->literal_db != NULL) {
- rs_free_database(rs_inst->rs_rt->literal_db);
- rs_inst->rs_rt->literal_db = NULL;
- }
-
- if (rs_inst->rs_rt->regex_db != NULL) {
- rs_free_database(rs_inst->rs_rt->regex_db);
- rs_inst->rs_rt->regex_db = NULL;
- }
-
- if (rs_inst->rs_rt->bool_match_buffs != NULL) {
- for (i = 0; i < rs_inst->n_worker_thread; i++) {
- if (rs_inst->rs_rt->bool_match_buffs[i] != NULL) {
- FREE(rs_inst->rs_rt->bool_match_buffs[i]);
- }
- }
-
- FREE(rs_inst->rs_rt->bool_match_buffs);
- }
-
- if (rs_inst->rs_rt->bm != NULL) {
- bool_matcher_free(rs_inst->rs_rt->bm);
- rs_inst->rs_rt->bm = NULL;
- }
-
- if (rs_inst->rs_rt->streams != NULL) {
- for (i = 0; i < rs_inst->n_worker_thread; i++) {
- if (rs_inst->rs_rt->streams[i] != NULL) {
- adapter_rs_stream_close(rs_inst->rs_rt->streams[i]);
- rs_inst->rs_rt->streams[i] = NULL;
- }
- }
- FREE(rs_inst->rs_rt->streams);
- }
-
- if (rs_inst->rs_rt->matched_pats != NULL) {
- for (i = 0; i < rs_inst->n_worker_thread; i++) {
- if (rs_inst->rs_rt->matched_pats[i] != NULL) {
- utarray_free(rs_inst->rs_rt->matched_pats[i]->pattern_ids);
- rs_inst->rs_rt->matched_pats[i]->pattern_ids = NULL;
- FREE(rs_inst->rs_rt->matched_pats[i]);
- }
- }
- FREE(rs_inst->rs_rt->matched_pats);
- }
-
- FREE(rs_inst->rs_rt);
- }
-
- if (rs_inst->rs_attr != NULL) {
- FREE(rs_inst->rs_attr);
- }
-
- FREE(rs_inst);
-}
-
-static inline int compare_pattern_id(const void *a, const void *b)
-{
- long long ret = *(const unsigned long long *)a - *(const unsigned long long *)b;
- if (ret == 0) {
- return 0;
- } else if(ret < 0) {
- return -1;
- } else {
- return 1;
- }
+ rs_cd->patterns[index].id = pattern_id;
+ rs_cd->patterns[index].case_sensitive = case_sensitive;
+ rs_cd->patterns[index].pattern = ALLOC(char, pat_len + 1);
+ memcpy(rs_cd->patterns[index].pattern, pat, pat_len);
+ rs_cd->patterns[index].pattern_len = pat_len;
}
/**
@@ -426,16 +187,12 @@ static int matched_event_cb(unsigned int id, int pos_offset, int from, int to,
unsigned long long pattern_id = id;
struct matched_pattern *matched_pat = (struct matched_pattern *)ctx;
- if (pattern_id > matched_pat->n_patterns || id < 0) {
- return 0;
- }
-
if (utarray_len(matched_pat->pattern_ids) >= MAX_HIT_PATTERN_NUM) {
return 0;
}
int ret = 0;
- struct pattern_attribute pat_attr = matched_pat->ref_rs_attr[id];
+ struct pattern_attribute pat_attr = matched_pat->ref_pat_attr[id];
switch (pat_attr.match_mode) {
case EXPR_MATCH_MODE_EXACTLY:
@@ -490,205 +247,329 @@ static int matched_event_cb(unsigned int id, int pos_offset, int from, int to,
return 0;
}
-void *adapter_rs_stream_open(void *rs_instance, int thread_id)
+static int gather_hit_pattern_id(struct matched_pattern *matched_pat,
+ unsigned long long *pattern_id_array,
+ size_t array_size, size_t *n_pattern_id)
{
- if (NULL == rs_instance || thread_id < 0) {
- return NULL;
+ size_t pattern_id_cnt = utarray_len(matched_pat->pattern_ids);
+ if (0 == pattern_id_cnt) {
+ *n_pattern_id = 0;
+ return 0;
}
-
- struct adapter_rs *rs_inst = (struct adapter_rs *)rs_instance;
- struct adapter_rs_stream *rs_stream = ALLOC(struct adapter_rs_stream, 1);
- rs_stream->logger = rs_inst->logger;
- rs_stream->thread_id = thread_id;
- rs_stream->ref_rs_rt = rs_inst->rs_rt;
-
- int err_count = 0;
- if (rs_inst->rs_rt->literal_db != NULL) {
- rs_stream->literal_stream = rs_open_stream(rs_inst->rs_rt->literal_db, 0, 128);
- if (NULL == rs_stream->literal_stream) {
- log_fatal(rs_inst->logger, MODULE_ADAPTER_RS, "rs_open_stream failed");
- err_count++;
- }
+ size_t array_index = 0;
+ for (size_t i = 0; i < pattern_id_cnt && array_index < array_size; i++) {
+ pattern_id_array[array_index++] = *(unsigned long long *)utarray_eltptr(matched_pat->pattern_ids, i);
}
- if (rs_inst->rs_rt->regex_db != NULL) {
- rs_stream->regex_stream = rs_open_stream(rs_inst->rs_rt->regex_db, 0, 128);
- if (NULL == rs_stream->regex_stream) {
- log_fatal(rs_inst->logger, MODULE_ADAPTER_RS, "rs_open_stream failed");
- err_count++;
- }
- }
+ *n_pattern_id = array_index;
+ utarray_clear(matched_pat->pattern_ids);
+
+ return 0;
+}
- if (err_count > 0) {
- goto error;
+void rs_lit_engine_free(void *rs_lit_engine)
+{
+ if (NULL == rs_lit_engine) {
+ return;
}
- return rs_stream;
-error:
- if (rs_stream->literal_stream != NULL) {
- rs_close_stream(rs_stream->literal_stream);
- rs_stream->literal_stream = NULL;
+ struct rs_lit_engine *rs_lit_inst = (struct rs_lit_engine *)rs_lit_engine;
+
+ if (rs_lit_inst->rs_db != NULL) {
+ rs_free_database(rs_lit_inst->rs_db);
+ rs_lit_inst->rs_db = NULL;
}
- if (rs_stream->regex_stream != NULL) {
- rs_close_stream(rs_stream->regex_stream);
- rs_stream->regex_stream = NULL;
+ if (rs_lit_inst->streams != NULL) {
+ for (size_t i = 0; i < rs_lit_inst->n_thread; i++) {
+ if (rs_lit_inst->streams[i] != NULL) {
+ rs_lit_stream_close(rs_lit_inst->streams[i]);
+ rs_lit_inst->streams[i] = NULL;
+ }
+ }
+ FREE(rs_lit_inst->streams);
}
- FREE(rs_stream);
- return NULL;
+ FREE(rs_lit_inst);
}
-void adapter_rs_stream_close(void *rs_stream)
+UT_icd ut_rs_pattern_id_icd = {sizeof(unsigned long long), NULL, NULL, NULL};
+void *rs_lit_engine_new(struct expr_rule *rules, size_t n_rule,
+ struct pattern_attribute *pat_attr,
+ void *rs_lit_db, size_t n_thread,
+ struct log_handle *logger)
{
- if (NULL == rs_stream) {
- return;
- }
+ struct rs_lit_engine *rs_lit_inst = ALLOC(struct rs_lit_engine, 1);
+
+ rs_lit_inst->n_thread = n_thread;
+ rs_lit_inst->rs_db = (rs_database_t *)rs_lit_db;
+ rs_lit_inst->ref_pat_attr = pat_attr;
+ rs_lit_inst->logger = logger;
+ rs_lit_inst->streams = ALLOC(struct rs_lit_stream *, n_thread);
- struct adapter_rs_stream *stream = (struct adapter_rs_stream *)rs_stream;
- if (stream->ref_rs_rt != NULL) {
- if (stream->literal_stream != NULL) {
- rs_close_stream(stream->literal_stream);
- stream->literal_stream = NULL;
- }
-
- if (stream->regex_stream != NULL) {
- rs_close_stream(stream->regex_stream);
- stream->regex_stream = NULL;
- }
+ for (size_t i = 0; i < n_thread; i++) {
+ rs_lit_inst->streams[i] = (struct rs_lit_stream *)rs_lit_stream_open(rs_lit_inst, i);
}
- /* rs_stream->rs_rt point to rs_instance->rs_rt which will call free
- same as rs_attr */
- stream->ref_rs_rt = NULL;
- FREE(stream);
+ return rs_lit_inst;
}
-int adapter_rs_scan_match(struct bool_matcher *bm, UT_array *pattern_ids,
- struct bool_expr_match *match_buff, size_t buff_size,
- struct expr_scan_result *results, size_t n_result,
- size_t *n_hit_result)
+int rs_lit_engine_scan(void *rs_lit_engine, int thread_id,
+ const char *data, size_t data_len,
+ unsigned long long *pattern_id_array,
+ size_t array_size, size_t *n_pattern_id)
{
- size_t n_pattern_id = utarray_len(pattern_ids);
- if (0 == n_pattern_id) {
- *n_hit_result = 0;
- return 0;
+ if (NULL == rs_lit_engine || NULL == data || (0 == data_len) ||
+ NULL == pattern_id_array || 0 == array_size || NULL == n_pattern_id) {
+ return -1;
}
- utarray_sort(pattern_ids, compare_pattern_id);
+ struct rs_lit_engine *rs_lit_inst = (struct rs_lit_engine *)rs_lit_engine;
+ struct rs_lit_stream *rs_lit_stream = rs_lit_inst->streams[thread_id];
+ assert(rs_lit_stream != NULL);
- unsigned long long prev_pattern_id = 0xFFFFFFFFFFFFFFFF;
- unsigned long long tmp_pattern_id = 0;
- size_t n_unique_pattern_id = 0;
- unsigned long long unique_pattern_ids[n_pattern_id];
+ if (rs_lit_inst->rs_db != NULL) {
+ int ret = rs_scan(rs_lit_inst->rs_db, thread_id, data, data_len,
+ 0, matched_event_cb, rs_lit_stream->matched_pat);
+ if (ret < 0) {
+ return -1;
+ }
+ }
- for (size_t i = 0; i < n_pattern_id; i++) {
- tmp_pattern_id = *(unsigned long long *)utarray_eltptr(pattern_ids, i);
- if (tmp_pattern_id != prev_pattern_id) {
- unique_pattern_ids[n_unique_pattern_id++] = tmp_pattern_id;
- prev_pattern_id = tmp_pattern_id;
+ return gather_hit_pattern_id(rs_lit_stream->matched_pat, pattern_id_array,
+ array_size, n_pattern_id);
+}
+
+void *rs_lit_stream_open(void *rs_lit_engine, int thread_id)
+{
+ if (NULL == rs_lit_engine || thread_id < 0) {
+ return NULL;
+ }
+
+ struct rs_lit_engine *rs_lit_inst = (struct rs_lit_engine *)rs_lit_engine;
+ struct rs_lit_stream *lit_stream = ALLOC(struct rs_lit_stream, 1);
+
+ lit_stream->logger = rs_lit_inst->logger;
+ lit_stream->thread_id = thread_id;
+ lit_stream->ref_rs_rt = rs_lit_inst;
+ lit_stream->matched_pat = ALLOC(struct matched_pattern, 1);
+ lit_stream->matched_pat->ref_pat_attr = rs_lit_inst->ref_pat_attr;
+ utarray_new(lit_stream->matched_pat->pattern_ids, &ut_rs_pattern_id_icd);
+ utarray_reserve(lit_stream->matched_pat->pattern_ids, MAX_HIT_PATTERN_NUM);
+
+ if (rs_lit_inst->rs_db != NULL) {
+ lit_stream->rs_stream = rs_open_stream(rs_lit_inst->rs_db, 0, 128);
+ if (NULL == lit_stream->rs_stream) {
+ log_fatal(rs_lit_inst->logger, MODULE_ADAPTER_RS, "rs_open_stream failed");
+ FREE(lit_stream);
+ return NULL;
}
}
- int bool_matcher_ret = bool_matcher_match(bm, unique_pattern_ids,
- n_unique_pattern_id,
- match_buff, buff_size);
- if (bool_matcher_ret < 0) {
- goto next;
+ return lit_stream;
+}
+
+void rs_lit_stream_close(void *rs_lit_stream)
+{
+ if (NULL == rs_lit_stream) {
+ return;
}
- if (bool_matcher_ret > (int)n_result) {
- bool_matcher_ret = n_result;
+ struct rs_lit_stream *lit_stream = (struct rs_lit_stream *)rs_lit_stream;
+ if (lit_stream->ref_rs_rt != NULL) {
+ if (lit_stream->rs_stream != NULL) {
+ rs_close_stream(lit_stream->rs_stream);
+ lit_stream->rs_stream = NULL;
+ }
}
- for (int index = 0; index < bool_matcher_ret; index++) {
- results[index].rule_id = match_buff[index].expr_id;
- results[index].user_tag = match_buff[index].user_tag;
+ /* rs_stream->rs_rt point to rs_instance->rs_rt which will call free
+ same as rs_attr */
+ lit_stream->ref_rs_rt = NULL;
+ lit_stream->matched_pat->ref_pat_attr = NULL;
+
+ if (lit_stream->matched_pat->pattern_ids != NULL) {
+ utarray_free(lit_stream->matched_pat->pattern_ids);
+ lit_stream->matched_pat->pattern_ids = NULL;
}
- *n_hit_result = bool_matcher_ret;
-next:
- utarray_clear(pattern_ids);
- return bool_matcher_ret;
+ FREE(lit_stream->matched_pat);
+ FREE(lit_stream);
}
-int adapter_rs_scan_stream(void *rs_stream, const char *data, size_t data_len,
- struct expr_scan_result *results, size_t n_result,
- size_t *n_hit_result)
+int rs_lit_stream_scan(void *rs_lit_stream, const char *data, size_t data_len,
+ unsigned long long *pattern_id_array, size_t array_size,
+ size_t *n_pattern_id)
{
- if (NULL == rs_stream || NULL == data || 0 == data_len ||
- NULL == results || 0 == n_result || NULL == n_hit_result) {
+ if (NULL == rs_lit_stream || NULL == data || 0 == data_len ||
+ NULL == pattern_id_array || 0 == array_size || NULL == n_pattern_id) {
return -1;
}
- int ret = 0, err_count = 0;
- struct adapter_rs_stream *stream = (struct adapter_rs_stream *)rs_stream;
- int thread_id = stream->thread_id;
- struct adapter_rs_runtime *rs_rt = stream->ref_rs_rt;
- struct matched_pattern *matched_pat = rs_rt->matched_pats[thread_id];
-
- if (stream->literal_stream != NULL) {
- ret = rs_scan_stream(stream->literal_stream, data, data_len,
- matched_event_cb, matched_pat);
+ struct rs_lit_stream *lit_stream = (struct rs_lit_stream *)rs_lit_stream;
+
+ if (lit_stream->rs_stream != NULL) {
+ int ret = rs_scan_stream(lit_stream->rs_stream, data, data_len,
+ matched_event_cb, lit_stream->matched_pat);
if (ret < 0) {
- err_count++;
+ return -1;
}
}
- if (stream->regex_stream != NULL) {
- ret = rs_scan_stream(stream->regex_stream, data, data_len,
- matched_event_cb, matched_pat);
- if (ret < 0) {
- err_count++;
+ return gather_hit_pattern_id(lit_stream->matched_pat, pattern_id_array,
+ array_size, n_pattern_id);
+}
+
+void rs_regex_engine_free(void *rs_regex_engine)
+{
+ if (NULL == rs_regex_engine) {
+ return;
+ }
+
+ struct rs_regex_engine *rs_regex_inst = (struct rs_regex_engine *)rs_regex_engine;
+
+ if (rs_regex_inst->rs_db != NULL) {
+ rs_free_database(rs_regex_inst->rs_db);
+ rs_regex_inst->rs_db = NULL;
+ }
+
+ if (rs_regex_inst->streams != NULL) {
+ for (size_t i = 0; i < rs_regex_inst->n_thread; i++) {
+ if (rs_regex_inst->streams[i] != NULL) {
+ rs_regex_stream_close(rs_regex_inst->streams[i]);
+ rs_regex_inst->streams[i] = NULL;
+ }
}
+
+ FREE(rs_regex_inst->streams);
}
- if (err_count == 2) {
- return -1;
+ FREE(rs_regex_inst);
+}
+
+void *rs_regex_engine_new(struct expr_rule *rules, size_t n_rule,
+ struct pattern_attribute *pat_attr,
+ void *rs_regex_db, size_t n_thread,
+ struct log_handle *logger)
+{
+ struct rs_regex_engine *rs_regex_inst = ALLOC(struct rs_regex_engine, 1);
+
+ rs_regex_inst->n_thread = n_thread;
+ rs_regex_inst->rs_db = (rs_database_t *)rs_regex_db;
+ rs_regex_inst->ref_pat_attr = pat_attr;
+ rs_regex_inst->logger = logger;
+ rs_regex_inst->streams = ALLOC(struct rs_regex_stream *, n_thread);
+
+ for (size_t i = 0; i < n_thread; i++) {
+ rs_regex_inst->streams[i] = (struct rs_regex_stream *)rs_regex_stream_open(rs_regex_inst, i);
}
- return adapter_rs_scan_match(rs_rt->bm, matched_pat->pattern_ids,
- rs_rt->bool_match_buffs[thread_id],
- MAX_HIT_EXPR_NUM, results, n_result,
- n_hit_result);
+ return rs_regex_inst;
}
-int adapter_rs_scan(void *rs_instance, int thread_id, const char *data, size_t data_len,
- struct expr_scan_result *results, size_t n_result, size_t *n_hit_result)
+int rs_regex_engine_scan(void *rs_regex_engine, int thread_id,
+ const char *data, size_t data_len,
+ unsigned long long *pattern_id_array,
+ size_t array_size, size_t *n_pattern_id)
{
- if (NULL == rs_instance || NULL == data || (0 == data_len) ||
- NULL == results || 0 == n_result || NULL == n_hit_result) {
+ if (NULL == rs_regex_engine || NULL == data || (0 == data_len) ||
+ NULL == pattern_id_array || 0 == array_size || NULL == n_pattern_id) {
return -1;
}
- int ret = 0, err_count = 0;
- struct adapter_rs *rs_inst = (struct adapter_rs *)rs_instance;
- struct adapter_rs_runtime *rs_rt = rs_inst->rs_rt;
- struct matched_pattern *matched_pat = rs_rt->matched_pats[thread_id];
+ struct rs_regex_engine *rs_regex_inst = (struct rs_regex_engine *)rs_regex_engine;
+ struct rs_regex_stream *rs_regex_stream = rs_regex_inst->streams[thread_id];
+ assert(rs_regex_stream != NULL);
- if (rs_rt->literal_db != NULL) {
- ret = rs_scan(rs_rt->literal_db, thread_id, data, data_len,
- 0, matched_event_cb, matched_pat);
+ if (rs_regex_inst->rs_db != NULL) {
+ int ret = rs_scan(rs_regex_inst->rs_db, thread_id, data, data_len,
+ 0, matched_event_cb, rs_regex_stream->matched_pat);
if (ret < 0) {
- err_count++;
+ return -1;
}
}
-
- if (rs_rt->regex_db != NULL) {
- ret = rs_scan(rs_rt->regex_db, thread_id, data, data_len,
- 0, matched_event_cb, matched_pat);
- if (ret < 0) {
- err_count++;
+
+ return gather_hit_pattern_id(rs_regex_stream->matched_pat, pattern_id_array,
+ array_size, n_pattern_id);
+}
+
+void *rs_regex_stream_open(void *rs_regex_engine, int thread_id)
+{
+ if (NULL == rs_regex_engine || thread_id < 0) {
+ return NULL;
+ }
+
+ struct rs_regex_engine *rs_regex_inst = (struct rs_regex_engine *)rs_regex_engine;
+ struct rs_regex_stream *regex_stream = ALLOC(struct rs_regex_stream, 1);
+
+ regex_stream->logger = rs_regex_inst->logger;
+ regex_stream->thread_id = thread_id;
+ regex_stream->ref_rs_rt = rs_regex_inst;
+ regex_stream->matched_pat = ALLOC(struct matched_pattern, 1);
+ regex_stream->matched_pat->ref_pat_attr = rs_regex_inst->ref_pat_attr;
+ utarray_new(regex_stream->matched_pat->pattern_ids, &ut_rs_pattern_id_icd);
+ utarray_reserve(regex_stream->matched_pat->pattern_ids, MAX_HIT_PATTERN_NUM);
+
+ if (rs_regex_inst->rs_db != NULL) {
+ regex_stream->rs_stream = rs_open_stream(rs_regex_inst->rs_db, 0, 128);
+ if (NULL == regex_stream->rs_stream) {
+ log_fatal(rs_regex_inst->logger, MODULE_ADAPTER_RS, "rs_open_stream failed");
+ FREE(regex_stream);
+ return NULL;
}
}
- if (err_count == 2) {
+ return regex_stream;
+}
+
+void rs_regex_stream_close(void *rs_regex_stream)
+{
+ if (NULL == rs_regex_stream) {
+ return;
+ }
+
+ struct rs_regex_stream *regex_stream = (struct rs_regex_stream *)rs_regex_stream;
+ if (regex_stream->ref_rs_rt != NULL) {
+ if (regex_stream->rs_stream != NULL) {
+ rs_close_stream(regex_stream->rs_stream);
+ regex_stream->rs_stream = NULL;
+ }
+ }
+
+ /* rs_stream->rs_rt point to rs_instance->rs_rt which will call free
+ same as rs_attr */
+ regex_stream->ref_rs_rt = NULL;
+ regex_stream->matched_pat->ref_pat_attr = NULL;
+
+ if (regex_stream->matched_pat->pattern_ids != NULL) {
+ utarray_free(regex_stream->matched_pat->pattern_ids);
+ regex_stream->matched_pat->pattern_ids = NULL;
+ }
+
+ FREE(regex_stream->matched_pat);
+ FREE(regex_stream);
+}
+
+int rs_regex_stream_scan(void *rs_regex_stream, const char *data, size_t data_len,
+ unsigned long long *pattern_id_array, size_t array_size,
+ size_t *n_pattern_id)
+{
+ if (NULL == rs_regex_stream || NULL == data || 0 == data_len ||
+ NULL == pattern_id_array || 0 == array_size || NULL == n_pattern_id) {
return -1;
}
+
+ struct rs_regex_stream *regex_stream = (struct rs_regex_stream *)rs_regex_stream;
+
+ if (regex_stream->rs_stream != NULL) {
+ int ret = rs_scan_stream(regex_stream->rs_stream, data, data_len,
+ matched_event_cb, regex_stream->matched_pat);
+ if (ret < 0) {
+ return -1;
+ }
+ }
- return adapter_rs_scan_match(rs_rt->bm, matched_pat->pattern_ids,
- rs_rt->bool_match_buffs[thread_id],
- MAX_HIT_EXPR_NUM, results, n_result,
- n_hit_result);
+ return gather_hit_pattern_id(regex_stream->matched_pat, pattern_id_array,
+ array_size, n_pattern_id);
} \ No newline at end of file
diff --git a/scanner/expr_matcher/adapter_rs/adapter_rs.h b/scanner/expr_matcher/adapter_rs/adapter_rs.h
index c43e553..31ba83a 100644
--- a/scanner/expr_matcher/adapter_rs/adapter_rs.h
+++ b/scanner/expr_matcher/adapter_rs/adapter_rs.h
@@ -21,55 +21,89 @@ extern "C"
#include "log/log.h"
#include "../expr_matcher.h"
+#include "../expr_matcher_inc.h"
-int adapter_rs_verify_regex_expression(const char *regex_expr,
- struct log_handle *logger);
+int rs_verify_regex_expression(const char *regex_expr, struct log_handle *logger);
/**
- * @brief new adapter_rs instance
+ * @brief new adapter_rs literal instance
*
* @param rules: logic AND expression's array
* @param n_rule: the number of logic AND expression's array
- * @param n_worker_threads: the number of scan threads which will call adapter_rs_scan()
+ * @param n_thread: the number of scan threads which will call adapter_rs_lit_scan()
*
- * @retval the pointer to adapter_rs instance
+ * @retval the pointer to adapter_rs literal instance
*/
-void *adapter_rs_new(struct expr_rule *rules, size_t n_rule,
- size_t n_literal_pattern, size_t n_regex_pattern,
- size_t n_worker_thread, struct log_handle *logger);
+void *rs_lit_engine_new(struct expr_rule *rules, size_t n_rule,
+ struct pattern_attribute *pat_attr,
+ void *rs_lit_db, size_t n_thread,
+ struct log_handle *logger);
-void adapter_rs_free(void *rs_instance);
+void *rs_regex_engine_new(struct expr_rule *rules, size_t n_rule,
+ struct pattern_attribute *pat_attr,
+ void *rs_regex_db, size_t n_thread,
+ struct log_handle *logger);
+
+void rs_lit_engine_free(void *rs_lit_engine);
+
+void rs_regex_engine_free(void *rs_regex_engine);
/**
* @brief scan input data to match logic AND expression, return all matched expr_id
*
- * @param rs_instance: adapter_rs instance obtained by adapter_rs_new()
- * @param thread_id: the thread_id of caller
- * @param scan_data: data to be scanned
- * @param data_len: the length of data to be scanned
- * @param result_array: the array to store hit expr_id which allocated by caller
- * @param n_result_array: number of elements in array of expr_id
+ * @param adapter_rs_lit: adapter_rs literal instance obtained by adapter_rs_lit_new()
+ * @param thread_id: the thread_id of caller
+ * @param scan_data: data to be scanned
+ * @param data_len: the length of data to be scanned
+ * @param result_array: the array to store hit expr_id which allocated by caller
+ * @param n_result_array: number of elements in array of expr_id
*/
-int adapter_rs_scan(void *rs_instance, int thread_id,
- const char *scan_data, size_t data_len,
- struct expr_scan_result *result_array,
- size_t n_result_array, size_t *n_hit_results);
+int rs_lit_engine_scan(void *rs_lit_engine, int thread_id,
+ const char *scan_data, size_t data_len,
+ unsigned long long *pattern_id_array,
+ size_t array_size, size_t *n_pattern_id);
+
+int rs_regex_engine_scan(void *rs_lit_engine, int thread_id,
+ const char *scan_data, size_t data_len,
+ unsigned long long *pattern_id_array,
+ size_t array_size, size_t *n_pattern_id);
/**
- * @brief
+ * @brief open stream for adapter_rs literal instance
*/
-void *adapter_rs_stream_open(void *rs_instance, int thread_id);
+void *rs_lit_stream_open(void *rs_lit_engine, int thread_id);
+
+void *rs_regex_stream_open(void *rs_regex_engine, int thread_id);
+
+void rs_lit_stream_close(void *rs_lit_stream);
+
+void rs_regex_stream_close(void *rs_regex_stream);
/**
- * @brief
+ * @brief scan stream by adapter_rs literal stream
*/
-int adapter_rs_scan_stream(void *rs_stream, const char *scan_data,
- size_t data_len, struct expr_scan_result *result_array,
- size_t n_result_array, size_t *n_hit_results);
+int rs_lit_stream_scan(void *rs_lit_stream, const char *scan_data, size_t data_len,
+ unsigned long long *pattern_id_array, size_t array_size,
+ size_t *n_pattern_id);
+
+int rs_regex_stream_scan(void *rs_regex_stream, const char *scan_data, size_t data_len,
+ unsigned long long *pattern_id_array, size_t array_size,
+ size_t *n_pattern_id);
+
/**
- * @brief
- */
-void adapter_rs_stream_close(void *rs_stream);
+ * @brief build database
+*/
+void *rs_compile_data_new(enum expr_pattern_type pat_type, size_t n_patterns);
+
+void rs_compile_data_free(void *compile_data);
+
+void rs_populate_compile_data(void *compile_data, size_t index, int pattern_id,
+ char *pat, size_t pat_len, int case_sensitive);
+
+int rs_build_lit_db(void **rs_lit_db, void *compile_data, struct log_handle *logger);
+
+int rs_build_regex_db(void **rs_regex_db, size_t n_thread, void *compile_data,
+ struct log_handle *logger);
#ifdef __cplusplus
}
diff --git a/scanner/expr_matcher/expr_matcher.cpp b/scanner/expr_matcher/expr_matcher.cpp
index 64fbe97..16ec4ee 100644
--- a/scanner/expr_matcher/expr_matcher.cpp
+++ b/scanner/expr_matcher/expr_matcher.cpp
@@ -13,8 +13,9 @@
#include <sys/syscall.h>
#include "log/log.h"
-#include "expr_matcher.h"
#include "maat_utils.h"
+#include "../bool_matcher/bool_matcher.h"
+#include "expr_matcher_inc.h"
#include "adapter_hs/adapter_hs.h"
#include "adapter_rs/adapter_rs.h"
@@ -34,153 +35,459 @@ static const char *expr_matcher_module_name_str(const char *name)
#define MODULE_EXPR_MATCHER expr_matcher_module_name_str("maat.expr_matcher")
struct expr_matcher {
+ size_t n_thread;
enum expr_engine_type engine_type;
- void *engine;
+ void *lit_runtime;
+ void *regex_runtime;
+ struct pattern_attribute *pat_attr;
+ struct bool_matcher *bm;
+ struct bool_expr_match **bool_match_buffs;
struct log_handle *logger;
};
struct expr_matcher_stream {
+ int thread_id;
enum expr_engine_type engine_type;
- void *handle;
+ void *lit_stream;
+ void *regex_stream;
+ struct expr_matcher *ref_matcher;
};
-struct expr_engine_operations {
+struct db_operations {
+ enum expr_engine_type type;
+ void *(*compile_data_new)(enum expr_pattern_type pat_type, size_t n_pattern);
+ void (*compile_data_free)(void *compile_data);
+ void (*populate_compile_data)(void *compile_data, size_t index, int pattern_id,
+ char *pat, size_t pat_len, int case_sensitive);
+ int (*build_db)(void **lit_db, void *compile_data, struct log_handle *logger);
+};
+
+struct db_operations db_ops[EXPR_ENGINE_TYPE_AUTO] = {
+ {
+ .type = EXPR_ENGINE_TYPE_HS,
+ .compile_data_new = hs_compile_data_new,
+ .compile_data_free = hs_compile_data_free,
+ .populate_compile_data = hs_populate_compile_data,
+ .build_db = hs_build_lit_db
+ },
+ {
+ .type = EXPR_ENGINE_TYPE_RS,
+ .compile_data_new = rs_compile_data_new,
+ .compile_data_free = rs_compile_data_free,
+ .populate_compile_data = rs_populate_compile_data,
+ .build_db = rs_build_lit_db
+ }
+};
+
+struct engine_operations {
enum expr_engine_type type;
void *(*engine_new)(struct expr_rule *rules, size_t n_rule,
- size_t n_literal_pattern, size_t n_regex_pattern,
- size_t n_worker_thread, struct log_handle *logger);
+ struct pattern_attribute *pat_attr,
+ void *hs_lit_db, size_t n_thread,
+ struct log_handle *logger);
+
void (*engine_free)(void *engine);
- int (*engine_scan)(void *engine, int thread_id, const char *scan_data,
- size_t data_len, struct expr_scan_result *result_array,
- size_t n_result_array, size_t *n_hit_result);
- void *(*engine_stream_open)(void *engine, int thread_id);
- void (*engine_stream_close)(void *stream);
- int (*engine_scan_stream)(void *stream, const char *scan_data, size_t data_len,
- struct expr_scan_result *result_array, size_t n_result_array,
- size_t *n_hit_result);
+
+ int (*engine_scan)(void *engine, int thread_id,
+ const char *data, size_t data_len,
+ unsigned long long *pattern_id_array,
+ size_t array_size, size_t *n_pattern_id);
+
+ void *(*stream_open)(void *engine, int thread_id);
+
+ void (*stream_close)(void *stream);
+
+ int (*scan_stream)(void *stream, const char *data, size_t data_len,
+ unsigned long long *pattern_id_array, size_t array_size,
+ size_t *n_pattern_id);
};
-struct expr_engine_operations expr_engine_ops[EXPR_ENGINE_TYPE_MAX] = {
+struct engine_operations engine_ops[EXPR_ENGINE_TYPE_AUTO] = {
{
.type = EXPR_ENGINE_TYPE_HS,
- .engine_new = adapter_hs_new,
- .engine_free = adapter_hs_free,
- .engine_scan = adapter_hs_scan,
- .engine_stream_open = adapter_hs_stream_open,
- .engine_stream_close = adapter_hs_stream_close,
- .engine_scan_stream = adapter_hs_scan_stream
+ .engine_new = hs_lit_engine_new,
+ .engine_free = hs_lit_engine_free,
+ .engine_scan = hs_lit_engine_scan,
+ .stream_open = hs_lit_stream_open,
+ .stream_close = hs_lit_stream_close,
+ .scan_stream = hs_lit_stream_scan
},
{
.type = EXPR_ENGINE_TYPE_RS,
- .engine_new = adapter_rs_new,
- .engine_free = adapter_rs_free,
- .engine_scan = adapter_rs_scan,
- .engine_stream_open = adapter_rs_stream_open,
- .engine_stream_close = adapter_rs_stream_close,
- .engine_scan_stream = adapter_rs_scan_stream
+ .engine_new = rs_lit_engine_new,
+ .engine_free = rs_lit_engine_free,
+ .engine_scan = rs_lit_engine_scan,
+ .stream_open = rs_lit_stream_open,
+ .stream_close = rs_lit_stream_close,
+ .scan_stream = rs_lit_stream_scan
}
};
int expr_matcher_verify_regex_expression(const char *regex_expr,
struct log_handle *logger)
{
- int ret = adapter_hs_verify_regex_expression(regex_expr, logger);
+ int ret = hs_verify_regex_expression(regex_expr, logger);
if (ret == 0) {
return 0;
}
- return adapter_rs_verify_regex_expression(regex_expr, logger);
+ return rs_verify_regex_expression(regex_expr, logger);
}
-struct expr_matcher *
-expr_matcher_new(struct expr_rule *rules, size_t n_rule, enum expr_engine_type engine_type,
- size_t n_worker_thread, struct log_handle *logger)
+static int expr_rule_pattern_count(struct expr_rule *rules, size_t n_rule,
+ size_t *n_lit_pat, size_t *n_regex_pat,
+ struct log_handle *logger)
{
- if (NULL == rules || 0 == n_rule || 0 == n_worker_thread ||
- (engine_type != EXPR_ENGINE_TYPE_HS && engine_type != EXPR_ENGINE_TYPE_RS)) {
- log_fatal(logger, MODULE_EXPR_MATCHER, "[%s:%d]engine type:%d is illegal",
- __FUNCTION__, __LINE__, engine_type);
- return NULL;
- }
-
- size_t i = 0, j = 0;
- size_t literal_pat_num = 0;
+ size_t lit_pat_num = 0;
size_t regex_pat_num = 0;
- for (i = 0; i < n_rule; i++) {
+ for (size_t i = 0; i < n_rule; i++) {
if (rules[i].n_patterns > MAX_EXPR_PATTERN_NUM) {
- log_fatal(logger, MODULE_EXPR_MATCHER,
- "[%s:%d] the number of patterns in one expression should less than"
- " %d", __FUNCTION__, __LINE__, MAX_EXPR_PATTERN_NUM);
- return NULL;
+ log_fatal(logger, MODULE_EXPR_MATCHER,
+ "[%s:%d] the number of patterns in expr_rule(rule_id:%lld)"
+ " should less than %d", __FUNCTION__, __LINE__,
+ rules[i].expr_id, MAX_EXPR_PATTERN_NUM);
+ return -1;
}
- for (j = 0; j < rules[i].n_patterns; j++) {
+ for (size_t j = 0; j < rules[i].n_patterns; j++) {
/* pat_len should not 0 */
if (0 == rules[i].patterns[j].pat_len) {
log_fatal(logger, MODULE_EXPR_MATCHER,
"[%s:%d] expr pattern length should not 0",
__FUNCTION__, __LINE__);
- return NULL;
+ return -1;
}
if (rules[i].patterns[j].type == EXPR_PATTERN_TYPE_STR) {
- literal_pat_num++;
+ lit_pat_num++;
} else {
regex_pat_num++;
}
}
}
- if (0 == literal_pat_num && 0 == regex_pat_num) {
+ if (0 == lit_pat_num && 0 == regex_pat_num) {
log_fatal(logger, MODULE_EXPR_MATCHER,
- "[%s:%d] exprs has no valid pattern", __FUNCTION__, __LINE__);
- return NULL;
+ "[%s:%d] exprs has no valid pattern",
+ __FUNCTION__, __LINE__);
+ return -1;
+ }
+
+ *n_lit_pat = lit_pat_num;
+ *n_regex_pat = regex_pat_num;
+
+ return 0;
+}
+
+static struct bool_expr *bool_exprs_new(struct expr_rule *rules, size_t n_rule,
+ enum expr_engine_type engine_type,
+ struct pattern_attribute *pat_attr,
+ void *lit_compile_data, void *regex_compile_data)
+{
+ uint32_t pattern_index = 0;
+ uint32_t literal_index = 0;
+ uint32_t regex_index = 0;
+
+ struct bool_expr *bool_exprs = ALLOC(struct bool_expr, n_rule);
+
+ /* populate adpt_hs_compile_data and bool_expr */
+ for (size_t i = 0; i < n_rule; i++) {
+
+ for (size_t j = 0; j < rules[i].n_patterns; j++) {
+ pat_attr[pattern_index].pattern_id = pattern_index;
+ pat_attr[pattern_index].match_mode = rules[i].patterns[j].match_mode;
+
+ if (pat_attr[pattern_index].match_mode == EXPR_MATCH_MODE_SUB ||
+ pat_attr[pattern_index].match_mode == EXPR_MATCH_MODE_EXACTLY) {
+ pat_attr[pattern_index].offset.start = rules[i].patterns[j].start_offset;
+ pat_attr[pattern_index].offset.end = rules[i].patterns[j].end_offset;
+ }
+
+ /* literal pattern */
+ if (rules[i].patterns[j].type == EXPR_PATTERN_TYPE_STR) {
+ db_ops[engine_type].populate_compile_data(lit_compile_data, literal_index,
+ pattern_index, rules[i].patterns[j].pat,
+ rules[i].patterns[j].pat_len,
+ rules[i].patterns[j].case_sensitive);
+ literal_index++;
+ } else {
+ /* regex pattern */
+ hs_populate_compile_data(regex_compile_data, regex_index, pattern_index,
+ rules[i].patterns[j].pat, rules[i].patterns[j].pat_len,
+ rules[i].patterns[j].case_sensitive);
+ regex_index++;
+ }
+
+ bool_exprs[i].items[j].item_id = pattern_index++;
+ bool_exprs[i].items[j].not_flag = 0;
+ }
+
+ bool_exprs[i].expr_id = rules[i].expr_id;
+ bool_exprs[i].item_num = rules[i].n_patterns;
+ bool_exprs[i].user_tag = rules[i].tag;
+ }
+
+ return bool_exprs;
+}
+
+void expr_matcher_free(struct expr_matcher *matcher)
+{
+ if (NULL == matcher) {
+ return;
}
- void *engine = expr_engine_ops[engine_type].engine_new(rules, n_rule, literal_pat_num,
- regex_pat_num, n_worker_thread,
- logger);
- if (NULL == engine) {
+ if (matcher->lit_runtime != NULL) {
+ engine_ops[matcher->engine_type].engine_free(matcher->lit_runtime);
+ matcher->lit_runtime = NULL;
+ }
+
+ if (matcher->regex_runtime != NULL) {
+ hs_regex_engine_free(matcher->regex_runtime);
+ matcher->regex_runtime = NULL;
+ }
+
+ if (matcher->bm != NULL) {
+ bool_matcher_free(matcher->bm);
+ matcher->bm = NULL;
+ }
+
+ if (matcher->bool_match_buffs != NULL) {
+ for (size_t i = 0; i < matcher->n_thread; i++) {
+ if (matcher->bool_match_buffs[i] != NULL) {
+ FREE(matcher->bool_match_buffs[i]);
+ }
+ }
+
+ FREE(matcher->bool_match_buffs);
+ }
+
+ if (matcher->pat_attr != NULL) {
+ FREE(matcher->pat_attr);
+ }
+
+ FREE(matcher);
+}
+
+struct expr_matcher *expr_matcher_new(struct expr_rule *rules, size_t n_rule,
+ enum expr_engine_type engine_type,
+ size_t n_thread, struct log_handle *logger)
+{
+ if (NULL == rules || 0 == n_rule || 0 == n_thread ||
+ (engine_type != EXPR_ENGINE_TYPE_HS &&
+ engine_type != EXPR_ENGINE_TYPE_RS)) {
log_fatal(logger, MODULE_EXPR_MATCHER,
- "[%s:%d]expr_matcher engine_new failed.", __FUNCTION__, __LINE__);
+ "[%s:%d]engine type:%d is illegal",
+ __FUNCTION__, __LINE__, engine_type);
return NULL;
}
+ size_t lit_pat_cnt = 0;
+ size_t regex_pat_cnt = 0;
+ size_t pat_cnt = 0;
+
+ int ret = expr_rule_pattern_count(rules, n_rule, &lit_pat_cnt,
+ &regex_pat_cnt, logger);
+ if (ret < 0) {
+ return NULL;
+ }
+
+ pat_cnt = lit_pat_cnt + regex_pat_cnt;
+ void *lit_compile_data = NULL;
+ void *regex_compile_data = NULL;
+
+ if (lit_pat_cnt > 0) {
+ lit_compile_data = db_ops[engine_type].compile_data_new(EXPR_PATTERN_TYPE_STR,
+ lit_pat_cnt);
+ }
+
+ if (regex_pat_cnt > 0) {
+ regex_compile_data = hs_compile_data_new(EXPR_PATTERN_TYPE_REG, regex_pat_cnt);
+ }
+
+ struct pattern_attribute *pat_attr = ALLOC(struct pattern_attribute, pat_cnt);
+ struct bool_expr *bool_exprs = bool_exprs_new(rules, n_rule, engine_type,
+ pat_attr, lit_compile_data,
+ regex_compile_data);
+ size_t mem_size = 0;
+ int bm_ret = 0;
struct expr_matcher *matcher = ALLOC(struct expr_matcher, 1);
+
+ matcher->n_thread = n_thread;
+ matcher->pat_attr = pat_attr;
matcher->engine_type = engine_type;
- matcher->engine = engine;
matcher->logger = logger;
+ matcher->bm = bool_matcher_new(bool_exprs, n_rule, &mem_size);
+ if (matcher->bm != NULL) {
+ log_info(logger, MODULE_EXPR_MATCHER,
+ "expr_matcher module: build bool matcher of %zu expressions"
+ " with %zu bytes memory", n_rule, mem_size);
+ } else {
+ log_fatal(logger, MODULE_EXPR_MATCHER,
+ "[%s:%d] expr_matcher module: build bool matcher failed",
+ __FUNCTION__, __LINE__);
+ bm_ret = -1;
+ }
+ FREE(bool_exprs);
+
+ matcher->bool_match_buffs = ALLOC(struct bool_expr_match *, n_thread);
+ for (size_t i = 0; i < n_thread; i++) {
+ matcher->bool_match_buffs[i] = ALLOC(struct bool_expr_match, MAX_HIT_PATTERN_NUM);
+ }
+
+ void *lit_db = NULL;
+ if (lit_compile_data != NULL) {
+ ret = db_ops[engine_type].build_db(&lit_db, lit_compile_data, logger);
+ if (ret < 0) {
+ bm_ret = -1;
+ }
+ db_ops[engine_type].compile_data_free(lit_compile_data);
+ }
+
+ if (lit_db != NULL) {
+ matcher->lit_runtime = engine_ops[engine_type].engine_new(rules, n_rule, pat_attr,
+ lit_db, n_thread, logger);
+ if (NULL == matcher->lit_runtime) {
+ log_fatal(logger, MODULE_EXPR_MATCHER,
+ "[%s:%d]expr_matcher new lit runtime failed.",
+ __FUNCTION__, __LINE__);
+ bm_ret = -1;
+ }
+ }
+
+ void *regex_db = NULL;
+ if (regex_compile_data != NULL) {
+ ret = hs_build_regex_db(&regex_db, regex_compile_data, logger);
+ if (ret < 0) {
+ bm_ret = -1;
+ }
+ hs_compile_data_free(regex_compile_data);
+ }
+
+ if (regex_db != NULL) {
+ matcher->regex_runtime = hs_regex_engine_new(rules, n_rule, pat_attr,
+ regex_db, n_thread, logger);
+ if (NULL == matcher->regex_runtime) {
+ log_fatal(logger, MODULE_EXPR_MATCHER,
+ "[%s:%d]expr_matcher new regex runtime failed.",
+ __FUNCTION__, __LINE__);
+ bm_ret = -1;
+ }
+ }
+
+ if (bm_ret < 0) {
+ goto error;
+ }
+
return matcher;
+error:
+ expr_matcher_free(matcher);
+ return NULL;
}
-void expr_matcher_free(struct expr_matcher *matcher)
+static inline int compare_pattern_id(const void *a, const void *b)
{
- if (NULL == matcher) {
- return;
+ long long ret = *(const unsigned long long *)a - *(const unsigned long long *)b;
+ if (ret == 0) {
+ return 0;
+ } else if (ret < 0) {
+ return -1;
+ } else {
+ return 1;
+ }
+}
+
+static int expr_matcher_bool_matcher_match(struct bool_matcher *bm, struct bool_expr_match *match_buff,
+ size_t buff_size, unsigned long long *hit_pattern_ids,
+ size_t n_hit_pattern, struct expr_scan_result *result_array,
+ size_t array_size, size_t *n_hit_result)
+{
+
+ unsigned long long prev_pat_id = 0xFFFFFFFFFFFFFFFF;
+ unsigned long long tmp_pat_id = 0;
+ unsigned long long unique_pat_ids[n_hit_pattern];
+ size_t n_unique_pat_id = 0;
+
+ qsort(hit_pattern_ids, n_hit_pattern, sizeof(unsigned long long *), compare_pattern_id);
+
+ for (size_t i = 0; i < n_hit_pattern; i++) {
+ tmp_pat_id = hit_pattern_ids[i];
+ if (tmp_pat_id != prev_pat_id) {
+ unique_pat_ids[n_unique_pat_id++] = tmp_pat_id;
+ prev_pat_id = tmp_pat_id;
+ }
}
- if (matcher->engine != NULL) {
- expr_engine_ops[matcher->engine_type].engine_free(matcher->engine);
- matcher->engine = NULL;
+ int bool_matcher_ret = bool_matcher_match(bm, unique_pat_ids, n_unique_pat_id,
+ match_buff, MAX_HIT_PATTERN_NUM);
+ if (bool_matcher_ret < 0) {
+ goto next;
}
-
- FREE(matcher);
+
+ if (bool_matcher_ret > (int)array_size) {
+ bool_matcher_ret = array_size;
+ }
+
+ for (int index = 0; index < bool_matcher_ret; index++) {
+ result_array[index].rule_id = match_buff[index].expr_id;
+ result_array[index].user_tag = match_buff[index].user_tag;
+ }
+ *n_hit_result = bool_matcher_ret;
+
+next:
+ return bool_matcher_ret;
}
-int expr_matcher_match(struct expr_matcher *matcher, int thread_id, const char *scan_data,
- size_t data_len, struct expr_scan_result *result_array,
- size_t n_result_array, size_t *n_hit_results)
+int expr_matcher_match(struct expr_matcher *matcher, int thread_id,
+ const char *data, size_t data_len,
+ struct expr_scan_result *result_array,
+ size_t array_size, size_t *n_hit_result)
{
- if (NULL == matcher || thread_id < 0 || NULL == scan_data || 0 == data_len
- || NULL == result_array || 0 == n_result_array || NULL == n_hit_results) {
+ if (NULL == matcher || thread_id < 0 || NULL == data || 0 == data_len
+ || NULL == result_array || 0 == array_size || NULL == n_hit_result) {
+ return -1;
+ }
+
+ int err_count = 0;
+ unsigned long long lit_pattern_ids[MAX_HIT_PATTERN_NUM];
+ unsigned long long regex_pattern_ids[MAX_HIT_PATTERN_NUM];
+ size_t n_lit_pattern = 0;
+ size_t n_regex_pattern = 0;
+ size_t n_pattern = 0;
+
+ int ret = engine_ops[matcher->engine_type].engine_scan(matcher->lit_runtime, thread_id,
+ data, data_len, lit_pattern_ids,
+ MAX_HIT_PATTERN_NUM, &n_lit_pattern);
+ if (ret < 0) {
+ err_count++;
+ }
+
+ ret = hs_regex_engine_scan(matcher->regex_runtime, thread_id, data, data_len,
+ regex_pattern_ids, MAX_HIT_PATTERN_NUM, &n_regex_pattern);
+ if (ret < 0) {
+ err_count++;
+ }
+
+ if (err_count == 2) {
return -1;
}
- return expr_engine_ops[matcher->engine_type].engine_scan(matcher->engine, thread_id,
- scan_data, data_len, result_array,
- n_result_array, n_hit_results);
+ n_pattern = n_lit_pattern + n_regex_pattern;
+ if (n_pattern > MAX_HIT_PATTERN_NUM) {
+ n_pattern = MAX_HIT_PATTERN_NUM;
+ }
+
+ size_t j = 0;
+ for (size_t i = n_lit_pattern; i < n_pattern; i++, j++) {
+ lit_pattern_ids[i] = regex_pattern_ids[j];
+ }
+
+ struct bool_expr_match *match_buff = matcher->bool_match_buffs[thread_id];
+
+ return expr_matcher_bool_matcher_match(matcher->bm, match_buff, MAX_HIT_PATTERN_NUM,
+ lit_pattern_ids, n_pattern, result_array,
+ array_size, n_hit_result);
}
struct expr_matcher_stream *
@@ -190,34 +497,89 @@ expr_matcher_stream_open(struct expr_matcher *matcher, int thread_id)
return NULL;
}
- void *s_handle = expr_engine_ops[matcher->engine_type].engine_stream_open(matcher->engine,
- thread_id);
- if (NULL == s_handle) {
+ size_t err_count = 0;
+ void *lit_stream = engine_ops[matcher->engine_type].stream_open(matcher->lit_runtime,
+ thread_id);
+ if (NULL == lit_stream && matcher->lit_runtime != NULL) {
log_fatal(matcher->logger, MODULE_EXPR_MATCHER,
- "[%s:%d] expr_matcher engine_stream_open failed.",
+ "[%s:%d] expr_matcher open lit engine stream failed.",
__FUNCTION__, __LINE__);
+ err_count++;
+ }
+
+ void *regex_stream = hs_regex_stream_open(matcher->regex_runtime, thread_id);
+ if (NULL == regex_stream && matcher->regex_runtime != NULL) {
+ engine_ops[matcher->engine_type].stream_close(lit_stream);
+ log_fatal(matcher->logger, MODULE_EXPR_MATCHER,
+ "[%s:%d] expr_matcher open regex engine stream failed.",
+ __FUNCTION__, __LINE__);
+ err_count++;
+ }
+
+ if (err_count == 2) {
return NULL;
}
struct expr_matcher_stream *stream = ALLOC(struct expr_matcher_stream, 1);
stream->engine_type = matcher->engine_type;
- stream->handle = s_handle;
-
+ stream->thread_id = thread_id;
+ stream->lit_stream = lit_stream;
+ stream->regex_stream = regex_stream;
+ stream->ref_matcher = matcher;
+
return stream;
}
-int expr_matcher_stream_match(struct expr_matcher_stream *stream, const char *scan_data,
- size_t data_len, struct expr_scan_result *result_array,
- size_t n_result_array, size_t *n_hit_results)
+int expr_matcher_stream_match(struct expr_matcher_stream *stream,
+ const char *data, size_t data_len,
+ struct expr_scan_result *result_array,
+ size_t array_size, size_t *n_hit_result)
{
- if (NULL == stream || NULL == scan_data || 0 == data_len || NULL == result_array
- || 0 == n_result_array || NULL == n_hit_results) {
+ if (NULL == stream || NULL == data || 0 == data_len || NULL == result_array
+ || 0 == array_size || NULL == n_hit_result) {
+ return -1;
+ }
+
+ int err_count = 0;
+ unsigned long long lit_pattern_ids[MAX_HIT_PATTERN_NUM];
+ unsigned long long regex_pattern_ids[MAX_HIT_PATTERN_NUM];
+ size_t n_lit_pattern = 0;
+ size_t n_regex_pattern = 0;
+ size_t n_pattern = 0;
+
+ int ret = engine_ops[stream->engine_type].scan_stream(stream->lit_stream, data, data_len,
+ lit_pattern_ids, MAX_HIT_PATTERN_NUM,
+ &n_lit_pattern);
+ if (ret < 0) {
+ err_count++;
+ }
+
+ ret = hs_regex_stream_scan(stream->regex_stream, data, data_len, regex_pattern_ids,
+ MAX_HIT_PATTERN_NUM, &n_regex_pattern);
+ if (ret < 0) {
+ err_count++;
+ }
+
+ if (err_count == 2) {
return -1;
}
- return expr_engine_ops[stream->engine_type].engine_scan_stream(stream->handle, scan_data,
- data_len, result_array,
- n_result_array, n_hit_results);
+ n_pattern = n_lit_pattern + n_regex_pattern;
+ if (n_pattern > MAX_HIT_PATTERN_NUM) {
+ n_pattern = MAX_HIT_PATTERN_NUM;
+ }
+
+ size_t j = 0;
+ for (size_t i = n_lit_pattern; i < n_pattern; i++, j++) {
+ lit_pattern_ids[i] = regex_pattern_ids[j];
+ }
+
+ struct expr_matcher *matcher = stream->ref_matcher;
+ struct bool_expr_match *match_buff = matcher->bool_match_buffs[stream->thread_id];
+
+ return expr_matcher_bool_matcher_match(matcher->bm, match_buff, MAX_HIT_PATTERN_NUM,
+ lit_pattern_ids, n_pattern, result_array,
+ array_size, n_hit_result);
}
void expr_matcher_stream_close(struct expr_matcher_stream *stream)
@@ -226,10 +588,15 @@ void expr_matcher_stream_close(struct expr_matcher_stream *stream)
return;
}
- if (stream->handle != NULL) {
- expr_engine_ops[stream->engine_type].engine_stream_close(stream->handle);
- stream->handle = NULL;
+ if (stream->lit_stream != NULL) {
+ engine_ops[stream->engine_type].stream_close(stream->lit_stream);
+ stream->lit_stream = NULL;
}
+ if (stream->regex_stream != NULL) {
+ hs_regex_stream_close(stream->regex_stream);
+ stream->regex_stream = NULL;
+ }
+
FREE(stream);
} \ No newline at end of file
diff --git a/scanner/expr_matcher/expr_matcher.h b/scanner/expr_matcher/expr_matcher.h
index fb61854..75dbe94 100644
--- a/scanner/expr_matcher/expr_matcher.h
+++ b/scanner/expr_matcher/expr_matcher.h
@@ -21,12 +21,11 @@ extern "C"
#include "log/log.h"
#define MAX_EXPR_PATTERN_NUM 8 /* 每条与表达式最多由MAX_EXPR_ITEM_NUM个规则组成 */
-#define MAX_HIT_EXPR_NUM 1024
enum expr_engine_type {
- EXPR_ENGINE_TYPE_HS = 0, /* default engine */
+ EXPR_ENGINE_TYPE_HS = 0,
EXPR_ENGINE_TYPE_RS,
- EXPR_ENGINE_TYPE_MAX
+ EXPR_ENGINE_TYPE_AUTO
};
enum expr_pattern_type {
@@ -73,7 +72,7 @@ struct expr_scan_result {
/* logic AND expression, such as (rule1 & rule2) */
struct expr_rule {
long long expr_id; /* AND expression ID */
- size_t n_patterns;
+ size_t n_patterns;
struct expr_pattern patterns[MAX_EXPR_PATTERN_NUM];
void *tag; /* user defined data, return with hit result */
};
@@ -89,25 +88,26 @@ int expr_matcher_verify_regex_expression(const char *regex_expr,
* @param n_worker_threads: the number of scan threads which will call adapter_rs_scan()
*
*/
-struct expr_matcher *
-expr_matcher_new(struct expr_rule *rules, size_t n_rule, enum expr_engine_type type,
- size_t n_worker_thread, struct log_handle *logger);
+struct expr_matcher *expr_matcher_new(struct expr_rule *rules, size_t n_rule,
+ enum expr_engine_type type, size_t n_thread,
+ struct log_handle *logger);
void expr_matcher_free(struct expr_matcher *matcher);
/**
* @brief scan input data to match logic AND expression, return all matched expr_id
*
- * @param matcher: expr_matcher instance obtained by expr_matcher_new()
- * @param thread_id: the thread_id of caller
- * @param scan_data: data to be scanned
- * @param data_len: the length of data to be scanned
- * @param result_array: the array to store hit expr_id which allocated by caller
+ * @param matcher: expr_matcher instance obtained by expr_matcher_new()
+ * @param thread_id: the thread_id of caller
+ * @param scan_data: data to be scanned
+ * @param data_len: the length of data to be scanned
+ * @param result_array: the array to store hit expr_id which allocated by caller
* @param n_result_array: number of elements in array of expr_id
*/
-int expr_matcher_match(struct expr_matcher *matcher, int thread_id, const char *scan_data,
- size_t data_len, struct expr_scan_result *result_array,
- size_t n_result_array, size_t *n_hit_results);
+int expr_matcher_match(struct expr_matcher *matcher, int thread_id,
+ const char *data, size_t data_len,
+ struct expr_scan_result *result_array,
+ size_t array_size, size_t *n_hit_result);
/**
* @brief
@@ -118,9 +118,10 @@ expr_matcher_stream_open(struct expr_matcher *matcher, int thread_id);
/**
* @brief
*/
-int expr_matcher_stream_match(struct expr_matcher_stream *stream, const char *scan_data,
- size_t data_len, struct expr_scan_result *result_array,
- size_t n_result_array, size_t *n_hit_results);
+int expr_matcher_stream_match(struct expr_matcher_stream *stream,
+ const char *data, size_t data_len,
+ struct expr_scan_result *result_array,
+ size_t array_size, size_t *n_hit_result);
/**
* @brief
diff --git a/scanner/expr_matcher/expr_matcher_inc.h b/scanner/expr_matcher/expr_matcher_inc.h
new file mode 100644
index 0000000..57782ed
--- /dev/null
+++ b/scanner/expr_matcher/expr_matcher_inc.h
@@ -0,0 +1,47 @@
+/*
+**********************************************************************************************
+* File: expr_matcher_inc.h
+* Description: for expr matcher internal use only
+* Authors: Liu wentan <[email protected]>
+* Date: 2023-06-30
+* Copyright: (c) Since 2023 Geedge Networks, Ltd. All rights reserved.
+***********************************************************************************************
+*/
+
+#ifndef _EXPR_MATCHER_INC_H_
+#define _EXPR_MATCHER_INC_H_
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+#include <stddef.h>
+#include "uthash/utarray.h"
+#include "expr_matcher.h"
+
+#define MAX_HIT_PATTERN_NUM 1024
+
+struct pattern_offset {
+ long long start;
+ long long end;
+};
+
+struct pattern_attribute {
+ long long pattern_id;
+ enum expr_match_mode match_mode;
+ struct pattern_offset offset;
+ size_t pattern_len;
+};
+
+struct matched_pattern {
+ UT_array *pattern_ids;
+ struct pattern_attribute *ref_pat_attr;
+ size_t scan_data_len;
+};
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/maat_api.c b/src/maat_api.c
index 923f5c0..5c17e96 100644
--- a/src/maat_api.c
+++ b/src/maat_api.c
@@ -76,7 +76,7 @@ struct maat_options* maat_options_new(void)
options->rule_update_checking_interval_ms = 1 * 1000;
options->gc_timeout_ms = 10 * 1000;
options->input_mode = DATA_SOURCE_NONE;
- options->expr_engine = MAAT_EXPR_ENGINE_HS;
+ options->expr_engine = MAAT_EXPR_ENGINE_AUTO;
options->log_level = 0;
return options;
@@ -265,7 +265,9 @@ int maat_options_set_expr_engine(struct maat_options *opts,
enum maat_expr_engine expr_engine)
{
if (NULL == opts ||
- (expr_engine != MAAT_EXPR_ENGINE_HS && expr_engine != MAAT_EXPR_ENGINE_RS)) {
+ (expr_engine != MAAT_EXPR_ENGINE_HS &&
+ expr_engine != MAAT_EXPR_ENGINE_RS &&
+ expr_engine != MAAT_EXPR_ENGINE_AUTO)) {
return -1;
}
diff --git a/src/maat_expr.c b/src/maat_expr.c
index 2da71a2..db1592d 100644
--- a/src/maat_expr.c
+++ b/src/maat_expr.c
@@ -18,6 +18,7 @@
#include "maat_kv.h"
#include "maat_limits.h"
#include "rcu_hash.h"
+#include "maat.h"
#include "maat_rule.h"
#include "maat_compile.h"
#include "maat_group.h"
@@ -26,6 +27,12 @@
#define MODULE_EXPR module_name_str("maat.expr")
+/*
+ If expr_engine_type == MAAT_EXPR_ENGINE_AUTO, and the pattern number less than 50K,
+ expr_engine_type = MAAT_EXPR_ENGINE_HS; Otherwise expr_engine_type = MAAT_EXPR_ENGINE_RS
+*/
+#define ENGINE_TYPE_SWITCH_THRESHOLD 50000
+
struct expr_schema {
int item_id_column;
int group_id_column;
@@ -35,7 +42,7 @@ struct expr_schema {
int match_method_column;
int is_hexbin_column;
int table_id;
- int expr_engine;
+ enum maat_expr_engine engine_type;
struct table_manager *ref_tbl_mgr;
};
@@ -80,7 +87,7 @@ struct expr_runtime {
struct log_handle *logger;
struct maat_garbage_bin *ref_garbage_bin;
- enum maat_expr_engine expr_engine;
+ enum expr_engine_type engine_type;
int district_num;
struct maat_kv_store *district_map;
struct maat_kv_store *tmp_district_map;
@@ -328,7 +335,7 @@ void *expr_schema_new(cJSON *json, struct table_manager *tbl_mgr,
{
char table_type[NAME_MAX] = {0};
struct expr_schema *expr_schema = ALLOC(struct expr_schema, 1);
- expr_schema->expr_engine = EXPR_ENGINE_TYPE_MAX;
+ expr_schema->engine_type = MAAT_EXPR_ENGINE_AUTO;
cJSON *custom_item = NULL;
cJSON *item = cJSON_GetObjectItem(json, "table_id");
@@ -348,9 +355,9 @@ void *expr_schema_new(cJSON *json, struct table_manager *tbl_mgr,
item = cJSON_GetObjectItem(json, "expr_engine");
if (item != NULL && item->type == cJSON_String) {
if (strcmp(item->valuestring, "hyperscan") == 0) {
- expr_schema->expr_engine = EXPR_ENGINE_TYPE_HS;
+ expr_schema->engine_type = MAAT_EXPR_ENGINE_HS;
} else if (strcmp(item->valuestring, "rulescan") == 0) {
- expr_schema->expr_engine = EXPR_ENGINE_TYPE_RS;
+ expr_schema->engine_type = MAAT_EXPR_ENGINE_RS;
} else {
log_fatal(logger, MODULE_EXPR,
"[%s:%d] expr table:<%s> schema has invalid expr_engine",
@@ -499,12 +506,7 @@ void *expr_runtime_new(void *expr_schema, size_t max_thread_num,
expr_rt->ref_garbage_bin = garbage_bin;
expr_rt->logger = logger;
expr_rt->district_map = maat_kv_store_new();
-
- if (schema->expr_engine != EXPR_ENGINE_TYPE_MAX) {
- expr_rt->expr_engine = schema->expr_engine;
- } else {
- expr_rt->expr_engine = table_manager_get_expr_engine(schema->ref_tbl_mgr);
- }
+ expr_rt->engine_type = schema->engine_type;
expr_rt->hit_cnt = alignment_int64_array_alloc(max_thread_num);
expr_rt->scan_cnt = alignment_int64_array_alloc(max_thread_num);
@@ -842,6 +844,18 @@ static void garbage_expr_matcher_free(void *expr_matcher, void *arg)
expr_matcher_free(matcher);
}
+const char *expr_engine_int2str(enum expr_engine_type type)
+{
+ switch (type) {
+ case EXPR_ENGINE_TYPE_HS:
+ return "hyperscan";
+ case EXPR_ENGINE_TYPE_RS:
+ return "rulescan";
+ default:
+ return "unknown";
+ }
+}
+
int expr_runtime_commit(void *expr_runtime, const char *table_name,
long long maat_rt_version)
{
@@ -867,6 +881,7 @@ int expr_runtime_commit(void *expr_runtime, const char *table_name,
int ret = 0;
size_t i = 0;
size_t real_rule_cnt = 0;
+ size_t real_lit_rule_cnt = 0;
size_t real_regex_rule_cnt = 0;
struct expr_rule *rules = NULL;
void **ex_data_array = NULL;
@@ -886,25 +901,30 @@ int expr_runtime_commit(void *expr_runtime, const char *table_name,
if (expr_item->expr_type == EXPR_TYPE_REGEX) {
real_regex_rule_cnt++;
+ } else {
+ real_lit_rule_cnt++;
}
}
}
+ if (expr_rt->engine_type == EXPR_ENGINE_TYPE_AUTO) {
+ if (real_lit_rule_cnt <= ENGINE_TYPE_SWITCH_THRESHOLD) {
+ expr_rt->engine_type = EXPR_ENGINE_TYPE_HS;
+ } else {
+ expr_rt->engine_type = EXPR_ENGINE_TYPE_RS;
+ }
+ }
+
struct expr_matcher *new_matcher = NULL;
struct expr_matcher *old_matcher = NULL;
if (rule_cnt > 0) {
- enum expr_engine_type engine_type = EXPR_ENGINE_TYPE_HS;
- if (expr_rt->expr_engine == MAAT_EXPR_ENGINE_RS) {
- engine_type = EXPR_ENGINE_TYPE_RS;
- }
-
struct timespec start, end;
clock_gettime(CLOCK_MONOTONIC, &start);
- new_matcher = expr_matcher_new(rules, real_rule_cnt, engine_type,
+ new_matcher = expr_matcher_new(rules, real_rule_cnt, expr_rt->engine_type,
expr_rt->n_worker_thread, expr_rt->logger);
clock_gettime(CLOCK_MONOTONIC, &end);
- long long time_elapse_ms = (end.tv_sec - start.tv_sec) * 1000 +
+ long long time_elapse_ms = (end.tv_sec - start.tv_sec) * 1000 +
(end.tv_nsec - start.tv_nsec) / 1000000;
if (NULL == new_matcher) {
@@ -914,10 +934,10 @@ int expr_runtime_commit(void *expr_runtime, const char *table_name,
ret = -1;
} else {
log_info(expr_rt->logger, MODULE_EXPR,
- "table[%s] has %zu rules, commit %zu expr rules(regex rules:%zu) "
- "and rebuild expr_matcher(%s) completed, version:%lld, consume:%lldms", table_name, rule_cnt,
- real_rule_cnt, real_regex_rule_cnt, engine_type == EXPR_ENGINE_TYPE_HS ? "hyperscan" : "rulescan",
- maat_rt_version, time_elapse_ms);
+ "table[%s] has %zu rules, commit %zu expr rules(literal_rules:%zu regex_rules:%zu)"
+ " and rebuild expr_matcher(%s) completed, version:%lld, consume:%lldms",
+ table_name, rule_cnt, real_rule_cnt, real_lit_rule_cnt, real_regex_rule_cnt,
+ expr_engine_int2str(expr_rt->engine_type), maat_rt_version, time_elapse_ms);
}
}
diff --git a/src/maat_table.c b/src/maat_table.c
index ea27b77..d6d7286 100644
--- a/src/maat_table.c
+++ b/src/maat_table.c
@@ -28,6 +28,7 @@
#include "maat_fqdn_plugin.h"
#include "maat_interval.h"
#include "maat_virtual.h"
+#include "expr_matcher/expr_matcher.h"
#define MODULE_TABLE module_name_str("maat.table")
@@ -48,7 +49,7 @@ struct table_manager {
struct rule_tag *accept_tags;
size_t n_accept_tag;
- enum maat_expr_engine expr_engine;
+ enum expr_engine_type engine_type;
int default_compile_table_id;
int g2g_table_id;
struct maat_kv_store *tbl_name2id_map;
@@ -750,7 +751,7 @@ int maat_default_compile_table_id(cJSON *json, struct log_handle *logger)
struct table_manager *
table_manager_create(const char *table_info_path, const char *accept_tags,
- enum maat_expr_engine expr_engine, struct maat_garbage_bin *garbage_bin,
+ enum maat_expr_engine engine_type, struct maat_garbage_bin *garbage_bin,
struct log_handle *logger)
{
if (NULL == table_info_path) {
@@ -793,7 +794,7 @@ table_manager_create(const char *table_info_path, const char *accept_tags,
tbl_mgr->logger = logger;
tbl_mgr->tbl_name2id_map = maat_kv_store_new();
tbl_mgr->conj_tbl_name2id_map = maat_kv_store_new();
- tbl_mgr->expr_engine = expr_engine;
+ tbl_mgr->engine_type = engine_type;
tbl_mgr->ref_garbage_bin = garbage_bin;
ret = register_tbl_name2id(tbl_mgr->tbl_name2id_map, root, table_info_path, logger);
@@ -1137,10 +1138,10 @@ int table_manager_get_valid_column(struct table_manager *tbl_mgr, int table_id)
enum maat_expr_engine table_manager_get_expr_engine(struct table_manager *tbl_mgr)
{
if (NULL == tbl_mgr) {
- return MAAT_EXPR_ENGINE_HS;
+ return EXPR_ENGINE_TYPE_HS;
}
- return tbl_mgr->expr_engine;
+ return tbl_mgr->engine_type;
}
size_t table_manager_accept_tags_count(struct table_manager *tbl_mgr)
diff --git a/test/expr_matcher_gtest.cpp b/test/expr_matcher_gtest.cpp
index bb70306..1f58fc8 100644
--- a/test/expr_matcher_gtest.cpp
+++ b/test/expr_matcher_gtest.cpp
@@ -1320,7 +1320,7 @@ int main(int argc, char **argv)
{
int ret = 0;
::testing::InitGoogleTest(&argc, argv);
- g_logger = log_handle_create("./adapter_hs_gtest.log", 0);
+ g_logger = log_handle_create("./expr_matcher_gtest.log", 0);
ret = RUN_ALL_TESTS();
diff --git a/test/maat_framework_gtest.cpp b/test/maat_framework_gtest.cpp
index 0e1b797..54b5cdb 100644
--- a/test/maat_framework_gtest.cpp
+++ b/test/maat_framework_gtest.cpp
@@ -787,7 +787,7 @@ protected:
maat_options_set_logger(opts, "./maat_framework_gtest.log", LOG_LEVEL_INFO);
maat_options_set_accept_tags(opts, accept_tags);
maat_options_set_hit_path_enabled(opts);
- //maat_options_set_expr_engine(opts, MAAT_EXPR_ENGINE_HS); //default
+ maat_options_set_expr_engine(opts, MAAT_EXPR_ENGINE_HS);
_shared_maat_inst = maat_new(opts, table_info_path);
maat_options_free(opts);