summaryrefslogtreecommitdiff
path: root/scanner
diff options
context:
space:
mode:
authorliuwentan <[email protected]>2023-08-10 16:10:50 +0800
committerliuwentan <[email protected]>2023-08-10 16:10:50 +0800
commit42f44802715b8d71ac02fd2363f5bafc7dba8581 (patch)
tree45abb9380554f8227974b61f4e758242ce71ef14 /scanner
parentfb0cb5405d8236b23b5866952eda99e54d25aa5b (diff)
[FEATURE]expr_matcher support dual engine(hyperscan & rulescan) & benchmark
Diffstat (limited to 'scanner')
-rw-r--r--scanner/CMakeLists.txt5
-rw-r--r--scanner/adapter_hs/adapter_hs.h134
-rw-r--r--scanner/expr_matcher/adapter_hs/adapter_hs.cpp (renamed from scanner/adapter_hs/adapter_hs.cpp)240
-rw-r--r--scanner/expr_matcher/adapter_hs/adapter_hs.h75
-rw-r--r--scanner/expr_matcher/adapter_rs/adapter_rs.cpp708
-rw-r--r--scanner/expr_matcher/adapter_rs/adapter_rs.h78
-rw-r--r--scanner/expr_matcher/expr_matcher.cpp235
-rw-r--r--scanner/expr_matcher/expr_matcher.h134
8 files changed, 1337 insertions, 272 deletions
diff --git a/scanner/CMakeLists.txt b/scanner/CMakeLists.txt
index 429755e..7d45154 100644
--- a/scanner/CMakeLists.txt
+++ b/scanner/CMakeLists.txt
@@ -7,8 +7,9 @@ include_directories(${PROJECT_SOURCE_DIR}/src/inc_internal)
add_subdirectory(ip_matcher/IntervalIndex)
-add_library(adapter-static adapter_hs/adapter_hs.cpp bool_matcher/bool_matcher.cpp
+add_library(adapter-static bool_matcher/bool_matcher.cpp expr_matcher/expr_matcher.cpp
+ expr_matcher/adapter_hs/adapter_hs.cpp expr_matcher/adapter_rs/adapter_rs.cpp
fqdn_engine/fqdn_engine.cpp ip_matcher/ip_matcher.cpp ip_matcher/ipv4_match.cpp
ip_matcher/ipv6_match.cpp flag_matcher/flag_matcher.cpp interval_matcher/cgranges.c
interval_matcher/interval_matcher.cpp)
-target_link_libraries(adapter-static hyperscan_static hyperscan_runtime_static interval_index_static) \ No newline at end of file
+target_link_libraries(adapter-static hyperscan_static hyperscan_runtime_static rulescan_static interval_index_static) \ No newline at end of file
diff --git a/scanner/adapter_hs/adapter_hs.h b/scanner/adapter_hs/adapter_hs.h
deleted file mode 100644
index 672650a..0000000
--- a/scanner/adapter_hs/adapter_hs.h
+++ /dev/null
@@ -1,134 +0,0 @@
-/*
-**********************************************************************************************
-* File: adapter_hs.h
-* Description: wrapper for raw hyperscan
-* Authors: Liu WenTan <[email protected]>
-* Date: 2022-10-31
-* Copyright: (c) 2018-2022 Geedge Networks, Inc. All rights reserved.
-***********************************************************************************************
-*/
-
-#ifndef _ADAPTER_HS_H_
-#define _ADAPTER_HS_H_
-
-#ifdef __cplusplus
-extern "C"
-{
-#endif
-
-#include <stddef.h>
-#include <stdint.h>
-
-#include "log/log.h"
-
-#define MAX_EXPR_PATTERN_NUM 8
-
-struct adapter_hs;
-
-/* match method */
-enum hs_match_mode {
- HS_MATCH_MODE_INVALID = -1,
- HS_MATCH_MODE_EXACTLY = 1, /* scan data must match pattern exactly */
- HS_MATCH_MODE_PREFIX, /* pattern must in the head of scan_data */
- HS_MATCH_MODE_SUFFIX, /* pattern must in the end of scan_data */
- HS_MATCH_MODE_SUB /* pattern must in the range[l_offset, r_offset] of scan_data */
-};
-
-enum hs_pattern_type {
- HS_PATTERN_TYPE_STR = 0, /* pure literal string */
- HS_PATTERN_TYPE_REG /* regex expression */
-};
-
-enum hs_case_sensitive {
- HS_CASE_SENSITIVE = 0,
- HS_CASE_INSENSITIVE
-};
-
-struct hs_scan_result {
- long long rule_id;
- void *user_tag;
-};
-
-struct hs_pattern {
- enum hs_case_sensitive case_sensitive;
- enum hs_match_mode match_mode;
- enum hs_pattern_type pattern_type;
-
- int is_hexbin; /* 1(yes) 0(no) */
-
- /*
- * just match in scan_data's range of [start_offset, end_offset], -1 means no limits
- * for example:
- * [-1, end_offset] means the pattern must in scan_data's [0 ~ start_offset]
- * [start_offset, -1] means the pattern must in scan_data's [start_offset ~ data_end]
- */
- int start_offset;
- int end_offset;
-
- /* start pointer of pattern */
- char *pat;
- /* pattern length */
- size_t pat_len;
-};
-
-/* logic AND expression, such as (pattern1 & pattern2) */
-struct expr_rule {
- long long expr_id;
- size_t n_patterns;
- struct hs_pattern patterns[MAX_EXPR_PATTERN_NUM];
- void *user_tag;
-};
-
-int adapter_hs_verify_regex_expression(const char *regex_expr,
- struct log_handle *logger);
-/**
- * @brief new adapter_hs instance
- *
- * @param rules: logic AND expression's array
- * @param n_rule: the number of logic AND expression's array
- * @param nr_worker_threads: the number of scan threads which will call adapter_hs_scan()
- *
- * @retval the pointer to adapter_hs instance
-*/
-struct adapter_hs *adapter_hs_new(struct expr_rule *rules, size_t n_rule,
- size_t n_worker_thread, struct log_handle *logger);
-
-/**
- * @brief scan input data to match logic AND expression, return all matched expr_id
- *
- * @param instance: adapter_hs instance obtained by adapter_hs_new()
- * @param thread_id: the thread_id of caller
- * @param data: data to be scanned
- * @param data_len: the length of data to be scanned
- * @param results: the array of expr_id
- * @param n_results: number of elements in array of expr_id
-*/
-int adapter_hs_scan(struct adapter_hs *hs_instance, int thread_id,
- const char *data, size_t data_len,
- struct hs_scan_result *results,
- size_t n_result, size_t *n_hit_result);
-
-/**
- * @brief destroy adapter_hs instance
- *
- * @param instance: adapter_hs instance obtained by adapter_hs_new()
-*/
-void adapter_hs_free(struct adapter_hs *instance);
-
-struct adapter_hs_stream;
-/**
- * @brief open adapter_hs stream after adapter_hs instance initialized for stream scan
- *
-*/
-struct adapter_hs_stream *adapter_hs_stream_open(struct adapter_hs *hs_instance, int thread_id);
-
-int adapter_hs_scan_stream(struct adapter_hs_stream *stream, const char *data, size_t data_len,
- struct hs_scan_result *results, size_t n_result, size_t *n_hit_result);
-
-void adapter_hs_stream_close(struct adapter_hs_stream *stream);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif \ No newline at end of file
diff --git a/scanner/adapter_hs/adapter_hs.cpp b/scanner/expr_matcher/adapter_hs/adapter_hs.cpp
index d201e11..4d57dcf 100644
--- a/scanner/adapter_hs/adapter_hs.cpp
+++ b/scanner/expr_matcher/adapter_hs/adapter_hs.cpp
@@ -1,10 +1,10 @@
/*
**********************************************************************************************
-* File: adapter_hs.cpp
-* Description:
-* Authors: Liu WenTan <[email protected]>
+* File: adapter_hs.c
+* Description:
+* Authors: Liu wentan <[email protected]>
* Date: 2022-10-31
-* Copyright: (c) 2018-2022 Geedge Networks, Inc. All rights reserved.
+* Copyright: (c) Since 2022 Geedge Networks, Ltd. All rights reserved.
***********************************************************************************************
*/
@@ -20,9 +20,8 @@
#include "uthash/utarray.h"
#include "uthash/uthash.h"
#include "maat_utils.h"
-#include "../bool_matcher/bool_matcher.h"
+#include "../../bool_matcher/bool_matcher.h"
-#define MAX_OFFSET_NUM 1024
#define MAX_HIT_PATTERN_NUM 512
pid_t hs_gettid()
@@ -41,6 +40,7 @@ static const char *hs_module_name_str(const char *name)
#define MODULE_ADAPTER_HS hs_module_name_str("maat.adapter_hs")
struct adpt_hs_compile_data {
+ enum expr_pattern_type pat_type;
unsigned int *ids;
unsigned int *flags;
char **patterns;
@@ -56,7 +56,6 @@ struct adapter_hs_scratch {
struct adapter_hs_stream {
int thread_id;
- size_t n_expr;
hs_stream_t *literal_stream;
hs_stream_t *regex_stream;
struct adapter_hs_runtime *ref_hs_rt;
@@ -91,7 +90,7 @@ struct pattern_offset {
struct pattern_attribute {
long long pattern_id;
- enum hs_match_mode match_mode;
+ enum expr_match_mode match_mode;
struct pattern_offset offset;
};
@@ -137,12 +136,12 @@ static int _hs_alloc_scratch(hs_database_t *db, hs_scratch_t **scratches,
static int adpt_hs_alloc_scratch(struct adapter_hs_runtime *hs_rt,
size_t n_worker_thread,
- enum hs_pattern_type pattern_type,
+ enum expr_pattern_type pattern_type,
struct log_handle *logger)
{
int ret = 0;
- if (pattern_type == HS_PATTERN_TYPE_STR) {
+ if (pattern_type == EXPR_PATTERN_TYPE_STR) {
hs_rt->scratch->literal_scratches = ALLOC(hs_scratch_t *, n_worker_thread);
ret = _hs_alloc_scratch(hs_rt->literal_db, hs_rt->scratch->literal_scratches,
n_worker_thread, logger);
@@ -200,7 +199,7 @@ static int adpt_hs_build_database(struct adapter_hs_runtime *hs_rt,
if (regex_cd != NULL) {
err = hs_compile_multi((const char *const *)regex_cd->patterns,
regex_cd->flags, regex_cd->ids, regex_cd->n_patterns,
- HS_MODE_STREAM | HS_MODE_SOM_HORIZON_SMALL,
+ HS_MODE_STREAM,
NULL, &hs_rt->regex_db, &compile_err);
if (err != HS_SUCCESS) {
if (compile_err) {
@@ -215,9 +214,11 @@ static int adpt_hs_build_database(struct adapter_hs_runtime *hs_rt,
return 0;
}
-static struct adpt_hs_compile_data *adpt_hs_compile_data_new(size_t n_patterns)
+static struct adpt_hs_compile_data *
+adpt_hs_compile_data_new(enum expr_pattern_type pat_type, size_t n_patterns)
{
struct adpt_hs_compile_data *hs_cd = ALLOC(struct adpt_hs_compile_data, 1);
+ hs_cd->pat_type = pat_type;
hs_cd->patterns = ALLOC(char *, n_patterns);
hs_cd->pattern_lens = ALLOC(size_t, n_patterns);
hs_cd->n_patterns = n_patterns;
@@ -263,8 +264,11 @@ static void populate_compile_data(struct adpt_hs_compile_data *compile_data,
compile_data->ids[index] = pattern_id;
/* set flags */
- compile_data->flags[index] |= HS_FLAG_SOM_LEFTMOST;
- if (case_sensitive == HS_CASE_INSENSITIVE) {
+ if (compile_data->pat_type == EXPR_PATTERN_TYPE_STR) {
+ compile_data->flags[index] |= HS_FLAG_SOM_LEFTMOST;
+ }
+
+ if (case_sensitive == EXPR_CASE_INSENSITIVE) {
compile_data->flags[index] |= HS_FLAG_CASELESS;
}
@@ -295,14 +299,14 @@ static struct bool_expr *bool_exprs_new(struct expr_rule *rules, size_t n_rule,
pattern_attr[pattern_index].pattern_id = pattern_index;
pattern_attr[pattern_index].match_mode = rules[i].patterns[j].match_mode;
- if (pattern_attr[pattern_index].match_mode == HS_MATCH_MODE_SUB ||
- pattern_attr[pattern_index].match_mode == HS_MATCH_MODE_EXACTLY) {
+ if (pattern_attr[pattern_index].match_mode == EXPR_MATCH_MODE_SUB ||
+ pattern_attr[pattern_index].match_mode == EXPR_MATCH_MODE_EXACTLY) {
pattern_attr[pattern_index].offset.start = rules[i].patterns[j].start_offset;
pattern_attr[pattern_index].offset.end = rules[i].patterns[j].end_offset;
}
/* literal pattern */
- if (rules[i].patterns[j].pattern_type == HS_PATTERN_TYPE_STR) {
+ if (rules[i].patterns[j].type == EXPR_PATTERN_TYPE_STR) {
populate_compile_data(literal_cd, literal_index, pattern_index,
rules[i].patterns[j].pat, rules[i].patterns[j].pat_len,
rules[i].patterns[j].case_sensitive);
@@ -321,7 +325,7 @@ static struct bool_expr *bool_exprs_new(struct expr_rule *rules, size_t n_rule,
bool_exprs[i].expr_id = rules[i].expr_id;
bool_exprs[i].item_num = rules[i].n_patterns;
- bool_exprs[i].user_tag = rules[i].user_tag;
+ bool_exprs[i].user_tag = rules[i].tag;
}
*n_pattern = pattern_index;
@@ -345,81 +349,43 @@ static int verify_regex_expression(const char *regex_str, struct log_handle *log
FREE(info);
hs_free_compile_error(error);
- return -1;
+ return 0;
}
if (info != NULL) {
FREE(info);
}
- return 0;
+ return 1;
}
int adapter_hs_verify_regex_expression(const char *regex_expr, struct log_handle *logger)
{
if (NULL == regex_expr) {
- return -1;
+ return 0;
}
return verify_regex_expression(regex_expr, logger);
}
-struct adapter_hs *adapter_hs_new(struct expr_rule *rules, size_t n_rule,
- size_t n_worker_thread, struct log_handle *logger)
+void *adapter_hs_new(struct expr_rule *rules, size_t n_rule,
+ size_t n_literal_pattern, size_t n_regex_pattern,
+ size_t n_worker_thread, struct log_handle *logger)
{
- if (0 == n_worker_thread || NULL == rules || 0 == n_rule) {
- log_error(logger, MODULE_ADAPTER_HS,
- "[%s:%d] input parameters illegal!", __FUNCTION__, __LINE__);
- return NULL;
- }
-
/* get the sum of pattern */
- size_t i = 0, j = 0;
- size_t literal_pattern_num = 0;
- size_t regex_pattern_num = 0;
- for (i = 0; i < n_rule; i++) {
- if (rules[i].n_patterns > MAX_EXPR_PATTERN_NUM) {
- log_error(logger, MODULE_ADAPTER_HS,
- "[%s:%d] the number of patterns in one expression "
- "should less than %d", __FUNCTION__, __LINE__,
- MAX_EXPR_PATTERN_NUM);
- return NULL;
- }
-
- for (j = 0; j < rules[i].n_patterns; j++) {
- /* pat_len should not 0 */
- if (0 == rules[i].patterns[j].pat_len) {
- log_error(logger, MODULE_ADAPTER_HS,
- "[%s:%d] expr pattern length should not 0",
- __FUNCTION__, __LINE__);
- return NULL;
- }
-
- if (rules[i].patterns[j].pattern_type == HS_PATTERN_TYPE_STR) {
- literal_pattern_num++;
- } else {
- regex_pattern_num++;
- }
- }
- }
-
- if (0 == literal_pattern_num && 0 == regex_pattern_num) {
- log_error(logger, MODULE_ADAPTER_HS,
- "[%s:%d] exprs has no valid pattern", __FUNCTION__, __LINE__);
- return NULL;
- }
-
+ size_t i = 0;
struct adpt_hs_compile_data *literal_cd = NULL;
struct adpt_hs_compile_data *regex_cd = NULL;
- if (literal_pattern_num > 0) {
- literal_cd = adpt_hs_compile_data_new(literal_pattern_num);
+
+ if (n_literal_pattern > 0) {
+ literal_cd = adpt_hs_compile_data_new(EXPR_PATTERN_TYPE_STR, n_literal_pattern);
}
- if (regex_pattern_num > 0) {
- regex_cd = adpt_hs_compile_data_new(regex_pattern_num);
+ if (n_regex_pattern > 0) {
+ regex_cd = adpt_hs_compile_data_new(EXPR_PATTERN_TYPE_REG, n_regex_pattern);
}
- size_t pattern_cnt = literal_pattern_num + regex_pattern_num;
+ size_t pattern_cnt = n_literal_pattern + n_regex_pattern;
struct adapter_hs *hs_inst = ALLOC(struct adapter_hs, 1);
hs_inst->hs_attr = ALLOC(struct pattern_attribute, pattern_cnt);
hs_inst->logger = logger;
@@ -478,21 +444,21 @@ struct adapter_hs *adapter_hs_new(struct expr_rule *rules, size_t n_rule,
n_worker_thread);
for (i = 0; i < n_worker_thread; i++) {
hs_inst->hs_rt->scratch->bool_match_buffs[i] = ALLOC(struct bool_expr_match,
- hs_inst->n_expr);
+ MAX_HIT_EXPR_NUM);
}
/* literal and regex scratch can't reuse */
- if (literal_pattern_num > 0) {
+ if (n_literal_pattern > 0) {
ret = adpt_hs_alloc_scratch(hs_inst->hs_rt, n_worker_thread,
- HS_PATTERN_TYPE_STR, logger);
+ EXPR_PATTERN_TYPE_STR, logger);
if (ret < 0) {
goto error;
}
}
- if (regex_pattern_num > 0) {
+ if (n_regex_pattern > 0) {
ret = adpt_hs_alloc_scratch(hs_inst->hs_rt, n_worker_thread,
- HS_PATTERN_TYPE_REG, logger);
+ EXPR_PATTERN_TYPE_REG, logger);
if (ret < 0) {
goto error;
}
@@ -500,7 +466,7 @@ struct adapter_hs *adapter_hs_new(struct expr_rule *rules, size_t n_rule,
hs_inst->hs_rt->streams = ALLOC(struct adapter_hs_stream *, n_worker_thread);
for (i = 0; i < n_worker_thread; i++) {
- hs_inst->hs_rt->streams[i] = adapter_hs_stream_open(hs_inst, i);
+ hs_inst->hs_rt->streams[i] = (struct adapter_hs_stream *)adapter_hs_stream_open(hs_inst, i);
}
return hs_inst;
@@ -509,13 +475,15 @@ error:
return NULL;
}
-void adapter_hs_free(struct adapter_hs *hs_inst)
+void adapter_hs_free(void *hs_instance)
{
- if (NULL == hs_inst) {
+ if (NULL == hs_instance) {
return;
}
+ struct adapter_hs *hs_inst = (struct adapter_hs *)hs_instance;
size_t i = 0;
+
if (hs_inst->hs_rt != NULL) {
if (hs_inst->hs_rt->literal_db != NULL) {
hs_free_database(hs_inst->hs_rt->literal_db);
@@ -625,12 +593,12 @@ static int matched_event_cb(unsigned int id, unsigned long long from,
int ret = 0;
struct pattern_attribute pat_attr = matched_pat->ref_hs_attr[id];
switch (pat_attr.match_mode) {
- case HS_MATCH_MODE_EXACTLY:
+ case EXPR_MATCH_MODE_EXACTLY:
if (0 == from && matched_pat->scan_data_len == to) {
ret = 1;
}
break;
- case HS_MATCH_MODE_SUB:
+ case EXPR_MATCH_MODE_SUB:
if (pat_attr.offset.start == -1 &&
pat_attr.offset.end == -1) {
ret = 1;
@@ -656,12 +624,12 @@ static int matched_event_cb(unsigned int id, unsigned long long from,
ret = 1;
}
break;
- case HS_MATCH_MODE_PREFIX:
+ case EXPR_MATCH_MODE_PREFIX:
if (0 == from) {
ret = 1;
}
break;
- case HS_MATCH_MODE_SUFFIX:
+ case EXPR_MATCH_MODE_SUFFIX:
if (to == matched_pat->scan_data_len) {
ret = 1;
}
@@ -678,43 +646,42 @@ static int matched_event_cb(unsigned int id, unsigned long long from,
return 0;
}
-UT_icd ut_pattern_id_icd = {sizeof(unsigned long long), NULL, NULL, NULL};
-struct adapter_hs_stream *
-adapter_hs_stream_open(struct adapter_hs *hs_instance, int thread_id)
+UT_icd ut_hs_pattern_id_icd = {sizeof(unsigned long long), NULL, NULL, NULL};
+void *adapter_hs_stream_open(void *hs_instance, int thread_id)
{
if (NULL == hs_instance || thread_id < 0) {
return NULL;
}
+ struct adapter_hs *hs_inst = (struct adapter_hs *)hs_instance;
struct adapter_hs_stream *hs_stream = ALLOC(struct adapter_hs_stream, 1);
hs_error_t err;
- hs_stream->logger = hs_instance->logger;
+ hs_stream->logger = hs_inst->logger;
hs_stream->thread_id = thread_id;
- hs_stream->n_expr = hs_instance->n_expr;
- hs_stream->ref_hs_rt = hs_instance->hs_rt;
+ hs_stream->ref_hs_rt = hs_inst->hs_rt;
hs_stream->matched_pat = ALLOC(struct matched_pattern, 1);
- hs_stream->matched_pat->ref_hs_attr = hs_instance->hs_attr;
- hs_stream->matched_pat->n_patterns = hs_instance->n_patterns;
- utarray_new(hs_stream->matched_pat->pattern_ids, &ut_pattern_id_icd);
+ hs_stream->matched_pat->ref_hs_attr = hs_inst->hs_attr;
+ hs_stream->matched_pat->n_patterns = hs_inst->n_patterns;
+ utarray_new(hs_stream->matched_pat->pattern_ids, &ut_hs_pattern_id_icd);
utarray_reserve(hs_stream->matched_pat->pattern_ids, MAX_HIT_PATTERN_NUM);
int err_count = 0;
- if (hs_instance->hs_rt->literal_db != NULL) {
- err = hs_open_stream(hs_instance->hs_rt->literal_db, 0,
+ if (hs_inst->hs_rt->literal_db != NULL) {
+ err = hs_open_stream(hs_inst->hs_rt->literal_db, 0,
&hs_stream->literal_stream);
if (err != HS_SUCCESS) {
- log_error(hs_instance->logger, MODULE_ADAPTER_HS,
+ log_error(hs_inst->logger, MODULE_ADAPTER_HS,
"hs_open_stream failed, hs err:%d", err);
err_count++;
}
}
- if (hs_instance->hs_rt->regex_db != NULL) {
- err = hs_open_stream(hs_instance->hs_rt->regex_db, 0,
+ if (hs_inst->hs_rt->regex_db != NULL) {
+ err = hs_open_stream(hs_inst->hs_rt->regex_db, 0,
&hs_stream->regex_stream);
if (err != HS_SUCCESS) {
- log_error(hs_instance->logger, MODULE_ADAPTER_HS,
+ log_error(hs_inst->logger, MODULE_ADAPTER_HS,
"hs_open_stream failed, hs err:%d", err);
err_count++;
}
@@ -740,36 +707,37 @@ error:
return NULL;
}
-void adapter_hs_stream_close(struct adapter_hs_stream *hs_stream)
+void adapter_hs_stream_close(void *hs_stream)
{
if (NULL == hs_stream) {
return;
}
- if (hs_stream->ref_hs_rt != NULL) {
- if (hs_stream->literal_stream != NULL) {
- hs_close_stream(hs_stream->literal_stream, NULL, NULL, NULL);
- hs_stream->literal_stream = NULL;
+ struct adapter_hs_stream *stream = (struct adapter_hs_stream *)hs_stream;
+ if (stream->ref_hs_rt != NULL) {
+ if (stream->literal_stream != NULL) {
+ hs_close_stream(stream->literal_stream, NULL, NULL, NULL);
+ stream->literal_stream = NULL;
}
- if (hs_stream->regex_stream != NULL) {
- hs_close_stream(hs_stream->regex_stream, NULL, NULL, NULL);
- hs_stream->regex_stream = NULL;
+ if (stream->regex_stream != NULL) {
+ hs_close_stream(stream->regex_stream, NULL, NULL, NULL);
+ stream->regex_stream = NULL;
}
}
- /* hs_stream->hs_rt point to hs_instance->hs_rt which will call free
+ /* stream->hs_rt point to hs_instance->hs_rt which will call free
same as hs_attr */
- hs_stream->ref_hs_rt = NULL;
- hs_stream->matched_pat->ref_hs_attr = NULL;
+ stream->ref_hs_rt = NULL;
+ stream->matched_pat->ref_hs_attr = NULL;
- if (hs_stream->matched_pat->pattern_ids != NULL) {
- utarray_free(hs_stream->matched_pat->pattern_ids);
- hs_stream->matched_pat->pattern_ids = NULL;
+ if (stream->matched_pat->pattern_ids != NULL) {
+ utarray_free(stream->matched_pat->pattern_ids);
+ stream->matched_pat->pattern_ids = NULL;
}
- FREE(hs_stream->matched_pat);
- FREE(hs_stream);
+ FREE(stream->matched_pat);
+ FREE(stream);
}
static void adapter_hs_stream_reset(struct adapter_hs_stream *hs_stream)
@@ -794,9 +762,9 @@ static void adapter_hs_stream_reset(struct adapter_hs_stream *hs_stream)
utarray_clear(hs_stream->matched_pat->pattern_ids);
}
-int adapter_hs_scan_stream(struct adapter_hs_stream *hs_stream, const char *data,
- size_t data_len, struct hs_scan_result *results,
- size_t n_result, size_t *n_hit_result)
+int adapter_hs_scan_stream(void *hs_stream, const char *data, size_t data_len,
+ struct expr_scan_result *results, size_t n_result,
+ size_t *n_hit_result)
{
hs_error_t err;
@@ -816,36 +784,37 @@ int adapter_hs_scan_stream(struct adapter_hs_stream *hs_stream, const char *data
*/
int err_count = 0;
- int thread_id = hs_stream->thread_id;
- struct adapter_hs_scratch *scratch = hs_stream->ref_hs_rt->scratch;
- hs_stream->matched_pat->scan_data_len = data_len;
+ struct adapter_hs_stream *stream = (struct adapter_hs_stream *)hs_stream;
+ int thread_id = stream->thread_id;
+ struct adapter_hs_scratch *scratch = stream->ref_hs_rt->scratch;
+ stream->matched_pat->scan_data_len = data_len;
int err_scratch_flag = 0;
- if (hs_stream->literal_stream != NULL) {
+ if (stream->literal_stream != NULL) {
if (scratch->literal_scratches != NULL) {
- err = hs_scan_stream(hs_stream->literal_stream, data, data_len,
+ err = hs_scan_stream(stream->literal_stream, data, data_len,
0, scratch->literal_scratches[thread_id],
- matched_event_cb, hs_stream->matched_pat);
+ matched_event_cb, stream->matched_pat);
if (err != HS_SUCCESS) {
err_count++;
}
} else {
- log_error(hs_stream->logger, MODULE_ADAPTER_HS,
+ log_error(stream->logger, MODULE_ADAPTER_HS,
"literal_scratches is null, thread_id:%d", thread_id);
err_scratch_flag++;
}
}
- if (hs_stream->regex_stream != NULL) {
+ if (stream->regex_stream != NULL) {
if (scratch->regex_scratches != NULL) {
- err = hs_scan_stream(hs_stream->regex_stream, data, data_len,
+ err = hs_scan_stream(stream->regex_stream, data, data_len,
0, scratch->regex_scratches[thread_id],
- matched_event_cb, hs_stream->matched_pat);
+ matched_event_cb, stream->matched_pat);
if (err != HS_SUCCESS) {
err_count++;
}
} else {
- log_error(hs_stream->logger, MODULE_ADAPTER_HS,
+ log_error(stream->logger, MODULE_ADAPTER_HS,
"regex_scratches is null, thread_id:%d", thread_id);
err_scratch_flag++;
}
@@ -859,7 +828,7 @@ int adapter_hs_scan_stream(struct adapter_hs_stream *hs_stream, const char *data
return -1;
}
- size_t n_pattern_id = utarray_len(hs_stream->matched_pat->pattern_ids);
+ size_t n_pattern_id = utarray_len(stream->matched_pat->pattern_ids);
if (0 == n_pattern_id) {
*n_hit_result = 0;
return 0;
@@ -868,13 +837,13 @@ int adapter_hs_scan_stream(struct adapter_hs_stream *hs_stream, const char *data
unsigned long long pattern_ids[n_pattern_id];
for (size_t i = 0; i < n_pattern_id; i++) {
- pattern_ids[i] = *(unsigned long long *)utarray_eltptr(hs_stream->matched_pat->pattern_ids, i);
+ pattern_ids[i] = *(unsigned long long *)utarray_eltptr(stream->matched_pat->pattern_ids, i);
}
int ret = 0;
struct bool_expr_match *bool_matcher_results = scratch->bool_match_buffs[thread_id];
- int bool_matcher_ret = bool_matcher_match(hs_stream->ref_hs_rt->bm, pattern_ids, n_pattern_id,
- bool_matcher_results, hs_stream->n_expr);
+ int bool_matcher_ret = bool_matcher_match(stream->ref_hs_rt->bm, pattern_ids, n_pattern_id,
+ bool_matcher_results, MAX_HIT_EXPR_NUM);
if (bool_matcher_ret < 0) {
ret = -1;
goto next;
@@ -891,22 +860,21 @@ int adapter_hs_scan_stream(struct adapter_hs_stream *hs_stream, const char *data
*n_hit_result = bool_matcher_ret;
next:
- utarray_clear(hs_stream->matched_pat->pattern_ids);
+ utarray_clear(stream->matched_pat->pattern_ids);
return ret;
}
-int adapter_hs_scan(struct adapter_hs *hs_instance, int thread_id,
- const char *data, size_t data_len,
- struct hs_scan_result *results,
- size_t n_result, size_t *n_hit_result)
+int adapter_hs_scan(void *hs_instance, int thread_id, const char *data, size_t data_len,
+ struct expr_scan_result *results, size_t n_result, size_t *n_hit_result)
{
if (NULL == hs_instance || NULL == data || (0 == data_len) ||
NULL == results || 0 == n_result || NULL == n_hit_result) {
return -1;
}
- struct adapter_hs_stream *hs_stream = hs_instance->hs_rt->streams[thread_id];
+ struct adapter_hs *hs_inst = (struct adapter_hs *)hs_instance;
+ struct adapter_hs_stream *hs_stream = hs_inst->hs_rt->streams[thread_id];
assert(hs_stream != NULL);
adapter_hs_stream_reset(hs_stream);
diff --git a/scanner/expr_matcher/adapter_hs/adapter_hs.h b/scanner/expr_matcher/adapter_hs/adapter_hs.h
new file mode 100644
index 0000000..c9de7d2
--- /dev/null
+++ b/scanner/expr_matcher/adapter_hs/adapter_hs.h
@@ -0,0 +1,75 @@
+/*
+**********************************************************************************************
+* File: adapter_hs.h
+* Description:
+* Authors: Liu wentan <[email protected]>
+* Date: 2022-10-31
+* Copyright: (c) Since 2022 Geedge Networks, Ltd. All rights reserved.
+***********************************************************************************************
+*/
+
+#ifndef _ADAPTER_HS_H_
+#define _ADAPTER_HS_H_
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "log/log.h"
+#include "../expr_matcher.h"
+
+int adapter_hs_verify_regex_expression(const char *regex_expr, struct log_handle *logger);
+/**
+ * @brief new adapter_hs instance
+ *
+ * @param rules: logic AND expression's array
+ * @param n_rule: the number of logic AND expression's array
+ * @param nr_worker_threads: the number of scan threads which will call adapter_hs_scan()
+ *
+ * @retval the pointer to adapter_hs instance
+*/
+void *adapter_hs_new(struct expr_rule *rules, size_t n_rule,
+ size_t n_literal_pattern, size_t n_regex_pattern,
+ size_t n_worker_thread, struct log_handle *logger);
+
+/**
+ * @brief scan input data to match logic AND expression, return all matched expr_id
+ *
+ * @param instance: adapter_hs instance obtained by adapter_hs_new()
+ * @param thread_id: the thread_id of caller
+ * @param data: data to be scanned
+ * @param data_len: the length of data to be scanned
+ * @param results: the array of expr_id
+ * @param n_results: number of elements in array of expr_id
+*/
+int adapter_hs_scan(void *hs_instance, int thread_id, const char *data, size_t data_len,
+ struct expr_scan_result *results, size_t n_result, size_t *n_hit_result);
+
+/**
+ * @brief destroy adapter_hs instance
+ *
+ * @param instance: adapter_hs instance obtained by adapter_hs_new()
+*/
+void adapter_hs_free(void *instance);
+
+/**
+ * @brief open adapter_hs stream after adapter_hs instance initialized for stream scan
+ *
+*/
+void *adapter_hs_stream_open(void *hs_instance, int thread_id);
+
+int adapter_hs_scan_stream(void *stream, const char *data, size_t data_len,
+ struct expr_scan_result *results, size_t n_result,
+ size_t *n_hit_result);
+
+void adapter_hs_stream_close(void *stream);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif \ No newline at end of file
diff --git a/scanner/expr_matcher/adapter_rs/adapter_rs.cpp b/scanner/expr_matcher/adapter_rs/adapter_rs.cpp
new file mode 100644
index 0000000..1459ab1
--- /dev/null
+++ b/scanner/expr_matcher/adapter_rs/adapter_rs.cpp
@@ -0,0 +1,708 @@
+/*
+**********************************************************************************************
+* File: adapter_rs.cpp
+* Description:
+* Authors: Liu wentan <[email protected]>
+* Date: 2022-10-31
+* Copyright: (c) Since 2022 Geedge Networks, Ltd. All rights reserved.
+***********************************************************************************************
+*/
+
+#include <stdint.h>
+#include <stdio.h>
+#include <stddef.h>
+#include <assert.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+
+#include "rulescan.h"
+#include "adapter_rs.h"
+#include "uthash/utarray.h"
+#include "uthash/uthash.h"
+#include "maat_utils.h"
+#include "../../bool_matcher/bool_matcher.h"
+
+#define MAX_HIT_PATTERN_NUM 512
+
+pid_t rs_gettid()
+{
+ return syscall(SYS_gettid);
+}
+
+static const char *rs_module_name_str(const char *name)
+{
+ static __thread char module[64];
+ snprintf(module, sizeof(module), "%s(%d)", name, rs_gettid());
+
+ return module;
+}
+
+#define MODULE_ADAPTER_RS rs_module_name_str("maat.adapter_rs")
+
+struct adpt_rs_compile_data {
+ struct scan_pattern *patterns;
+ size_t n_patterns;
+};
+
+struct adapter_rs_stream {
+ int thread_id;
+ size_t offset; /* current stream offset */
+ rs_stream_t *literal_stream;
+ rs_stream_t *regex_stream;
+ struct adapter_rs_runtime *ref_rs_rt;
+
+ struct log_handle *logger;
+};
+
+/* adapter_rs runtime */
+struct adapter_rs_runtime {
+ rs_database_t *literal_db;
+ rs_database_t *regex_db;
+
+ struct bool_expr_match **bool_match_buffs; /* per thread */
+ struct adapter_rs_stream **streams; /* per thread */
+ struct matched_pattern **matched_pats; /* per thread */
+ struct bool_matcher *bm;
+};
+
+/* adapter_rs instance */
+struct adapter_rs {
+ size_t n_worker_thread;
+ size_t n_expr;
+ size_t n_patterns;
+ struct adapter_rs_runtime *rs_rt;
+ struct pattern_attribute *rs_attr;
+ struct log_handle *logger;
+};
+
+struct pattern_offset {
+ long long start;
+ long long end;
+};
+
+struct pattern_attribute {
+ long long pattern_id;
+ enum expr_match_mode match_mode;
+ struct pattern_offset offset;
+ size_t pattern_len;
+};
+
+struct matched_pattern {
+ UT_array *pattern_ids;
+ size_t n_patterns;
+ struct pattern_attribute *ref_rs_attr;
+};
+
+int adapter_rs_verify_regex_expression(const char *regex_expr,
+ struct log_handle *logger)
+{
+ int ret = rs_verify_regex(regex_expr);
+ if (ret == 0) {
+ log_error(logger, MODULE_ADAPTER_RS,
+ "[%s:%d] illegal regex expression: \"%s\"",
+ __FUNCTION__, __LINE__, regex_expr);
+ }
+
+ return ret;
+}
+/**
+ * @brief build rs database for literal string and regex expression respectively
+ *
+ * @retval 0(success) -1(failed)
+*/
+static int adpt_rs_build_database(struct adapter_rs_runtime *rs_rt,
+ size_t n_worker_thread,
+ struct adpt_rs_compile_data *literal_cd,
+ struct adpt_rs_compile_data *regex_cd,
+ struct log_handle *logger)
+{
+ if (NULL == rs_rt) {
+ return -1;
+ }
+
+ int ret = 0;
+ if (literal_cd != NULL) {
+ ret = rs_compile_lit(literal_cd->patterns, literal_cd->n_patterns,
+ &rs_rt->literal_db);
+ if (ret < 0) {
+ log_error(logger, MODULE_ADAPTER_RS, "[%s:%d] compile error",
+ __FUNCTION__, __LINE__);
+ return -1;
+ }
+ }
+
+ if (regex_cd != NULL) {
+ size_t n_failed_pats = 0;
+ ret = rs_compile_regex(regex_cd->patterns, regex_cd->n_patterns,
+ n_worker_thread, &rs_rt->regex_db, &n_failed_pats);
+ if (ret < 0) {
+ log_error(logger, MODULE_ADAPTER_RS, "[%s:%d] compile error",
+ __FUNCTION__, __LINE__);
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+static struct adpt_rs_compile_data *adpt_rs_compile_data_new(size_t n_patterns)
+{
+ struct adpt_rs_compile_data *rs_cd = ALLOC(struct adpt_rs_compile_data, 1);
+ rs_cd->patterns = ALLOC(struct scan_pattern, n_patterns);
+ rs_cd->n_patterns = n_patterns;
+
+ return rs_cd;
+}
+
+static void adpt_rs_compile_data_free(struct adpt_rs_compile_data *rs_cd)
+{
+ if (NULL == rs_cd) {
+ return;
+ }
+
+ if (rs_cd->patterns != NULL) {
+ for (size_t i = 0; i < rs_cd->n_patterns; i++) {
+ if (rs_cd->patterns[i].pattern != NULL) {
+ FREE(rs_cd->patterns[i].pattern);
+ }
+ }
+
+ FREE(rs_cd->patterns);
+ }
+
+ FREE(rs_cd);
+}
+
+static void populate_compile_data(struct adpt_rs_compile_data *compile_data,
+ size_t index, long long pattern_id, char *pat,
+ size_t pat_len, int case_sensitive)
+{
+ compile_data->patterns[index].id = pattern_id;
+ compile_data->patterns[index].case_sensitive = case_sensitive;
+ compile_data->patterns[index].pattern = ALLOC(char, pat_len + 1);
+ memcpy(compile_data->patterns[index].pattern, pat, pat_len);
+ compile_data->patterns[index].pattern_len = pat_len;
+}
+
+static struct bool_expr *bool_exprs_new(struct expr_rule *rules, size_t n_rule,
+ struct pattern_attribute *pattern_attr,
+ struct adpt_rs_compile_data *literal_cd,
+ struct adpt_rs_compile_data *regex_cd,
+ size_t *n_pattern)
+{
+ long long pattern_idx = 0;
+ size_t literal_idx = 0;
+ size_t regex_idx = 0;
+
+ struct bool_expr *bool_exprs = ALLOC(struct bool_expr, n_rule);
+
+ /* populate adpt_rs_compile_data and bool_expr */
+ for (size_t i = 0; i < n_rule; i++) {
+
+ for (size_t j = 0; j < rules[i].n_patterns; j++) {
+ pattern_attr[pattern_idx].pattern_id = pattern_idx;
+ pattern_attr[pattern_idx].match_mode = rules[i].patterns[j].match_mode;
+ pattern_attr[pattern_idx].pattern_len = rules[i].patterns[j].pat_len;
+
+ if (pattern_attr[pattern_idx].match_mode == EXPR_MATCH_MODE_SUB ||
+ pattern_attr[pattern_idx].match_mode == EXPR_MATCH_MODE_EXACTLY) {
+ pattern_attr[pattern_idx].offset.start = rules[i].patterns[j].start_offset;
+ pattern_attr[pattern_idx].offset.end = rules[i].patterns[j].end_offset;
+ }
+
+ /* literal pattern */
+ if (rules[i].patterns[j].type == EXPR_PATTERN_TYPE_STR) {
+ populate_compile_data(literal_cd, literal_idx, pattern_idx,
+ rules[i].patterns[j].pat, rules[i].patterns[j].pat_len,
+ rules[i].patterns[j].case_sensitive);
+ literal_idx++;
+ } else {
+ /* regex pattern */
+ populate_compile_data(regex_cd, regex_idx, pattern_idx,
+ rules[i].patterns[j].pat, rules[i].patterns[j].pat_len,
+ rules[i].patterns[j].case_sensitive);
+ regex_idx++;
+ }
+
+ bool_exprs[i].items[j].item_id = pattern_idx++;
+ bool_exprs[i].items[j].not_flag = 0;
+ }
+
+ bool_exprs[i].expr_id = rules[i].expr_id;
+ bool_exprs[i].item_num = rules[i].n_patterns;
+ bool_exprs[i].user_tag = rules[i].tag;
+ }
+
+ *n_pattern = pattern_idx;
+
+ return bool_exprs;
+}
+
+UT_icd ut_rs_pattern_id_icd = {sizeof(unsigned long long), NULL, NULL, NULL};
+void *adapter_rs_new(struct expr_rule *rules, size_t n_rule,
+ size_t n_literal_pattern, size_t n_regex_pattern,
+ size_t n_worker_thread, struct log_handle *logger)
+{
+ /* get the sum of pattern */
+ size_t i = 0;
+ struct adpt_rs_compile_data *literal_cd = NULL;
+ struct adpt_rs_compile_data *regex_cd = NULL;
+
+ if (n_literal_pattern > 0) {
+ literal_cd = adpt_rs_compile_data_new(n_literal_pattern);
+ }
+
+ if (n_regex_pattern > 0) {
+ regex_cd = adpt_rs_compile_data_new(n_regex_pattern);
+ }
+
+ size_t pattern_cnt = n_literal_pattern + n_regex_pattern;
+ struct adapter_rs *rs_inst = ALLOC(struct adapter_rs, 1);
+ rs_inst->rs_attr = ALLOC(struct pattern_attribute, pattern_cnt);
+ rs_inst->logger = logger;
+ rs_inst->n_worker_thread = n_worker_thread;
+ rs_inst->n_expr = n_rule;
+
+ struct bool_expr *bool_exprs = bool_exprs_new(rules, n_rule, rs_inst->rs_attr,
+ literal_cd, regex_cd, &pattern_cnt);
+ if (NULL == bool_exprs) {
+ return NULL;
+ }
+ rs_inst->n_patterns = pattern_cnt;
+
+ /* create bool matcher */
+ size_t mem_size = 0;
+ int rs_ret = 0;
+
+ rs_inst->rs_rt = ALLOC(struct adapter_rs_runtime, 1);
+
+ //rs_rt->bm
+ rs_inst->rs_rt->bm = bool_matcher_new(bool_exprs, n_rule, &mem_size);
+ if (rs_inst->rs_rt->bm != NULL) {
+ log_info(logger, MODULE_ADAPTER_RS,
+ "Adapter_rs module: build bool matcher of %zu expressions"
+ " with %zu bytes memory", n_rule, mem_size);
+ } else {
+ log_error(logger, MODULE_ADAPTER_RS,
+ "[%s:%d] Adapter_rs module: build bool matcher failed",
+ __FUNCTION__, __LINE__);
+
+ rs_ret = -1;
+ }
+ FREE(bool_exprs);
+
+ /* build rs database rs_rt->literal_db & rs_rt->regex_db */
+ int ret = adpt_rs_build_database(rs_inst->rs_rt, n_worker_thread,
+ literal_cd, regex_cd, logger);
+ if (ret < 0) {
+ rs_ret = -1;
+ }
+
+ if (literal_cd != NULL) {
+ adpt_rs_compile_data_free(literal_cd);
+ }
+
+ if (regex_cd != NULL) {
+ adpt_rs_compile_data_free(regex_cd);
+ }
+
+ if (rs_ret < 0) {
+ goto error;
+ }
+
+ /* alloc scratch */
+ rs_inst->rs_rt->bool_match_buffs = ALLOC(struct bool_expr_match *, n_worker_thread);
+ for (i = 0; i < n_worker_thread; i++) {
+ rs_inst->rs_rt->bool_match_buffs[i] = ALLOC(struct bool_expr_match, MAX_HIT_EXPR_NUM);
+ }
+
+ rs_inst->rs_rt->streams = ALLOC(struct adapter_rs_stream *, n_worker_thread);
+ for (i = 0; i < n_worker_thread; i++) {
+ rs_inst->rs_rt->streams[i] = (struct adapter_rs_stream *)adapter_rs_stream_open(rs_inst, i);
+ }
+
+ rs_inst->rs_rt->matched_pats = ALLOC(struct matched_pattern *, n_worker_thread);
+ for (i = 0; i < n_worker_thread; i++) {
+ rs_inst->rs_rt->matched_pats[i] = ALLOC(struct matched_pattern, 1);
+ rs_inst->rs_rt->matched_pats[i]->ref_rs_attr = rs_inst->rs_attr;
+ rs_inst->rs_rt->matched_pats[i]->n_patterns = rs_inst->n_patterns;
+ utarray_new(rs_inst->rs_rt->matched_pats[i]->pattern_ids, &ut_rs_pattern_id_icd);
+ utarray_reserve(rs_inst->rs_rt->matched_pats[i]->pattern_ids, MAX_HIT_PATTERN_NUM);
+ }
+
+ return rs_inst;
+error:
+ adapter_rs_free(rs_inst);
+ return NULL;
+}
+
+void adapter_rs_free(void *rs_instance)
+{
+ if (NULL == rs_instance) {
+ return;
+ }
+
+ size_t i = 0;
+ struct adapter_rs *rs_inst = (struct adapter_rs *)rs_instance;
+
+ if (rs_inst->rs_rt != NULL) {
+ if (rs_inst->rs_rt->literal_db != NULL) {
+ rs_free_database(rs_inst->rs_rt->literal_db);
+ rs_inst->rs_rt->literal_db = NULL;
+ }
+
+ if (rs_inst->rs_rt->regex_db != NULL) {
+ rs_free_database(rs_inst->rs_rt->regex_db);
+ rs_inst->rs_rt->regex_db = NULL;
+ }
+
+ if (rs_inst->rs_rt->bool_match_buffs != NULL) {
+ for (i = 0; i < rs_inst->n_worker_thread; i++) {
+ if (rs_inst->rs_rt->bool_match_buffs[i] != NULL) {
+ FREE(rs_inst->rs_rt->bool_match_buffs[i]);
+ }
+ }
+
+ FREE(rs_inst->rs_rt->bool_match_buffs);
+ }
+
+ if (rs_inst->rs_rt->bm != NULL) {
+ bool_matcher_free(rs_inst->rs_rt->bm);
+ rs_inst->rs_rt->bm = NULL;
+ }
+
+ if (rs_inst->rs_rt->streams != NULL) {
+ for (i = 0; i < rs_inst->n_worker_thread; i++) {
+ if (rs_inst->rs_rt->streams[i] != NULL) {
+ adapter_rs_stream_close(rs_inst->rs_rt->streams[i]);
+ rs_inst->rs_rt->streams[i] = NULL;
+ }
+ }
+ FREE(rs_inst->rs_rt->streams);
+ }
+
+ if (rs_inst->rs_rt->matched_pats != NULL) {
+ for (i = 0; i < rs_inst->n_worker_thread; i++) {
+ if (rs_inst->rs_rt->matched_pats[i] != NULL) {
+ utarray_free(rs_inst->rs_rt->matched_pats[i]->pattern_ids);
+ FREE(rs_inst->rs_rt->matched_pats[i]);
+ }
+ }
+ FREE(rs_inst->rs_rt->matched_pats);
+ }
+
+ FREE(rs_inst->rs_rt);
+ }
+
+ if (rs_inst->rs_attr != NULL) {
+ FREE(rs_inst->rs_attr);
+ }
+
+ FREE(rs_inst);
+}
+
+static inline int compare_pattern_id(const void *a, const void *b)
+{
+ long long ret = *(const unsigned long long *)a - *(const unsigned long long *)b;
+ if (ret == 0) {
+ return 0;
+ } else if(ret < 0) {
+ return -1;
+ } else {
+ return 1;
+ }
+}
+
+/**
+ * @param id: pattern id
+*/
+static int matched_event_cb(unsigned int id, int pos_offset, int from, int to,
+ size_t data_len, void *ctx)
+{
+ // put id in set
+ unsigned long long pattern_id = id;
+ struct matched_pattern *matched_pat = (struct matched_pattern *)ctx;
+
+ if (pattern_id > matched_pat->n_patterns || id < 0) {
+ return 0;
+ }
+
+ if (utarray_len(matched_pat->pattern_ids) >= MAX_HIT_PATTERN_NUM) {
+ return 0;
+ }
+
+ // duplicate pattern_id
+ if (utarray_find(matched_pat->pattern_ids, &pattern_id, compare_pattern_id)) {
+ return 0;
+ }
+
+ int ret = 0;
+ struct pattern_attribute pat_attr = matched_pat->ref_rs_attr[id];
+
+ switch (pat_attr.match_mode) {
+ case EXPR_MATCH_MODE_EXACTLY:
+ if (0 == (from + pos_offset) && (int)data_len == (to + pos_offset)) {
+ ret = 1;
+ }
+ break;
+ case EXPR_MATCH_MODE_SUB:
+ if (pat_attr.offset.start == -1 &&
+ pat_attr.offset.end == -1) {
+ ret = 1;
+ break;
+ }
+
+ if (pat_attr.offset.start == -1) {
+ if ((long long)(to + pos_offset - 1) <= pat_attr.offset.end) {
+ ret = 1;
+ break;
+ }
+ }
+
+ if (pat_attr.offset.end == -1) {
+ if ((long long)(from + pos_offset) >= pat_attr.offset.start) {
+ ret = 1;
+ break;
+ }
+ }
+
+ if ((long long)(from + pos_offset) >= pat_attr.offset.start &&
+ (long long)(to + pos_offset - 1) <= pat_attr.offset.end) {
+ ret = 1;
+ }
+ break;
+ case EXPR_MATCH_MODE_PREFIX:
+ if (0 == (from + pos_offset)) {
+ ret = 1;
+ }
+ break;
+ case EXPR_MATCH_MODE_SUFFIX:
+ if ((to + pos_offset) == (int)data_len) {
+ ret = 1;
+ }
+ break;
+ default:
+ break;
+ }
+
+ if (1 == ret) {
+ utarray_push_back(matched_pat->pattern_ids, &pattern_id);
+ utarray_sort(matched_pat->pattern_ids, compare_pattern_id);
+ }
+
+ return 0;
+}
+
+void *adapter_rs_stream_open(void *rs_instance, int thread_id)
+{
+ if (NULL == rs_instance || thread_id < 0) {
+ return NULL;
+ }
+
+ struct adapter_rs *rs_inst = (struct adapter_rs *)rs_instance;
+ struct adapter_rs_stream *rs_stream = ALLOC(struct adapter_rs_stream, 1);
+
+ rs_stream->logger = rs_inst->logger;
+ rs_stream->thread_id = thread_id;
+ rs_stream->ref_rs_rt = rs_inst->rs_rt;
+
+ int err_count = 0;
+ if (rs_inst->rs_rt->literal_db != NULL) {
+ rs_stream->literal_stream = rs_open_stream(rs_inst->rs_rt->literal_db, 0, 128);
+ if (NULL == rs_stream->literal_stream) {
+ log_error(rs_inst->logger, MODULE_ADAPTER_RS, "rs_open_stream failed");
+ err_count++;
+ }
+ }
+
+ if (rs_inst->rs_rt->regex_db != NULL) {
+ rs_stream->regex_stream = rs_open_stream(rs_inst->rs_rt->regex_db, 0, 128);
+ if (NULL == rs_stream->regex_stream) {
+ log_error(rs_inst->logger, MODULE_ADAPTER_RS, "rs_open_stream failed");
+ err_count++;
+ }
+ }
+
+ if (err_count > 0) {
+ goto error;
+ }
+
+ return rs_stream;
+error:
+ if (rs_stream->literal_stream != NULL) {
+ rs_close_stream(rs_stream->literal_stream);
+ rs_stream->literal_stream = NULL;
+ }
+
+ if (rs_stream->regex_stream != NULL) {
+ rs_close_stream(rs_stream->regex_stream);
+ rs_stream->regex_stream = NULL;
+ }
+
+ FREE(rs_stream);
+ return NULL;
+}
+
+void adapter_rs_stream_close(void *rs_stream)
+{
+ if (NULL == rs_stream) {
+ return;
+ }
+
+ struct adapter_rs_stream *stream = (struct adapter_rs_stream *)rs_stream;
+ if (stream->ref_rs_rt != NULL) {
+ if (stream->literal_stream != NULL) {
+ rs_close_stream(stream->literal_stream);
+ stream->literal_stream = NULL;
+ }
+
+ if (stream->regex_stream != NULL) {
+ rs_close_stream(stream->regex_stream);
+ stream->regex_stream = NULL;
+ }
+ }
+
+ /* rs_stream->rs_rt point to rs_instance->rs_rt which will call free
+ same as rs_attr */
+ stream->ref_rs_rt = NULL;
+ FREE(stream);
+}
+
+int adapter_rs_scan_stream(void *rs_stream, const char *data, size_t data_len,
+ struct expr_scan_result *results, size_t n_result,
+ size_t *n_hit_result)
+{
+ if (NULL == rs_stream || NULL == data || 0 == data_len ||
+ NULL == results || 0 == n_result || NULL == n_hit_result) {
+ return -1;
+ }
+
+ int ret = 0, err_count = 0;
+ struct adapter_rs_stream *stream = (struct adapter_rs_stream *)rs_stream;
+ int thread_id = stream->thread_id;
+ struct adapter_rs_runtime *rs_rt = stream->ref_rs_rt;
+ struct matched_pattern *matched_pat = rs_rt->matched_pats[thread_id];
+
+ if (stream->literal_stream != NULL) {
+ ret = rs_scan_stream(stream->literal_stream, data, data_len,
+ matched_event_cb, matched_pat);
+ if (ret < 0) {
+ err_count++;
+ }
+ }
+
+ if (stream->regex_stream != NULL) {
+ ret = rs_scan_stream(stream->regex_stream, data, data_len,
+ matched_event_cb, matched_pat);
+ if (ret < 0) {
+ err_count++;
+ }
+ }
+
+ if (err_count == 2) {
+ return -1;
+ }
+
+ size_t n_pattern_id = utarray_len(matched_pat->pattern_ids);
+ if (0 == n_pattern_id) {
+ *n_hit_result = 0;
+ return 0;
+ }
+
+ unsigned long long pattern_ids[n_pattern_id];
+
+ for (size_t i = 0; i < n_pattern_id; i++) {
+ pattern_ids[i] = *(unsigned long long *)utarray_eltptr(matched_pat->pattern_ids, i);
+ }
+
+ struct bool_expr_match *bool_matcher_results = rs_rt->bool_match_buffs[thread_id];
+ int bool_matcher_ret = bool_matcher_match(rs_rt->bm, pattern_ids, n_pattern_id,
+ bool_matcher_results, MAX_HIT_EXPR_NUM);
+ if (bool_matcher_ret < 0) {
+ ret = -1;
+ goto next;
+ }
+
+ if (bool_matcher_ret > (int)n_result) {
+ bool_matcher_ret = n_result;
+ }
+
+ for (int index = 0; index < bool_matcher_ret; index++) {
+ results[index].rule_id = bool_matcher_results[index].expr_id;
+ results[index].user_tag = bool_matcher_results[index].user_tag;
+ }
+ *n_hit_result = bool_matcher_ret;
+
+next:
+ utarray_clear(matched_pat->pattern_ids);
+
+ return ret;
+}
+
+int adapter_rs_scan(void *rs_instance, int thread_id, const char *data, size_t data_len,
+ struct expr_scan_result *results, size_t n_result, size_t *n_hit_result)
+{
+ if (NULL == rs_instance || NULL == data || (0 == data_len) ||
+ NULL == results || 0 == n_result || NULL == n_hit_result) {
+ return -1;
+ }
+
+ int ret = 0, err_count = 0;
+ struct adapter_rs *rs_inst = (struct adapter_rs *)rs_instance;
+ struct adapter_rs_runtime *rs_rt = rs_inst->rs_rt;
+ struct matched_pattern *matched_pat = rs_rt->matched_pats[thread_id];
+
+ if (rs_rt->literal_db != NULL) {
+ ret = rs_scan(rs_rt->literal_db, thread_id, data, data_len,
+ 0, matched_event_cb, matched_pat);
+ if (ret < 0) {
+ err_count++;
+ }
+ }
+
+ if (rs_rt->regex_db != NULL) {
+ ret = rs_scan(rs_rt->regex_db, thread_id, data, data_len,
+ 0, matched_event_cb, matched_pat);
+ if (ret < 0) {
+ err_count++;
+ }
+ }
+
+ if (err_count == 2) {
+ return -1;
+ }
+
+ size_t n_pattern_id = utarray_len(matched_pat->pattern_ids);
+ if (0 == n_pattern_id) {
+ *n_hit_result = 0;
+ return 0;
+ }
+
+ unsigned long long pattern_ids[n_pattern_id];
+ for (size_t i = 0; i < n_pattern_id; i++) {
+ pattern_ids[i] = *(unsigned long long *)utarray_eltptr(matched_pat->pattern_ids, i);
+ }
+
+ struct bool_expr_match *bool_matcher_results = rs_rt->bool_match_buffs[thread_id];
+ int bool_matcher_ret = bool_matcher_match(rs_rt->bm, pattern_ids, n_pattern_id,
+ bool_matcher_results, MAX_HIT_EXPR_NUM);
+ if (bool_matcher_ret < 0) {
+ ret = -1;
+ goto next;
+ }
+
+ if (bool_matcher_ret > (int)n_result) {
+ bool_matcher_ret = n_result;
+ }
+
+ for (int index = 0; index < bool_matcher_ret; index++) {
+ results[index].rule_id = bool_matcher_results[index].expr_id;
+ results[index].user_tag = bool_matcher_results[index].user_tag;
+ }
+ *n_hit_result = bool_matcher_ret;
+
+next:
+ utarray_clear(matched_pat->pattern_ids);
+
+ return ret;
+} \ No newline at end of file
diff --git a/scanner/expr_matcher/adapter_rs/adapter_rs.h b/scanner/expr_matcher/adapter_rs/adapter_rs.h
new file mode 100644
index 0000000..c43e553
--- /dev/null
+++ b/scanner/expr_matcher/adapter_rs/adapter_rs.h
@@ -0,0 +1,78 @@
+/*
+**********************************************************************************************
+* File: adapter_rs.h
+* Description:
+* Authors: Liu wentan <[email protected]>
+* Date: 2023-06-30
+* Copyright: (c) Since 2022 Geedge Networks, Ltd. All rights reserved.
+***********************************************************************************************
+*/
+
+#ifndef _ADAPTER_RS_H_
+#define _ADAPTER_RS_H_
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+#include <stddef.h>
+
+#include "log/log.h"
+
+#include "../expr_matcher.h"
+
+int adapter_rs_verify_regex_expression(const char *regex_expr,
+ struct log_handle *logger);
+
+/**
+ * @brief new adapter_rs instance
+ *
+ * @param rules: logic AND expression's array
+ * @param n_rule: the number of logic AND expression's array
+ * @param n_worker_threads: the number of scan threads which will call adapter_rs_scan()
+ *
+ * @retval the pointer to adapter_rs instance
+*/
+void *adapter_rs_new(struct expr_rule *rules, size_t n_rule,
+ size_t n_literal_pattern, size_t n_regex_pattern,
+ size_t n_worker_thread, struct log_handle *logger);
+
+void adapter_rs_free(void *rs_instance);
+
+/**
+ * @brief scan input data to match logic AND expression, return all matched expr_id
+ *
+ * @param rs_instance: adapter_rs instance obtained by adapter_rs_new()
+ * @param thread_id: the thread_id of caller
+ * @param scan_data: data to be scanned
+ * @param data_len: the length of data to be scanned
+ * @param result_array: the array to store hit expr_id which allocated by caller
+ * @param n_result_array: number of elements in array of expr_id
+*/
+int adapter_rs_scan(void *rs_instance, int thread_id,
+ const char *scan_data, size_t data_len,
+ struct expr_scan_result *result_array,
+ size_t n_result_array, size_t *n_hit_results);
+
+/**
+ * @brief
+ */
+void *adapter_rs_stream_open(void *rs_instance, int thread_id);
+
+/**
+ * @brief
+ */
+int adapter_rs_scan_stream(void *rs_stream, const char *scan_data,
+ size_t data_len, struct expr_scan_result *result_array,
+ size_t n_result_array, size_t *n_hit_results);
+/**
+ * @brief
+ */
+void adapter_rs_stream_close(void *rs_stream);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif \ No newline at end of file
diff --git a/scanner/expr_matcher/expr_matcher.cpp b/scanner/expr_matcher/expr_matcher.cpp
new file mode 100644
index 0000000..3a37383
--- /dev/null
+++ b/scanner/expr_matcher/expr_matcher.cpp
@@ -0,0 +1,235 @@
+/*
+**********************************************************************************************
+* File: expr_matcher.cpp
+* Description:
+* Authors: Liu wentan <[email protected]>
+* Date: 2023-06-30
+* Copyright: (c) Since 2023 Geedge Networks, Ltd. All rights reserved.
+***********************************************************************************************
+*/
+
+#include <unistd.h>
+#include <assert.h>
+#include <sys/syscall.h>
+
+#include "log/log.h"
+#include "expr_matcher.h"
+#include "maat_utils.h"
+#include "adapter_hs/adapter_hs.h"
+#include "adapter_rs/adapter_rs.h"
+
+pid_t expr_matcher_gettid()
+{
+ return syscall(SYS_gettid);
+}
+
+static const char *expr_matcher_module_name_str(const char *name)
+{
+ static __thread char module[64];
+ snprintf(module, sizeof(module), "%s(%d)", name, expr_matcher_gettid());
+
+ return module;
+}
+
+#define MODULE_EXPR_MATCHER expr_matcher_module_name_str("maat.expr_matcher")
+
+struct expr_matcher {
+ enum expr_engine_type engine_type;
+ void *engine;
+ struct log_handle *logger;
+};
+
+struct expr_matcher_stream {
+ enum expr_engine_type engine_type;
+ void *handle;
+};
+
+struct expr_engine_operations {
+ enum expr_engine_type type;
+ void *(*engine_new)(struct expr_rule *rules, size_t n_rule,
+ size_t n_literal_pattern, size_t n_regex_pattern,
+ size_t n_worker_thread, struct log_handle *logger);
+ void (*engine_free)(void *engine);
+ int (*engine_scan)(void *engine, int thread_id, const char *scan_data,
+ size_t data_len, struct expr_scan_result *result_array,
+ size_t n_result_array, size_t *n_hit_result);
+ void *(*engine_stream_open)(void *engine, int thread_id);
+ void (*engine_stream_close)(void *stream);
+ int (*engine_scan_stream)(void *stream, const char *scan_data, size_t data_len,
+ struct expr_scan_result *result_array, size_t n_result_array,
+ size_t *n_hit_result);
+};
+
+struct expr_engine_operations expr_engine_ops[EXPR_ENGINE_TYPE_MAX] = {
+ {
+ .type = EXPR_ENGINE_TYPE_HS,
+ .engine_new = adapter_hs_new,
+ .engine_free = adapter_hs_free,
+ .engine_scan = adapter_hs_scan,
+ .engine_stream_open = adapter_hs_stream_open,
+ .engine_stream_close = adapter_hs_stream_close,
+ .engine_scan_stream = adapter_hs_scan_stream
+ },
+ {
+ .type = EXPR_ENGINE_TYPE_RS,
+ .engine_new = adapter_rs_new,
+ .engine_free = adapter_rs_free,
+ .engine_scan = adapter_rs_scan,
+ .engine_stream_open = adapter_rs_stream_open,
+ .engine_stream_close = adapter_rs_stream_close,
+ .engine_scan_stream = adapter_rs_scan_stream
+ }
+};
+
+int expr_matcher_verify_regex_expression(const char *regex_expr,
+ struct log_handle *logger)
+{
+ int ret = adapter_hs_verify_regex_expression(regex_expr, logger);
+ if (ret == 0) {
+ return 0;
+ }
+
+ return adapter_rs_verify_regex_expression(regex_expr, logger);
+}
+
+struct expr_matcher *
+expr_matcher_new(struct expr_rule *rules, size_t n_rule, enum expr_engine_type engine_type,
+ size_t n_worker_thread, struct log_handle *logger)
+{
+ if (NULL == rules || 0 == n_rule || 0 == n_worker_thread ||
+ (engine_type != EXPR_ENGINE_TYPE_HS && engine_type != EXPR_ENGINE_TYPE_RS)) {
+ log_error(logger, MODULE_EXPR_MATCHER, "[%s:%d]engine type:%d is illegal",
+ __FUNCTION__, __LINE__, engine_type);
+ return NULL;
+ }
+
+ size_t i = 0, j = 0;
+ size_t literal_pat_num = 0;
+ size_t regex_pat_num = 0;
+
+ for (i = 0; i < n_rule; i++) {
+ if (rules[i].n_patterns > MAX_EXPR_PATTERN_NUM) {
+ log_error(logger, MODULE_EXPR_MATCHER,
+ "[%s:%d] the number of patterns in one expression should less than"
+ " %d", __FUNCTION__, __LINE__, MAX_EXPR_PATTERN_NUM);
+ return NULL;
+ }
+
+ for (j = 0; j < rules[i].n_patterns; j++) {
+ /* pat_len should not 0 */
+ if (0 == rules[i].patterns[j].pat_len) {
+ log_error(logger, MODULE_EXPR_MATCHER,
+ "[%s:%d] expr pattern length should not 0",
+ __FUNCTION__, __LINE__);
+ return NULL;
+ }
+
+ if (rules[i].patterns[j].type == EXPR_PATTERN_TYPE_STR) {
+ literal_pat_num++;
+ } else {
+ regex_pat_num++;
+ }
+ }
+ }
+
+ if (0 == literal_pat_num && 0 == regex_pat_num) {
+ log_error(logger, MODULE_EXPR_MATCHER,
+ "[%s:%d] exprs has no valid pattern", __FUNCTION__, __LINE__);
+ return NULL;
+ }
+
+ void *engine = expr_engine_ops[engine_type].engine_new(rules, n_rule, literal_pat_num,
+ regex_pat_num, n_worker_thread,
+ logger);
+ if (NULL == engine) {
+ log_error(logger, MODULE_EXPR_MATCHER,
+ "[%s:%d]expr_matcher engine_new failed.", __FUNCTION__, __LINE__);
+ return NULL;
+ }
+
+ struct expr_matcher *matcher = ALLOC(struct expr_matcher, 1);
+ matcher->engine_type = engine_type;
+ matcher->engine = engine;
+ matcher->logger = logger;
+
+ return matcher;
+}
+
+void expr_matcher_free(struct expr_matcher *matcher)
+{
+ if (NULL == matcher) {
+ return;
+ }
+
+ if (matcher->engine != NULL) {
+ expr_engine_ops[matcher->engine_type].engine_free(matcher->engine);
+ matcher->engine = NULL;
+ }
+
+ FREE(matcher);
+}
+
+int expr_matcher_match(struct expr_matcher *matcher, int thread_id, const char *scan_data,
+ size_t data_len, struct expr_scan_result *result_array,
+ size_t n_result_array, size_t *n_hit_results)
+{
+ if (NULL == matcher || thread_id < 0 || NULL == scan_data || 0 == data_len
+ || NULL == result_array || 0 == n_result_array || NULL == n_hit_results) {
+ return -1;
+ }
+
+ return expr_engine_ops[matcher->engine_type].engine_scan(matcher->engine, thread_id,
+ scan_data, data_len, result_array,
+ n_result_array, n_hit_results);
+}
+
+struct expr_matcher_stream *
+expr_matcher_stream_open(struct expr_matcher *matcher, int thread_id)
+{
+ if (NULL == matcher || thread_id < 0) {
+ return NULL;
+ }
+
+ void *s_handle = expr_engine_ops[matcher->engine_type].engine_stream_open(matcher->engine,
+ thread_id);
+ if (NULL == s_handle) {
+ log_error(matcher->logger, MODULE_EXPR_MATCHER,
+ "[%s:%d] expr_matcher engine_stream_open failed.",
+ __FUNCTION__, __LINE__);
+ return NULL;
+ }
+
+ struct expr_matcher_stream *stream = ALLOC(struct expr_matcher_stream, 1);
+ stream->engine_type = matcher->engine_type;
+ stream->handle = s_handle;
+
+ return stream;
+}
+
+int expr_matcher_stream_match(struct expr_matcher_stream *stream, const char *scan_data,
+ size_t data_len, struct expr_scan_result *result_array,
+ size_t n_result_array, size_t *n_hit_results)
+{
+ if (NULL == stream || NULL == scan_data || 0 == data_len || NULL == result_array
+ || 0 == n_result_array || NULL == n_hit_results) {
+ return -1;
+ }
+
+ return expr_engine_ops[stream->engine_type].engine_scan_stream(stream->handle, scan_data,
+ data_len, result_array,
+ n_result_array, n_hit_results);
+}
+
+void expr_matcher_stream_close(struct expr_matcher_stream *stream)
+{
+ if (NULL == stream) {
+ return;
+ }
+
+ if (stream->handle != NULL) {
+ expr_engine_ops[stream->engine_type].engine_stream_close(stream->handle);
+ stream->handle = NULL;
+ }
+
+ FREE(stream);
+} \ No newline at end of file
diff --git a/scanner/expr_matcher/expr_matcher.h b/scanner/expr_matcher/expr_matcher.h
new file mode 100644
index 0000000..fb61854
--- /dev/null
+++ b/scanner/expr_matcher/expr_matcher.h
@@ -0,0 +1,134 @@
+/*
+**********************************************************************************************
+* File: expr_matcher.h
+* Description:
+* Authors: Liu wentan <[email protected]>
+* Date: 2023-06-30
+* Copyright: (c) Since 2023 Geedge Networks, Ltd. All rights reserved.
+***********************************************************************************************
+*/
+
+#ifndef _EXPR_MATCHER_H_
+#define _EXPR_MATCHER_H_
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+#include <stddef.h>
+
+#include "log/log.h"
+
+#define MAX_EXPR_PATTERN_NUM 8 /* 每条与表达式最多由MAX_EXPR_ITEM_NUM个规则组成 */
+#define MAX_HIT_EXPR_NUM 1024
+
+enum expr_engine_type {
+ EXPR_ENGINE_TYPE_HS = 0, /* default engine */
+ EXPR_ENGINE_TYPE_RS,
+ EXPR_ENGINE_TYPE_MAX
+};
+
+enum expr_pattern_type {
+ EXPR_PATTERN_TYPE_STR = 0, /* pure literal string */
+ EXPR_PATTERN_TYPE_REG = 1, /* regex expression */
+};
+
+enum expr_case_sensitive {
+ EXPR_CASE_INSENSITIVE = 0,
+ EXPR_CASE_SENSITIVE
+};
+
+enum expr_match_mode {
+ EXPR_MATCH_MODE_INVALID = -1,
+ EXPR_MATCH_MODE_EXACTLY = 1, /* scan data must match pattern exactly */
+ EXPR_MATCH_MODE_PREFIX, /* pattern must in the head of scan_data */
+ EXPR_MATCH_MODE_SUFFIX, /* pattern must in the tail of scan_data */
+ EXPR_MATCH_MODE_SUB /* pattern must in the range[l_offset, r_offset] of scan_data */
+};
+
+struct expr_pattern {
+ enum expr_pattern_type type;
+ enum expr_match_mode match_mode;
+ enum expr_case_sensitive case_sensitive;
+
+ /*
+ * just match in scan_data's range of [start_offset, end_offset], -1 means no limits
+ * for example:
+ * [-1, end_offset] means the pattern must in scan_data's [0 ~ start_offset]
+ * [start_offset, -1] means the pattern must in scan_data's [start_offset ~ data_end]
+ */
+ int start_offset;
+ int end_offset;
+
+ char *pat;
+ size_t pat_len;
+};
+
+struct expr_scan_result {
+ long long rule_id;
+ void *user_tag;
+};
+
+/* logic AND expression, such as (rule1 & rule2) */
+struct expr_rule {
+ long long expr_id; /* AND expression ID */
+ size_t n_patterns;
+ struct expr_pattern patterns[MAX_EXPR_PATTERN_NUM];
+ void *tag; /* user defined data, return with hit result */
+};
+
+int expr_matcher_verify_regex_expression(const char *regex_expr,
+ struct log_handle *logger);
+
+/**
+ * @brief new expr matcher instance
+ *
+ * @param expr_array: logic AND expression's array
+ * @param n_expr_array: the number of logic AND expression's array
+ * @param n_worker_threads: the number of scan threads which will call adapter_rs_scan()
+ *
+*/
+struct expr_matcher *
+expr_matcher_new(struct expr_rule *rules, size_t n_rule, enum expr_engine_type type,
+ size_t n_worker_thread, struct log_handle *logger);
+
+void expr_matcher_free(struct expr_matcher *matcher);
+
+/**
+ * @brief scan input data to match logic AND expression, return all matched expr_id
+ *
+ * @param matcher: expr_matcher instance obtained by expr_matcher_new()
+ * @param thread_id: the thread_id of caller
+ * @param scan_data: data to be scanned
+ * @param data_len: the length of data to be scanned
+ * @param result_array: the array to store hit expr_id which allocated by caller
+ * @param n_result_array: number of elements in array of expr_id
+*/
+int expr_matcher_match(struct expr_matcher *matcher, int thread_id, const char *scan_data,
+ size_t data_len, struct expr_scan_result *result_array,
+ size_t n_result_array, size_t *n_hit_results);
+
+/**
+ * @brief
+ */
+struct expr_matcher_stream *
+expr_matcher_stream_open(struct expr_matcher *matcher, int thread_id);
+
+/**
+ * @brief
+ */
+int expr_matcher_stream_match(struct expr_matcher_stream *stream, const char *scan_data,
+ size_t data_len, struct expr_scan_result *result_array,
+ size_t n_result_array, size_t *n_hit_results);
+
+/**
+ * @brief
+ */
+void expr_matcher_stream_close(struct expr_matcher_stream *stream);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif \ No newline at end of file