diff options
| author | liuwentan <[email protected]> | 2023-08-10 16:10:50 +0800 |
|---|---|---|
| committer | liuwentan <[email protected]> | 2023-08-10 16:10:50 +0800 |
| commit | 42f44802715b8d71ac02fd2363f5bafc7dba8581 (patch) | |
| tree | 45abb9380554f8227974b61f4e758242ce71ef14 /scanner | |
| parent | fb0cb5405d8236b23b5866952eda99e54d25aa5b (diff) | |
[FEATURE]expr_matcher support dual engine(hyperscan & rulescan) & benchmark
Diffstat (limited to 'scanner')
| -rw-r--r-- | scanner/CMakeLists.txt | 5 | ||||
| -rw-r--r-- | scanner/adapter_hs/adapter_hs.h | 134 | ||||
| -rw-r--r-- | scanner/expr_matcher/adapter_hs/adapter_hs.cpp (renamed from scanner/adapter_hs/adapter_hs.cpp) | 240 | ||||
| -rw-r--r-- | scanner/expr_matcher/adapter_hs/adapter_hs.h | 75 | ||||
| -rw-r--r-- | scanner/expr_matcher/adapter_rs/adapter_rs.cpp | 708 | ||||
| -rw-r--r-- | scanner/expr_matcher/adapter_rs/adapter_rs.h | 78 | ||||
| -rw-r--r-- | scanner/expr_matcher/expr_matcher.cpp | 235 | ||||
| -rw-r--r-- | scanner/expr_matcher/expr_matcher.h | 134 |
8 files changed, 1337 insertions, 272 deletions
diff --git a/scanner/CMakeLists.txt b/scanner/CMakeLists.txt index 429755e..7d45154 100644 --- a/scanner/CMakeLists.txt +++ b/scanner/CMakeLists.txt @@ -7,8 +7,9 @@ include_directories(${PROJECT_SOURCE_DIR}/src/inc_internal) add_subdirectory(ip_matcher/IntervalIndex) -add_library(adapter-static adapter_hs/adapter_hs.cpp bool_matcher/bool_matcher.cpp +add_library(adapter-static bool_matcher/bool_matcher.cpp expr_matcher/expr_matcher.cpp + expr_matcher/adapter_hs/adapter_hs.cpp expr_matcher/adapter_rs/adapter_rs.cpp fqdn_engine/fqdn_engine.cpp ip_matcher/ip_matcher.cpp ip_matcher/ipv4_match.cpp ip_matcher/ipv6_match.cpp flag_matcher/flag_matcher.cpp interval_matcher/cgranges.c interval_matcher/interval_matcher.cpp) -target_link_libraries(adapter-static hyperscan_static hyperscan_runtime_static interval_index_static)
\ No newline at end of file +target_link_libraries(adapter-static hyperscan_static hyperscan_runtime_static rulescan_static interval_index_static)
\ No newline at end of file diff --git a/scanner/adapter_hs/adapter_hs.h b/scanner/adapter_hs/adapter_hs.h deleted file mode 100644 index 672650a..0000000 --- a/scanner/adapter_hs/adapter_hs.h +++ /dev/null @@ -1,134 +0,0 @@ -/* -********************************************************************************************** -* File: adapter_hs.h -* Description: wrapper for raw hyperscan -* Authors: Liu WenTan <[email protected]> -* Date: 2022-10-31 -* Copyright: (c) 2018-2022 Geedge Networks, Inc. All rights reserved. -*********************************************************************************************** -*/ - -#ifndef _ADAPTER_HS_H_ -#define _ADAPTER_HS_H_ - -#ifdef __cplusplus -extern "C" -{ -#endif - -#include <stddef.h> -#include <stdint.h> - -#include "log/log.h" - -#define MAX_EXPR_PATTERN_NUM 8 - -struct adapter_hs; - -/* match method */ -enum hs_match_mode { - HS_MATCH_MODE_INVALID = -1, - HS_MATCH_MODE_EXACTLY = 1, /* scan data must match pattern exactly */ - HS_MATCH_MODE_PREFIX, /* pattern must in the head of scan_data */ - HS_MATCH_MODE_SUFFIX, /* pattern must in the end of scan_data */ - HS_MATCH_MODE_SUB /* pattern must in the range[l_offset, r_offset] of scan_data */ -}; - -enum hs_pattern_type { - HS_PATTERN_TYPE_STR = 0, /* pure literal string */ - HS_PATTERN_TYPE_REG /* regex expression */ -}; - -enum hs_case_sensitive { - HS_CASE_SENSITIVE = 0, - HS_CASE_INSENSITIVE -}; - -struct hs_scan_result { - long long rule_id; - void *user_tag; -}; - -struct hs_pattern { - enum hs_case_sensitive case_sensitive; - enum hs_match_mode match_mode; - enum hs_pattern_type pattern_type; - - int is_hexbin; /* 1(yes) 0(no) */ - - /* - * just match in scan_data's range of [start_offset, end_offset], -1 means no limits - * for example: - * [-1, end_offset] means the pattern must in scan_data's [0 ~ start_offset] - * [start_offset, -1] means the pattern must in scan_data's [start_offset ~ data_end] - */ - int start_offset; - int end_offset; - - /* start pointer of pattern */ - char *pat; - /* pattern length */ - size_t pat_len; -}; - -/* logic AND expression, such as (pattern1 & pattern2) */ -struct expr_rule { - long long expr_id; - size_t n_patterns; - struct hs_pattern patterns[MAX_EXPR_PATTERN_NUM]; - void *user_tag; -}; - -int adapter_hs_verify_regex_expression(const char *regex_expr, - struct log_handle *logger); -/** - * @brief new adapter_hs instance - * - * @param rules: logic AND expression's array - * @param n_rule: the number of logic AND expression's array - * @param nr_worker_threads: the number of scan threads which will call adapter_hs_scan() - * - * @retval the pointer to adapter_hs instance -*/ -struct adapter_hs *adapter_hs_new(struct expr_rule *rules, size_t n_rule, - size_t n_worker_thread, struct log_handle *logger); - -/** - * @brief scan input data to match logic AND expression, return all matched expr_id - * - * @param instance: adapter_hs instance obtained by adapter_hs_new() - * @param thread_id: the thread_id of caller - * @param data: data to be scanned - * @param data_len: the length of data to be scanned - * @param results: the array of expr_id - * @param n_results: number of elements in array of expr_id -*/ -int adapter_hs_scan(struct adapter_hs *hs_instance, int thread_id, - const char *data, size_t data_len, - struct hs_scan_result *results, - size_t n_result, size_t *n_hit_result); - -/** - * @brief destroy adapter_hs instance - * - * @param instance: adapter_hs instance obtained by adapter_hs_new() -*/ -void adapter_hs_free(struct adapter_hs *instance); - -struct adapter_hs_stream; -/** - * @brief open adapter_hs stream after adapter_hs instance initialized for stream scan - * -*/ -struct adapter_hs_stream *adapter_hs_stream_open(struct adapter_hs *hs_instance, int thread_id); - -int adapter_hs_scan_stream(struct adapter_hs_stream *stream, const char *data, size_t data_len, - struct hs_scan_result *results, size_t n_result, size_t *n_hit_result); - -void adapter_hs_stream_close(struct adapter_hs_stream *stream); - -#ifdef __cplusplus -} -#endif - -#endif
\ No newline at end of file diff --git a/scanner/adapter_hs/adapter_hs.cpp b/scanner/expr_matcher/adapter_hs/adapter_hs.cpp index d201e11..4d57dcf 100644 --- a/scanner/adapter_hs/adapter_hs.cpp +++ b/scanner/expr_matcher/adapter_hs/adapter_hs.cpp @@ -1,10 +1,10 @@ /* ********************************************************************************************** -* File: adapter_hs.cpp -* Description: -* Authors: Liu WenTan <[email protected]> +* File: adapter_hs.c +* Description: +* Authors: Liu wentan <[email protected]> * Date: 2022-10-31 -* Copyright: (c) 2018-2022 Geedge Networks, Inc. All rights reserved. +* Copyright: (c) Since 2022 Geedge Networks, Ltd. All rights reserved. *********************************************************************************************** */ @@ -20,9 +20,8 @@ #include "uthash/utarray.h" #include "uthash/uthash.h" #include "maat_utils.h" -#include "../bool_matcher/bool_matcher.h" +#include "../../bool_matcher/bool_matcher.h" -#define MAX_OFFSET_NUM 1024 #define MAX_HIT_PATTERN_NUM 512 pid_t hs_gettid() @@ -41,6 +40,7 @@ static const char *hs_module_name_str(const char *name) #define MODULE_ADAPTER_HS hs_module_name_str("maat.adapter_hs") struct adpt_hs_compile_data { + enum expr_pattern_type pat_type; unsigned int *ids; unsigned int *flags; char **patterns; @@ -56,7 +56,6 @@ struct adapter_hs_scratch { struct adapter_hs_stream { int thread_id; - size_t n_expr; hs_stream_t *literal_stream; hs_stream_t *regex_stream; struct adapter_hs_runtime *ref_hs_rt; @@ -91,7 +90,7 @@ struct pattern_offset { struct pattern_attribute { long long pattern_id; - enum hs_match_mode match_mode; + enum expr_match_mode match_mode; struct pattern_offset offset; }; @@ -137,12 +136,12 @@ static int _hs_alloc_scratch(hs_database_t *db, hs_scratch_t **scratches, static int adpt_hs_alloc_scratch(struct adapter_hs_runtime *hs_rt, size_t n_worker_thread, - enum hs_pattern_type pattern_type, + enum expr_pattern_type pattern_type, struct log_handle *logger) { int ret = 0; - if (pattern_type == HS_PATTERN_TYPE_STR) { + if (pattern_type == EXPR_PATTERN_TYPE_STR) { hs_rt->scratch->literal_scratches = ALLOC(hs_scratch_t *, n_worker_thread); ret = _hs_alloc_scratch(hs_rt->literal_db, hs_rt->scratch->literal_scratches, n_worker_thread, logger); @@ -200,7 +199,7 @@ static int adpt_hs_build_database(struct adapter_hs_runtime *hs_rt, if (regex_cd != NULL) { err = hs_compile_multi((const char *const *)regex_cd->patterns, regex_cd->flags, regex_cd->ids, regex_cd->n_patterns, - HS_MODE_STREAM | HS_MODE_SOM_HORIZON_SMALL, + HS_MODE_STREAM, NULL, &hs_rt->regex_db, &compile_err); if (err != HS_SUCCESS) { if (compile_err) { @@ -215,9 +214,11 @@ static int adpt_hs_build_database(struct adapter_hs_runtime *hs_rt, return 0; } -static struct adpt_hs_compile_data *adpt_hs_compile_data_new(size_t n_patterns) +static struct adpt_hs_compile_data * +adpt_hs_compile_data_new(enum expr_pattern_type pat_type, size_t n_patterns) { struct adpt_hs_compile_data *hs_cd = ALLOC(struct adpt_hs_compile_data, 1); + hs_cd->pat_type = pat_type; hs_cd->patterns = ALLOC(char *, n_patterns); hs_cd->pattern_lens = ALLOC(size_t, n_patterns); hs_cd->n_patterns = n_patterns; @@ -263,8 +264,11 @@ static void populate_compile_data(struct adpt_hs_compile_data *compile_data, compile_data->ids[index] = pattern_id; /* set flags */ - compile_data->flags[index] |= HS_FLAG_SOM_LEFTMOST; - if (case_sensitive == HS_CASE_INSENSITIVE) { + if (compile_data->pat_type == EXPR_PATTERN_TYPE_STR) { + compile_data->flags[index] |= HS_FLAG_SOM_LEFTMOST; + } + + if (case_sensitive == EXPR_CASE_INSENSITIVE) { compile_data->flags[index] |= HS_FLAG_CASELESS; } @@ -295,14 +299,14 @@ static struct bool_expr *bool_exprs_new(struct expr_rule *rules, size_t n_rule, pattern_attr[pattern_index].pattern_id = pattern_index; pattern_attr[pattern_index].match_mode = rules[i].patterns[j].match_mode; - if (pattern_attr[pattern_index].match_mode == HS_MATCH_MODE_SUB || - pattern_attr[pattern_index].match_mode == HS_MATCH_MODE_EXACTLY) { + if (pattern_attr[pattern_index].match_mode == EXPR_MATCH_MODE_SUB || + pattern_attr[pattern_index].match_mode == EXPR_MATCH_MODE_EXACTLY) { pattern_attr[pattern_index].offset.start = rules[i].patterns[j].start_offset; pattern_attr[pattern_index].offset.end = rules[i].patterns[j].end_offset; } /* literal pattern */ - if (rules[i].patterns[j].pattern_type == HS_PATTERN_TYPE_STR) { + if (rules[i].patterns[j].type == EXPR_PATTERN_TYPE_STR) { populate_compile_data(literal_cd, literal_index, pattern_index, rules[i].patterns[j].pat, rules[i].patterns[j].pat_len, rules[i].patterns[j].case_sensitive); @@ -321,7 +325,7 @@ static struct bool_expr *bool_exprs_new(struct expr_rule *rules, size_t n_rule, bool_exprs[i].expr_id = rules[i].expr_id; bool_exprs[i].item_num = rules[i].n_patterns; - bool_exprs[i].user_tag = rules[i].user_tag; + bool_exprs[i].user_tag = rules[i].tag; } *n_pattern = pattern_index; @@ -345,81 +349,43 @@ static int verify_regex_expression(const char *regex_str, struct log_handle *log FREE(info); hs_free_compile_error(error); - return -1; + return 0; } if (info != NULL) { FREE(info); } - return 0; + return 1; } int adapter_hs_verify_regex_expression(const char *regex_expr, struct log_handle *logger) { if (NULL == regex_expr) { - return -1; + return 0; } return verify_regex_expression(regex_expr, logger); } -struct adapter_hs *adapter_hs_new(struct expr_rule *rules, size_t n_rule, - size_t n_worker_thread, struct log_handle *logger) +void *adapter_hs_new(struct expr_rule *rules, size_t n_rule, + size_t n_literal_pattern, size_t n_regex_pattern, + size_t n_worker_thread, struct log_handle *logger) { - if (0 == n_worker_thread || NULL == rules || 0 == n_rule) { - log_error(logger, MODULE_ADAPTER_HS, - "[%s:%d] input parameters illegal!", __FUNCTION__, __LINE__); - return NULL; - } - /* get the sum of pattern */ - size_t i = 0, j = 0; - size_t literal_pattern_num = 0; - size_t regex_pattern_num = 0; - for (i = 0; i < n_rule; i++) { - if (rules[i].n_patterns > MAX_EXPR_PATTERN_NUM) { - log_error(logger, MODULE_ADAPTER_HS, - "[%s:%d] the number of patterns in one expression " - "should less than %d", __FUNCTION__, __LINE__, - MAX_EXPR_PATTERN_NUM); - return NULL; - } - - for (j = 0; j < rules[i].n_patterns; j++) { - /* pat_len should not 0 */ - if (0 == rules[i].patterns[j].pat_len) { - log_error(logger, MODULE_ADAPTER_HS, - "[%s:%d] expr pattern length should not 0", - __FUNCTION__, __LINE__); - return NULL; - } - - if (rules[i].patterns[j].pattern_type == HS_PATTERN_TYPE_STR) { - literal_pattern_num++; - } else { - regex_pattern_num++; - } - } - } - - if (0 == literal_pattern_num && 0 == regex_pattern_num) { - log_error(logger, MODULE_ADAPTER_HS, - "[%s:%d] exprs has no valid pattern", __FUNCTION__, __LINE__); - return NULL; - } - + size_t i = 0; struct adpt_hs_compile_data *literal_cd = NULL; struct adpt_hs_compile_data *regex_cd = NULL; - if (literal_pattern_num > 0) { - literal_cd = adpt_hs_compile_data_new(literal_pattern_num); + + if (n_literal_pattern > 0) { + literal_cd = adpt_hs_compile_data_new(EXPR_PATTERN_TYPE_STR, n_literal_pattern); } - if (regex_pattern_num > 0) { - regex_cd = adpt_hs_compile_data_new(regex_pattern_num); + if (n_regex_pattern > 0) { + regex_cd = adpt_hs_compile_data_new(EXPR_PATTERN_TYPE_REG, n_regex_pattern); } - size_t pattern_cnt = literal_pattern_num + regex_pattern_num; + size_t pattern_cnt = n_literal_pattern + n_regex_pattern; struct adapter_hs *hs_inst = ALLOC(struct adapter_hs, 1); hs_inst->hs_attr = ALLOC(struct pattern_attribute, pattern_cnt); hs_inst->logger = logger; @@ -478,21 +444,21 @@ struct adapter_hs *adapter_hs_new(struct expr_rule *rules, size_t n_rule, n_worker_thread); for (i = 0; i < n_worker_thread; i++) { hs_inst->hs_rt->scratch->bool_match_buffs[i] = ALLOC(struct bool_expr_match, - hs_inst->n_expr); + MAX_HIT_EXPR_NUM); } /* literal and regex scratch can't reuse */ - if (literal_pattern_num > 0) { + if (n_literal_pattern > 0) { ret = adpt_hs_alloc_scratch(hs_inst->hs_rt, n_worker_thread, - HS_PATTERN_TYPE_STR, logger); + EXPR_PATTERN_TYPE_STR, logger); if (ret < 0) { goto error; } } - if (regex_pattern_num > 0) { + if (n_regex_pattern > 0) { ret = adpt_hs_alloc_scratch(hs_inst->hs_rt, n_worker_thread, - HS_PATTERN_TYPE_REG, logger); + EXPR_PATTERN_TYPE_REG, logger); if (ret < 0) { goto error; } @@ -500,7 +466,7 @@ struct adapter_hs *adapter_hs_new(struct expr_rule *rules, size_t n_rule, hs_inst->hs_rt->streams = ALLOC(struct adapter_hs_stream *, n_worker_thread); for (i = 0; i < n_worker_thread; i++) { - hs_inst->hs_rt->streams[i] = adapter_hs_stream_open(hs_inst, i); + hs_inst->hs_rt->streams[i] = (struct adapter_hs_stream *)adapter_hs_stream_open(hs_inst, i); } return hs_inst; @@ -509,13 +475,15 @@ error: return NULL; } -void adapter_hs_free(struct adapter_hs *hs_inst) +void adapter_hs_free(void *hs_instance) { - if (NULL == hs_inst) { + if (NULL == hs_instance) { return; } + struct adapter_hs *hs_inst = (struct adapter_hs *)hs_instance; size_t i = 0; + if (hs_inst->hs_rt != NULL) { if (hs_inst->hs_rt->literal_db != NULL) { hs_free_database(hs_inst->hs_rt->literal_db); @@ -625,12 +593,12 @@ static int matched_event_cb(unsigned int id, unsigned long long from, int ret = 0; struct pattern_attribute pat_attr = matched_pat->ref_hs_attr[id]; switch (pat_attr.match_mode) { - case HS_MATCH_MODE_EXACTLY: + case EXPR_MATCH_MODE_EXACTLY: if (0 == from && matched_pat->scan_data_len == to) { ret = 1; } break; - case HS_MATCH_MODE_SUB: + case EXPR_MATCH_MODE_SUB: if (pat_attr.offset.start == -1 && pat_attr.offset.end == -1) { ret = 1; @@ -656,12 +624,12 @@ static int matched_event_cb(unsigned int id, unsigned long long from, ret = 1; } break; - case HS_MATCH_MODE_PREFIX: + case EXPR_MATCH_MODE_PREFIX: if (0 == from) { ret = 1; } break; - case HS_MATCH_MODE_SUFFIX: + case EXPR_MATCH_MODE_SUFFIX: if (to == matched_pat->scan_data_len) { ret = 1; } @@ -678,43 +646,42 @@ static int matched_event_cb(unsigned int id, unsigned long long from, return 0; } -UT_icd ut_pattern_id_icd = {sizeof(unsigned long long), NULL, NULL, NULL}; -struct adapter_hs_stream * -adapter_hs_stream_open(struct adapter_hs *hs_instance, int thread_id) +UT_icd ut_hs_pattern_id_icd = {sizeof(unsigned long long), NULL, NULL, NULL}; +void *adapter_hs_stream_open(void *hs_instance, int thread_id) { if (NULL == hs_instance || thread_id < 0) { return NULL; } + struct adapter_hs *hs_inst = (struct adapter_hs *)hs_instance; struct adapter_hs_stream *hs_stream = ALLOC(struct adapter_hs_stream, 1); hs_error_t err; - hs_stream->logger = hs_instance->logger; + hs_stream->logger = hs_inst->logger; hs_stream->thread_id = thread_id; - hs_stream->n_expr = hs_instance->n_expr; - hs_stream->ref_hs_rt = hs_instance->hs_rt; + hs_stream->ref_hs_rt = hs_inst->hs_rt; hs_stream->matched_pat = ALLOC(struct matched_pattern, 1); - hs_stream->matched_pat->ref_hs_attr = hs_instance->hs_attr; - hs_stream->matched_pat->n_patterns = hs_instance->n_patterns; - utarray_new(hs_stream->matched_pat->pattern_ids, &ut_pattern_id_icd); + hs_stream->matched_pat->ref_hs_attr = hs_inst->hs_attr; + hs_stream->matched_pat->n_patterns = hs_inst->n_patterns; + utarray_new(hs_stream->matched_pat->pattern_ids, &ut_hs_pattern_id_icd); utarray_reserve(hs_stream->matched_pat->pattern_ids, MAX_HIT_PATTERN_NUM); int err_count = 0; - if (hs_instance->hs_rt->literal_db != NULL) { - err = hs_open_stream(hs_instance->hs_rt->literal_db, 0, + if (hs_inst->hs_rt->literal_db != NULL) { + err = hs_open_stream(hs_inst->hs_rt->literal_db, 0, &hs_stream->literal_stream); if (err != HS_SUCCESS) { - log_error(hs_instance->logger, MODULE_ADAPTER_HS, + log_error(hs_inst->logger, MODULE_ADAPTER_HS, "hs_open_stream failed, hs err:%d", err); err_count++; } } - if (hs_instance->hs_rt->regex_db != NULL) { - err = hs_open_stream(hs_instance->hs_rt->regex_db, 0, + if (hs_inst->hs_rt->regex_db != NULL) { + err = hs_open_stream(hs_inst->hs_rt->regex_db, 0, &hs_stream->regex_stream); if (err != HS_SUCCESS) { - log_error(hs_instance->logger, MODULE_ADAPTER_HS, + log_error(hs_inst->logger, MODULE_ADAPTER_HS, "hs_open_stream failed, hs err:%d", err); err_count++; } @@ -740,36 +707,37 @@ error: return NULL; } -void adapter_hs_stream_close(struct adapter_hs_stream *hs_stream) +void adapter_hs_stream_close(void *hs_stream) { if (NULL == hs_stream) { return; } - if (hs_stream->ref_hs_rt != NULL) { - if (hs_stream->literal_stream != NULL) { - hs_close_stream(hs_stream->literal_stream, NULL, NULL, NULL); - hs_stream->literal_stream = NULL; + struct adapter_hs_stream *stream = (struct adapter_hs_stream *)hs_stream; + if (stream->ref_hs_rt != NULL) { + if (stream->literal_stream != NULL) { + hs_close_stream(stream->literal_stream, NULL, NULL, NULL); + stream->literal_stream = NULL; } - if (hs_stream->regex_stream != NULL) { - hs_close_stream(hs_stream->regex_stream, NULL, NULL, NULL); - hs_stream->regex_stream = NULL; + if (stream->regex_stream != NULL) { + hs_close_stream(stream->regex_stream, NULL, NULL, NULL); + stream->regex_stream = NULL; } } - /* hs_stream->hs_rt point to hs_instance->hs_rt which will call free + /* stream->hs_rt point to hs_instance->hs_rt which will call free same as hs_attr */ - hs_stream->ref_hs_rt = NULL; - hs_stream->matched_pat->ref_hs_attr = NULL; + stream->ref_hs_rt = NULL; + stream->matched_pat->ref_hs_attr = NULL; - if (hs_stream->matched_pat->pattern_ids != NULL) { - utarray_free(hs_stream->matched_pat->pattern_ids); - hs_stream->matched_pat->pattern_ids = NULL; + if (stream->matched_pat->pattern_ids != NULL) { + utarray_free(stream->matched_pat->pattern_ids); + stream->matched_pat->pattern_ids = NULL; } - FREE(hs_stream->matched_pat); - FREE(hs_stream); + FREE(stream->matched_pat); + FREE(stream); } static void adapter_hs_stream_reset(struct adapter_hs_stream *hs_stream) @@ -794,9 +762,9 @@ static void adapter_hs_stream_reset(struct adapter_hs_stream *hs_stream) utarray_clear(hs_stream->matched_pat->pattern_ids); } -int adapter_hs_scan_stream(struct adapter_hs_stream *hs_stream, const char *data, - size_t data_len, struct hs_scan_result *results, - size_t n_result, size_t *n_hit_result) +int adapter_hs_scan_stream(void *hs_stream, const char *data, size_t data_len, + struct expr_scan_result *results, size_t n_result, + size_t *n_hit_result) { hs_error_t err; @@ -816,36 +784,37 @@ int adapter_hs_scan_stream(struct adapter_hs_stream *hs_stream, const char *data */ int err_count = 0; - int thread_id = hs_stream->thread_id; - struct adapter_hs_scratch *scratch = hs_stream->ref_hs_rt->scratch; - hs_stream->matched_pat->scan_data_len = data_len; + struct adapter_hs_stream *stream = (struct adapter_hs_stream *)hs_stream; + int thread_id = stream->thread_id; + struct adapter_hs_scratch *scratch = stream->ref_hs_rt->scratch; + stream->matched_pat->scan_data_len = data_len; int err_scratch_flag = 0; - if (hs_stream->literal_stream != NULL) { + if (stream->literal_stream != NULL) { if (scratch->literal_scratches != NULL) { - err = hs_scan_stream(hs_stream->literal_stream, data, data_len, + err = hs_scan_stream(stream->literal_stream, data, data_len, 0, scratch->literal_scratches[thread_id], - matched_event_cb, hs_stream->matched_pat); + matched_event_cb, stream->matched_pat); if (err != HS_SUCCESS) { err_count++; } } else { - log_error(hs_stream->logger, MODULE_ADAPTER_HS, + log_error(stream->logger, MODULE_ADAPTER_HS, "literal_scratches is null, thread_id:%d", thread_id); err_scratch_flag++; } } - if (hs_stream->regex_stream != NULL) { + if (stream->regex_stream != NULL) { if (scratch->regex_scratches != NULL) { - err = hs_scan_stream(hs_stream->regex_stream, data, data_len, + err = hs_scan_stream(stream->regex_stream, data, data_len, 0, scratch->regex_scratches[thread_id], - matched_event_cb, hs_stream->matched_pat); + matched_event_cb, stream->matched_pat); if (err != HS_SUCCESS) { err_count++; } } else { - log_error(hs_stream->logger, MODULE_ADAPTER_HS, + log_error(stream->logger, MODULE_ADAPTER_HS, "regex_scratches is null, thread_id:%d", thread_id); err_scratch_flag++; } @@ -859,7 +828,7 @@ int adapter_hs_scan_stream(struct adapter_hs_stream *hs_stream, const char *data return -1; } - size_t n_pattern_id = utarray_len(hs_stream->matched_pat->pattern_ids); + size_t n_pattern_id = utarray_len(stream->matched_pat->pattern_ids); if (0 == n_pattern_id) { *n_hit_result = 0; return 0; @@ -868,13 +837,13 @@ int adapter_hs_scan_stream(struct adapter_hs_stream *hs_stream, const char *data unsigned long long pattern_ids[n_pattern_id]; for (size_t i = 0; i < n_pattern_id; i++) { - pattern_ids[i] = *(unsigned long long *)utarray_eltptr(hs_stream->matched_pat->pattern_ids, i); + pattern_ids[i] = *(unsigned long long *)utarray_eltptr(stream->matched_pat->pattern_ids, i); } int ret = 0; struct bool_expr_match *bool_matcher_results = scratch->bool_match_buffs[thread_id]; - int bool_matcher_ret = bool_matcher_match(hs_stream->ref_hs_rt->bm, pattern_ids, n_pattern_id, - bool_matcher_results, hs_stream->n_expr); + int bool_matcher_ret = bool_matcher_match(stream->ref_hs_rt->bm, pattern_ids, n_pattern_id, + bool_matcher_results, MAX_HIT_EXPR_NUM); if (bool_matcher_ret < 0) { ret = -1; goto next; @@ -891,22 +860,21 @@ int adapter_hs_scan_stream(struct adapter_hs_stream *hs_stream, const char *data *n_hit_result = bool_matcher_ret; next: - utarray_clear(hs_stream->matched_pat->pattern_ids); + utarray_clear(stream->matched_pat->pattern_ids); return ret; } -int adapter_hs_scan(struct adapter_hs *hs_instance, int thread_id, - const char *data, size_t data_len, - struct hs_scan_result *results, - size_t n_result, size_t *n_hit_result) +int adapter_hs_scan(void *hs_instance, int thread_id, const char *data, size_t data_len, + struct expr_scan_result *results, size_t n_result, size_t *n_hit_result) { if (NULL == hs_instance || NULL == data || (0 == data_len) || NULL == results || 0 == n_result || NULL == n_hit_result) { return -1; } - struct adapter_hs_stream *hs_stream = hs_instance->hs_rt->streams[thread_id]; + struct adapter_hs *hs_inst = (struct adapter_hs *)hs_instance; + struct adapter_hs_stream *hs_stream = hs_inst->hs_rt->streams[thread_id]; assert(hs_stream != NULL); adapter_hs_stream_reset(hs_stream); diff --git a/scanner/expr_matcher/adapter_hs/adapter_hs.h b/scanner/expr_matcher/adapter_hs/adapter_hs.h new file mode 100644 index 0000000..c9de7d2 --- /dev/null +++ b/scanner/expr_matcher/adapter_hs/adapter_hs.h @@ -0,0 +1,75 @@ +/* +********************************************************************************************** +* File: adapter_hs.h +* Description: +* Authors: Liu wentan <[email protected]> +* Date: 2022-10-31 +* Copyright: (c) Since 2022 Geedge Networks, Ltd. All rights reserved. +*********************************************************************************************** +*/ + +#ifndef _ADAPTER_HS_H_ +#define _ADAPTER_HS_H_ + +#ifdef __cplusplus +extern "C" +{ +#endif + +#include <stddef.h> +#include <stdint.h> + +#include "log/log.h" +#include "../expr_matcher.h" + +int adapter_hs_verify_regex_expression(const char *regex_expr, struct log_handle *logger); +/** + * @brief new adapter_hs instance + * + * @param rules: logic AND expression's array + * @param n_rule: the number of logic AND expression's array + * @param nr_worker_threads: the number of scan threads which will call adapter_hs_scan() + * + * @retval the pointer to adapter_hs instance +*/ +void *adapter_hs_new(struct expr_rule *rules, size_t n_rule, + size_t n_literal_pattern, size_t n_regex_pattern, + size_t n_worker_thread, struct log_handle *logger); + +/** + * @brief scan input data to match logic AND expression, return all matched expr_id + * + * @param instance: adapter_hs instance obtained by adapter_hs_new() + * @param thread_id: the thread_id of caller + * @param data: data to be scanned + * @param data_len: the length of data to be scanned + * @param results: the array of expr_id + * @param n_results: number of elements in array of expr_id +*/ +int adapter_hs_scan(void *hs_instance, int thread_id, const char *data, size_t data_len, + struct expr_scan_result *results, size_t n_result, size_t *n_hit_result); + +/** + * @brief destroy adapter_hs instance + * + * @param instance: adapter_hs instance obtained by adapter_hs_new() +*/ +void adapter_hs_free(void *instance); + +/** + * @brief open adapter_hs stream after adapter_hs instance initialized for stream scan + * +*/ +void *adapter_hs_stream_open(void *hs_instance, int thread_id); + +int adapter_hs_scan_stream(void *stream, const char *data, size_t data_len, + struct expr_scan_result *results, size_t n_result, + size_t *n_hit_result); + +void adapter_hs_stream_close(void *stream); + +#ifdef __cplusplus +} +#endif + +#endif
\ No newline at end of file diff --git a/scanner/expr_matcher/adapter_rs/adapter_rs.cpp b/scanner/expr_matcher/adapter_rs/adapter_rs.cpp new file mode 100644 index 0000000..1459ab1 --- /dev/null +++ b/scanner/expr_matcher/adapter_rs/adapter_rs.cpp @@ -0,0 +1,708 @@ +/* +********************************************************************************************** +* File: adapter_rs.cpp +* Description: +* Authors: Liu wentan <[email protected]> +* Date: 2022-10-31 +* Copyright: (c) Since 2022 Geedge Networks, Ltd. All rights reserved. +*********************************************************************************************** +*/ + +#include <stdint.h> +#include <stdio.h> +#include <stddef.h> +#include <assert.h> +#include <unistd.h> +#include <sys/syscall.h> + +#include "rulescan.h" +#include "adapter_rs.h" +#include "uthash/utarray.h" +#include "uthash/uthash.h" +#include "maat_utils.h" +#include "../../bool_matcher/bool_matcher.h" + +#define MAX_HIT_PATTERN_NUM 512 + +pid_t rs_gettid() +{ + return syscall(SYS_gettid); +} + +static const char *rs_module_name_str(const char *name) +{ + static __thread char module[64]; + snprintf(module, sizeof(module), "%s(%d)", name, rs_gettid()); + + return module; +} + +#define MODULE_ADAPTER_RS rs_module_name_str("maat.adapter_rs") + +struct adpt_rs_compile_data { + struct scan_pattern *patterns; + size_t n_patterns; +}; + +struct adapter_rs_stream { + int thread_id; + size_t offset; /* current stream offset */ + rs_stream_t *literal_stream; + rs_stream_t *regex_stream; + struct adapter_rs_runtime *ref_rs_rt; + + struct log_handle *logger; +}; + +/* adapter_rs runtime */ +struct adapter_rs_runtime { + rs_database_t *literal_db; + rs_database_t *regex_db; + + struct bool_expr_match **bool_match_buffs; /* per thread */ + struct adapter_rs_stream **streams; /* per thread */ + struct matched_pattern **matched_pats; /* per thread */ + struct bool_matcher *bm; +}; + +/* adapter_rs instance */ +struct adapter_rs { + size_t n_worker_thread; + size_t n_expr; + size_t n_patterns; + struct adapter_rs_runtime *rs_rt; + struct pattern_attribute *rs_attr; + struct log_handle *logger; +}; + +struct pattern_offset { + long long start; + long long end; +}; + +struct pattern_attribute { + long long pattern_id; + enum expr_match_mode match_mode; + struct pattern_offset offset; + size_t pattern_len; +}; + +struct matched_pattern { + UT_array *pattern_ids; + size_t n_patterns; + struct pattern_attribute *ref_rs_attr; +}; + +int adapter_rs_verify_regex_expression(const char *regex_expr, + struct log_handle *logger) +{ + int ret = rs_verify_regex(regex_expr); + if (ret == 0) { + log_error(logger, MODULE_ADAPTER_RS, + "[%s:%d] illegal regex expression: \"%s\"", + __FUNCTION__, __LINE__, regex_expr); + } + + return ret; +} +/** + * @brief build rs database for literal string and regex expression respectively + * + * @retval 0(success) -1(failed) +*/ +static int adpt_rs_build_database(struct adapter_rs_runtime *rs_rt, + size_t n_worker_thread, + struct adpt_rs_compile_data *literal_cd, + struct adpt_rs_compile_data *regex_cd, + struct log_handle *logger) +{ + if (NULL == rs_rt) { + return -1; + } + + int ret = 0; + if (literal_cd != NULL) { + ret = rs_compile_lit(literal_cd->patterns, literal_cd->n_patterns, + &rs_rt->literal_db); + if (ret < 0) { + log_error(logger, MODULE_ADAPTER_RS, "[%s:%d] compile error", + __FUNCTION__, __LINE__); + return -1; + } + } + + if (regex_cd != NULL) { + size_t n_failed_pats = 0; + ret = rs_compile_regex(regex_cd->patterns, regex_cd->n_patterns, + n_worker_thread, &rs_rt->regex_db, &n_failed_pats); + if (ret < 0) { + log_error(logger, MODULE_ADAPTER_RS, "[%s:%d] compile error", + __FUNCTION__, __LINE__); + return -1; + } + } + + return 0; +} + +static struct adpt_rs_compile_data *adpt_rs_compile_data_new(size_t n_patterns) +{ + struct adpt_rs_compile_data *rs_cd = ALLOC(struct adpt_rs_compile_data, 1); + rs_cd->patterns = ALLOC(struct scan_pattern, n_patterns); + rs_cd->n_patterns = n_patterns; + + return rs_cd; +} + +static void adpt_rs_compile_data_free(struct adpt_rs_compile_data *rs_cd) +{ + if (NULL == rs_cd) { + return; + } + + if (rs_cd->patterns != NULL) { + for (size_t i = 0; i < rs_cd->n_patterns; i++) { + if (rs_cd->patterns[i].pattern != NULL) { + FREE(rs_cd->patterns[i].pattern); + } + } + + FREE(rs_cd->patterns); + } + + FREE(rs_cd); +} + +static void populate_compile_data(struct adpt_rs_compile_data *compile_data, + size_t index, long long pattern_id, char *pat, + size_t pat_len, int case_sensitive) +{ + compile_data->patterns[index].id = pattern_id; + compile_data->patterns[index].case_sensitive = case_sensitive; + compile_data->patterns[index].pattern = ALLOC(char, pat_len + 1); + memcpy(compile_data->patterns[index].pattern, pat, pat_len); + compile_data->patterns[index].pattern_len = pat_len; +} + +static struct bool_expr *bool_exprs_new(struct expr_rule *rules, size_t n_rule, + struct pattern_attribute *pattern_attr, + struct adpt_rs_compile_data *literal_cd, + struct adpt_rs_compile_data *regex_cd, + size_t *n_pattern) +{ + long long pattern_idx = 0; + size_t literal_idx = 0; + size_t regex_idx = 0; + + struct bool_expr *bool_exprs = ALLOC(struct bool_expr, n_rule); + + /* populate adpt_rs_compile_data and bool_expr */ + for (size_t i = 0; i < n_rule; i++) { + + for (size_t j = 0; j < rules[i].n_patterns; j++) { + pattern_attr[pattern_idx].pattern_id = pattern_idx; + pattern_attr[pattern_idx].match_mode = rules[i].patterns[j].match_mode; + pattern_attr[pattern_idx].pattern_len = rules[i].patterns[j].pat_len; + + if (pattern_attr[pattern_idx].match_mode == EXPR_MATCH_MODE_SUB || + pattern_attr[pattern_idx].match_mode == EXPR_MATCH_MODE_EXACTLY) { + pattern_attr[pattern_idx].offset.start = rules[i].patterns[j].start_offset; + pattern_attr[pattern_idx].offset.end = rules[i].patterns[j].end_offset; + } + + /* literal pattern */ + if (rules[i].patterns[j].type == EXPR_PATTERN_TYPE_STR) { + populate_compile_data(literal_cd, literal_idx, pattern_idx, + rules[i].patterns[j].pat, rules[i].patterns[j].pat_len, + rules[i].patterns[j].case_sensitive); + literal_idx++; + } else { + /* regex pattern */ + populate_compile_data(regex_cd, regex_idx, pattern_idx, + rules[i].patterns[j].pat, rules[i].patterns[j].pat_len, + rules[i].patterns[j].case_sensitive); + regex_idx++; + } + + bool_exprs[i].items[j].item_id = pattern_idx++; + bool_exprs[i].items[j].not_flag = 0; + } + + bool_exprs[i].expr_id = rules[i].expr_id; + bool_exprs[i].item_num = rules[i].n_patterns; + bool_exprs[i].user_tag = rules[i].tag; + } + + *n_pattern = pattern_idx; + + return bool_exprs; +} + +UT_icd ut_rs_pattern_id_icd = {sizeof(unsigned long long), NULL, NULL, NULL}; +void *adapter_rs_new(struct expr_rule *rules, size_t n_rule, + size_t n_literal_pattern, size_t n_regex_pattern, + size_t n_worker_thread, struct log_handle *logger) +{ + /* get the sum of pattern */ + size_t i = 0; + struct adpt_rs_compile_data *literal_cd = NULL; + struct adpt_rs_compile_data *regex_cd = NULL; + + if (n_literal_pattern > 0) { + literal_cd = adpt_rs_compile_data_new(n_literal_pattern); + } + + if (n_regex_pattern > 0) { + regex_cd = adpt_rs_compile_data_new(n_regex_pattern); + } + + size_t pattern_cnt = n_literal_pattern + n_regex_pattern; + struct adapter_rs *rs_inst = ALLOC(struct adapter_rs, 1); + rs_inst->rs_attr = ALLOC(struct pattern_attribute, pattern_cnt); + rs_inst->logger = logger; + rs_inst->n_worker_thread = n_worker_thread; + rs_inst->n_expr = n_rule; + + struct bool_expr *bool_exprs = bool_exprs_new(rules, n_rule, rs_inst->rs_attr, + literal_cd, regex_cd, &pattern_cnt); + if (NULL == bool_exprs) { + return NULL; + } + rs_inst->n_patterns = pattern_cnt; + + /* create bool matcher */ + size_t mem_size = 0; + int rs_ret = 0; + + rs_inst->rs_rt = ALLOC(struct adapter_rs_runtime, 1); + + //rs_rt->bm + rs_inst->rs_rt->bm = bool_matcher_new(bool_exprs, n_rule, &mem_size); + if (rs_inst->rs_rt->bm != NULL) { + log_info(logger, MODULE_ADAPTER_RS, + "Adapter_rs module: build bool matcher of %zu expressions" + " with %zu bytes memory", n_rule, mem_size); + } else { + log_error(logger, MODULE_ADAPTER_RS, + "[%s:%d] Adapter_rs module: build bool matcher failed", + __FUNCTION__, __LINE__); + + rs_ret = -1; + } + FREE(bool_exprs); + + /* build rs database rs_rt->literal_db & rs_rt->regex_db */ + int ret = adpt_rs_build_database(rs_inst->rs_rt, n_worker_thread, + literal_cd, regex_cd, logger); + if (ret < 0) { + rs_ret = -1; + } + + if (literal_cd != NULL) { + adpt_rs_compile_data_free(literal_cd); + } + + if (regex_cd != NULL) { + adpt_rs_compile_data_free(regex_cd); + } + + if (rs_ret < 0) { + goto error; + } + + /* alloc scratch */ + rs_inst->rs_rt->bool_match_buffs = ALLOC(struct bool_expr_match *, n_worker_thread); + for (i = 0; i < n_worker_thread; i++) { + rs_inst->rs_rt->bool_match_buffs[i] = ALLOC(struct bool_expr_match, MAX_HIT_EXPR_NUM); + } + + rs_inst->rs_rt->streams = ALLOC(struct adapter_rs_stream *, n_worker_thread); + for (i = 0; i < n_worker_thread; i++) { + rs_inst->rs_rt->streams[i] = (struct adapter_rs_stream *)adapter_rs_stream_open(rs_inst, i); + } + + rs_inst->rs_rt->matched_pats = ALLOC(struct matched_pattern *, n_worker_thread); + for (i = 0; i < n_worker_thread; i++) { + rs_inst->rs_rt->matched_pats[i] = ALLOC(struct matched_pattern, 1); + rs_inst->rs_rt->matched_pats[i]->ref_rs_attr = rs_inst->rs_attr; + rs_inst->rs_rt->matched_pats[i]->n_patterns = rs_inst->n_patterns; + utarray_new(rs_inst->rs_rt->matched_pats[i]->pattern_ids, &ut_rs_pattern_id_icd); + utarray_reserve(rs_inst->rs_rt->matched_pats[i]->pattern_ids, MAX_HIT_PATTERN_NUM); + } + + return rs_inst; +error: + adapter_rs_free(rs_inst); + return NULL; +} + +void adapter_rs_free(void *rs_instance) +{ + if (NULL == rs_instance) { + return; + } + + size_t i = 0; + struct adapter_rs *rs_inst = (struct adapter_rs *)rs_instance; + + if (rs_inst->rs_rt != NULL) { + if (rs_inst->rs_rt->literal_db != NULL) { + rs_free_database(rs_inst->rs_rt->literal_db); + rs_inst->rs_rt->literal_db = NULL; + } + + if (rs_inst->rs_rt->regex_db != NULL) { + rs_free_database(rs_inst->rs_rt->regex_db); + rs_inst->rs_rt->regex_db = NULL; + } + + if (rs_inst->rs_rt->bool_match_buffs != NULL) { + for (i = 0; i < rs_inst->n_worker_thread; i++) { + if (rs_inst->rs_rt->bool_match_buffs[i] != NULL) { + FREE(rs_inst->rs_rt->bool_match_buffs[i]); + } + } + + FREE(rs_inst->rs_rt->bool_match_buffs); + } + + if (rs_inst->rs_rt->bm != NULL) { + bool_matcher_free(rs_inst->rs_rt->bm); + rs_inst->rs_rt->bm = NULL; + } + + if (rs_inst->rs_rt->streams != NULL) { + for (i = 0; i < rs_inst->n_worker_thread; i++) { + if (rs_inst->rs_rt->streams[i] != NULL) { + adapter_rs_stream_close(rs_inst->rs_rt->streams[i]); + rs_inst->rs_rt->streams[i] = NULL; + } + } + FREE(rs_inst->rs_rt->streams); + } + + if (rs_inst->rs_rt->matched_pats != NULL) { + for (i = 0; i < rs_inst->n_worker_thread; i++) { + if (rs_inst->rs_rt->matched_pats[i] != NULL) { + utarray_free(rs_inst->rs_rt->matched_pats[i]->pattern_ids); + FREE(rs_inst->rs_rt->matched_pats[i]); + } + } + FREE(rs_inst->rs_rt->matched_pats); + } + + FREE(rs_inst->rs_rt); + } + + if (rs_inst->rs_attr != NULL) { + FREE(rs_inst->rs_attr); + } + + FREE(rs_inst); +} + +static inline int compare_pattern_id(const void *a, const void *b) +{ + long long ret = *(const unsigned long long *)a - *(const unsigned long long *)b; + if (ret == 0) { + return 0; + } else if(ret < 0) { + return -1; + } else { + return 1; + } +} + +/** + * @param id: pattern id +*/ +static int matched_event_cb(unsigned int id, int pos_offset, int from, int to, + size_t data_len, void *ctx) +{ + // put id in set + unsigned long long pattern_id = id; + struct matched_pattern *matched_pat = (struct matched_pattern *)ctx; + + if (pattern_id > matched_pat->n_patterns || id < 0) { + return 0; + } + + if (utarray_len(matched_pat->pattern_ids) >= MAX_HIT_PATTERN_NUM) { + return 0; + } + + // duplicate pattern_id + if (utarray_find(matched_pat->pattern_ids, &pattern_id, compare_pattern_id)) { + return 0; + } + + int ret = 0; + struct pattern_attribute pat_attr = matched_pat->ref_rs_attr[id]; + + switch (pat_attr.match_mode) { + case EXPR_MATCH_MODE_EXACTLY: + if (0 == (from + pos_offset) && (int)data_len == (to + pos_offset)) { + ret = 1; + } + break; + case EXPR_MATCH_MODE_SUB: + if (pat_attr.offset.start == -1 && + pat_attr.offset.end == -1) { + ret = 1; + break; + } + + if (pat_attr.offset.start == -1) { + if ((long long)(to + pos_offset - 1) <= pat_attr.offset.end) { + ret = 1; + break; + } + } + + if (pat_attr.offset.end == -1) { + if ((long long)(from + pos_offset) >= pat_attr.offset.start) { + ret = 1; + break; + } + } + + if ((long long)(from + pos_offset) >= pat_attr.offset.start && + (long long)(to + pos_offset - 1) <= pat_attr.offset.end) { + ret = 1; + } + break; + case EXPR_MATCH_MODE_PREFIX: + if (0 == (from + pos_offset)) { + ret = 1; + } + break; + case EXPR_MATCH_MODE_SUFFIX: + if ((to + pos_offset) == (int)data_len) { + ret = 1; + } + break; + default: + break; + } + + if (1 == ret) { + utarray_push_back(matched_pat->pattern_ids, &pattern_id); + utarray_sort(matched_pat->pattern_ids, compare_pattern_id); + } + + return 0; +} + +void *adapter_rs_stream_open(void *rs_instance, int thread_id) +{ + if (NULL == rs_instance || thread_id < 0) { + return NULL; + } + + struct adapter_rs *rs_inst = (struct adapter_rs *)rs_instance; + struct adapter_rs_stream *rs_stream = ALLOC(struct adapter_rs_stream, 1); + + rs_stream->logger = rs_inst->logger; + rs_stream->thread_id = thread_id; + rs_stream->ref_rs_rt = rs_inst->rs_rt; + + int err_count = 0; + if (rs_inst->rs_rt->literal_db != NULL) { + rs_stream->literal_stream = rs_open_stream(rs_inst->rs_rt->literal_db, 0, 128); + if (NULL == rs_stream->literal_stream) { + log_error(rs_inst->logger, MODULE_ADAPTER_RS, "rs_open_stream failed"); + err_count++; + } + } + + if (rs_inst->rs_rt->regex_db != NULL) { + rs_stream->regex_stream = rs_open_stream(rs_inst->rs_rt->regex_db, 0, 128); + if (NULL == rs_stream->regex_stream) { + log_error(rs_inst->logger, MODULE_ADAPTER_RS, "rs_open_stream failed"); + err_count++; + } + } + + if (err_count > 0) { + goto error; + } + + return rs_stream; +error: + if (rs_stream->literal_stream != NULL) { + rs_close_stream(rs_stream->literal_stream); + rs_stream->literal_stream = NULL; + } + + if (rs_stream->regex_stream != NULL) { + rs_close_stream(rs_stream->regex_stream); + rs_stream->regex_stream = NULL; + } + + FREE(rs_stream); + return NULL; +} + +void adapter_rs_stream_close(void *rs_stream) +{ + if (NULL == rs_stream) { + return; + } + + struct adapter_rs_stream *stream = (struct adapter_rs_stream *)rs_stream; + if (stream->ref_rs_rt != NULL) { + if (stream->literal_stream != NULL) { + rs_close_stream(stream->literal_stream); + stream->literal_stream = NULL; + } + + if (stream->regex_stream != NULL) { + rs_close_stream(stream->regex_stream); + stream->regex_stream = NULL; + } + } + + /* rs_stream->rs_rt point to rs_instance->rs_rt which will call free + same as rs_attr */ + stream->ref_rs_rt = NULL; + FREE(stream); +} + +int adapter_rs_scan_stream(void *rs_stream, const char *data, size_t data_len, + struct expr_scan_result *results, size_t n_result, + size_t *n_hit_result) +{ + if (NULL == rs_stream || NULL == data || 0 == data_len || + NULL == results || 0 == n_result || NULL == n_hit_result) { + return -1; + } + + int ret = 0, err_count = 0; + struct adapter_rs_stream *stream = (struct adapter_rs_stream *)rs_stream; + int thread_id = stream->thread_id; + struct adapter_rs_runtime *rs_rt = stream->ref_rs_rt; + struct matched_pattern *matched_pat = rs_rt->matched_pats[thread_id]; + + if (stream->literal_stream != NULL) { + ret = rs_scan_stream(stream->literal_stream, data, data_len, + matched_event_cb, matched_pat); + if (ret < 0) { + err_count++; + } + } + + if (stream->regex_stream != NULL) { + ret = rs_scan_stream(stream->regex_stream, data, data_len, + matched_event_cb, matched_pat); + if (ret < 0) { + err_count++; + } + } + + if (err_count == 2) { + return -1; + } + + size_t n_pattern_id = utarray_len(matched_pat->pattern_ids); + if (0 == n_pattern_id) { + *n_hit_result = 0; + return 0; + } + + unsigned long long pattern_ids[n_pattern_id]; + + for (size_t i = 0; i < n_pattern_id; i++) { + pattern_ids[i] = *(unsigned long long *)utarray_eltptr(matched_pat->pattern_ids, i); + } + + struct bool_expr_match *bool_matcher_results = rs_rt->bool_match_buffs[thread_id]; + int bool_matcher_ret = bool_matcher_match(rs_rt->bm, pattern_ids, n_pattern_id, + bool_matcher_results, MAX_HIT_EXPR_NUM); + if (bool_matcher_ret < 0) { + ret = -1; + goto next; + } + + if (bool_matcher_ret > (int)n_result) { + bool_matcher_ret = n_result; + } + + for (int index = 0; index < bool_matcher_ret; index++) { + results[index].rule_id = bool_matcher_results[index].expr_id; + results[index].user_tag = bool_matcher_results[index].user_tag; + } + *n_hit_result = bool_matcher_ret; + +next: + utarray_clear(matched_pat->pattern_ids); + + return ret; +} + +int adapter_rs_scan(void *rs_instance, int thread_id, const char *data, size_t data_len, + struct expr_scan_result *results, size_t n_result, size_t *n_hit_result) +{ + if (NULL == rs_instance || NULL == data || (0 == data_len) || + NULL == results || 0 == n_result || NULL == n_hit_result) { + return -1; + } + + int ret = 0, err_count = 0; + struct adapter_rs *rs_inst = (struct adapter_rs *)rs_instance; + struct adapter_rs_runtime *rs_rt = rs_inst->rs_rt; + struct matched_pattern *matched_pat = rs_rt->matched_pats[thread_id]; + + if (rs_rt->literal_db != NULL) { + ret = rs_scan(rs_rt->literal_db, thread_id, data, data_len, + 0, matched_event_cb, matched_pat); + if (ret < 0) { + err_count++; + } + } + + if (rs_rt->regex_db != NULL) { + ret = rs_scan(rs_rt->regex_db, thread_id, data, data_len, + 0, matched_event_cb, matched_pat); + if (ret < 0) { + err_count++; + } + } + + if (err_count == 2) { + return -1; + } + + size_t n_pattern_id = utarray_len(matched_pat->pattern_ids); + if (0 == n_pattern_id) { + *n_hit_result = 0; + return 0; + } + + unsigned long long pattern_ids[n_pattern_id]; + for (size_t i = 0; i < n_pattern_id; i++) { + pattern_ids[i] = *(unsigned long long *)utarray_eltptr(matched_pat->pattern_ids, i); + } + + struct bool_expr_match *bool_matcher_results = rs_rt->bool_match_buffs[thread_id]; + int bool_matcher_ret = bool_matcher_match(rs_rt->bm, pattern_ids, n_pattern_id, + bool_matcher_results, MAX_HIT_EXPR_NUM); + if (bool_matcher_ret < 0) { + ret = -1; + goto next; + } + + if (bool_matcher_ret > (int)n_result) { + bool_matcher_ret = n_result; + } + + for (int index = 0; index < bool_matcher_ret; index++) { + results[index].rule_id = bool_matcher_results[index].expr_id; + results[index].user_tag = bool_matcher_results[index].user_tag; + } + *n_hit_result = bool_matcher_ret; + +next: + utarray_clear(matched_pat->pattern_ids); + + return ret; +}
\ No newline at end of file diff --git a/scanner/expr_matcher/adapter_rs/adapter_rs.h b/scanner/expr_matcher/adapter_rs/adapter_rs.h new file mode 100644 index 0000000..c43e553 --- /dev/null +++ b/scanner/expr_matcher/adapter_rs/adapter_rs.h @@ -0,0 +1,78 @@ +/* +********************************************************************************************** +* File: adapter_rs.h +* Description: +* Authors: Liu wentan <[email protected]> +* Date: 2023-06-30 +* Copyright: (c) Since 2022 Geedge Networks, Ltd. All rights reserved. +*********************************************************************************************** +*/ + +#ifndef _ADAPTER_RS_H_ +#define _ADAPTER_RS_H_ + +#ifdef __cplusplus +extern "C" +{ +#endif + +#include <stddef.h> + +#include "log/log.h" + +#include "../expr_matcher.h" + +int adapter_rs_verify_regex_expression(const char *regex_expr, + struct log_handle *logger); + +/** + * @brief new adapter_rs instance + * + * @param rules: logic AND expression's array + * @param n_rule: the number of logic AND expression's array + * @param n_worker_threads: the number of scan threads which will call adapter_rs_scan() + * + * @retval the pointer to adapter_rs instance +*/ +void *adapter_rs_new(struct expr_rule *rules, size_t n_rule, + size_t n_literal_pattern, size_t n_regex_pattern, + size_t n_worker_thread, struct log_handle *logger); + +void adapter_rs_free(void *rs_instance); + +/** + * @brief scan input data to match logic AND expression, return all matched expr_id + * + * @param rs_instance: adapter_rs instance obtained by adapter_rs_new() + * @param thread_id: the thread_id of caller + * @param scan_data: data to be scanned + * @param data_len: the length of data to be scanned + * @param result_array: the array to store hit expr_id which allocated by caller + * @param n_result_array: number of elements in array of expr_id +*/ +int adapter_rs_scan(void *rs_instance, int thread_id, + const char *scan_data, size_t data_len, + struct expr_scan_result *result_array, + size_t n_result_array, size_t *n_hit_results); + +/** + * @brief + */ +void *adapter_rs_stream_open(void *rs_instance, int thread_id); + +/** + * @brief + */ +int adapter_rs_scan_stream(void *rs_stream, const char *scan_data, + size_t data_len, struct expr_scan_result *result_array, + size_t n_result_array, size_t *n_hit_results); +/** + * @brief + */ +void adapter_rs_stream_close(void *rs_stream); + +#ifdef __cplusplus +} +#endif + +#endif
\ No newline at end of file diff --git a/scanner/expr_matcher/expr_matcher.cpp b/scanner/expr_matcher/expr_matcher.cpp new file mode 100644 index 0000000..3a37383 --- /dev/null +++ b/scanner/expr_matcher/expr_matcher.cpp @@ -0,0 +1,235 @@ +/* +********************************************************************************************** +* File: expr_matcher.cpp +* Description: +* Authors: Liu wentan <[email protected]> +* Date: 2023-06-30 +* Copyright: (c) Since 2023 Geedge Networks, Ltd. All rights reserved. +*********************************************************************************************** +*/ + +#include <unistd.h> +#include <assert.h> +#include <sys/syscall.h> + +#include "log/log.h" +#include "expr_matcher.h" +#include "maat_utils.h" +#include "adapter_hs/adapter_hs.h" +#include "adapter_rs/adapter_rs.h" + +pid_t expr_matcher_gettid() +{ + return syscall(SYS_gettid); +} + +static const char *expr_matcher_module_name_str(const char *name) +{ + static __thread char module[64]; + snprintf(module, sizeof(module), "%s(%d)", name, expr_matcher_gettid()); + + return module; +} + +#define MODULE_EXPR_MATCHER expr_matcher_module_name_str("maat.expr_matcher") + +struct expr_matcher { + enum expr_engine_type engine_type; + void *engine; + struct log_handle *logger; +}; + +struct expr_matcher_stream { + enum expr_engine_type engine_type; + void *handle; +}; + +struct expr_engine_operations { + enum expr_engine_type type; + void *(*engine_new)(struct expr_rule *rules, size_t n_rule, + size_t n_literal_pattern, size_t n_regex_pattern, + size_t n_worker_thread, struct log_handle *logger); + void (*engine_free)(void *engine); + int (*engine_scan)(void *engine, int thread_id, const char *scan_data, + size_t data_len, struct expr_scan_result *result_array, + size_t n_result_array, size_t *n_hit_result); + void *(*engine_stream_open)(void *engine, int thread_id); + void (*engine_stream_close)(void *stream); + int (*engine_scan_stream)(void *stream, const char *scan_data, size_t data_len, + struct expr_scan_result *result_array, size_t n_result_array, + size_t *n_hit_result); +}; + +struct expr_engine_operations expr_engine_ops[EXPR_ENGINE_TYPE_MAX] = { + { + .type = EXPR_ENGINE_TYPE_HS, + .engine_new = adapter_hs_new, + .engine_free = adapter_hs_free, + .engine_scan = adapter_hs_scan, + .engine_stream_open = adapter_hs_stream_open, + .engine_stream_close = adapter_hs_stream_close, + .engine_scan_stream = adapter_hs_scan_stream + }, + { + .type = EXPR_ENGINE_TYPE_RS, + .engine_new = adapter_rs_new, + .engine_free = adapter_rs_free, + .engine_scan = adapter_rs_scan, + .engine_stream_open = adapter_rs_stream_open, + .engine_stream_close = adapter_rs_stream_close, + .engine_scan_stream = adapter_rs_scan_stream + } +}; + +int expr_matcher_verify_regex_expression(const char *regex_expr, + struct log_handle *logger) +{ + int ret = adapter_hs_verify_regex_expression(regex_expr, logger); + if (ret == 0) { + return 0; + } + + return adapter_rs_verify_regex_expression(regex_expr, logger); +} + +struct expr_matcher * +expr_matcher_new(struct expr_rule *rules, size_t n_rule, enum expr_engine_type engine_type, + size_t n_worker_thread, struct log_handle *logger) +{ + if (NULL == rules || 0 == n_rule || 0 == n_worker_thread || + (engine_type != EXPR_ENGINE_TYPE_HS && engine_type != EXPR_ENGINE_TYPE_RS)) { + log_error(logger, MODULE_EXPR_MATCHER, "[%s:%d]engine type:%d is illegal", + __FUNCTION__, __LINE__, engine_type); + return NULL; + } + + size_t i = 0, j = 0; + size_t literal_pat_num = 0; + size_t regex_pat_num = 0; + + for (i = 0; i < n_rule; i++) { + if (rules[i].n_patterns > MAX_EXPR_PATTERN_NUM) { + log_error(logger, MODULE_EXPR_MATCHER, + "[%s:%d] the number of patterns in one expression should less than" + " %d", __FUNCTION__, __LINE__, MAX_EXPR_PATTERN_NUM); + return NULL; + } + + for (j = 0; j < rules[i].n_patterns; j++) { + /* pat_len should not 0 */ + if (0 == rules[i].patterns[j].pat_len) { + log_error(logger, MODULE_EXPR_MATCHER, + "[%s:%d] expr pattern length should not 0", + __FUNCTION__, __LINE__); + return NULL; + } + + if (rules[i].patterns[j].type == EXPR_PATTERN_TYPE_STR) { + literal_pat_num++; + } else { + regex_pat_num++; + } + } + } + + if (0 == literal_pat_num && 0 == regex_pat_num) { + log_error(logger, MODULE_EXPR_MATCHER, + "[%s:%d] exprs has no valid pattern", __FUNCTION__, __LINE__); + return NULL; + } + + void *engine = expr_engine_ops[engine_type].engine_new(rules, n_rule, literal_pat_num, + regex_pat_num, n_worker_thread, + logger); + if (NULL == engine) { + log_error(logger, MODULE_EXPR_MATCHER, + "[%s:%d]expr_matcher engine_new failed.", __FUNCTION__, __LINE__); + return NULL; + } + + struct expr_matcher *matcher = ALLOC(struct expr_matcher, 1); + matcher->engine_type = engine_type; + matcher->engine = engine; + matcher->logger = logger; + + return matcher; +} + +void expr_matcher_free(struct expr_matcher *matcher) +{ + if (NULL == matcher) { + return; + } + + if (matcher->engine != NULL) { + expr_engine_ops[matcher->engine_type].engine_free(matcher->engine); + matcher->engine = NULL; + } + + FREE(matcher); +} + +int expr_matcher_match(struct expr_matcher *matcher, int thread_id, const char *scan_data, + size_t data_len, struct expr_scan_result *result_array, + size_t n_result_array, size_t *n_hit_results) +{ + if (NULL == matcher || thread_id < 0 || NULL == scan_data || 0 == data_len + || NULL == result_array || 0 == n_result_array || NULL == n_hit_results) { + return -1; + } + + return expr_engine_ops[matcher->engine_type].engine_scan(matcher->engine, thread_id, + scan_data, data_len, result_array, + n_result_array, n_hit_results); +} + +struct expr_matcher_stream * +expr_matcher_stream_open(struct expr_matcher *matcher, int thread_id) +{ + if (NULL == matcher || thread_id < 0) { + return NULL; + } + + void *s_handle = expr_engine_ops[matcher->engine_type].engine_stream_open(matcher->engine, + thread_id); + if (NULL == s_handle) { + log_error(matcher->logger, MODULE_EXPR_MATCHER, + "[%s:%d] expr_matcher engine_stream_open failed.", + __FUNCTION__, __LINE__); + return NULL; + } + + struct expr_matcher_stream *stream = ALLOC(struct expr_matcher_stream, 1); + stream->engine_type = matcher->engine_type; + stream->handle = s_handle; + + return stream; +} + +int expr_matcher_stream_match(struct expr_matcher_stream *stream, const char *scan_data, + size_t data_len, struct expr_scan_result *result_array, + size_t n_result_array, size_t *n_hit_results) +{ + if (NULL == stream || NULL == scan_data || 0 == data_len || NULL == result_array + || 0 == n_result_array || NULL == n_hit_results) { + return -1; + } + + return expr_engine_ops[stream->engine_type].engine_scan_stream(stream->handle, scan_data, + data_len, result_array, + n_result_array, n_hit_results); +} + +void expr_matcher_stream_close(struct expr_matcher_stream *stream) +{ + if (NULL == stream) { + return; + } + + if (stream->handle != NULL) { + expr_engine_ops[stream->engine_type].engine_stream_close(stream->handle); + stream->handle = NULL; + } + + FREE(stream); +}
\ No newline at end of file diff --git a/scanner/expr_matcher/expr_matcher.h b/scanner/expr_matcher/expr_matcher.h new file mode 100644 index 0000000..fb61854 --- /dev/null +++ b/scanner/expr_matcher/expr_matcher.h @@ -0,0 +1,134 @@ +/* +********************************************************************************************** +* File: expr_matcher.h +* Description: +* Authors: Liu wentan <[email protected]> +* Date: 2023-06-30 +* Copyright: (c) Since 2023 Geedge Networks, Ltd. All rights reserved. +*********************************************************************************************** +*/ + +#ifndef _EXPR_MATCHER_H_ +#define _EXPR_MATCHER_H_ + +#ifdef __cplusplus +extern "C" +{ +#endif + +#include <stddef.h> + +#include "log/log.h" + +#define MAX_EXPR_PATTERN_NUM 8 /* 每条与表达式最多由MAX_EXPR_ITEM_NUM个规则组成 */ +#define MAX_HIT_EXPR_NUM 1024 + +enum expr_engine_type { + EXPR_ENGINE_TYPE_HS = 0, /* default engine */ + EXPR_ENGINE_TYPE_RS, + EXPR_ENGINE_TYPE_MAX +}; + +enum expr_pattern_type { + EXPR_PATTERN_TYPE_STR = 0, /* pure literal string */ + EXPR_PATTERN_TYPE_REG = 1, /* regex expression */ +}; + +enum expr_case_sensitive { + EXPR_CASE_INSENSITIVE = 0, + EXPR_CASE_SENSITIVE +}; + +enum expr_match_mode { + EXPR_MATCH_MODE_INVALID = -1, + EXPR_MATCH_MODE_EXACTLY = 1, /* scan data must match pattern exactly */ + EXPR_MATCH_MODE_PREFIX, /* pattern must in the head of scan_data */ + EXPR_MATCH_MODE_SUFFIX, /* pattern must in the tail of scan_data */ + EXPR_MATCH_MODE_SUB /* pattern must in the range[l_offset, r_offset] of scan_data */ +}; + +struct expr_pattern { + enum expr_pattern_type type; + enum expr_match_mode match_mode; + enum expr_case_sensitive case_sensitive; + + /* + * just match in scan_data's range of [start_offset, end_offset], -1 means no limits + * for example: + * [-1, end_offset] means the pattern must in scan_data's [0 ~ start_offset] + * [start_offset, -1] means the pattern must in scan_data's [start_offset ~ data_end] + */ + int start_offset; + int end_offset; + + char *pat; + size_t pat_len; +}; + +struct expr_scan_result { + long long rule_id; + void *user_tag; +}; + +/* logic AND expression, such as (rule1 & rule2) */ +struct expr_rule { + long long expr_id; /* AND expression ID */ + size_t n_patterns; + struct expr_pattern patterns[MAX_EXPR_PATTERN_NUM]; + void *tag; /* user defined data, return with hit result */ +}; + +int expr_matcher_verify_regex_expression(const char *regex_expr, + struct log_handle *logger); + +/** + * @brief new expr matcher instance + * + * @param expr_array: logic AND expression's array + * @param n_expr_array: the number of logic AND expression's array + * @param n_worker_threads: the number of scan threads which will call adapter_rs_scan() + * +*/ +struct expr_matcher * +expr_matcher_new(struct expr_rule *rules, size_t n_rule, enum expr_engine_type type, + size_t n_worker_thread, struct log_handle *logger); + +void expr_matcher_free(struct expr_matcher *matcher); + +/** + * @brief scan input data to match logic AND expression, return all matched expr_id + * + * @param matcher: expr_matcher instance obtained by expr_matcher_new() + * @param thread_id: the thread_id of caller + * @param scan_data: data to be scanned + * @param data_len: the length of data to be scanned + * @param result_array: the array to store hit expr_id which allocated by caller + * @param n_result_array: number of elements in array of expr_id +*/ +int expr_matcher_match(struct expr_matcher *matcher, int thread_id, const char *scan_data, + size_t data_len, struct expr_scan_result *result_array, + size_t n_result_array, size_t *n_hit_results); + +/** + * @brief + */ +struct expr_matcher_stream * +expr_matcher_stream_open(struct expr_matcher *matcher, int thread_id); + +/** + * @brief + */ +int expr_matcher_stream_match(struct expr_matcher_stream *stream, const char *scan_data, + size_t data_len, struct expr_scan_result *result_array, + size_t n_result_array, size_t *n_hit_results); + +/** + * @brief + */ +void expr_matcher_stream_close(struct expr_matcher_stream *stream); + +#ifdef __cplusplus +} +#endif + +#endif
\ No newline at end of file |
