diff options
| author | liuwentan <[email protected]> | 2023-12-21 10:24:49 +0800 |
|---|---|---|
| committer | liuwentan <[email protected]> | 2023-12-21 10:24:49 +0800 |
| commit | 759f625cb178ada2751a9980062c4c9045a83675 (patch) | |
| tree | 4e33b3e87b7f316dd88f85860047363a16789d74 | |
| parent | 48af7e7aac84f673bf39a5679503bc891407a182 (diff) | |
| -rw-r--r-- | deps/bloom/bloom.c | 334 | ||||
| -rw-r--r-- | deps/bloom/bloom.h | 241 | ||||
| -rw-r--r-- | deps/bloom/murmurhash2.c | 64 | ||||
| -rw-r--r-- | deps/bloom/murmurhash2.h | 6 | ||||
| -rw-r--r-- | scanner/CMakeLists.txt | 14 | ||||
| -rw-r--r-- | scanner/expr_matcher/adapter_hs/adapter_hs.cpp | 41 | ||||
| -rw-r--r-- | scanner/expr_matcher/adapter_rs/adapter_rs.cpp | 49 | ||||
| -rw-r--r-- | src/maat_utils.c | 2 |
8 files changed, 742 insertions, 9 deletions
diff --git a/deps/bloom/bloom.c b/deps/bloom/bloom.c new file mode 100644 index 0000000..f460139 --- /dev/null +++ b/deps/bloom/bloom.c @@ -0,0 +1,334 @@ +/* + * Copyright (c) 2012-2022, Jyri J. Virkki + * All rights reserved. + * + * This file is under BSD license. See LICENSE file. + */ + +/* + * Refer to bloom.h for documentation on the public interfaces. + */ + +#include <assert.h> +#include <fcntl.h> +#include <math.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <unistd.h> + +#include "bloom.h" +#include "murmurhash2.h" + +#define MAKESTRING(n) STRING(n) +#define STRING(n) #n +#define BLOOM_MAGIC "libbloom2" + +#define BLOOM_VERSION_MAJOR 2 +#define BLOOM_VERSION_MINOR 0 + +inline static int test_bit_set_bit(unsigned char * buf, + unsigned long int bit, int set_bit) +{ + unsigned long int byte = bit >> 3; + unsigned char c = buf[byte]; // expensive memory access + unsigned char mask = 1 << (bit % 8ul); + + if (c & mask) { + return 1; + } else { + if (set_bit) { + buf[byte] = c | mask; + } + return 0; + } +} + + +static int bloom_check_add(struct bloom * bloom, + const void * buffer, int len, int add) +{ + if (bloom->ready == 0) { + printf("bloom at %p not initialized!\n", (void *)bloom); + return -1; + } + + unsigned char hits = 0; + unsigned int a = murmurhash2(buffer, len, 0x9747b28c); + unsigned int b = murmurhash2(buffer, len, a); + unsigned long int x; + unsigned long int i; + + for (i = 0; i < bloom->hashes; i++) { + x = (a + b*i) % bloom->bits; + if (test_bit_set_bit(bloom->bf, x, add)) { + hits++; + } else if (!add) { + // Don't care about the presence of all the bits. Just our own. + return 0; + } + } + + if (hits == bloom->hashes) { + return 1; // 1 == element already in (or collision) + } + + return 0; +} + + +// DEPRECATED - Please migrate to bloom_init2. +int bloom_init(struct bloom * bloom, int entries, double error) +{ + return bloom_init2(bloom, (unsigned int)entries, error); +} + + +int bloom_init2(struct bloom * bloom, unsigned int entries, double error) +{ + if (sizeof(unsigned long int) < 8) { + printf("error: libbloom will not function correctly because\n"); + printf("sizeof(unsigned long int) == %ld\n", sizeof(unsigned long int)); + exit(1); + } + + memset(bloom, 0, sizeof(struct bloom)); + + if (entries < 1000 || error <= 0 || error >= 1) { + return 1; + } + + bloom->entries = entries; + bloom->error = error; + + double num = -log(bloom->error); + double denom = 0.480453013918201; // ln(2)^2 + bloom->bpe = (num / denom); + + long double dentries = (long double)entries; + long double allbits = dentries * bloom->bpe; + bloom->bits = (unsigned long int)allbits; + + if (bloom->bits % 8) { + bloom->bytes = (bloom->bits / 8) + 1; + } else { + bloom->bytes = bloom->bits / 8; + } + + bloom->hashes = (unsigned char)ceil(0.693147180559945 * bloom->bpe); // ln(2) + + bloom->bf = (unsigned char *)calloc(bloom->bytes, sizeof(unsigned char)); + if (bloom->bf == NULL) { // LCOV_EXCL_START + return 1; + } // LCOV_EXCL_STOP + + bloom->ready = 1; + + bloom->major = BLOOM_VERSION_MAJOR; + bloom->minor = BLOOM_VERSION_MINOR; + + return 0; +} + + +int bloom_check(struct bloom * bloom, const void * buffer, int len) +{ + return bloom_check_add(bloom, buffer, len, 0); +} + + +int bloom_add(struct bloom * bloom, const void * buffer, int len) +{ + return bloom_check_add(bloom, buffer, len, 1); +} + + +void bloom_print(struct bloom * bloom) +{ + printf("bloom at %p\n", (void *)bloom); + if (!bloom->ready) { printf(" *** NOT READY ***\n"); } + printf(" ->version = %d.%d\n", bloom->major, bloom->minor); + printf(" ->entries = %u\n", bloom->entries); + printf(" ->error = %f\n", bloom->error); + printf(" ->bits = %lu\n", bloom->bits); + printf(" ->bits per elem = %f\n", bloom->bpe); + printf(" ->bytes = %lu", bloom->bytes); + unsigned int KB = bloom->bytes / 1024; + unsigned int MB = KB / 1024; + printf(" (%u KB, %u MB)\n", KB, MB); + printf(" ->hash functions = %d\n", bloom->hashes); +} + + +void bloom_free(struct bloom * bloom) +{ + if (bloom->ready) { + free(bloom->bf); + } + bloom->ready = 0; +} + + +int bloom_reset(struct bloom * bloom) +{ + if (!bloom->ready) return 1; + memset(bloom->bf, 0, bloom->bytes); + return 0; +} + + +int bloom_save(struct bloom * bloom, char * filename) +{ + if (filename == NULL || filename[0] == 0) { + return 1; + } + + int fd = open(filename, O_WRONLY | O_CREAT, 0644); + if (fd < 0) { + return 1; + } + + ssize_t out = write(fd, BLOOM_MAGIC, strlen(BLOOM_MAGIC)); + if (out != strlen(BLOOM_MAGIC)) { goto save_error; } // LCOV_EXCL_LINE + + uint16_t size = sizeof(struct bloom); + out = write(fd, &size, sizeof(uint16_t)); + if (out != sizeof(uint16_t)) { goto save_error; } // LCOV_EXCL_LINE + + out = write(fd, bloom, sizeof(struct bloom)); + if (out != sizeof(struct bloom)) { goto save_error; } // LCOV_EXCL_LINE + + out = write(fd, bloom->bf, bloom->bytes); + if (out != bloom->bytes) { goto save_error; } // LCOV_EXCL_LINE + + close(fd); + return 0; + // LCOV_EXCL_START + save_error: + close(fd); + return 1; + // LCOV_EXCL_STOP +} + + +int bloom_load(struct bloom * bloom, char * filename) +{ + int rv = 0; + + if (filename == NULL || filename[0] == 0) { return 1; } + if (bloom == NULL) { return 2; } + + memset(bloom, 0, sizeof(struct bloom)); + + int fd = open(filename, O_RDONLY); + if (fd < 0) { return 3; } + + char line[30]; + memset(line, 0, 30); + ssize_t in = read(fd, line, strlen(BLOOM_MAGIC)); + + if (in != strlen(BLOOM_MAGIC)) { + rv = 4; + goto load_error; + } + + if (strncmp(line, BLOOM_MAGIC, strlen(BLOOM_MAGIC))) { + rv = 5; + goto load_error; + } + + uint16_t size; + in = read(fd, &size, sizeof(uint16_t)); + if (in != sizeof(uint16_t)) { + rv = 6; + goto load_error; + } + + if (size != sizeof(struct bloom)) { + rv = 7; + goto load_error; + } + + in = read(fd, bloom, sizeof(struct bloom)); + if (in != sizeof(struct bloom)) { + rv = 8; + goto load_error; + } + + bloom->bf = NULL; + if (bloom->major != BLOOM_VERSION_MAJOR) { + rv = 9; + goto load_error; + } + + bloom->bf = (unsigned char *)malloc(bloom->bytes); + if (bloom->bf == NULL) { rv = 10; goto load_error; } // LCOV_EXCL_LINE + + in = read(fd, bloom->bf, bloom->bytes); + if (in != bloom->bytes) { + rv = 11; + free(bloom->bf); + bloom->bf = NULL; + goto load_error; + } + + close(fd); + return rv; + + load_error: + close(fd); + bloom->ready = 0; + return rv; +} + + +int bloom_merge(struct bloom * bloom_dest, struct bloom * bloom_src) +{ + if (bloom_dest->ready == 0) { + printf("bloom at %p not initialized!\n", (void *)bloom_dest); + return -1; + } + + if (bloom_src->ready == 0) { + printf("bloom at %p not initialized!\n", (void *)bloom_src); + return -1; + } + + if (bloom_dest->entries != bloom_src->entries) { + return 1; + } + + if (bloom_dest->error != bloom_src->error) { + return 1; + } + + if (bloom_dest->major != bloom_src->major) { + return 1; + } + + if (bloom_dest->minor != bloom_src->minor) { + return 1; + } + + // Not really possible if properly used but check anyway to avoid the + // possibility of buffer overruns. + if (bloom_dest->bytes != bloom_src->bytes) { + return 1; // LCOV_EXCL_LINE + } + + unsigned long int p; + for (p = 0; p < bloom_dest->bytes; p++) { + bloom_dest->bf[p] |= bloom_src->bf[p]; + } + + return 0; +} + + +const char * bloom_version() +{ + return MAKESTRING(BLOOM_VERSION); +} diff --git a/deps/bloom/bloom.h b/deps/bloom/bloom.h new file mode 100644 index 0000000..3d7b86b --- /dev/null +++ b/deps/bloom/bloom.h @@ -0,0 +1,241 @@ +/* + * Copyright (c) 2012-2022, Jyri J. Virkki + * All rights reserved. + * + * This file is under BSD license. See LICENSE file. + */ + +#ifndef _BLOOM_H +#define _BLOOM_H + +#ifdef __cplusplus +extern "C" { +#endif + + +#define NULL_BLOOM_FILTER { 0, 0, 0, 0, 0.0, 0, 0, 0, 0.0, NULL } + +#define ENTRIES_T unsigned int +#define BYTES_T unsigned long int +#define BITS_T unsigned long int + + +/** *************************************************************************** + * Structure to keep track of one bloom filter. Caller needs to + * allocate this and pass it to the functions below. First call for + * every struct must be to bloom_init(). + * + */ +struct bloom +{ + // These fields are part of the public interface of this structure. + // Client code may read these values if desired. Client code MUST NOT + // modify any of these. + unsigned int entries; + unsigned long int bits; + unsigned long int bytes; + unsigned char hashes; + double error; + + // Fields below are private to the implementation. These may go away or + // change incompatibly at any moment. Client code MUST NOT access or rely + // on these. + unsigned char ready; + unsigned char major; + unsigned char minor; + double bpe; + unsigned char * bf; +}; + + +/** *************************************************************************** + * Initialize the bloom filter for use. + * + * The filter is initialized with a bit field and number of hash functions + * according to the computations from the wikipedia entry: + * http://en.wikipedia.org/wiki/Bloom_filter + * + * Optimal number of bits is: + * bits = (entries * ln(error)) / ln(2)^2 + * + * Optimal number of hash functions is: + * hashes = bpe * ln(2) + * + * Parameters: + * ----------- + * bloom - Pointer to an allocated struct bloom (see above). + * entries - The expected number of entries which will be inserted. + * Must be at least 1000 (in practice, likely much larger). + * error - Probability of collision (as long as entries are not + * exceeded). + * + * Return: + * ------- + * 0 - on success + * 1 - on failure + * + */ +int bloom_init2(struct bloom * bloom, unsigned int entries, double error); + + +/** + * DEPRECATED. + * Kept for compatibility with libbloom v.1. To be removed in v3.0. + * + */ +int bloom_init(struct bloom * bloom, int entries, double error); + + +/** *************************************************************************** + * Check if the given element is in the bloom filter. Remember this may + * return false positive if a collision occurred. + * + * Parameters: + * ----------- + * bloom - Pointer to an allocated struct bloom (see above). + * buffer - Pointer to buffer containing element to check. + * len - Size of 'buffer'. + * + * Return: + * ------- + * 0 - element is not present + * 1 - element is present (or false positive due to collision) + * -1 - bloom not initialized + * + */ +int bloom_check(struct bloom * bloom, const void * buffer, int len); + + +/** *************************************************************************** + * Add the given element to the bloom filter. + * The return code indicates if the element (or a collision) was already in, + * so for the common check+add use case, no need to call check separately. + * + * Parameters: + * ----------- + * bloom - Pointer to an allocated struct bloom (see above). + * buffer - Pointer to buffer containing element to add. + * len - Size of 'buffer'. + * + * Return: + * ------- + * 0 - element was not present and was added + * 1 - element (or a collision) had already been added previously + * -1 - bloom not initialized + * + */ +int bloom_add(struct bloom * bloom, const void * buffer, int len); + + +/** *************************************************************************** + * Print (to stdout) info about this bloom filter. Debugging aid. + * + */ +void bloom_print(struct bloom * bloom); + + +/** *************************************************************************** + * Deallocate internal storage. + * + * Upon return, the bloom struct is no longer usable. You may call bloom_init + * again on the same struct to reinitialize it again. + * + * Parameters: + * ----------- + * bloom - Pointer to an allocated struct bloom (see above). + * + * Return: none + * + */ +void bloom_free(struct bloom * bloom); + + +/** *************************************************************************** + * Erase internal storage. + * + * Erases all elements. Upon return, the bloom struct returns to its initial + * (initialized) state. + * + * Parameters: + * ----------- + * bloom - Pointer to an allocated struct bloom (see above). + * + * Return: + * 0 - on success + * 1 - on failure + * + */ +int bloom_reset(struct bloom * bloom); + + +/** *************************************************************************** + * Save a bloom filter to a file. + * + * Parameters: + * ----------- + * bloom - Pointer to an allocated struct bloom (see above). + * filename - Create (or overwrite) bloom data to this file. + * + * Return: + * 0 - on success + * 1 - on failure + * + */ +int bloom_save(struct bloom * bloom, char * filename); + + +/** *************************************************************************** + * Load a bloom filter from a file. + * + * This functions loads a file previously saved with bloom_save(). + * + * Parameters: + * ----------- + * bloom - Pointer to an allocated struct bloom (see above). + * filename - Load bloom filter data from this file. + * + * Return: + * 0 - on success + * > 0 - on failure + * + */ +int bloom_load(struct bloom * bloom, char * filename); + + +/** *************************************************************************** + * Merge two compatible bloom filters. + * + * On success, bloom_dest will contain all elements of bloom_src in addition + * to its own. The bloom_src bloom filter is never modified. + * + * Both bloom_dest and bloom_src must be initialized and both must have + * identical parameters. + * + * Parameters: + * ----------- + * bloom_dest - will contain the merged elements from bloom_src + * bloom_src - its elements will be merged into bloom_dest + * + * Return: + * ------- + * 0 - on success + * 1 - incompatible bloom filters + * -1 - bloom not initialized + * + */ +int bloom_merge(struct bloom * bloom_dest, struct bloom * bloom_src); + + +/** *************************************************************************** + * Returns version string compiled into library. + * + * Return: version string + * + */ +const char * bloom_version(); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/deps/bloom/murmurhash2.c b/deps/bloom/murmurhash2.c new file mode 100644 index 0000000..21b306c --- /dev/null +++ b/deps/bloom/murmurhash2.c @@ -0,0 +1,64 @@ +//----------------------------------------------------------------------------- +// MurmurHash2, by Austin Appleby + +// Note - This code makes a few assumptions about how your machine behaves - + +// 1. We can read a 4-byte value from any address without crashing +// 2. sizeof(int) == 4 + +// And it has a few limitations - + +// 1. It will not work incrementally. +// 2. It will not produce the same results on little-endian and big-endian +// machines. + +unsigned int murmurhash2(const void * key, int len, const unsigned int seed) +{ + // 'm' and 'r' are mixing constants generated offline. + // They're not really 'magic', they just happen to work well. + + const unsigned int m = 0x5bd1e995; + const int r = 24; + + // Initialize the hash to a 'random' value + + unsigned int h = seed ^ len; + + // Mix 4 bytes at a time into the hash + + const unsigned char * data = (const unsigned char *)key; + + while(len >= 4) + { + unsigned int k = *(unsigned int *)data; + + k *= m; + k ^= k >> r; + k *= m; + + h *= m; + h ^= k; + + data += 4; + len -= 4; + } + + // Handle the last few bytes of the input array + + switch(len) + { + case 3: h ^= data[2] << 16; + case 2: h ^= data[1] << 8; + case 1: h ^= data[0]; + h *= m; + }; + + // Do a few final mixes of the hash to ensure the last few + // bytes are well-incorporated. + + h ^= h >> 13; + h *= m; + h ^= h >> 15; + + return h; +}
\ No newline at end of file diff --git a/deps/bloom/murmurhash2.h b/deps/bloom/murmurhash2.h new file mode 100644 index 0000000..04c0881 --- /dev/null +++ b/deps/bloom/murmurhash2.h @@ -0,0 +1,6 @@ +#ifndef _BLOOM_MURMURHASH2 +#define _BLOOM_MURMURHASH2 + +unsigned int murmurhash2(const void * key, int len, const unsigned int seed); + +#endif diff --git a/scanner/CMakeLists.txt b/scanner/CMakeLists.txt index dd373ba..e8c1a7b 100644 --- a/scanner/CMakeLists.txt +++ b/scanner/CMakeLists.txt @@ -7,9 +7,13 @@ include_directories(${PROJECT_SOURCE_DIR}/src/inc_internal) add_subdirectory(ip_matcher/IntervalIndex) -add_library(adapter-static bool_matcher/bool_matcher.cpp expr_matcher/expr_matcher.cpp - expr_matcher/adapter_hs/adapter_hs.cpp expr_matcher/adapter_rs/adapter_rs.cpp - fqdn_engine/fqdn_engine.cpp ip_matcher/ip_matcher.cpp ip_matcher/ipv4_match.cpp - ip_matcher/ipv6_match.cpp flag_matcher/flag_matcher.cpp interval_matcher/cgranges.c - interval_matcher/interval_matcher.cpp ipport_matcher/ipport_matcher.cpp) +set(SCANNER_SRC bool_matcher/bool_matcher.cpp expr_matcher/expr_matcher.cpp + expr_matcher/adapter_hs/adapter_hs.cpp expr_matcher/adapter_rs/adapter_rs.cpp + fqdn_engine/fqdn_engine.cpp ip_matcher/ip_matcher.cpp ip_matcher/ipv4_match.cpp + ip_matcher/ipv6_match.cpp flag_matcher/flag_matcher.cpp interval_matcher/cgranges.c + interval_matcher/interval_matcher.cpp ipport_matcher/ipport_matcher.cpp) + +set(LIB_SOURCE_FILES ${PROJECT_SOURCE_DIR}/deps/bloom/bloom.c ${PROJECT_SOURCE_DIR}/deps/bloom/murmurhash2.c) + +add_library(adapter-static ${SCANNER_SRC} ${LIB_SOURCE_FILES}) target_link_libraries(adapter-static hyperscan_static hyperscan_runtime_static rulescan_static interval_index_static)
\ No newline at end of file diff --git a/scanner/expr_matcher/adapter_hs/adapter_hs.cpp b/scanner/expr_matcher/adapter_hs/adapter_hs.cpp index 001d160..e4e723e 100644 --- a/scanner/expr_matcher/adapter_hs/adapter_hs.cpp +++ b/scanner/expr_matcher/adapter_hs/adapter_hs.cpp @@ -19,6 +19,7 @@ #include "adapter_hs.h" #include "uthash/utarray.h" #include "uthash/uthash.h" +#include "bloom/bloom.h" #include "maat_utils.h" #include "../../bool_matcher/bool_matcher.h" @@ -68,6 +69,7 @@ struct adapter_hs_runtime { hs_database_t *literal_db; hs_database_t *regex_db; + struct bloom **blooms; struct adapter_hs_scratch *scratch; struct adapter_hs_stream **streams; struct bool_matcher *bm; @@ -97,6 +99,7 @@ struct pattern_attribute { struct matched_pattern { UT_array *pattern_ids; size_t n_patterns; + struct bloom *ref_bloom; struct pattern_attribute *ref_hs_attr; size_t scan_data_len; }; @@ -438,6 +441,13 @@ void *adapter_hs_new(struct expr_rule *rules, size_t n_rule, goto error; } + /* alloc bloom filter */ + hs_inst->hs_rt->blooms = ALLOC(struct bloom *, n_worker_thread); + for (i = 0; i < n_worker_thread; i++) { + hs_inst->hs_rt->blooms[i] = ALLOC(struct bloom, 1); + bloom_init2(hs_inst->hs_rt->blooms[i], 1024, 0.001); + } + /* alloc scratch */ hs_inst->hs_rt->scratch = ALLOC(struct adapter_hs_scratch, 1); hs_inst->hs_rt->scratch->bool_match_buffs = ALLOC(struct bool_expr_match *, @@ -495,6 +505,16 @@ void adapter_hs_free(void *hs_instance) hs_inst->hs_rt->regex_db = NULL; } + if (hs_inst->hs_rt->blooms != NULL) { + for (i = 0; i < hs_inst->n_worker_thread; i++) { + if (hs_inst->hs_rt->blooms[i] != NULL) { + bloom_free(hs_inst->hs_rt->blooms[i]); + FREE(hs_inst->hs_rt->blooms[i]); + } + } + FREE(hs_inst->hs_rt->blooms); + } + if (hs_inst->hs_rt->scratch != NULL) { if (hs_inst->hs_rt->scratch->literal_scratches != NULL) { for (i = 0; i < hs_inst->n_worker_thread; i++) { @@ -581,6 +601,23 @@ static int matched_event_cb(unsigned int id, unsigned long long from, return 0; } + unsigned long long *tmp_pat_id = NULL; + if (utarray_len(matched_pat->pattern_ids) < (MAX_HIT_PATTERN_NUM / 64)) { + for (size_t i = 0; i < utarray_len(matched_pat->pattern_ids); i++) { + tmp_pat_id = (unsigned long long *)utarray_eltptr(matched_pat->pattern_ids, i); + if (*tmp_pat_id == pattern_id) { + return 0; + } + } + } else { + if (bloom_check(matched_pat->ref_bloom, (char *)&pattern_id, + sizeof(unsigned long long)) == 1) { + return 0; + } + bloom_add(matched_pat->ref_bloom, (char *)&pattern_id, + sizeof(unsigned long long)); + } + if (utarray_len(matched_pat->pattern_ids) >= MAX_HIT_PATTERN_NUM) { return 0; } @@ -655,6 +692,7 @@ void *adapter_hs_stream_open(void *hs_instance, int thread_id) hs_stream->thread_id = thread_id; hs_stream->ref_hs_rt = hs_inst->hs_rt; hs_stream->matched_pat = ALLOC(struct matched_pattern, 1); + hs_stream->matched_pat->ref_bloom = hs_inst->hs_rt->blooms[thread_id]; hs_stream->matched_pat->ref_hs_attr = hs_inst->hs_attr; hs_stream->matched_pat->n_patterns = hs_inst->n_patterns; utarray_new(hs_stream->matched_pat->pattern_ids, &ut_hs_pattern_id_icd); @@ -723,6 +761,7 @@ void adapter_hs_stream_close(void *hs_stream) /* stream->hs_rt point to hs_instance->hs_rt which will call free same as hs_attr */ stream->ref_hs_rt = NULL; + stream->matched_pat->ref_bloom = NULL; stream->matched_pat->ref_hs_attr = NULL; if (stream->matched_pat->pattern_ids != NULL) { @@ -862,6 +901,8 @@ int adapter_hs_scan_stream(void *hs_stream, const char *data, size_t data_len, } } + bloom_reset(stream->matched_pat->ref_bloom); + if (err_count == 2) { return -1; } diff --git a/scanner/expr_matcher/adapter_rs/adapter_rs.cpp b/scanner/expr_matcher/adapter_rs/adapter_rs.cpp index 04e05b2..110ab60 100644 --- a/scanner/expr_matcher/adapter_rs/adapter_rs.cpp +++ b/scanner/expr_matcher/adapter_rs/adapter_rs.cpp @@ -19,6 +19,7 @@ #include "adapter_rs.h" #include "uthash/utarray.h" #include "uthash/uthash.h" +#include "bloom/bloom.h" #include "maat_utils.h" #include "../../bool_matcher/bool_matcher.h" @@ -59,6 +60,7 @@ struct adapter_rs_runtime { rs_database_t *literal_db; rs_database_t *regex_db; + struct bloom **blooms; struct bool_expr_match **bool_match_buffs; /* per thread */ struct adapter_rs_stream **streams; /* per thread */ struct matched_pattern **matched_pats; /* per thread */ @@ -90,6 +92,7 @@ struct pattern_attribute { struct matched_pattern { UT_array *pattern_ids; size_t n_patterns; + struct bloom *ref_bloom; struct pattern_attribute *ref_rs_attr; }; @@ -310,6 +313,13 @@ void *adapter_rs_new(struct expr_rule *rules, size_t n_rule, goto error; } + /* alloc bloom filter */ + rs_inst->rs_rt->blooms = ALLOC(struct bloom *, n_worker_thread); + for (i = 0; i < n_worker_thread; i++) { + rs_inst->rs_rt->blooms[i] = ALLOC(struct bloom, 1); + bloom_init2(rs_inst->rs_rt->blooms[i], 1024, 0.001); + } + /* alloc scratch */ rs_inst->rs_rt->bool_match_buffs = ALLOC(struct bool_expr_match *, n_worker_thread); for (i = 0; i < n_worker_thread; i++) { @@ -326,8 +336,11 @@ void *adapter_rs_new(struct expr_rule *rules, size_t n_rule, rs_inst->rs_rt->matched_pats[i] = ALLOC(struct matched_pattern, 1); rs_inst->rs_rt->matched_pats[i]->ref_rs_attr = rs_inst->rs_attr; rs_inst->rs_rt->matched_pats[i]->n_patterns = rs_inst->n_patterns; - utarray_new(rs_inst->rs_rt->matched_pats[i]->pattern_ids, &ut_rs_pattern_id_icd); - utarray_reserve(rs_inst->rs_rt->matched_pats[i]->pattern_ids, MAX_HIT_PATTERN_NUM); + rs_inst->rs_rt->matched_pats[i]->ref_bloom = rs_inst->rs_rt->blooms[i]; + utarray_new(rs_inst->rs_rt->matched_pats[i]->pattern_ids, + &ut_rs_pattern_id_icd); + utarray_reserve(rs_inst->rs_rt->matched_pats[i]->pattern_ids, + MAX_HIT_PATTERN_NUM); } return rs_inst; @@ -356,6 +369,16 @@ void adapter_rs_free(void *rs_instance) rs_inst->rs_rt->regex_db = NULL; } + if (rs_inst->rs_rt->blooms != NULL) { + for (i = 0; i < rs_inst->n_worker_thread; i++) { + if (rs_inst->rs_rt->blooms[i] != NULL) { + bloom_free(rs_inst->rs_rt->blooms[i]); + FREE(rs_inst->rs_rt->blooms[i]); + } + } + FREE(rs_inst->rs_rt->blooms); + } + if (rs_inst->rs_rt->bool_match_buffs != NULL) { for (i = 0; i < rs_inst->n_worker_thread; i++) { if (rs_inst->rs_rt->bool_match_buffs[i] != NULL) { @@ -427,6 +450,23 @@ static int matched_event_cb(unsigned int id, int pos_offset, int from, int to, return 0; } + unsigned long long *tmp_pat_id = NULL; + if (utarray_len(matched_pat->pattern_ids) < (MAX_HIT_PATTERN_NUM / 64)) { + for (size_t i = 0; i < utarray_len(matched_pat->pattern_ids); i++) { + tmp_pat_id = (unsigned long long *)utarray_eltptr(matched_pat->pattern_ids, i); + if (*tmp_pat_id == pattern_id) { + return 0; + } + } + } else { + if (bloom_check(matched_pat->ref_bloom, (char *)&pattern_id, + sizeof(unsigned long long)) == 1) { + return 0; + } + bloom_add(matched_pat->ref_bloom, (char *)&pattern_id, + sizeof(unsigned long long)); + } + if (utarray_len(matched_pat->pattern_ids) >= MAX_HIT_PATTERN_NUM) { return 0; } @@ -640,7 +680,8 @@ int adapter_rs_scan_stream(void *rs_stream, const char *data, size_t data_len, err_count++; } } - + bloom_reset(matched_pat->ref_bloom); + if (err_count == 2) { return -1; } @@ -680,6 +721,8 @@ int adapter_rs_scan(void *rs_instance, int thread_id, const char *data, size_t d } } + bloom_reset(matched_pat->ref_bloom); + if (err_count == 2) { return -1; } diff --git a/src/maat_utils.c b/src/maat_utils.c index 9f91322..9b251c5 100644 --- a/src/maat_utils.c +++ b/src/maat_utils.c @@ -260,7 +260,7 @@ int system_cmd_gzip(const char *src_file, const char *dst_file) int system_cmd_encrypt(const char *src_file, const char *dst_file, const char *password) { char cmd[MAX_SYSTEM_CMD_LEN] = { 0 }; - snprintf(cmd, sizeof(cmd), "openssl enc -e -aes-256-cbc -k %s -p -nosalt -in %s -out %s -md md5", + snprintf(cmd, sizeof(cmd), "openssl enc -e -aes-256-cbc -k %s -nosalt -in %s -out %s -md md5", password, src_file, dst_file); return system(cmd); } |
