summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorliuwentan <[email protected]>2023-12-21 10:24:49 +0800
committerliuwentan <[email protected]>2023-12-21 10:24:49 +0800
commit759f625cb178ada2751a9980062c4c9045a83675 (patch)
tree4e33b3e87b7f316dd88f85860047363a16789d74
parent48af7e7aac84f673bf39a5679503bc891407a182 (diff)
[PATCH]add bloom filter to optimize performance => OMPUB-1081v4.0.49dev-23.07
-rw-r--r--deps/bloom/bloom.c334
-rw-r--r--deps/bloom/bloom.h241
-rw-r--r--deps/bloom/murmurhash2.c64
-rw-r--r--deps/bloom/murmurhash2.h6
-rw-r--r--scanner/CMakeLists.txt14
-rw-r--r--scanner/expr_matcher/adapter_hs/adapter_hs.cpp41
-rw-r--r--scanner/expr_matcher/adapter_rs/adapter_rs.cpp49
-rw-r--r--src/maat_utils.c2
8 files changed, 742 insertions, 9 deletions
diff --git a/deps/bloom/bloom.c b/deps/bloom/bloom.c
new file mode 100644
index 0000000..f460139
--- /dev/null
+++ b/deps/bloom/bloom.c
@@ -0,0 +1,334 @@
+/*
+ * Copyright (c) 2012-2022, Jyri J. Virkki
+ * All rights reserved.
+ *
+ * This file is under BSD license. See LICENSE file.
+ */
+
+/*
+ * Refer to bloom.h for documentation on the public interfaces.
+ */
+
+#include <assert.h>
+#include <fcntl.h>
+#include <math.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "bloom.h"
+#include "murmurhash2.h"
+
+#define MAKESTRING(n) STRING(n)
+#define STRING(n) #n
+#define BLOOM_MAGIC "libbloom2"
+
+#define BLOOM_VERSION_MAJOR 2
+#define BLOOM_VERSION_MINOR 0
+
+inline static int test_bit_set_bit(unsigned char * buf,
+ unsigned long int bit, int set_bit)
+{
+ unsigned long int byte = bit >> 3;
+ unsigned char c = buf[byte]; // expensive memory access
+ unsigned char mask = 1 << (bit % 8ul);
+
+ if (c & mask) {
+ return 1;
+ } else {
+ if (set_bit) {
+ buf[byte] = c | mask;
+ }
+ return 0;
+ }
+}
+
+
+static int bloom_check_add(struct bloom * bloom,
+ const void * buffer, int len, int add)
+{
+ if (bloom->ready == 0) {
+ printf("bloom at %p not initialized!\n", (void *)bloom);
+ return -1;
+ }
+
+ unsigned char hits = 0;
+ unsigned int a = murmurhash2(buffer, len, 0x9747b28c);
+ unsigned int b = murmurhash2(buffer, len, a);
+ unsigned long int x;
+ unsigned long int i;
+
+ for (i = 0; i < bloom->hashes; i++) {
+ x = (a + b*i) % bloom->bits;
+ if (test_bit_set_bit(bloom->bf, x, add)) {
+ hits++;
+ } else if (!add) {
+ // Don't care about the presence of all the bits. Just our own.
+ return 0;
+ }
+ }
+
+ if (hits == bloom->hashes) {
+ return 1; // 1 == element already in (or collision)
+ }
+
+ return 0;
+}
+
+
+// DEPRECATED - Please migrate to bloom_init2.
+int bloom_init(struct bloom * bloom, int entries, double error)
+{
+ return bloom_init2(bloom, (unsigned int)entries, error);
+}
+
+
+int bloom_init2(struct bloom * bloom, unsigned int entries, double error)
+{
+ if (sizeof(unsigned long int) < 8) {
+ printf("error: libbloom will not function correctly because\n");
+ printf("sizeof(unsigned long int) == %ld\n", sizeof(unsigned long int));
+ exit(1);
+ }
+
+ memset(bloom, 0, sizeof(struct bloom));
+
+ if (entries < 1000 || error <= 0 || error >= 1) {
+ return 1;
+ }
+
+ bloom->entries = entries;
+ bloom->error = error;
+
+ double num = -log(bloom->error);
+ double denom = 0.480453013918201; // ln(2)^2
+ bloom->bpe = (num / denom);
+
+ long double dentries = (long double)entries;
+ long double allbits = dentries * bloom->bpe;
+ bloom->bits = (unsigned long int)allbits;
+
+ if (bloom->bits % 8) {
+ bloom->bytes = (bloom->bits / 8) + 1;
+ } else {
+ bloom->bytes = bloom->bits / 8;
+ }
+
+ bloom->hashes = (unsigned char)ceil(0.693147180559945 * bloom->bpe); // ln(2)
+
+ bloom->bf = (unsigned char *)calloc(bloom->bytes, sizeof(unsigned char));
+ if (bloom->bf == NULL) { // LCOV_EXCL_START
+ return 1;
+ } // LCOV_EXCL_STOP
+
+ bloom->ready = 1;
+
+ bloom->major = BLOOM_VERSION_MAJOR;
+ bloom->minor = BLOOM_VERSION_MINOR;
+
+ return 0;
+}
+
+
+int bloom_check(struct bloom * bloom, const void * buffer, int len)
+{
+ return bloom_check_add(bloom, buffer, len, 0);
+}
+
+
+int bloom_add(struct bloom * bloom, const void * buffer, int len)
+{
+ return bloom_check_add(bloom, buffer, len, 1);
+}
+
+
+void bloom_print(struct bloom * bloom)
+{
+ printf("bloom at %p\n", (void *)bloom);
+ if (!bloom->ready) { printf(" *** NOT READY ***\n"); }
+ printf(" ->version = %d.%d\n", bloom->major, bloom->minor);
+ printf(" ->entries = %u\n", bloom->entries);
+ printf(" ->error = %f\n", bloom->error);
+ printf(" ->bits = %lu\n", bloom->bits);
+ printf(" ->bits per elem = %f\n", bloom->bpe);
+ printf(" ->bytes = %lu", bloom->bytes);
+ unsigned int KB = bloom->bytes / 1024;
+ unsigned int MB = KB / 1024;
+ printf(" (%u KB, %u MB)\n", KB, MB);
+ printf(" ->hash functions = %d\n", bloom->hashes);
+}
+
+
+void bloom_free(struct bloom * bloom)
+{
+ if (bloom->ready) {
+ free(bloom->bf);
+ }
+ bloom->ready = 0;
+}
+
+
+int bloom_reset(struct bloom * bloom)
+{
+ if (!bloom->ready) return 1;
+ memset(bloom->bf, 0, bloom->bytes);
+ return 0;
+}
+
+
+int bloom_save(struct bloom * bloom, char * filename)
+{
+ if (filename == NULL || filename[0] == 0) {
+ return 1;
+ }
+
+ int fd = open(filename, O_WRONLY | O_CREAT, 0644);
+ if (fd < 0) {
+ return 1;
+ }
+
+ ssize_t out = write(fd, BLOOM_MAGIC, strlen(BLOOM_MAGIC));
+ if (out != strlen(BLOOM_MAGIC)) { goto save_error; } // LCOV_EXCL_LINE
+
+ uint16_t size = sizeof(struct bloom);
+ out = write(fd, &size, sizeof(uint16_t));
+ if (out != sizeof(uint16_t)) { goto save_error; } // LCOV_EXCL_LINE
+
+ out = write(fd, bloom, sizeof(struct bloom));
+ if (out != sizeof(struct bloom)) { goto save_error; } // LCOV_EXCL_LINE
+
+ out = write(fd, bloom->bf, bloom->bytes);
+ if (out != bloom->bytes) { goto save_error; } // LCOV_EXCL_LINE
+
+ close(fd);
+ return 0;
+ // LCOV_EXCL_START
+ save_error:
+ close(fd);
+ return 1;
+ // LCOV_EXCL_STOP
+}
+
+
+int bloom_load(struct bloom * bloom, char * filename)
+{
+ int rv = 0;
+
+ if (filename == NULL || filename[0] == 0) { return 1; }
+ if (bloom == NULL) { return 2; }
+
+ memset(bloom, 0, sizeof(struct bloom));
+
+ int fd = open(filename, O_RDONLY);
+ if (fd < 0) { return 3; }
+
+ char line[30];
+ memset(line, 0, 30);
+ ssize_t in = read(fd, line, strlen(BLOOM_MAGIC));
+
+ if (in != strlen(BLOOM_MAGIC)) {
+ rv = 4;
+ goto load_error;
+ }
+
+ if (strncmp(line, BLOOM_MAGIC, strlen(BLOOM_MAGIC))) {
+ rv = 5;
+ goto load_error;
+ }
+
+ uint16_t size;
+ in = read(fd, &size, sizeof(uint16_t));
+ if (in != sizeof(uint16_t)) {
+ rv = 6;
+ goto load_error;
+ }
+
+ if (size != sizeof(struct bloom)) {
+ rv = 7;
+ goto load_error;
+ }
+
+ in = read(fd, bloom, sizeof(struct bloom));
+ if (in != sizeof(struct bloom)) {
+ rv = 8;
+ goto load_error;
+ }
+
+ bloom->bf = NULL;
+ if (bloom->major != BLOOM_VERSION_MAJOR) {
+ rv = 9;
+ goto load_error;
+ }
+
+ bloom->bf = (unsigned char *)malloc(bloom->bytes);
+ if (bloom->bf == NULL) { rv = 10; goto load_error; } // LCOV_EXCL_LINE
+
+ in = read(fd, bloom->bf, bloom->bytes);
+ if (in != bloom->bytes) {
+ rv = 11;
+ free(bloom->bf);
+ bloom->bf = NULL;
+ goto load_error;
+ }
+
+ close(fd);
+ return rv;
+
+ load_error:
+ close(fd);
+ bloom->ready = 0;
+ return rv;
+}
+
+
+int bloom_merge(struct bloom * bloom_dest, struct bloom * bloom_src)
+{
+ if (bloom_dest->ready == 0) {
+ printf("bloom at %p not initialized!\n", (void *)bloom_dest);
+ return -1;
+ }
+
+ if (bloom_src->ready == 0) {
+ printf("bloom at %p not initialized!\n", (void *)bloom_src);
+ return -1;
+ }
+
+ if (bloom_dest->entries != bloom_src->entries) {
+ return 1;
+ }
+
+ if (bloom_dest->error != bloom_src->error) {
+ return 1;
+ }
+
+ if (bloom_dest->major != bloom_src->major) {
+ return 1;
+ }
+
+ if (bloom_dest->minor != bloom_src->minor) {
+ return 1;
+ }
+
+ // Not really possible if properly used but check anyway to avoid the
+ // possibility of buffer overruns.
+ if (bloom_dest->bytes != bloom_src->bytes) {
+ return 1; // LCOV_EXCL_LINE
+ }
+
+ unsigned long int p;
+ for (p = 0; p < bloom_dest->bytes; p++) {
+ bloom_dest->bf[p] |= bloom_src->bf[p];
+ }
+
+ return 0;
+}
+
+
+const char * bloom_version()
+{
+ return MAKESTRING(BLOOM_VERSION);
+}
diff --git a/deps/bloom/bloom.h b/deps/bloom/bloom.h
new file mode 100644
index 0000000..3d7b86b
--- /dev/null
+++ b/deps/bloom/bloom.h
@@ -0,0 +1,241 @@
+/*
+ * Copyright (c) 2012-2022, Jyri J. Virkki
+ * All rights reserved.
+ *
+ * This file is under BSD license. See LICENSE file.
+ */
+
+#ifndef _BLOOM_H
+#define _BLOOM_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+#define NULL_BLOOM_FILTER { 0, 0, 0, 0, 0.0, 0, 0, 0, 0.0, NULL }
+
+#define ENTRIES_T unsigned int
+#define BYTES_T unsigned long int
+#define BITS_T unsigned long int
+
+
+/** ***************************************************************************
+ * Structure to keep track of one bloom filter. Caller needs to
+ * allocate this and pass it to the functions below. First call for
+ * every struct must be to bloom_init().
+ *
+ */
+struct bloom
+{
+ // These fields are part of the public interface of this structure.
+ // Client code may read these values if desired. Client code MUST NOT
+ // modify any of these.
+ unsigned int entries;
+ unsigned long int bits;
+ unsigned long int bytes;
+ unsigned char hashes;
+ double error;
+
+ // Fields below are private to the implementation. These may go away or
+ // change incompatibly at any moment. Client code MUST NOT access or rely
+ // on these.
+ unsigned char ready;
+ unsigned char major;
+ unsigned char minor;
+ double bpe;
+ unsigned char * bf;
+};
+
+
+/** ***************************************************************************
+ * Initialize the bloom filter for use.
+ *
+ * The filter is initialized with a bit field and number of hash functions
+ * according to the computations from the wikipedia entry:
+ * http://en.wikipedia.org/wiki/Bloom_filter
+ *
+ * Optimal number of bits is:
+ * bits = (entries * ln(error)) / ln(2)^2
+ *
+ * Optimal number of hash functions is:
+ * hashes = bpe * ln(2)
+ *
+ * Parameters:
+ * -----------
+ * bloom - Pointer to an allocated struct bloom (see above).
+ * entries - The expected number of entries which will be inserted.
+ * Must be at least 1000 (in practice, likely much larger).
+ * error - Probability of collision (as long as entries are not
+ * exceeded).
+ *
+ * Return:
+ * -------
+ * 0 - on success
+ * 1 - on failure
+ *
+ */
+int bloom_init2(struct bloom * bloom, unsigned int entries, double error);
+
+
+/**
+ * DEPRECATED.
+ * Kept for compatibility with libbloom v.1. To be removed in v3.0.
+ *
+ */
+int bloom_init(struct bloom * bloom, int entries, double error);
+
+
+/** ***************************************************************************
+ * Check if the given element is in the bloom filter. Remember this may
+ * return false positive if a collision occurred.
+ *
+ * Parameters:
+ * -----------
+ * bloom - Pointer to an allocated struct bloom (see above).
+ * buffer - Pointer to buffer containing element to check.
+ * len - Size of 'buffer'.
+ *
+ * Return:
+ * -------
+ * 0 - element is not present
+ * 1 - element is present (or false positive due to collision)
+ * -1 - bloom not initialized
+ *
+ */
+int bloom_check(struct bloom * bloom, const void * buffer, int len);
+
+
+/** ***************************************************************************
+ * Add the given element to the bloom filter.
+ * The return code indicates if the element (or a collision) was already in,
+ * so for the common check+add use case, no need to call check separately.
+ *
+ * Parameters:
+ * -----------
+ * bloom - Pointer to an allocated struct bloom (see above).
+ * buffer - Pointer to buffer containing element to add.
+ * len - Size of 'buffer'.
+ *
+ * Return:
+ * -------
+ * 0 - element was not present and was added
+ * 1 - element (or a collision) had already been added previously
+ * -1 - bloom not initialized
+ *
+ */
+int bloom_add(struct bloom * bloom, const void * buffer, int len);
+
+
+/** ***************************************************************************
+ * Print (to stdout) info about this bloom filter. Debugging aid.
+ *
+ */
+void bloom_print(struct bloom * bloom);
+
+
+/** ***************************************************************************
+ * Deallocate internal storage.
+ *
+ * Upon return, the bloom struct is no longer usable. You may call bloom_init
+ * again on the same struct to reinitialize it again.
+ *
+ * Parameters:
+ * -----------
+ * bloom - Pointer to an allocated struct bloom (see above).
+ *
+ * Return: none
+ *
+ */
+void bloom_free(struct bloom * bloom);
+
+
+/** ***************************************************************************
+ * Erase internal storage.
+ *
+ * Erases all elements. Upon return, the bloom struct returns to its initial
+ * (initialized) state.
+ *
+ * Parameters:
+ * -----------
+ * bloom - Pointer to an allocated struct bloom (see above).
+ *
+ * Return:
+ * 0 - on success
+ * 1 - on failure
+ *
+ */
+int bloom_reset(struct bloom * bloom);
+
+
+/** ***************************************************************************
+ * Save a bloom filter to a file.
+ *
+ * Parameters:
+ * -----------
+ * bloom - Pointer to an allocated struct bloom (see above).
+ * filename - Create (or overwrite) bloom data to this file.
+ *
+ * Return:
+ * 0 - on success
+ * 1 - on failure
+ *
+ */
+int bloom_save(struct bloom * bloom, char * filename);
+
+
+/** ***************************************************************************
+ * Load a bloom filter from a file.
+ *
+ * This functions loads a file previously saved with bloom_save().
+ *
+ * Parameters:
+ * -----------
+ * bloom - Pointer to an allocated struct bloom (see above).
+ * filename - Load bloom filter data from this file.
+ *
+ * Return:
+ * 0 - on success
+ * > 0 - on failure
+ *
+ */
+int bloom_load(struct bloom * bloom, char * filename);
+
+
+/** ***************************************************************************
+ * Merge two compatible bloom filters.
+ *
+ * On success, bloom_dest will contain all elements of bloom_src in addition
+ * to its own. The bloom_src bloom filter is never modified.
+ *
+ * Both bloom_dest and bloom_src must be initialized and both must have
+ * identical parameters.
+ *
+ * Parameters:
+ * -----------
+ * bloom_dest - will contain the merged elements from bloom_src
+ * bloom_src - its elements will be merged into bloom_dest
+ *
+ * Return:
+ * -------
+ * 0 - on success
+ * 1 - incompatible bloom filters
+ * -1 - bloom not initialized
+ *
+ */
+int bloom_merge(struct bloom * bloom_dest, struct bloom * bloom_src);
+
+
+/** ***************************************************************************
+ * Returns version string compiled into library.
+ *
+ * Return: version string
+ *
+ */
+const char * bloom_version();
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/deps/bloom/murmurhash2.c b/deps/bloom/murmurhash2.c
new file mode 100644
index 0000000..21b306c
--- /dev/null
+++ b/deps/bloom/murmurhash2.c
@@ -0,0 +1,64 @@
+//-----------------------------------------------------------------------------
+// MurmurHash2, by Austin Appleby
+
+// Note - This code makes a few assumptions about how your machine behaves -
+
+// 1. We can read a 4-byte value from any address without crashing
+// 2. sizeof(int) == 4
+
+// And it has a few limitations -
+
+// 1. It will not work incrementally.
+// 2. It will not produce the same results on little-endian and big-endian
+// machines.
+
+unsigned int murmurhash2(const void * key, int len, const unsigned int seed)
+{
+ // 'm' and 'r' are mixing constants generated offline.
+ // They're not really 'magic', they just happen to work well.
+
+ const unsigned int m = 0x5bd1e995;
+ const int r = 24;
+
+ // Initialize the hash to a 'random' value
+
+ unsigned int h = seed ^ len;
+
+ // Mix 4 bytes at a time into the hash
+
+ const unsigned char * data = (const unsigned char *)key;
+
+ while(len >= 4)
+ {
+ unsigned int k = *(unsigned int *)data;
+
+ k *= m;
+ k ^= k >> r;
+ k *= m;
+
+ h *= m;
+ h ^= k;
+
+ data += 4;
+ len -= 4;
+ }
+
+ // Handle the last few bytes of the input array
+
+ switch(len)
+ {
+ case 3: h ^= data[2] << 16;
+ case 2: h ^= data[1] << 8;
+ case 1: h ^= data[0];
+ h *= m;
+ };
+
+ // Do a few final mixes of the hash to ensure the last few
+ // bytes are well-incorporated.
+
+ h ^= h >> 13;
+ h *= m;
+ h ^= h >> 15;
+
+ return h;
+} \ No newline at end of file
diff --git a/deps/bloom/murmurhash2.h b/deps/bloom/murmurhash2.h
new file mode 100644
index 0000000..04c0881
--- /dev/null
+++ b/deps/bloom/murmurhash2.h
@@ -0,0 +1,6 @@
+#ifndef _BLOOM_MURMURHASH2
+#define _BLOOM_MURMURHASH2
+
+unsigned int murmurhash2(const void * key, int len, const unsigned int seed);
+
+#endif
diff --git a/scanner/CMakeLists.txt b/scanner/CMakeLists.txt
index dd373ba..e8c1a7b 100644
--- a/scanner/CMakeLists.txt
+++ b/scanner/CMakeLists.txt
@@ -7,9 +7,13 @@ include_directories(${PROJECT_SOURCE_DIR}/src/inc_internal)
add_subdirectory(ip_matcher/IntervalIndex)
-add_library(adapter-static bool_matcher/bool_matcher.cpp expr_matcher/expr_matcher.cpp
- expr_matcher/adapter_hs/adapter_hs.cpp expr_matcher/adapter_rs/adapter_rs.cpp
- fqdn_engine/fqdn_engine.cpp ip_matcher/ip_matcher.cpp ip_matcher/ipv4_match.cpp
- ip_matcher/ipv6_match.cpp flag_matcher/flag_matcher.cpp interval_matcher/cgranges.c
- interval_matcher/interval_matcher.cpp ipport_matcher/ipport_matcher.cpp)
+set(SCANNER_SRC bool_matcher/bool_matcher.cpp expr_matcher/expr_matcher.cpp
+ expr_matcher/adapter_hs/adapter_hs.cpp expr_matcher/adapter_rs/adapter_rs.cpp
+ fqdn_engine/fqdn_engine.cpp ip_matcher/ip_matcher.cpp ip_matcher/ipv4_match.cpp
+ ip_matcher/ipv6_match.cpp flag_matcher/flag_matcher.cpp interval_matcher/cgranges.c
+ interval_matcher/interval_matcher.cpp ipport_matcher/ipport_matcher.cpp)
+
+set(LIB_SOURCE_FILES ${PROJECT_SOURCE_DIR}/deps/bloom/bloom.c ${PROJECT_SOURCE_DIR}/deps/bloom/murmurhash2.c)
+
+add_library(adapter-static ${SCANNER_SRC} ${LIB_SOURCE_FILES})
target_link_libraries(adapter-static hyperscan_static hyperscan_runtime_static rulescan_static interval_index_static) \ No newline at end of file
diff --git a/scanner/expr_matcher/adapter_hs/adapter_hs.cpp b/scanner/expr_matcher/adapter_hs/adapter_hs.cpp
index 001d160..e4e723e 100644
--- a/scanner/expr_matcher/adapter_hs/adapter_hs.cpp
+++ b/scanner/expr_matcher/adapter_hs/adapter_hs.cpp
@@ -19,6 +19,7 @@
#include "adapter_hs.h"
#include "uthash/utarray.h"
#include "uthash/uthash.h"
+#include "bloom/bloom.h"
#include "maat_utils.h"
#include "../../bool_matcher/bool_matcher.h"
@@ -68,6 +69,7 @@ struct adapter_hs_runtime {
hs_database_t *literal_db;
hs_database_t *regex_db;
+ struct bloom **blooms;
struct adapter_hs_scratch *scratch;
struct adapter_hs_stream **streams;
struct bool_matcher *bm;
@@ -97,6 +99,7 @@ struct pattern_attribute {
struct matched_pattern {
UT_array *pattern_ids;
size_t n_patterns;
+ struct bloom *ref_bloom;
struct pattern_attribute *ref_hs_attr;
size_t scan_data_len;
};
@@ -438,6 +441,13 @@ void *adapter_hs_new(struct expr_rule *rules, size_t n_rule,
goto error;
}
+ /* alloc bloom filter */
+ hs_inst->hs_rt->blooms = ALLOC(struct bloom *, n_worker_thread);
+ for (i = 0; i < n_worker_thread; i++) {
+ hs_inst->hs_rt->blooms[i] = ALLOC(struct bloom, 1);
+ bloom_init2(hs_inst->hs_rt->blooms[i], 1024, 0.001);
+ }
+
/* alloc scratch */
hs_inst->hs_rt->scratch = ALLOC(struct adapter_hs_scratch, 1);
hs_inst->hs_rt->scratch->bool_match_buffs = ALLOC(struct bool_expr_match *,
@@ -495,6 +505,16 @@ void adapter_hs_free(void *hs_instance)
hs_inst->hs_rt->regex_db = NULL;
}
+ if (hs_inst->hs_rt->blooms != NULL) {
+ for (i = 0; i < hs_inst->n_worker_thread; i++) {
+ if (hs_inst->hs_rt->blooms[i] != NULL) {
+ bloom_free(hs_inst->hs_rt->blooms[i]);
+ FREE(hs_inst->hs_rt->blooms[i]);
+ }
+ }
+ FREE(hs_inst->hs_rt->blooms);
+ }
+
if (hs_inst->hs_rt->scratch != NULL) {
if (hs_inst->hs_rt->scratch->literal_scratches != NULL) {
for (i = 0; i < hs_inst->n_worker_thread; i++) {
@@ -581,6 +601,23 @@ static int matched_event_cb(unsigned int id, unsigned long long from,
return 0;
}
+ unsigned long long *tmp_pat_id = NULL;
+ if (utarray_len(matched_pat->pattern_ids) < (MAX_HIT_PATTERN_NUM / 64)) {
+ for (size_t i = 0; i < utarray_len(matched_pat->pattern_ids); i++) {
+ tmp_pat_id = (unsigned long long *)utarray_eltptr(matched_pat->pattern_ids, i);
+ if (*tmp_pat_id == pattern_id) {
+ return 0;
+ }
+ }
+ } else {
+ if (bloom_check(matched_pat->ref_bloom, (char *)&pattern_id,
+ sizeof(unsigned long long)) == 1) {
+ return 0;
+ }
+ bloom_add(matched_pat->ref_bloom, (char *)&pattern_id,
+ sizeof(unsigned long long));
+ }
+
if (utarray_len(matched_pat->pattern_ids) >= MAX_HIT_PATTERN_NUM) {
return 0;
}
@@ -655,6 +692,7 @@ void *adapter_hs_stream_open(void *hs_instance, int thread_id)
hs_stream->thread_id = thread_id;
hs_stream->ref_hs_rt = hs_inst->hs_rt;
hs_stream->matched_pat = ALLOC(struct matched_pattern, 1);
+ hs_stream->matched_pat->ref_bloom = hs_inst->hs_rt->blooms[thread_id];
hs_stream->matched_pat->ref_hs_attr = hs_inst->hs_attr;
hs_stream->matched_pat->n_patterns = hs_inst->n_patterns;
utarray_new(hs_stream->matched_pat->pattern_ids, &ut_hs_pattern_id_icd);
@@ -723,6 +761,7 @@ void adapter_hs_stream_close(void *hs_stream)
/* stream->hs_rt point to hs_instance->hs_rt which will call free
same as hs_attr */
stream->ref_hs_rt = NULL;
+ stream->matched_pat->ref_bloom = NULL;
stream->matched_pat->ref_hs_attr = NULL;
if (stream->matched_pat->pattern_ids != NULL) {
@@ -862,6 +901,8 @@ int adapter_hs_scan_stream(void *hs_stream, const char *data, size_t data_len,
}
}
+ bloom_reset(stream->matched_pat->ref_bloom);
+
if (err_count == 2) {
return -1;
}
diff --git a/scanner/expr_matcher/adapter_rs/adapter_rs.cpp b/scanner/expr_matcher/adapter_rs/adapter_rs.cpp
index 04e05b2..110ab60 100644
--- a/scanner/expr_matcher/adapter_rs/adapter_rs.cpp
+++ b/scanner/expr_matcher/adapter_rs/adapter_rs.cpp
@@ -19,6 +19,7 @@
#include "adapter_rs.h"
#include "uthash/utarray.h"
#include "uthash/uthash.h"
+#include "bloom/bloom.h"
#include "maat_utils.h"
#include "../../bool_matcher/bool_matcher.h"
@@ -59,6 +60,7 @@ struct adapter_rs_runtime {
rs_database_t *literal_db;
rs_database_t *regex_db;
+ struct bloom **blooms;
struct bool_expr_match **bool_match_buffs; /* per thread */
struct adapter_rs_stream **streams; /* per thread */
struct matched_pattern **matched_pats; /* per thread */
@@ -90,6 +92,7 @@ struct pattern_attribute {
struct matched_pattern {
UT_array *pattern_ids;
size_t n_patterns;
+ struct bloom *ref_bloom;
struct pattern_attribute *ref_rs_attr;
};
@@ -310,6 +313,13 @@ void *adapter_rs_new(struct expr_rule *rules, size_t n_rule,
goto error;
}
+ /* alloc bloom filter */
+ rs_inst->rs_rt->blooms = ALLOC(struct bloom *, n_worker_thread);
+ for (i = 0; i < n_worker_thread; i++) {
+ rs_inst->rs_rt->blooms[i] = ALLOC(struct bloom, 1);
+ bloom_init2(rs_inst->rs_rt->blooms[i], 1024, 0.001);
+ }
+
/* alloc scratch */
rs_inst->rs_rt->bool_match_buffs = ALLOC(struct bool_expr_match *, n_worker_thread);
for (i = 0; i < n_worker_thread; i++) {
@@ -326,8 +336,11 @@ void *adapter_rs_new(struct expr_rule *rules, size_t n_rule,
rs_inst->rs_rt->matched_pats[i] = ALLOC(struct matched_pattern, 1);
rs_inst->rs_rt->matched_pats[i]->ref_rs_attr = rs_inst->rs_attr;
rs_inst->rs_rt->matched_pats[i]->n_patterns = rs_inst->n_patterns;
- utarray_new(rs_inst->rs_rt->matched_pats[i]->pattern_ids, &ut_rs_pattern_id_icd);
- utarray_reserve(rs_inst->rs_rt->matched_pats[i]->pattern_ids, MAX_HIT_PATTERN_NUM);
+ rs_inst->rs_rt->matched_pats[i]->ref_bloom = rs_inst->rs_rt->blooms[i];
+ utarray_new(rs_inst->rs_rt->matched_pats[i]->pattern_ids,
+ &ut_rs_pattern_id_icd);
+ utarray_reserve(rs_inst->rs_rt->matched_pats[i]->pattern_ids,
+ MAX_HIT_PATTERN_NUM);
}
return rs_inst;
@@ -356,6 +369,16 @@ void adapter_rs_free(void *rs_instance)
rs_inst->rs_rt->regex_db = NULL;
}
+ if (rs_inst->rs_rt->blooms != NULL) {
+ for (i = 0; i < rs_inst->n_worker_thread; i++) {
+ if (rs_inst->rs_rt->blooms[i] != NULL) {
+ bloom_free(rs_inst->rs_rt->blooms[i]);
+ FREE(rs_inst->rs_rt->blooms[i]);
+ }
+ }
+ FREE(rs_inst->rs_rt->blooms);
+ }
+
if (rs_inst->rs_rt->bool_match_buffs != NULL) {
for (i = 0; i < rs_inst->n_worker_thread; i++) {
if (rs_inst->rs_rt->bool_match_buffs[i] != NULL) {
@@ -427,6 +450,23 @@ static int matched_event_cb(unsigned int id, int pos_offset, int from, int to,
return 0;
}
+ unsigned long long *tmp_pat_id = NULL;
+ if (utarray_len(matched_pat->pattern_ids) < (MAX_HIT_PATTERN_NUM / 64)) {
+ for (size_t i = 0; i < utarray_len(matched_pat->pattern_ids); i++) {
+ tmp_pat_id = (unsigned long long *)utarray_eltptr(matched_pat->pattern_ids, i);
+ if (*tmp_pat_id == pattern_id) {
+ return 0;
+ }
+ }
+ } else {
+ if (bloom_check(matched_pat->ref_bloom, (char *)&pattern_id,
+ sizeof(unsigned long long)) == 1) {
+ return 0;
+ }
+ bloom_add(matched_pat->ref_bloom, (char *)&pattern_id,
+ sizeof(unsigned long long));
+ }
+
if (utarray_len(matched_pat->pattern_ids) >= MAX_HIT_PATTERN_NUM) {
return 0;
}
@@ -640,7 +680,8 @@ int adapter_rs_scan_stream(void *rs_stream, const char *data, size_t data_len,
err_count++;
}
}
-
+ bloom_reset(matched_pat->ref_bloom);
+
if (err_count == 2) {
return -1;
}
@@ -680,6 +721,8 @@ int adapter_rs_scan(void *rs_instance, int thread_id, const char *data, size_t d
}
}
+ bloom_reset(matched_pat->ref_bloom);
+
if (err_count == 2) {
return -1;
}
diff --git a/src/maat_utils.c b/src/maat_utils.c
index 9f91322..9b251c5 100644
--- a/src/maat_utils.c
+++ b/src/maat_utils.c
@@ -260,7 +260,7 @@ int system_cmd_gzip(const char *src_file, const char *dst_file)
int system_cmd_encrypt(const char *src_file, const char *dst_file, const char *password)
{
char cmd[MAX_SYSTEM_CMD_LEN] = { 0 };
- snprintf(cmd, sizeof(cmd), "openssl enc -e -aes-256-cbc -k %s -p -nosalt -in %s -out %s -md md5",
+ snprintf(cmd, sizeof(cmd), "openssl enc -e -aes-256-cbc -k %s -nosalt -in %s -out %s -md md5",
password, src_file, dst_file);
return system(cmd);
}