summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorchenzizhan <[email protected]>2024-06-25 14:52:28 +0800
committerchenzizhan <[email protected]>2024-06-25 14:52:28 +0800
commit381d3f32479a780833efdad6ba4c515067aa2127 (patch)
treeaa39420599b4a95efe0000d829bb673df01578a0
parenta04cb3a7a50913f842b100529c2a881daf73efd7 (diff)
performance: faster hash128, power by table, less calling of sorted_set_find_entry
-rw-r--r--CMakeLists.txt3
-rw-r--r--src/metrics/metric.c24
-rw-r--r--src/tags/heavy_keeper.c48
-rw-r--r--src/tags/my_ut_hash.c133
-rw-r--r--test/test_fuzz_test.cpp58
5 files changed, 232 insertions, 34 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 4c2d6f3..2513d1e 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,6 +1,7 @@
cmake_minimum_required(VERSION 2.8.8...3.10)
message("-- CMAKE_C_COMPILER: ${CMAKE_C_COMPILER_ID}")
+message("compiling in: ${CMAKE_BUILD_TYPE} mode")
set(lib_name fieldstat4)
@@ -65,7 +66,7 @@ if (CMAKE_CXX_CPPCHECK)
"--inline-suppr"
"--suppress=*:${PROJECT_SOURCE_DIR}/vendors/*"
"--suppress=*:${PROJECT_SOURCE_DIR}/test/utils.hpp"
- "--suppress=*:${PROJECT_SOURCE_DIR}/test/deps/*"
+ "--suppress=*:${PROJECT_SOURCE_DIR}/test/*"
"--suppress=*:${PROJECT_SOURCE_DIR}/test/unit_test_serialize.cpp"
"--suppress=*:${PROJECT_SOURCE_DIR}/src/logger/log_handle.c"
)
diff --git a/src/metrics/metric.c b/src/metrics/metric.c
index 95b7200..fc7ba28 100644
--- a/src/metrics/metric.c
+++ b/src/metrics/metric.c
@@ -82,7 +82,7 @@ struct metric {
/* -------------------------------------------------------------------------- */
/* id cell map */
/* -------------------------------------------------------------------------- */
-struct metric_measure_data *metric_find_one_cell(const struct metric *pthis, int id) {
+static inline struct metric_measure_data *metric_find_one_cell(const struct metric *pthis, int id) {
if (id >= pthis->max_n_array_item) {
return NULL;
}
@@ -694,10 +694,10 @@ void metric_delete_cell(struct metric *pthis, int cell_id)
/* metric operations */
/* -------------------------------------------------------------------------- */
-struct metric_measure_data *metric_find_or_new_cell(struct metric *pthis, int cell_id)
+static inline struct metric_measure_data *metric_find_or_new_cell(struct metric *pthis, int cell_id)
{
struct metric_measure_data *data = metric_find_one_cell(pthis, cell_id);
-
+
if (data == NULL) {
data = pthis->scheme->new(pthis->info->paras);
metric_add_one_cell(pthis, cell_id, data);
@@ -713,30 +713,16 @@ struct metric *metric_counter_new(const char *name)
return pthis;
}
-long long safe_add_longlong(long long a, long long b)
-{
- if (a > 0 && b > LLONG_MAX - a) {
- return LLONG_MAX;
- }
- if (a < 0 && b < LLONG_MIN - a) {
- return LLONG_MIN;
- }
- return a + b;
-}
-
void metric_counter_incrby(struct metric *pthis, int cell_id, long long value)
{
struct metric_measure_data *data = metric_find_or_new_cell(pthis, cell_id);
- struct metric_counter_or_gauge *counter = data->counter;
- counter->value = safe_add_longlong(counter->value, value);
+ data->counter->value += value;
}
int metric_counter_set(struct metric *pthis, int cell_id, long long value)
{
struct metric_measure_data *data = metric_find_or_new_cell(pthis, cell_id);
- struct metric_counter_or_gauge *counter = data->counter;
-
- counter->value = value;
+ data->counter->value = value;
return 0;
}
diff --git a/src/tags/heavy_keeper.c b/src/tags/heavy_keeper.c
index d2eaa2d..838edb2 100644
--- a/src/tags/heavy_keeper.c
+++ b/src/tags/heavy_keeper.c
@@ -116,23 +116,40 @@ void heavy_keeper_free(struct heavy_keeper *hk) {
/*********************************add and query*************************************/
/***********************************************************************************/
-bool if_need_to_decay(struct heavy_keeper *hk, struct Bucket *bucket, unsigned long long count) {
- if (bucket->count == 0) {
- return true;
- }
+const double DECAY_POW_TABLE[147] = { // 1.17 ^ exp, exp is in [0,146]
+ 1.0,0.8547008547009,0.7305135510264,0.6243705564328,0.5336500482332,0.456111152336,0.3898385917402,0.3331953775557,0.2847823739793,0.2434037384438,0.2080373832853,0.1778097293037,0.15197412761,
+ 0.1298924167607,0.1110191596245,0.0948881706192,0.0811010005293,0.0693170944695,0.0592453798884,0.0506370768277,0.0432795528442,0.036991070807,0.031616299835,0.0270224784915,0.0230961354628,0.0197402867204,
+ 0.0168720399319,0.0144205469504,0.0123252538037,0.0105344049605,0.0090037649235,0.0076955255756,0.0065773722868,0.0056216857153,0.0048048595857,0.0041067175946,0.0035100150381,0.0030000128531,0.0025641135497,
+ 0.0021915500424,0.0018731196944,0.0016009570038,0.0013683393194,0.0011695207859,0.0009995904153,0.0008543507823,0.0007302143438,0.0006241148238,0.0005334314733,0.0004559243362,0.0003896789198,0.0003330589058,
+ 0.0002846657315,0.000243304044,0.0002079521743,0.0001777369012,0.0001519118813,0.0001298392148,0.0001109736879,0.0000948493059,0.0000810677828,0.0000692887032,0.0000592211139,0.0000506163367,0.0000432618262,
+ 0.0000369759198,0.0000316033503,0.0000270114105,0.0000230866756,0.0000197322014,0.0000168651294,0.0000144146405,0.0000123202056,0.0000105300902,0.0000090000771,0.0000076923736,0.0000065746783,0.0000056193832,
+ 0.0000048028916,0.0000041050355,0.0000035085774,0.0000029987841,0.0000025630633,0.0000021906524,0.0000018723525,0.0000016003013,0.0000013677789,0.0000011690418,0.000000999181,0.0000008540009,0.0000007299153,
+ 0.0000006238592,0.000000533213,0.0000004557376,0.0000003895193,0.0000003329225,0.0000002845491,0.0000002432044,0.000000207867,0.0000001776641,0.0000001518497,0.000000129786,0.0000001109282,0.0000000948105,
+ 0.0000000810346,0.0000000692603,0.0000000591969,0.0000000505956,0.0000000432441,0.0000000369608,0.0000000315904,0.0000000270003,0.0000000230772,0.0000000197241,0.0000000168582,0.0000000144087,0.0000000123152,
+ 0.0000000105258,0.0000000089964,0.0000000076892,0.000000006572,0.0000000056171,0.0000000048009,0.0000000041034,0.0000000035071,0.0000000029976,0.000000002562,0.0000000021898,0.0000000018716,0.0000000015996,
+ 0.0000000013672,0.0000000011686,0.0000000009988,0.0000000008537,0.0000000007296,0.0000000006236,0.000000000533,0.0000000004556,0.0000000003894,0.0000000003328,0.0000000002844,0.0000000002431,0.0000000002078,
+ 0.0000000001776,0.0000000001518,0.0000000001297,0.0000000001109
+};
+
+bool if_need_to_decay(struct heavy_keeper *hk, const struct Bucket *bucket, unsigned long long count) {
if (count == 0) {
return false;
}
+ if (bucket->count < count) { // the exp is 0, so the decay rate is 1
+ return true;
+ }
- double exp = (double)bucket->count / count;
- if (exp > 250) { // decay_exp_rate ^ (-250) is very small, so regard the chance as 0.
+ unsigned long long exp = bucket->count / count;
+ if (exp > 146) { // 1.17 ^ 146 < 1 / RAND_MAX < rand_r / RAND_MAX, so there is no chance to decay
return false;
}
double r = (double)rand_r(&(hk->rand_state)) / (double)RAND_MAX;
- double b = hk->params.decay_exp_rate;
- double decay_rate = pow(b, -exp);
+ // double b = hk->params.decay_exp_rate;
+ // double decay_rate = pow(b, -exp);
+ // p->decay_exp_rate = 1.17 is fixed, search table to get result directly.
+ double decay_rate = DECAY_POW_TABLE[exp];
if (r < decay_rate) {
return true;
@@ -190,6 +207,11 @@ static int heavy_keeper_add_by_recording_popped_data(struct heavy_keeper *heavy_
struct sorted_set *summary = heavy_keeper->top_K_heap;
unsigned nMin = sorted_set_get_min_count(summary);
+ if (nMin == INVALID_COUNT) {
+ nMin = 0;
+ }
+ unsigned long long int old_cnt = sorted_set_get_count(summary, tag);
+ bool not_in_sorted_set = (old_cnt == INVALID_COUNT);
unsigned long long maxv = 0;
unsigned int FP = tag_hash_key_cal_hash_val(tag, FP_HASH_KEY);
@@ -201,7 +223,7 @@ static int heavy_keeper_add_by_recording_popped_data(struct heavy_keeper *heavy_
// The flows whose counts are both larger than nmin and not in min-heap must have the same xxhash value, and its FP stored in bucket represents another tag,
// In this case, the sketch won't be updated. This flow is expected to be taken into account in another array,
// where its FP is different from the one it should collided with, so that element flows won't be missed.
- if (bucket->count > nMin && sorted_set_get_cell_id(summary, tag) == -1) {
+ if (bucket->count > nMin && not_in_sorted_set) {
continue;
}
bucket->count = count_add(bucket->count, count);
@@ -221,8 +243,8 @@ static int heavy_keeper_add_by_recording_popped_data(struct heavy_keeper *heavy_
}
}
- unsigned long long int tmp_cnt = sorted_set_get_count(summary, tag);
- if (tmp_cnt == INVALID_COUNT) { // the flow is not in sorted set
+
+ if (not_in_sorted_set) {
if ((maxv - nMin <= count && maxv != nMin) || sorted_set_cardinality(summary) != heavy_keeper->K) {
assert(cell_id >= 0);
assert(tag != NULL);
@@ -243,8 +265,8 @@ static int heavy_keeper_add_by_recording_popped_data(struct heavy_keeper *heavy_
}
return -1; // the flow is not added to the sorted set
} else {
- if (maxv > tmp_cnt) {
- sorted_set_incrby(summary, tag, maxv - tmp_cnt);
+ if (maxv > old_cnt) {
+ sorted_set_incrby(summary, tag, maxv - old_cnt);
}
return 1; // no popped, but the tag definitely exists in the sorted set
}
diff --git a/src/tags/my_ut_hash.c b/src/tags/my_ut_hash.c
index 7963d2a..c2e6617 100644
--- a/src/tags/my_ut_hash.c
+++ b/src/tags/my_ut_hash.c
@@ -138,7 +138,7 @@ unsigned int cal_tag_hash_xxhash(const struct fieldstat_tag *tag, size_t n_tag,
return XXH32_digest(&state);
}
-XXH128_hash_t cal_tag_hash_xxhash128(const struct fieldstat_tag *tag, size_t n_tag, unsigned int seed)
+XXH128_hash_t cal_tag_hash_xxhash128_obsolete(const struct fieldstat_tag *tag, size_t n_tag, unsigned int seed)
{
XXH3_state_t state;
XXH3_128bits_reset_withSeed(&state, seed);
@@ -159,6 +159,137 @@ XXH128_hash_t cal_tag_hash_xxhash128(const struct fieldstat_tag *tag, size_t n_t
return ret;
}
+#define BIG_CONSTANT(x) (x##LLU)
+#define getblock(p, i) (p[i])
+#ifdef __GNUC__
+#define FORCE_INLINE __attribute__((always_inline)) inline
+#else
+#define FORCE_INLINE inline
+#endif
+static FORCE_INLINE uint64_t rotl64 ( uint64_t x, int8_t r )
+{
+ return (x << r) | (x >> (64 - r));
+}
+#define ROTL64(x,y) rotl64(x,y)
+#define C1 0x87c37b91114253d5LLU
+#define C2 0x4cf5ad432745937fLLU
+
+static FORCE_INLINE uint64_t fmix64 ( uint64_t k )
+{
+ k ^= k >> 33;
+ k *= BIG_CONSTANT(0xff51afd7ed558ccd);
+ k ^= k >> 33;
+ k *= BIG_CONSTANT(0xc4ceb9fe1a85ec53);
+ k ^= k >> 33;
+
+ return k;
+}
+
+static FORCE_INLINE murmurhash_string(uint64_t *h1_io, uint64_t *h2_io, const char *key, const int len) {
+ uint64_t h1 = *h1_io;
+ uint64_t h2 = *h2_io;
+ const uint8_t * data = (const uint8_t*)key;
+ const int nblocks = len / 16;
+ int i;
+
+ // body
+ const uint64_t * blocks = (const uint64_t *)(data);
+
+ for(i = 0; i < nblocks; i++)
+ {
+ uint64_t k1 = getblock(blocks,i*2+0);
+ uint64_t k2 = getblock(blocks,i*2+1);
+
+ k1 *= C1; k1 = ROTL64(k1,31); k1 *= C2; h1 ^= k1;
+
+ h1 = ROTL64(h1,27); h1 += h2; h1 = h1*5+0x52dce729;
+
+ k2 *= C2; k2 = ROTL64(k2,33); k2 *= C1; h2 ^= k2;
+
+ h2 = ROTL64(h2,31); h2 += h1; h2 = h2*5+0x38495ab5;
+ }
+
+ //----------
+ // tail
+
+ const uint8_t * tail = (const uint8_t*)(data + nblocks*16);
+
+ uint64_t k1 = 0;
+ uint64_t k2 = 0;
+
+ switch(len & 15)
+ {
+ case 15: k2 ^= (uint64_t)(tail[14]) << 48;
+ case 14: k2 ^= (uint64_t)(tail[13]) << 40;
+ case 13: k2 ^= (uint64_t)(tail[12]) << 32;
+ case 12: k2 ^= (uint64_t)(tail[11]) << 24;
+ case 11: k2 ^= (uint64_t)(tail[10]) << 16;
+ case 10: k2 ^= (uint64_t)(tail[ 9]) << 8;
+ case 9: k2 ^= (uint64_t)(tail[ 8]) << 0;
+ k2 *= C2; k2 = ROTL64(k2,33); k2 *= C1; h2 ^= k2;
+
+ case 8: k1 ^= (uint64_t)(tail[ 7]) << 56;
+ case 7: k1 ^= (uint64_t)(tail[ 6]) << 48;
+ case 6: k1 ^= (uint64_t)(tail[ 5]) << 40;
+ case 5: k1 ^= (uint64_t)(tail[ 4]) << 32;
+ case 4: k1 ^= (uint64_t)(tail[ 3]) << 24;
+ case 3: k1 ^= (uint64_t)(tail[ 2]) << 16;
+ case 2: k1 ^= (uint64_t)(tail[ 1]) << 8;
+ case 1: k1 ^= (uint64_t)(tail[ 0]) << 0;
+ k1 *= C1; k1 = ROTL64(k1,31); k1 *= C2; h1 ^= k1;
+ };
+
+ *h1_io = h1;
+ *h2_io = h2;
+}
+
+static FORCE_INLINE murmurhash_int64(uint64_t *h1_io, uint64_t h2, long long value) {
+ uint64_t h1 = *h1_io;
+ uint64_t k1 = value;
+
+ k1 *= C1; k1 = ROTL64(k1,31); k1 *= C2; h1 ^= k1;
+ h1 = ROTL64(h1,27); h1 += h2; h1 = h1*5+0x52dce729;
+
+ *h1_io = h1;
+}
+
+XXH128_hash_t my_murmurHash_128_tag(const struct fieldstat_tag *tag, size_t n_tag, unsigned int seed)
+{
+ uint64_t h1 = seed;
+ uint64_t h2 = seed;
+ struct fieldstat_tag *tmp_tag;
+
+ for (int i = 0; i < n_tag; i++) {
+ tmp_tag = tag + i;
+
+ murmurhash_string(&h1, &h2, tmp_tag->key, strlen(tmp_tag->key));
+ if (tmp_tag->type != TAG_CSTRING) {
+ murmurhash_int64(&h1, h2, tmp_tag->value_longlong); // sizeof(long long) == sizeof(double)
+ } else {
+ murmurhash_string(&h1, &h2, tmp_tag->value_str, strlen(tmp_tag->value_str));
+ }
+ }
+
+ // finalization
+ h1 += h2;
+ h2 += h1;
+
+ h1 = fmix64(h1);
+ h2 = fmix64(h2);
+
+ h1 += h2;
+ h2 += h1;
+
+ XXH128_hash_t ret;
+ ret.low64 = h1;
+ ret.high64 = h2;
+ return ret;
+}
+
+XXH128_hash_t cal_tag_hash_xxhash128(const struct fieldstat_tag *tag, size_t n_tag, unsigned int seed) {
+ return my_murmurHash_128_tag(tag, n_tag, seed);
+}
+
void tag_hash_key_init_with_fieldstat_tag(struct tag_hash_key *tag_key, const struct fieldstat_tag *src, size_t n_src, bool deep_copy)
{
tag_key->n_my_tag = n_src;
diff --git a/test/test_fuzz_test.cpp b/test/test_fuzz_test.cpp
index 46eabc0..32fbe6c 100644
--- a/test/test_fuzz_test.cpp
+++ b/test/test_fuzz_test.cpp
@@ -367,6 +367,64 @@ TEST(Fuzz_test, add_and_reset_with_randomly_generated_flows_and_randomly_chosen_
}
}
+
+// TEST(Fuzz_test, simple_one_for_perf)
+// {
+// const int CUBE_NUM = 5;
+// const int FLOW_NUM = 50000;
+// const int CELL_MAX = 50;
+// const int TEST_ROUND = 500000;
+// const int OUT_GAP = 10000;
+// struct fieldstat *master = fieldstat_new();
+
+// Fieldstat_tag_list_wrapper *shared_tags[CUBE_NUM];
+
+// // init cube
+// for (int i = 0; i < CUBE_NUM; i++) {
+// shared_tags[i] = new Fieldstat_tag_list_wrapper("shared_tag", i);
+// int cube_id = fieldstat_create_cube(master, shared_tags[i]->get_tag(), shared_tags[i]->get_tag_count(), SAMPLING_MODE_TOPK, CELL_MAX);
+// EXPECT_EQ(cube_id, i);
+// }
+// // init metric
+// fieldstat_register_counter(master, "topk");
+// // all the possible tags
+// Fieldstat_tag_list_wrapper *tag_list_wrapper[FLOW_NUM];
+// fill_with_elephant_flows(tag_list_wrapper, FLOW_NUM);
+// //all the possible operations
+// long long *rand_nums = new long long[TEST_ROUND];
+// for (int i = 0; i < TEST_ROUND; i++) {
+// rand_nums[i] = rand() % 1000;
+// }
+
+// struct fieldstat *instance = master;
+
+// clock_t start = clock();
+// int next_shared_tag_value = CUBE_NUM;
+// printf("press any key to start\n");
+// getchar();
+
+// for (int i = 0; i < TEST_ROUND; i++) {
+
+// const Fieldstat_tag_list_wrapper * tag = tag_list_wrapper[rand() % FLOW_NUM];
+// int cube_id = rand() % CUBE_NUM;
+
+// (void)fieldstat_counter_incrby(instance, cube_id, 0, tag->get_tag(), tag->get_tag_count(), rand_nums[i]);
+// }
+
+// clock_t end = clock();
+// printf("time: %lf\n", (double)(end - start) / CLOCKS_PER_SEC);
+
+// for (int i = 0; i < FLOW_NUM; i++) {
+// delete tag_list_wrapper[i];
+// }
+// for (int i = 0; i < CUBE_NUM; i++) {
+// delete shared_tags[i];
+// }
+// delete[] rand_nums;
+// fieldstat_free(master);
+// }
+
+
int main(int argc, char *argv[])
{
testing::InitGoogleTest(&argc, argv);