diff options
| author | chenzizhan <[email protected]> | 2024-06-25 14:52:28 +0800 |
|---|---|---|
| committer | chenzizhan <[email protected]> | 2024-06-25 14:52:28 +0800 |
| commit | 381d3f32479a780833efdad6ba4c515067aa2127 (patch) | |
| tree | aa39420599b4a95efe0000d829bb673df01578a0 | |
| parent | a04cb3a7a50913f842b100529c2a881daf73efd7 (diff) | |
performance: faster hash128, power by table, less calling of sorted_set_find_entry
| -rw-r--r-- | CMakeLists.txt | 3 | ||||
| -rw-r--r-- | src/metrics/metric.c | 24 | ||||
| -rw-r--r-- | src/tags/heavy_keeper.c | 48 | ||||
| -rw-r--r-- | src/tags/my_ut_hash.c | 133 | ||||
| -rw-r--r-- | test/test_fuzz_test.cpp | 58 |
5 files changed, 232 insertions, 34 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index 4c2d6f3..2513d1e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,6 +1,7 @@ cmake_minimum_required(VERSION 2.8.8...3.10)
message("-- CMAKE_C_COMPILER: ${CMAKE_C_COMPILER_ID}")
+message("compiling in: ${CMAKE_BUILD_TYPE} mode")
set(lib_name fieldstat4)
@@ -65,7 +66,7 @@ if (CMAKE_CXX_CPPCHECK) "--inline-suppr"
"--suppress=*:${PROJECT_SOURCE_DIR}/vendors/*"
"--suppress=*:${PROJECT_SOURCE_DIR}/test/utils.hpp"
- "--suppress=*:${PROJECT_SOURCE_DIR}/test/deps/*"
+ "--suppress=*:${PROJECT_SOURCE_DIR}/test/*"
"--suppress=*:${PROJECT_SOURCE_DIR}/test/unit_test_serialize.cpp"
"--suppress=*:${PROJECT_SOURCE_DIR}/src/logger/log_handle.c"
)
diff --git a/src/metrics/metric.c b/src/metrics/metric.c index 95b7200..fc7ba28 100644 --- a/src/metrics/metric.c +++ b/src/metrics/metric.c @@ -82,7 +82,7 @@ struct metric { /* -------------------------------------------------------------------------- */ /* id cell map */ /* -------------------------------------------------------------------------- */ -struct metric_measure_data *metric_find_one_cell(const struct metric *pthis, int id) { +static inline struct metric_measure_data *metric_find_one_cell(const struct metric *pthis, int id) { if (id >= pthis->max_n_array_item) { return NULL; } @@ -694,10 +694,10 @@ void metric_delete_cell(struct metric *pthis, int cell_id) /* metric operations */ /* -------------------------------------------------------------------------- */ -struct metric_measure_data *metric_find_or_new_cell(struct metric *pthis, int cell_id) +static inline struct metric_measure_data *metric_find_or_new_cell(struct metric *pthis, int cell_id) { struct metric_measure_data *data = metric_find_one_cell(pthis, cell_id); - + if (data == NULL) { data = pthis->scheme->new(pthis->info->paras); metric_add_one_cell(pthis, cell_id, data); @@ -713,30 +713,16 @@ struct metric *metric_counter_new(const char *name) return pthis; } -long long safe_add_longlong(long long a, long long b) -{ - if (a > 0 && b > LLONG_MAX - a) { - return LLONG_MAX; - } - if (a < 0 && b < LLONG_MIN - a) { - return LLONG_MIN; - } - return a + b; -} - void metric_counter_incrby(struct metric *pthis, int cell_id, long long value) { struct metric_measure_data *data = metric_find_or_new_cell(pthis, cell_id); - struct metric_counter_or_gauge *counter = data->counter; - counter->value = safe_add_longlong(counter->value, value); + data->counter->value += value; } int metric_counter_set(struct metric *pthis, int cell_id, long long value) { struct metric_measure_data *data = metric_find_or_new_cell(pthis, cell_id); - struct metric_counter_or_gauge *counter = data->counter; - - counter->value = value; + data->counter->value = value; return 0; } diff --git a/src/tags/heavy_keeper.c b/src/tags/heavy_keeper.c index d2eaa2d..838edb2 100644 --- a/src/tags/heavy_keeper.c +++ b/src/tags/heavy_keeper.c @@ -116,23 +116,40 @@ void heavy_keeper_free(struct heavy_keeper *hk) { /*********************************add and query*************************************/ /***********************************************************************************/ -bool if_need_to_decay(struct heavy_keeper *hk, struct Bucket *bucket, unsigned long long count) { - if (bucket->count == 0) { - return true; - } +const double DECAY_POW_TABLE[147] = { // 1.17 ^ exp, exp is in [0,146] + 1.0,0.8547008547009,0.7305135510264,0.6243705564328,0.5336500482332,0.456111152336,0.3898385917402,0.3331953775557,0.2847823739793,0.2434037384438,0.2080373832853,0.1778097293037,0.15197412761, + 0.1298924167607,0.1110191596245,0.0948881706192,0.0811010005293,0.0693170944695,0.0592453798884,0.0506370768277,0.0432795528442,0.036991070807,0.031616299835,0.0270224784915,0.0230961354628,0.0197402867204, + 0.0168720399319,0.0144205469504,0.0123252538037,0.0105344049605,0.0090037649235,0.0076955255756,0.0065773722868,0.0056216857153,0.0048048595857,0.0041067175946,0.0035100150381,0.0030000128531,0.0025641135497, + 0.0021915500424,0.0018731196944,0.0016009570038,0.0013683393194,0.0011695207859,0.0009995904153,0.0008543507823,0.0007302143438,0.0006241148238,0.0005334314733,0.0004559243362,0.0003896789198,0.0003330589058, + 0.0002846657315,0.000243304044,0.0002079521743,0.0001777369012,0.0001519118813,0.0001298392148,0.0001109736879,0.0000948493059,0.0000810677828,0.0000692887032,0.0000592211139,0.0000506163367,0.0000432618262, + 0.0000369759198,0.0000316033503,0.0000270114105,0.0000230866756,0.0000197322014,0.0000168651294,0.0000144146405,0.0000123202056,0.0000105300902,0.0000090000771,0.0000076923736,0.0000065746783,0.0000056193832, + 0.0000048028916,0.0000041050355,0.0000035085774,0.0000029987841,0.0000025630633,0.0000021906524,0.0000018723525,0.0000016003013,0.0000013677789,0.0000011690418,0.000000999181,0.0000008540009,0.0000007299153, + 0.0000006238592,0.000000533213,0.0000004557376,0.0000003895193,0.0000003329225,0.0000002845491,0.0000002432044,0.000000207867,0.0000001776641,0.0000001518497,0.000000129786,0.0000001109282,0.0000000948105, + 0.0000000810346,0.0000000692603,0.0000000591969,0.0000000505956,0.0000000432441,0.0000000369608,0.0000000315904,0.0000000270003,0.0000000230772,0.0000000197241,0.0000000168582,0.0000000144087,0.0000000123152, + 0.0000000105258,0.0000000089964,0.0000000076892,0.000000006572,0.0000000056171,0.0000000048009,0.0000000041034,0.0000000035071,0.0000000029976,0.000000002562,0.0000000021898,0.0000000018716,0.0000000015996, + 0.0000000013672,0.0000000011686,0.0000000009988,0.0000000008537,0.0000000007296,0.0000000006236,0.000000000533,0.0000000004556,0.0000000003894,0.0000000003328,0.0000000002844,0.0000000002431,0.0000000002078, + 0.0000000001776,0.0000000001518,0.0000000001297,0.0000000001109 +}; + +bool if_need_to_decay(struct heavy_keeper *hk, const struct Bucket *bucket, unsigned long long count) { if (count == 0) { return false; } + if (bucket->count < count) { // the exp is 0, so the decay rate is 1 + return true; + } - double exp = (double)bucket->count / count; - if (exp > 250) { // decay_exp_rate ^ (-250) is very small, so regard the chance as 0. + unsigned long long exp = bucket->count / count; + if (exp > 146) { // 1.17 ^ 146 < 1 / RAND_MAX < rand_r / RAND_MAX, so there is no chance to decay return false; } double r = (double)rand_r(&(hk->rand_state)) / (double)RAND_MAX; - double b = hk->params.decay_exp_rate; - double decay_rate = pow(b, -exp); + // double b = hk->params.decay_exp_rate; + // double decay_rate = pow(b, -exp); + // p->decay_exp_rate = 1.17 is fixed, search table to get result directly. + double decay_rate = DECAY_POW_TABLE[exp]; if (r < decay_rate) { return true; @@ -190,6 +207,11 @@ static int heavy_keeper_add_by_recording_popped_data(struct heavy_keeper *heavy_ struct sorted_set *summary = heavy_keeper->top_K_heap; unsigned nMin = sorted_set_get_min_count(summary); + if (nMin == INVALID_COUNT) { + nMin = 0; + } + unsigned long long int old_cnt = sorted_set_get_count(summary, tag); + bool not_in_sorted_set = (old_cnt == INVALID_COUNT); unsigned long long maxv = 0; unsigned int FP = tag_hash_key_cal_hash_val(tag, FP_HASH_KEY); @@ -201,7 +223,7 @@ static int heavy_keeper_add_by_recording_popped_data(struct heavy_keeper *heavy_ // The flows whose counts are both larger than nmin and not in min-heap must have the same xxhash value, and its FP stored in bucket represents another tag, // In this case, the sketch won't be updated. This flow is expected to be taken into account in another array, // where its FP is different from the one it should collided with, so that element flows won't be missed. - if (bucket->count > nMin && sorted_set_get_cell_id(summary, tag) == -1) { + if (bucket->count > nMin && not_in_sorted_set) { continue; } bucket->count = count_add(bucket->count, count); @@ -221,8 +243,8 @@ static int heavy_keeper_add_by_recording_popped_data(struct heavy_keeper *heavy_ } } - unsigned long long int tmp_cnt = sorted_set_get_count(summary, tag); - if (tmp_cnt == INVALID_COUNT) { // the flow is not in sorted set + + if (not_in_sorted_set) { if ((maxv - nMin <= count && maxv != nMin) || sorted_set_cardinality(summary) != heavy_keeper->K) { assert(cell_id >= 0); assert(tag != NULL); @@ -243,8 +265,8 @@ static int heavy_keeper_add_by_recording_popped_data(struct heavy_keeper *heavy_ } return -1; // the flow is not added to the sorted set } else { - if (maxv > tmp_cnt) { - sorted_set_incrby(summary, tag, maxv - tmp_cnt); + if (maxv > old_cnt) { + sorted_set_incrby(summary, tag, maxv - old_cnt); } return 1; // no popped, but the tag definitely exists in the sorted set } diff --git a/src/tags/my_ut_hash.c b/src/tags/my_ut_hash.c index 7963d2a..c2e6617 100644 --- a/src/tags/my_ut_hash.c +++ b/src/tags/my_ut_hash.c @@ -138,7 +138,7 @@ unsigned int cal_tag_hash_xxhash(const struct fieldstat_tag *tag, size_t n_tag, return XXH32_digest(&state); } -XXH128_hash_t cal_tag_hash_xxhash128(const struct fieldstat_tag *tag, size_t n_tag, unsigned int seed) +XXH128_hash_t cal_tag_hash_xxhash128_obsolete(const struct fieldstat_tag *tag, size_t n_tag, unsigned int seed) { XXH3_state_t state; XXH3_128bits_reset_withSeed(&state, seed); @@ -159,6 +159,137 @@ XXH128_hash_t cal_tag_hash_xxhash128(const struct fieldstat_tag *tag, size_t n_t return ret; } +#define BIG_CONSTANT(x) (x##LLU) +#define getblock(p, i) (p[i]) +#ifdef __GNUC__ +#define FORCE_INLINE __attribute__((always_inline)) inline +#else +#define FORCE_INLINE inline +#endif +static FORCE_INLINE uint64_t rotl64 ( uint64_t x, int8_t r ) +{ + return (x << r) | (x >> (64 - r)); +} +#define ROTL64(x,y) rotl64(x,y) +#define C1 0x87c37b91114253d5LLU +#define C2 0x4cf5ad432745937fLLU + +static FORCE_INLINE uint64_t fmix64 ( uint64_t k ) +{ + k ^= k >> 33; + k *= BIG_CONSTANT(0xff51afd7ed558ccd); + k ^= k >> 33; + k *= BIG_CONSTANT(0xc4ceb9fe1a85ec53); + k ^= k >> 33; + + return k; +} + +static FORCE_INLINE murmurhash_string(uint64_t *h1_io, uint64_t *h2_io, const char *key, const int len) { + uint64_t h1 = *h1_io; + uint64_t h2 = *h2_io; + const uint8_t * data = (const uint8_t*)key; + const int nblocks = len / 16; + int i; + + // body + const uint64_t * blocks = (const uint64_t *)(data); + + for(i = 0; i < nblocks; i++) + { + uint64_t k1 = getblock(blocks,i*2+0); + uint64_t k2 = getblock(blocks,i*2+1); + + k1 *= C1; k1 = ROTL64(k1,31); k1 *= C2; h1 ^= k1; + + h1 = ROTL64(h1,27); h1 += h2; h1 = h1*5+0x52dce729; + + k2 *= C2; k2 = ROTL64(k2,33); k2 *= C1; h2 ^= k2; + + h2 = ROTL64(h2,31); h2 += h1; h2 = h2*5+0x38495ab5; + } + + //---------- + // tail + + const uint8_t * tail = (const uint8_t*)(data + nblocks*16); + + uint64_t k1 = 0; + uint64_t k2 = 0; + + switch(len & 15) + { + case 15: k2 ^= (uint64_t)(tail[14]) << 48; + case 14: k2 ^= (uint64_t)(tail[13]) << 40; + case 13: k2 ^= (uint64_t)(tail[12]) << 32; + case 12: k2 ^= (uint64_t)(tail[11]) << 24; + case 11: k2 ^= (uint64_t)(tail[10]) << 16; + case 10: k2 ^= (uint64_t)(tail[ 9]) << 8; + case 9: k2 ^= (uint64_t)(tail[ 8]) << 0; + k2 *= C2; k2 = ROTL64(k2,33); k2 *= C1; h2 ^= k2; + + case 8: k1 ^= (uint64_t)(tail[ 7]) << 56; + case 7: k1 ^= (uint64_t)(tail[ 6]) << 48; + case 6: k1 ^= (uint64_t)(tail[ 5]) << 40; + case 5: k1 ^= (uint64_t)(tail[ 4]) << 32; + case 4: k1 ^= (uint64_t)(tail[ 3]) << 24; + case 3: k1 ^= (uint64_t)(tail[ 2]) << 16; + case 2: k1 ^= (uint64_t)(tail[ 1]) << 8; + case 1: k1 ^= (uint64_t)(tail[ 0]) << 0; + k1 *= C1; k1 = ROTL64(k1,31); k1 *= C2; h1 ^= k1; + }; + + *h1_io = h1; + *h2_io = h2; +} + +static FORCE_INLINE murmurhash_int64(uint64_t *h1_io, uint64_t h2, long long value) { + uint64_t h1 = *h1_io; + uint64_t k1 = value; + + k1 *= C1; k1 = ROTL64(k1,31); k1 *= C2; h1 ^= k1; + h1 = ROTL64(h1,27); h1 += h2; h1 = h1*5+0x52dce729; + + *h1_io = h1; +} + +XXH128_hash_t my_murmurHash_128_tag(const struct fieldstat_tag *tag, size_t n_tag, unsigned int seed) +{ + uint64_t h1 = seed; + uint64_t h2 = seed; + struct fieldstat_tag *tmp_tag; + + for (int i = 0; i < n_tag; i++) { + tmp_tag = tag + i; + + murmurhash_string(&h1, &h2, tmp_tag->key, strlen(tmp_tag->key)); + if (tmp_tag->type != TAG_CSTRING) { + murmurhash_int64(&h1, h2, tmp_tag->value_longlong); // sizeof(long long) == sizeof(double) + } else { + murmurhash_string(&h1, &h2, tmp_tag->value_str, strlen(tmp_tag->value_str)); + } + } + + // finalization + h1 += h2; + h2 += h1; + + h1 = fmix64(h1); + h2 = fmix64(h2); + + h1 += h2; + h2 += h1; + + XXH128_hash_t ret; + ret.low64 = h1; + ret.high64 = h2; + return ret; +} + +XXH128_hash_t cal_tag_hash_xxhash128(const struct fieldstat_tag *tag, size_t n_tag, unsigned int seed) { + return my_murmurHash_128_tag(tag, n_tag, seed); +} + void tag_hash_key_init_with_fieldstat_tag(struct tag_hash_key *tag_key, const struct fieldstat_tag *src, size_t n_src, bool deep_copy) { tag_key->n_my_tag = n_src; diff --git a/test/test_fuzz_test.cpp b/test/test_fuzz_test.cpp index 46eabc0..32fbe6c 100644 --- a/test/test_fuzz_test.cpp +++ b/test/test_fuzz_test.cpp @@ -367,6 +367,64 @@ TEST(Fuzz_test, add_and_reset_with_randomly_generated_flows_and_randomly_chosen_ } } + +// TEST(Fuzz_test, simple_one_for_perf) +// { +// const int CUBE_NUM = 5; +// const int FLOW_NUM = 50000; +// const int CELL_MAX = 50; +// const int TEST_ROUND = 500000; +// const int OUT_GAP = 10000; +// struct fieldstat *master = fieldstat_new(); + +// Fieldstat_tag_list_wrapper *shared_tags[CUBE_NUM]; + +// // init cube +// for (int i = 0; i < CUBE_NUM; i++) { +// shared_tags[i] = new Fieldstat_tag_list_wrapper("shared_tag", i); +// int cube_id = fieldstat_create_cube(master, shared_tags[i]->get_tag(), shared_tags[i]->get_tag_count(), SAMPLING_MODE_TOPK, CELL_MAX); +// EXPECT_EQ(cube_id, i); +// } +// // init metric +// fieldstat_register_counter(master, "topk"); +// // all the possible tags +// Fieldstat_tag_list_wrapper *tag_list_wrapper[FLOW_NUM]; +// fill_with_elephant_flows(tag_list_wrapper, FLOW_NUM); +// //all the possible operations +// long long *rand_nums = new long long[TEST_ROUND]; +// for (int i = 0; i < TEST_ROUND; i++) { +// rand_nums[i] = rand() % 1000; +// } + +// struct fieldstat *instance = master; + +// clock_t start = clock(); +// int next_shared_tag_value = CUBE_NUM; +// printf("press any key to start\n"); +// getchar(); + +// for (int i = 0; i < TEST_ROUND; i++) { + +// const Fieldstat_tag_list_wrapper * tag = tag_list_wrapper[rand() % FLOW_NUM]; +// int cube_id = rand() % CUBE_NUM; + +// (void)fieldstat_counter_incrby(instance, cube_id, 0, tag->get_tag(), tag->get_tag_count(), rand_nums[i]); +// } + +// clock_t end = clock(); +// printf("time: %lf\n", (double)(end - start) / CLOCKS_PER_SEC); + +// for (int i = 0; i < FLOW_NUM; i++) { +// delete tag_list_wrapper[i]; +// } +// for (int i = 0; i < CUBE_NUM; i++) { +// delete shared_tags[i]; +// } +// delete[] rand_nums; +// fieldstat_free(master); +// } + + int main(int argc, char *argv[]) { testing::InitGoogleTest(&argc, argv); |
