diff options
| author | chenzizhan <[email protected]> | 2024-06-25 15:38:52 +0800 |
|---|---|---|
| committer | chenzizhan <[email protected]> | 2024-06-25 15:38:52 +0800 |
| commit | 329eac31807814fc0d976c8abd8852d424bbae0d (patch) | |
| tree | 3c366077e9db4804bdb277ab584570c35eddf2e1 /src | |
| parent | 381d3f32479a780833efdad6ba4c515067aa2127 (diff) | |
use more h2 in murmurhash, especilly when tag is short. remove redundant bucket finding if.
Diffstat (limited to 'src')
| -rw-r--r-- | src/tags/heavy_keeper.c | 13 | ||||
| -rw-r--r-- | src/tags/my_ut_hash.c | 22 |
2 files changed, 16 insertions, 19 deletions
diff --git a/src/tags/heavy_keeper.c b/src/tags/heavy_keeper.c index 838edb2..f0f6fc5 100644 --- a/src/tags/heavy_keeper.c +++ b/src/tags/heavy_keeper.c @@ -157,14 +157,9 @@ bool if_need_to_decay(struct heavy_keeper *hk, const struct Bucket *bucket, unsi return false; } -struct Bucket *map_flow_id_hash_to_bucket(struct heavy_keeper *hk, int array_index, const struct tag_hash_key *tag, unsigned int already_hashed_flow_id) { +struct Bucket *map_flow_id_hash_to_bucket(struct heavy_keeper *hk, int array_index, const struct tag_hash_key *tag) { int w = hk->params.max_bucket_num; - int Hsh; - if (array_index == FP_HASH_KEY) { - Hsh = already_hashed_flow_id % w; - } else { - Hsh = tag_hash_key_cal_hash_val(tag, array_index) % w; - } + int Hsh = tag_hash_key_cal_hash_val(tag, array_index) % w; return &(hk->sketch[array_index * w + Hsh]); } @@ -216,7 +211,7 @@ static int heavy_keeper_add_by_recording_popped_data(struct heavy_keeper *heavy_ unsigned int FP = tag_hash_key_cal_hash_val(tag, FP_HASH_KEY); for (int j = 0; j < heavy_keeper->params.array_num; j++) { - struct Bucket *bucket = map_flow_id_hash_to_bucket(heavy_keeper, j, tag, FP); + struct Bucket *bucket = map_flow_id_hash_to_bucket(heavy_keeper, j, tag); if (bucket->finger_print == FP) { // If a flow is not in the min-heap, then the estimated flow size should be no larger than nmin. @@ -370,7 +365,7 @@ unsigned long long find_maxv_in_sketch(struct heavy_keeper *hk, const struct tag unsigned long long maxv = 0; for (int array_id = 0; array_id < hk->params.array_num; array_id++) { - bucket = map_flow_id_hash_to_bucket(hk, array_id, tag, tag_hash_key_cal_hash_val(tag, FP_HASH_KEY)); // hk->sketch is the merge of two. So just check one + bucket = map_flow_id_hash_to_bucket(hk, array_id, tag); // hk->sketch is the merge of two. So just check one if (bucket->finger_print == fp) { maxv = my_max(maxv, bucket->count); diff --git a/src/tags/my_ut_hash.c b/src/tags/my_ut_hash.c index c2e6617..8123456 100644 --- a/src/tags/my_ut_hash.c +++ b/src/tags/my_ut_hash.c @@ -140,7 +140,7 @@ unsigned int cal_tag_hash_xxhash(const struct fieldstat_tag *tag, size_t n_tag, XXH128_hash_t cal_tag_hash_xxhash128_obsolete(const struct fieldstat_tag *tag, size_t n_tag, unsigned int seed) { - XXH3_state_t state; + XXH3_state_t state = {0}; XXH3_128bits_reset_withSeed(&state, seed); for (int i = 0; i < n_tag; i++) @@ -185,7 +185,7 @@ static FORCE_INLINE uint64_t fmix64 ( uint64_t k ) return k; } -static FORCE_INLINE murmurhash_string(uint64_t *h1_io, uint64_t *h2_io, const char *key, const int len) { +static FORCE_INLINE void murmurhash_string(uint64_t *h1_io, uint64_t *h2_io, const char *key, const int len) { uint64_t h1 = *h1_io; uint64_t h2 = *h2_io; const uint8_t * data = (const uint8_t*)key; @@ -243,28 +243,30 @@ static FORCE_INLINE murmurhash_string(uint64_t *h1_io, uint64_t *h2_io, const ch *h2_io = h2; } -static FORCE_INLINE murmurhash_int64(uint64_t *h1_io, uint64_t h2, long long value) { - uint64_t h1 = *h1_io; - uint64_t k1 = value; +static FORCE_INLINE void murmurhash_int64(uint64_t *h2_io, uint64_t h1, long long value) { + // uint64_t h1 = *h1_io; + uint64_t h2 = *h2_io; + uint64_t k2 = value; - k1 *= C1; k1 = ROTL64(k1,31); k1 *= C2; h1 ^= k1; - h1 = ROTL64(h1,27); h1 += h2; h1 = h1*5+0x52dce729; + k2 *= C2; k2 = ROTL64(k2,33); k2 *= C1; h2 ^= k2; + h2 = ROTL64(h2,31); h2 += h1; h2 = h2*5+0x38495ab5; - *h1_io = h1; + // *h1_io = h1; + *h2_io = h2; } XXH128_hash_t my_murmurHash_128_tag(const struct fieldstat_tag *tag, size_t n_tag, unsigned int seed) { uint64_t h1 = seed; uint64_t h2 = seed; - struct fieldstat_tag *tmp_tag; + const struct fieldstat_tag *tmp_tag; for (int i = 0; i < n_tag; i++) { tmp_tag = tag + i; murmurhash_string(&h1, &h2, tmp_tag->key, strlen(tmp_tag->key)); if (tmp_tag->type != TAG_CSTRING) { - murmurhash_int64(&h1, h2, tmp_tag->value_longlong); // sizeof(long long) == sizeof(double) + murmurhash_int64(&h2, h1, tmp_tag->value_longlong); // sizeof(long long) == sizeof(double) } else { murmurhash_string(&h1, &h2, tmp_tag->value_str, strlen(tmp_tag->value_str)); } |
