summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorchenzizhan <[email protected]>2024-06-25 15:38:52 +0800
committerchenzizhan <[email protected]>2024-06-25 15:38:52 +0800
commit329eac31807814fc0d976c8abd8852d424bbae0d (patch)
tree3c366077e9db4804bdb277ab584570c35eddf2e1 /src
parent381d3f32479a780833efdad6ba4c515067aa2127 (diff)
use more h2 in murmurhash, especilly when tag is short. remove redundant bucket finding if.
Diffstat (limited to 'src')
-rw-r--r--src/tags/heavy_keeper.c13
-rw-r--r--src/tags/my_ut_hash.c22
2 files changed, 16 insertions, 19 deletions
diff --git a/src/tags/heavy_keeper.c b/src/tags/heavy_keeper.c
index 838edb2..f0f6fc5 100644
--- a/src/tags/heavy_keeper.c
+++ b/src/tags/heavy_keeper.c
@@ -157,14 +157,9 @@ bool if_need_to_decay(struct heavy_keeper *hk, const struct Bucket *bucket, unsi
return false;
}
-struct Bucket *map_flow_id_hash_to_bucket(struct heavy_keeper *hk, int array_index, const struct tag_hash_key *tag, unsigned int already_hashed_flow_id) {
+struct Bucket *map_flow_id_hash_to_bucket(struct heavy_keeper *hk, int array_index, const struct tag_hash_key *tag) {
int w = hk->params.max_bucket_num;
- int Hsh;
- if (array_index == FP_HASH_KEY) {
- Hsh = already_hashed_flow_id % w;
- } else {
- Hsh = tag_hash_key_cal_hash_val(tag, array_index) % w;
- }
+ int Hsh = tag_hash_key_cal_hash_val(tag, array_index) % w;
return &(hk->sketch[array_index * w + Hsh]);
}
@@ -216,7 +211,7 @@ static int heavy_keeper_add_by_recording_popped_data(struct heavy_keeper *heavy_
unsigned int FP = tag_hash_key_cal_hash_val(tag, FP_HASH_KEY);
for (int j = 0; j < heavy_keeper->params.array_num; j++) {
- struct Bucket *bucket = map_flow_id_hash_to_bucket(heavy_keeper, j, tag, FP);
+ struct Bucket *bucket = map_flow_id_hash_to_bucket(heavy_keeper, j, tag);
if (bucket->finger_print == FP) {
// If a flow is not in the min-heap, then the estimated flow size should be no larger than nmin.
@@ -370,7 +365,7 @@ unsigned long long find_maxv_in_sketch(struct heavy_keeper *hk, const struct tag
unsigned long long maxv = 0;
for (int array_id = 0; array_id < hk->params.array_num; array_id++) {
- bucket = map_flow_id_hash_to_bucket(hk, array_id, tag, tag_hash_key_cal_hash_val(tag, FP_HASH_KEY)); // hk->sketch is the merge of two. So just check one
+ bucket = map_flow_id_hash_to_bucket(hk, array_id, tag); // hk->sketch is the merge of two. So just check one
if (bucket->finger_print == fp) {
maxv = my_max(maxv, bucket->count);
diff --git a/src/tags/my_ut_hash.c b/src/tags/my_ut_hash.c
index c2e6617..8123456 100644
--- a/src/tags/my_ut_hash.c
+++ b/src/tags/my_ut_hash.c
@@ -140,7 +140,7 @@ unsigned int cal_tag_hash_xxhash(const struct fieldstat_tag *tag, size_t n_tag,
XXH128_hash_t cal_tag_hash_xxhash128_obsolete(const struct fieldstat_tag *tag, size_t n_tag, unsigned int seed)
{
- XXH3_state_t state;
+ XXH3_state_t state = {0};
XXH3_128bits_reset_withSeed(&state, seed);
for (int i = 0; i < n_tag; i++)
@@ -185,7 +185,7 @@ static FORCE_INLINE uint64_t fmix64 ( uint64_t k )
return k;
}
-static FORCE_INLINE murmurhash_string(uint64_t *h1_io, uint64_t *h2_io, const char *key, const int len) {
+static FORCE_INLINE void murmurhash_string(uint64_t *h1_io, uint64_t *h2_io, const char *key, const int len) {
uint64_t h1 = *h1_io;
uint64_t h2 = *h2_io;
const uint8_t * data = (const uint8_t*)key;
@@ -243,28 +243,30 @@ static FORCE_INLINE murmurhash_string(uint64_t *h1_io, uint64_t *h2_io, const ch
*h2_io = h2;
}
-static FORCE_INLINE murmurhash_int64(uint64_t *h1_io, uint64_t h2, long long value) {
- uint64_t h1 = *h1_io;
- uint64_t k1 = value;
+static FORCE_INLINE void murmurhash_int64(uint64_t *h2_io, uint64_t h1, long long value) {
+ // uint64_t h1 = *h1_io;
+ uint64_t h2 = *h2_io;
+ uint64_t k2 = value;
- k1 *= C1; k1 = ROTL64(k1,31); k1 *= C2; h1 ^= k1;
- h1 = ROTL64(h1,27); h1 += h2; h1 = h1*5+0x52dce729;
+ k2 *= C2; k2 = ROTL64(k2,33); k2 *= C1; h2 ^= k2;
+ h2 = ROTL64(h2,31); h2 += h1; h2 = h2*5+0x38495ab5;
- *h1_io = h1;
+ // *h1_io = h1;
+ *h2_io = h2;
}
XXH128_hash_t my_murmurHash_128_tag(const struct fieldstat_tag *tag, size_t n_tag, unsigned int seed)
{
uint64_t h1 = seed;
uint64_t h2 = seed;
- struct fieldstat_tag *tmp_tag;
+ const struct fieldstat_tag *tmp_tag;
for (int i = 0; i < n_tag; i++) {
tmp_tag = tag + i;
murmurhash_string(&h1, &h2, tmp_tag->key, strlen(tmp_tag->key));
if (tmp_tag->type != TAG_CSTRING) {
- murmurhash_int64(&h1, h2, tmp_tag->value_longlong); // sizeof(long long) == sizeof(double)
+ murmurhash_int64(&h2, h1, tmp_tag->value_longlong); // sizeof(long long) == sizeof(double)
} else {
murmurhash_string(&h1, &h2, tmp_tag->value_str, strlen(tmp_tag->value_str));
}