diff options
| author | chenzizhan <[email protected]> | 2023-11-22 18:27:03 +0800 |
|---|---|---|
| committer | chenzizhan <[email protected]> | 2023-11-22 18:27:03 +0800 |
| commit | 30437a0f2756671ba658311c0f179769f2b4e5ea (patch) | |
| tree | 9bc5d67473ba7028a3a2e6699e1dba3b9c30390c /src | |
| parent | d852831c317699ff9a2a3b98dd32364dbf43d249 (diff) | |
faster hashfind
Diffstat (limited to 'src')
| -rw-r--r-- | src/tags/heavy_keeper.c | 8 | ||||
| -rw-r--r-- | src/tags/my_ut_hash.c | 96 | ||||
| -rw-r--r-- | src/tags/my_ut_hash.h | 2 |
3 files changed, 30 insertions, 76 deletions
diff --git a/src/tags/heavy_keeper.c b/src/tags/heavy_keeper.c index 2903218..7dae95b 100644 --- a/src/tags/heavy_keeper.c +++ b/src/tags/heavy_keeper.c @@ -142,7 +142,7 @@ struct Bucket *map_flow_id_hash_to_bucket(struct heavy_keeper *hk, int array_ind if (array_index == FP_HASH_KEY) { Hsh = already_hashed_flow_id % w; } else { - Hsh = tag_hash_key_cal_fnv_hash(tag, array_index) % w; + Hsh = tag_hash_key_cal_hash_val(tag, array_index) % w; } return &(hk->sketch[array_index * w + Hsh]); @@ -183,7 +183,7 @@ static int heavy_keeper_add_by_recording_popped_data(struct heavy_keeper *heavy_ unsigned nMin = sorted_set_get_min_count(summary); unsigned maxv = 0; - unsigned int FP = tag_hash_key_cal_fnv_hash(tag, FP_HASH_KEY); + unsigned int FP = tag_hash_key_cal_hash_val(tag, FP_HASH_KEY); for (int j = 0; j < heavy_keeper->params.array_num; j++) { struct Bucket *bucket = map_flow_id_hash_to_bucket(heavy_keeper, j, tag, FP); @@ -330,11 +330,11 @@ static void heavy_keeper_merge_sketch(struct heavy_keeper *dest, const struct he unsigned find_maxv_in_sketch(struct heavy_keeper *hk, const struct tag_hash_key *tag) { struct Bucket *bucket; - unsigned fp = tag_hash_key_cal_fnv_hash(tag, FP_HASH_KEY); + unsigned fp = tag_hash_key_cal_hash_val(tag, FP_HASH_KEY); unsigned maxv = 0; for (int array_id = 0; array_id < hk->params.array_num; array_id++) { - bucket = map_flow_id_hash_to_bucket(hk, array_id, tag, tag_hash_key_cal_fnv_hash(tag, FP_HASH_KEY)); // hk->sketch is the merge of two. So just check one + bucket = map_flow_id_hash_to_bucket(hk, array_id, tag, tag_hash_key_cal_hash_val(tag, FP_HASH_KEY)); // hk->sketch is the merge of two. So just check one if (bucket->finger_print == fp) { maxv = my_max(maxv, bucket->count); diff --git a/src/tags/my_ut_hash.c b/src/tags/my_ut_hash.c index 13051bf..0e166c8 100644 --- a/src/tags/my_ut_hash.c +++ b/src/tags/my_ut_hash.c @@ -33,35 +33,27 @@ int tag_hash_key_cmp(const struct tag_hash_key *a, const struct tag_hash_key *b) if (a->n_my_tag != b->n_my_tag) { return 1; } - for (size_t i = 0; i < a->n_my_tag; i++) { - const struct fieldstat_tag *tag_a = &a->tags[i]; - const struct fieldstat_tag *tag_b = &b->tags[i]; - if (strcmp(tag_a->key, tag_b->key) != 0) { - return 1; - } - if (tag_a->type != tag_b->type) { - return 1; - } - switch (tag_a->type) { - case TAG_INTEGER: - if (tag_a->value_longlong != tag_b->value_longlong) { - return 1; - } - break; - case TAG_DOUBLE: - if (tag_a->value_double != tag_b->value_double) { - return 1; - } - break; - case TAG_CSTRING: - if (strcmp(tag_a->value_str, tag_b->value_str) != 0) { - return 1; - } - break; - default: - assert(0); - break; + size_t tmp_tag_n = a->n_my_tag; + const struct fieldstat_tag *tag_a = a->tags; + const struct fieldstat_tag *tag_b = b->tags; + for (size_t i = 0; i < tmp_tag_n; i++) { + if (tag_a->type == TAG_CSTRING) { + if (strcmp(tag_a->value_str, tag_b->value_str) != 0) { + return 1; + } + } else { + if (tag_a->value_longlong != tag_b->value_longlong) { // sizeof(long long) == sizeof(double) + return 1; + } } + + // its hardly possible that the key is different while the value is the same. What's more, the tags are in the same hash bucket. So it's not necessary to compare the key. + // if (strcmp(tag_a->key, tag_b->key) != 0) { + // return 1; + // } + + tag_a++; + tag_b++; } return 0; } @@ -157,35 +149,7 @@ unsigned int fnv_hash_calculator_get_hashv(const struct fnv_hash_calculator *cal return calculator->hashv; } -unsigned int cal_tag_hash(struct fieldstat_tag *tags, size_t n_tag, unsigned int seed) -{ - struct fnv_hash_calculator calculator; - fnv_hash_calculator_init(&calculator, seed); - - for (int i = 0; i < n_tag; i++) - { - fnv_hash_calculator_update(&calculator, tags[i].key, strlen(tags[i].key)); - switch (tags[i].type) - { - case TAG_INTEGER: - fnv_hash_calculator_update(&calculator, (char *)&tags[i].value_longlong, sizeof(long long)); - break; - case TAG_DOUBLE: - fnv_hash_calculator_update(&calculator, (char *)&tags[i].value_double, sizeof(double)); - break; - case TAG_CSTRING: - fnv_hash_calculator_update(&calculator, tags[i].value_str, strlen(tags[i].value_str)); - break; - default: - assert(0); - break; - } - } - - return fnv_hash_calculator_get_hashv(&calculator); -} - -unsigned int cal_tag_hash_xxhash(struct fieldstat_tag *tag, size_t n_tag, unsigned int seed) +unsigned int cal_tag_hash_xxhash(const struct fieldstat_tag *tag, size_t n_tag, unsigned int seed) { XXH32_state_t state; XXH32_reset(&state, seed); @@ -193,20 +157,11 @@ unsigned int cal_tag_hash_xxhash(struct fieldstat_tag *tag, size_t n_tag, unsign for (int i = 0; i < n_tag; i++) { XXH32_update(&state, (const xxh_u8 *)tag[i].key, strlen(tag[i].key)); - switch (tag[i].type) - { - case TAG_INTEGER: + + if (tag[i].type != TAG_CSTRING) { // sizeof(long long) == sizeof(double) XXH32_update(&state, (const xxh_u8 *)&tag[i].value_longlong, sizeof(long long)); - break; - case TAG_DOUBLE: - XXH32_update(&state, (const xxh_u8 *)&tag[i].value_double, sizeof(double)); - break; - case TAG_CSTRING: + } else { XXH32_update(&state, (const xxh_u8 *)tag[i].value_str, strlen(tag[i].value_str)); - break; - default: - assert(0); - break; } } @@ -250,7 +205,6 @@ void tag_hash_key_store(struct tag_hash_key *tag_key) tag_key->tags = new_tags; } - void tag_hash_key_convert_to_fieldstat_tag(const struct tag_hash_key *tag_key, struct fieldstat_tag **out, size_t *n_out) { size_t ret_n_tag = tag_key->n_my_tag; @@ -296,7 +250,7 @@ void tag_hash_key_free(struct tag_hash_key *tag_key) } // FNV-1a hash (http://www.isthe.com/chongo/tech/comp/fnv/) -unsigned tag_hash_key_cal_fnv_hash(const struct tag_hash_key *tag, unsigned seed) { +unsigned tag_hash_key_cal_hash_val(const struct tag_hash_key *tag, unsigned seed) { // return cal_tag_hash(tag->tags, tag->n_my_tag, seed); return cal_tag_hash_xxhash(tag->tags, tag->n_my_tag, seed); } diff --git a/src/tags/my_ut_hash.h b/src/tags/my_ut_hash.h index d641399..0cb0e49 100644 --- a/src/tags/my_ut_hash.h +++ b/src/tags/my_ut_hash.h @@ -15,7 +15,7 @@ struct tag_hash_key *tag_hash_key_construct_with_fieldstat_tag(const struct fiel void tag_hash_key_convert_to_fieldstat_tag(const struct tag_hash_key *tag_key, struct fieldstat_tag **out, size_t *n_out); struct tag_hash_key *tag_hash_key_copy(const struct tag_hash_key *src); // Deep copy void tag_hash_key_free(struct tag_hash_key *tag_key); -unsigned tag_hash_key_cal_fnv_hash(const struct tag_hash_key *tag, unsigned seed); +unsigned tag_hash_key_cal_hash_val(const struct tag_hash_key *tag, unsigned seed); void tag_hash_key_store(struct tag_hash_key *tag_key); // let the shallow copy of fieldstat_tag in tag_key to be deep copy, if they are shallow copy void fieldtag_list_free(struct fieldstat_tag *tags, size_t n_tags); |
