diff options
| author | chenzizhan <[email protected]> | 2023-07-28 16:12:16 +0800 |
|---|---|---|
| committer | chenzizhan <[email protected]> | 2023-07-28 16:12:16 +0800 |
| commit | 40770d053f0c8c478800f652e2e7dbb61e7f264e (patch) | |
| tree | 95cd32f976ceac9d32068b9a0c37939894b5fbf0 | |
| parent | c188b95d18280be7449356fd429a0e8635fc78a5 (diff) | |
test tag key hash
| -rw-r--r-- | test/CMakeLists.txt | 3 | ||||
| -rw-r--r-- | test/unit_test_cell_manager.cpp | 162 | ||||
| -rw-r--r-- | test/unit_test_fast_hash.cpp | 179 |
3 files changed, 136 insertions, 208 deletions
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 00c9896..d6f456a 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -51,5 +51,4 @@ add_unit_test(test_metric_hll) add_unit_test(test_performance)
add_unit_test(test_reset_and_delete_cube)
add_unit_test(test_serialize)
-# add_unit_test(unit_test_cell_manager)
-# add_unit_test(unit_test_fast_hash)
\ No newline at end of file +add_unit_test(unit_test_cell_manager)
\ No newline at end of file diff --git a/test/unit_test_cell_manager.cpp b/test/unit_test_cell_manager.cpp index 1b3656a..d5e6270 100644 --- a/test/unit_test_cell_manager.cpp +++ b/test/unit_test_cell_manager.cpp @@ -13,21 +13,22 @@ using namespace std; -struct tag_hash_key *gen_key(const char *key, int value) +struct tag_hash_key *test_gen_tag_key(const char *key, int value) { - const char *value_str = to_string(value).c_str(); struct fieldstat_tag tag = { .key = key, .type = TAG_CSTRING, - {.value_str = value_str}, + {.value_str = strdup(to_string(value).c_str())}, }; struct tag_hash_key *tag_key = tag_hash_key_construct_with_fieldstat_tag(&tag, 1); + free((void *)tag.value_str); + return tag_key; } -double cal_accuracy(vector<struct tag_hash_key *> &expected_keys, vector<struct tag_hash_key *> &test_result) { +double cal_accuracy_with_tags(vector<struct tag_hash_key *> &expected_keys, vector<struct tag_hash_key *> &test_result) { map<string, int> countMap; for (size_t i = 0; i < expected_keys.size(); i++) { std::string key = tag_hash_key_get_compound_key(expected_keys[i]); @@ -46,9 +47,8 @@ double cal_accuracy(vector<struct tag_hash_key *> &expected_keys, vector<struct myset.insert(countVector[i].first); min_in_max_count = countVector[i].second; } - int last_cnt = min_in_max_count; - while (last_cnt == min_in_max_count && i < countVector.size()) { - if (countVector[i].second != last_cnt) { + while (i < countVector.size()) { + if (countVector[i].second != min_in_max_count) { break; } myset.insert(countVector[i].first); @@ -66,7 +66,7 @@ double cal_accuracy(vector<struct tag_hash_key *> &expected_keys, vector<struct return accuracy; } -vector<tag_hash_key *> test_query_cell_manager_content(struct cell_manager *cm) +vector<tag_hash_key *> test_query_cell_manager_content(const struct cell_manager *cm) { int ret_len; const struct tag_hash_key **dump_ret = cell_manager_dump(cm, &ret_len); @@ -76,7 +76,6 @@ vector<tag_hash_key *> test_query_cell_manager_content(struct cell_manager *cm) continue; } test_result.push_back((struct tag_hash_key *)dump_ret[i]); - printf("query content, id: %d, key: %s\n", i, tag_hash_key_get_compound_key(dump_ret[i])); } return test_result; @@ -91,10 +90,10 @@ TEST(unit_test_cell_manager, topk_add_and_query_accuracy) for (int i = 0; i < TEST_ROUND; i++) { if (rand()) { - struct tag_hash_key *key = gen_key("key", rand() % 10); + struct tag_hash_key *key = test_gen_tag_key("key", rand() % 10); keys.push_back(key); } else { - struct tag_hash_key *key = gen_key("key", rand() % 1000); + struct tag_hash_key *key = test_gen_tag_key("key", rand() % 1000); keys.push_back(key); } } @@ -107,7 +106,7 @@ TEST(unit_test_cell_manager, topk_add_and_query_accuracy) vector<tag_hash_key *> test_result = test_query_cell_manager_content(cm); EXPECT_EQ(test_result.size(), 10); - double accuracy = cal_accuracy(keys, test_result); + double accuracy = cal_accuracy_with_tags(keys, test_result); EXPECT_NEAR(accuracy, 1.0, 0.01); cell_manager_free(cm); @@ -122,11 +121,11 @@ TEST(unit_test_cell_manager, merge_topk_given_K_large_enough) struct cell_manager *cm2 = cell_manager_new(SAMPLING_MODE_TOPK, 10); vector<struct tag_hash_key *> keys; - keys.push_back(gen_key("key_share", 1)); - keys.push_back(gen_key("key_1", 1)); - keys.push_back(gen_key("key_1", 2)); - keys.push_back(gen_key("key_share", 1)); - keys.push_back(gen_key("key_2", 1)); + keys.push_back(test_gen_tag_key("key_share", 1)); + keys.push_back(test_gen_tag_key("key_1", 1)); + keys.push_back(test_gen_tag_key("key_1", 2)); + keys.push_back(test_gen_tag_key("key_share", 1)); + keys.push_back(test_gen_tag_key("key_2", 1)); int pop_dummy; int exist_dummy; @@ -163,7 +162,7 @@ TEST(unit_test_cell_manager, merge_topk_given_K_large_enough) EXPECT_EQ(cell_id_old[1], cell_id_2[1]); // key_2 in cm2 auto test_result = test_query_cell_manager_content(cm1); - double accuracy = cal_accuracy(keys, test_result); + double accuracy = cal_accuracy_with_tags(keys, test_result); EXPECT_NEAR(accuracy, 1.0, 0.01); EXPECT_EQ(cell_manager_get_count_by_tag(cm1, keys[0]), 2); // key_share merged once @@ -185,9 +184,9 @@ TEST(unit_test_cell_manager, merge_topk_to_empty) struct cell_manager *cm2 = cell_manager_new(SAMPLING_MODE_TOPK, 10); vector<struct tag_hash_key *> keys; - keys.push_back(gen_key("key_1", 1)); - keys.push_back(gen_key("key_1", 1)); - keys.push_back(gen_key("key_1", 2)); + keys.push_back(test_gen_tag_key("key_1", 1)); + keys.push_back(test_gen_tag_key("key_1", 1)); + keys.push_back(test_gen_tag_key("key_1", 2)); int pop_dummy; int exist_dummy; @@ -236,15 +235,15 @@ TEST(unit_test_cell_manager, merge_topk_to_full_one) struct cell_manager *cm2 = cell_manager_new(SAMPLING_MODE_TOPK, 10); vector<struct tag_hash_key *> keys1; - keys1.push_back(gen_key("key_1", 1)); - keys1.push_back(gen_key("key_1", 2)); - keys1.push_back(gen_key("key_shared", 1)); + keys1.push_back(test_gen_tag_key("key_1", 1)); + keys1.push_back(test_gen_tag_key("key_1", 2)); + keys1.push_back(test_gen_tag_key("key_shared", 1)); vector<struct tag_hash_key *> keys2; for (int i = 0; i < 9; i++) { - keys2.push_back(gen_key("key_2", i)); + keys2.push_back(test_gen_tag_key("key_2", i)); } - keys2.push_back(gen_key("key_shared", 1)); + keys2.push_back(test_gen_tag_key("key_shared", 1)); int pop_dummy; int exist_dummy; @@ -279,7 +278,7 @@ TEST(unit_test_cell_manager, merge_topk_to_full_one) auto test_result = test_query_cell_manager_content(cm2); // join keys2 to keys1 keys1.insert(keys1.end(), std::make_move_iterator(keys2.begin()), std::make_move_iterator(keys2.end())); - double accuracy = cal_accuracy(keys1, test_result); + double accuracy = cal_accuracy_with_tags(keys1, test_result); EXPECT_NEAR(accuracy, 1.0, 0.01); free(cell_id_popped); @@ -293,6 +292,115 @@ TEST(unit_test_cell_manager, merge_topk_to_full_one) // all keys are moved to cm1, so no need to free keys2 } +void add_key_and_assert_find_result(struct cell_manager *cm, const struct tag_hash_key *key) +{ + int pop_dummy; + int exist_dummy; + int cell_id = cell_manager_add_cell_topk(cm, key, 1234, &pop_dummy, &exist_dummy); + EXPECT_EQ(cell_id, 0); + EXPECT_EQ(cell_manager_get_count_by_tag(cm, key), 1234); + const struct tag_hash_key *key_get = cell_manager_get_tag_by_cell_id(cm, cell_id); + EXPECT_STREQ(tag_hash_key_get_compound_key(key_get), tag_hash_key_get_compound_key(key)); +} + +void add_key_and_assert_find_result_comprehensive(struct cell_manager *cm, const struct tag_hash_key *key) +{ + int cell_id = cell_manager_add_cell(cm, key); + EXPECT_EQ(cell_id, 0); + const struct tag_hash_key *key_get = cell_manager_get_tag_by_cell_id(cm, cell_id); + EXPECT_STREQ(tag_hash_key_get_compound_key(key_get), tag_hash_key_get_compound_key(key)); +} + +TEST(unit_test_cell_manager, add_with_key_length_is_1_int_type_topk) +{ + struct cell_manager *cm = cell_manager_new(SAMPLING_MODE_TOPK, 10); + struct tag_hash_key *key = tag_hash_key_construct_with_fieldstat_tag(&TEST_TAG_INT, 1); + + add_key_and_assert_find_result(cm, key); + + cell_manager_free(cm); + tag_hash_key_free(key); +} + +TEST(unit_test_cell_manager, add_with_key_length_is_1_double_type_topk) +{ + struct tag_hash_key *key = tag_hash_key_construct_with_fieldstat_tag(&TEST_TAG_DOUBLE, 1); + struct cell_manager *cm = cell_manager_new(SAMPLING_MODE_TOPK, 10); + + add_key_and_assert_find_result(cm, key); + + cell_manager_free(cm); + tag_hash_key_free(key); +} + +TEST(unit_test_cell_manager, add_with_key_length_is_1_string_type_topk) +{ + struct tag_hash_key *key = tag_hash_key_construct_with_fieldstat_tag(&TEST_TAG_STRING, 1); + struct cell_manager *cm = cell_manager_new(SAMPLING_MODE_TOPK, 10); + + add_key_and_assert_find_result(cm, key); + + cell_manager_free(cm); + tag_hash_key_free(key); +} + +TEST(unit_test_cell_manager, add_with_key_length_is_3_of_diff_types_topk) +{ + const struct fieldstat_tag tags[3] = {TEST_TAG_INT, TEST_TAG_STRING, TEST_TAG_DOUBLE}; + struct tag_hash_key *key = tag_hash_key_construct_with_fieldstat_tag(tags, 3); + struct cell_manager *cm = cell_manager_new(SAMPLING_MODE_TOPK, 10); + + add_key_and_assert_find_result(cm, key); + + cell_manager_free(cm); + tag_hash_key_free(key); +} + +TEST(unit_test_cell_manager, add_with_key_length_is_1_int_type_comprehensive) +{ + struct cell_manager *cm = cell_manager_new(SAMPLING_MODE_COMPREHENSIVE, 10); + struct tag_hash_key *key = tag_hash_key_construct_with_fieldstat_tag(&TEST_TAG_INT, 1); + + add_key_and_assert_find_result_comprehensive(cm, key); + + cell_manager_free(cm); + tag_hash_key_free(key); +} + +TEST(unit_test_cell_manager, add_with_key_length_is_1_double_type_comprehensive) +{ + struct tag_hash_key *key = tag_hash_key_construct_with_fieldstat_tag(&TEST_TAG_DOUBLE, 1); + struct cell_manager *cm = cell_manager_new(SAMPLING_MODE_COMPREHENSIVE, 10); + + add_key_and_assert_find_result_comprehensive(cm, key); + + cell_manager_free(cm); + tag_hash_key_free(key); +} + +TEST(unit_test_cell_manager, add_with_key_length_is_1_string_type_comprehensive) +{ + struct tag_hash_key *key = tag_hash_key_construct_with_fieldstat_tag(&TEST_TAG_STRING, 1); + struct cell_manager *cm = cell_manager_new(SAMPLING_MODE_COMPREHENSIVE, 10); + + add_key_and_assert_find_result_comprehensive(cm, key); + + cell_manager_free(cm); + tag_hash_key_free(key); +} + +TEST(unit_test_cell_manager, add_with_key_length_is_3_of_diff_types_comprehensive) +{ + const struct fieldstat_tag tags[3] = {TEST_TAG_INT, TEST_TAG_STRING, TEST_TAG_DOUBLE}; + struct tag_hash_key *key = tag_hash_key_construct_with_fieldstat_tag(tags, 3); + struct cell_manager *cm = cell_manager_new(SAMPLING_MODE_COMPREHENSIVE, 10); + + add_key_and_assert_find_result_comprehensive(cm, key); + + cell_manager_free(cm); + tag_hash_key_free(key); +} + int main(int argc, char *argv[]) { diff --git a/test/unit_test_fast_hash.cpp b/test/unit_test_fast_hash.cpp deleted file mode 100644 index 50ba87d..0000000 --- a/test/unit_test_fast_hash.cpp +++ /dev/null @@ -1,179 +0,0 @@ - -#include <gtest/gtest.h> -#include <stdio.h> -#include <stdlib.h> -#include <time.h> -#include "fieldstat.h" -#include "utils.hpp" -#include "tags/my_ut_hash_inner.h" -#include "tags/my_ut_hash.h" - -struct testing_hash { - struct tag_hash_key *key; - int value; - UT_hash_handle hh; -}; - -TEST(unit_test_fasthash, simple_add_and_find_when_length_is_1_given_different_type_of_value) -{ - struct testing_hash *hdr = NULL; - struct testing_hash *s = NULL; - - struct tag_hash_key *keys[3]; - keys[0] = tag_hash_key_construct_with_fieldstat_tag(&TEST_TAG_INT, 1); - keys[1] = tag_hash_key_construct_with_fieldstat_tag(&TEST_TAG_DOUBLE, 1); - keys[2] = tag_hash_key_construct_with_fieldstat_tag(&TEST_TAG_STRING, 1); - - s = (struct testing_hash *)malloc(sizeof(struct testing_hash)); - s->key = keys[0]; - s->value = 1; - HASH_ADD_TAG(hdr, key, s); - s = (struct testing_hash *)malloc(sizeof(struct testing_hash)); - s->key = keys[1]; - s->value = 2; - HASH_ADD_TAG(hdr, key, s); - s = (struct testing_hash *)malloc(sizeof(struct testing_hash)); - s->key = keys[2]; - s->value = 3; - HASH_ADD_TAG(hdr, key, s); - printf("add finish \n"); - - struct testing_hash *find = NULL; - HASH_FIND_TAG(hdr, keys[0], find); - ASSERT_NE(find, nullptr); - EXPECT_EQ(find->value, 1); - HASH_DEL(hdr, find); - free(find); - - find = NULL; - HASH_FIND_TAG(hdr, keys[2], find); - ASSERT_NE(find, nullptr); - EXPECT_EQ(find->value, 3); - HASH_DEL(hdr, find); - free(find); - - find = NULL; - HASH_FIND_TAG(hdr, keys[1], find); - ASSERT_NE(find, nullptr); - EXPECT_EQ(find->value, 2); - HASH_DEL(hdr, find); - free(find); - - tag_hash_key_free(keys[0]); - tag_hash_key_free(keys[1]); - tag_hash_key_free(keys[2]); -} - -TEST(unit_test_fasthash, simple_add_and_find_when_length_is_3_of_different_types) -{ - struct testing_hash *hdr = NULL; - struct testing_hash *s = NULL; - s = (struct testing_hash *)malloc(sizeof(struct testing_hash)); - - const struct fieldstat_tag tags[3] = {TEST_TAG_INT, TEST_TAG_STRING, TEST_TAG_DOUBLE}; - - s->key = tag_hash_key_construct_with_fieldstat_tag(tags, 3); - s->value = 321; - HASH_ADD_TAG(hdr, key, s); - printf("add finish \n"); - - struct testing_hash *find = NULL; - HASH_FIND_TAG(hdr, s->key, find); - ASSERT_NE(find, nullptr); - ASSERT_EQ(find->value, 321); - - HASH_DEL(hdr, find); - - tag_hash_key_free(s->key); - free(s); -} - -TEST(unit_test_fasthash, test_multiple_hash_and_ensure_no_double_hashing) -{ - struct testing_hash *hdr = NULL; - struct testing_hash *s = NULL; - struct testing_hash *find[3]; - - s = (struct testing_hash *)malloc(sizeof(struct testing_hash)); - - const struct fieldstat_tag tags[3] = {TEST_TAG_INT, TEST_TAG_STRING, TEST_TAG_DOUBLE}; - - s->key = tag_hash_key_construct_with_fieldstat_tag(tags, 3); - s->value = 321; - printf("call hash add tag\n"); - HASH_ADD_TAG(hdr, key, s); - printf("add finish \n"); - - struct tag_hash_key *new_key = tag_hash_key_construct_with_fieldstat_tag(tags, 3); - clock_t start = clock(); - HASH_FIND_TAG(hdr, new_key, find[0]); - clock_t end = clock(); - clock_t duration = end - start; - printf("duration with new hash key: %ld \n", (long int)duration); - - start = clock(); - HASH_FIND_TAG(hdr, s->key, find[1]); - end = clock(); - clock_t duration2 = end - start; - printf("duration with old hash key: %ld \n", (long int)duration2); - - start = clock(); - HASH_FIND_TAG(hdr, new_key, find[2]); - end = clock(); - clock_t duration3 = end - start; - printf("duration with new hash key again: %ld \n", (long int)duration3); - - EXPECT_GE(duration, duration2); - EXPECT_GE(duration, duration3); - - EXPECT_EQ(find[0], find[1]); - EXPECT_EQ(find[0], find[2]); - - HASH_DEL(hdr, find[0]); - tag_hash_key_free(s->key); - free(s); - tag_hash_key_free(new_key); -} - -TEST(unit_test_fasthash, serialize_and_deserialize) -{ - const struct fieldstat_tag tags[3] = {TEST_TAG_INT, TEST_TAG_STRING, TEST_TAG_DOUBLE}; - struct tag_hash_key *new_key = tag_hash_key_construct_with_fieldstat_tag(tags, 3); - char *blob = NULL; - size_t blob_size = 0; - tag_hash_key_serialize(new_key, &blob, &blob_size); - struct tag_hash_key *deserialized_key = tag_hash_key_deserialize(blob, blob_size); - - struct fieldstat_tag *deserialized_tags = NULL; - size_t n_deserialized_tags = 0; - tag_hash_key_convert_to_fieldstat_tag(deserialized_key, &deserialized_tags, &n_deserialized_tags); - - EXPECT_EQ(n_deserialized_tags, 3); - // the keys are rearranged - EXPECT_STREQ(deserialized_tags[0].key, TEST_TAG_DOUBLE.key); - EXPECT_STREQ(deserialized_tags[1].key, TEST_TAG_INT.key); - EXPECT_STREQ(deserialized_tags[2].key, TEST_TAG_STRING.key); - EXPECT_EQ(deserialized_tags[0].value_double, TEST_TAG_DOUBLE.value_double); - EXPECT_EQ(deserialized_tags[1].value_longlong, TEST_TAG_INT.value_longlong); - EXPECT_STREQ(deserialized_tags[2].value_str, TEST_TAG_STRING.value_str); - - tag_hash_key_free(new_key); - tag_hash_key_free(deserialized_key); - free(blob); - - for (size_t i = 0; i < n_deserialized_tags; i++) - { - free((char *)deserialized_tags[i].key); - if (deserialized_tags[i].type == TAG_CSTRING) { - free((char *)deserialized_tags[i].value_str); - } - } - - free(deserialized_tags); -} - -int main(int argc, char *argv[]) -{ - testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -}
\ No newline at end of file |
