summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorchenzizhan <[email protected]>2023-07-28 16:12:16 +0800
committerchenzizhan <[email protected]>2023-07-28 16:12:16 +0800
commit40770d053f0c8c478800f652e2e7dbb61e7f264e (patch)
tree95cd32f976ceac9d32068b9a0c37939894b5fbf0
parentc188b95d18280be7449356fd429a0e8635fc78a5 (diff)
test tag key hash
-rw-r--r--test/CMakeLists.txt3
-rw-r--r--test/unit_test_cell_manager.cpp162
-rw-r--r--test/unit_test_fast_hash.cpp179
3 files changed, 136 insertions, 208 deletions
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index 00c9896..d6f456a 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -51,5 +51,4 @@ add_unit_test(test_metric_hll)
add_unit_test(test_performance)
add_unit_test(test_reset_and_delete_cube)
add_unit_test(test_serialize)
-# add_unit_test(unit_test_cell_manager)
-# add_unit_test(unit_test_fast_hash) \ No newline at end of file
+add_unit_test(unit_test_cell_manager) \ No newline at end of file
diff --git a/test/unit_test_cell_manager.cpp b/test/unit_test_cell_manager.cpp
index 1b3656a..d5e6270 100644
--- a/test/unit_test_cell_manager.cpp
+++ b/test/unit_test_cell_manager.cpp
@@ -13,21 +13,22 @@
using namespace std;
-struct tag_hash_key *gen_key(const char *key, int value)
+struct tag_hash_key *test_gen_tag_key(const char *key, int value)
{
- const char *value_str = to_string(value).c_str();
struct fieldstat_tag tag = {
.key = key,
.type = TAG_CSTRING,
- {.value_str = value_str},
+ {.value_str = strdup(to_string(value).c_str())},
};
struct tag_hash_key *tag_key = tag_hash_key_construct_with_fieldstat_tag(&tag, 1);
+ free((void *)tag.value_str);
+
return tag_key;
}
-double cal_accuracy(vector<struct tag_hash_key *> &expected_keys, vector<struct tag_hash_key *> &test_result) {
+double cal_accuracy_with_tags(vector<struct tag_hash_key *> &expected_keys, vector<struct tag_hash_key *> &test_result) {
map<string, int> countMap;
for (size_t i = 0; i < expected_keys.size(); i++) {
std::string key = tag_hash_key_get_compound_key(expected_keys[i]);
@@ -46,9 +47,8 @@ double cal_accuracy(vector<struct tag_hash_key *> &expected_keys, vector<struct
myset.insert(countVector[i].first);
min_in_max_count = countVector[i].second;
}
- int last_cnt = min_in_max_count;
- while (last_cnt == min_in_max_count && i < countVector.size()) {
- if (countVector[i].second != last_cnt) {
+ while (i < countVector.size()) {
+ if (countVector[i].second != min_in_max_count) {
break;
}
myset.insert(countVector[i].first);
@@ -66,7 +66,7 @@ double cal_accuracy(vector<struct tag_hash_key *> &expected_keys, vector<struct
return accuracy;
}
-vector<tag_hash_key *> test_query_cell_manager_content(struct cell_manager *cm)
+vector<tag_hash_key *> test_query_cell_manager_content(const struct cell_manager *cm)
{
int ret_len;
const struct tag_hash_key **dump_ret = cell_manager_dump(cm, &ret_len);
@@ -76,7 +76,6 @@ vector<tag_hash_key *> test_query_cell_manager_content(struct cell_manager *cm)
continue;
}
test_result.push_back((struct tag_hash_key *)dump_ret[i]);
- printf("query content, id: %d, key: %s\n", i, tag_hash_key_get_compound_key(dump_ret[i]));
}
return test_result;
@@ -91,10 +90,10 @@ TEST(unit_test_cell_manager, topk_add_and_query_accuracy)
for (int i = 0; i < TEST_ROUND; i++)
{
if (rand()) {
- struct tag_hash_key *key = gen_key("key", rand() % 10);
+ struct tag_hash_key *key = test_gen_tag_key("key", rand() % 10);
keys.push_back(key);
} else {
- struct tag_hash_key *key = gen_key("key", rand() % 1000);
+ struct tag_hash_key *key = test_gen_tag_key("key", rand() % 1000);
keys.push_back(key);
}
}
@@ -107,7 +106,7 @@ TEST(unit_test_cell_manager, topk_add_and_query_accuracy)
vector<tag_hash_key *> test_result = test_query_cell_manager_content(cm);
EXPECT_EQ(test_result.size(), 10);
- double accuracy = cal_accuracy(keys, test_result);
+ double accuracy = cal_accuracy_with_tags(keys, test_result);
EXPECT_NEAR(accuracy, 1.0, 0.01);
cell_manager_free(cm);
@@ -122,11 +121,11 @@ TEST(unit_test_cell_manager, merge_topk_given_K_large_enough)
struct cell_manager *cm2 = cell_manager_new(SAMPLING_MODE_TOPK, 10);
vector<struct tag_hash_key *> keys;
- keys.push_back(gen_key("key_share", 1));
- keys.push_back(gen_key("key_1", 1));
- keys.push_back(gen_key("key_1", 2));
- keys.push_back(gen_key("key_share", 1));
- keys.push_back(gen_key("key_2", 1));
+ keys.push_back(test_gen_tag_key("key_share", 1));
+ keys.push_back(test_gen_tag_key("key_1", 1));
+ keys.push_back(test_gen_tag_key("key_1", 2));
+ keys.push_back(test_gen_tag_key("key_share", 1));
+ keys.push_back(test_gen_tag_key("key_2", 1));
int pop_dummy;
int exist_dummy;
@@ -163,7 +162,7 @@ TEST(unit_test_cell_manager, merge_topk_given_K_large_enough)
EXPECT_EQ(cell_id_old[1], cell_id_2[1]); // key_2 in cm2
auto test_result = test_query_cell_manager_content(cm1);
- double accuracy = cal_accuracy(keys, test_result);
+ double accuracy = cal_accuracy_with_tags(keys, test_result);
EXPECT_NEAR(accuracy, 1.0, 0.01);
EXPECT_EQ(cell_manager_get_count_by_tag(cm1, keys[0]), 2); // key_share merged once
@@ -185,9 +184,9 @@ TEST(unit_test_cell_manager, merge_topk_to_empty)
struct cell_manager *cm2 = cell_manager_new(SAMPLING_MODE_TOPK, 10);
vector<struct tag_hash_key *> keys;
- keys.push_back(gen_key("key_1", 1));
- keys.push_back(gen_key("key_1", 1));
- keys.push_back(gen_key("key_1", 2));
+ keys.push_back(test_gen_tag_key("key_1", 1));
+ keys.push_back(test_gen_tag_key("key_1", 1));
+ keys.push_back(test_gen_tag_key("key_1", 2));
int pop_dummy;
int exist_dummy;
@@ -236,15 +235,15 @@ TEST(unit_test_cell_manager, merge_topk_to_full_one)
struct cell_manager *cm2 = cell_manager_new(SAMPLING_MODE_TOPK, 10);
vector<struct tag_hash_key *> keys1;
- keys1.push_back(gen_key("key_1", 1));
- keys1.push_back(gen_key("key_1", 2));
- keys1.push_back(gen_key("key_shared", 1));
+ keys1.push_back(test_gen_tag_key("key_1", 1));
+ keys1.push_back(test_gen_tag_key("key_1", 2));
+ keys1.push_back(test_gen_tag_key("key_shared", 1));
vector<struct tag_hash_key *> keys2;
for (int i = 0; i < 9; i++) {
- keys2.push_back(gen_key("key_2", i));
+ keys2.push_back(test_gen_tag_key("key_2", i));
}
- keys2.push_back(gen_key("key_shared", 1));
+ keys2.push_back(test_gen_tag_key("key_shared", 1));
int pop_dummy;
int exist_dummy;
@@ -279,7 +278,7 @@ TEST(unit_test_cell_manager, merge_topk_to_full_one)
auto test_result = test_query_cell_manager_content(cm2);
// join keys2 to keys1
keys1.insert(keys1.end(), std::make_move_iterator(keys2.begin()), std::make_move_iterator(keys2.end()));
- double accuracy = cal_accuracy(keys1, test_result);
+ double accuracy = cal_accuracy_with_tags(keys1, test_result);
EXPECT_NEAR(accuracy, 1.0, 0.01);
free(cell_id_popped);
@@ -293,6 +292,115 @@ TEST(unit_test_cell_manager, merge_topk_to_full_one)
// all keys are moved to cm1, so no need to free keys2
}
+void add_key_and_assert_find_result(struct cell_manager *cm, const struct tag_hash_key *key)
+{
+ int pop_dummy;
+ int exist_dummy;
+ int cell_id = cell_manager_add_cell_topk(cm, key, 1234, &pop_dummy, &exist_dummy);
+ EXPECT_EQ(cell_id, 0);
+ EXPECT_EQ(cell_manager_get_count_by_tag(cm, key), 1234);
+ const struct tag_hash_key *key_get = cell_manager_get_tag_by_cell_id(cm, cell_id);
+ EXPECT_STREQ(tag_hash_key_get_compound_key(key_get), tag_hash_key_get_compound_key(key));
+}
+
+void add_key_and_assert_find_result_comprehensive(struct cell_manager *cm, const struct tag_hash_key *key)
+{
+ int cell_id = cell_manager_add_cell(cm, key);
+ EXPECT_EQ(cell_id, 0);
+ const struct tag_hash_key *key_get = cell_manager_get_tag_by_cell_id(cm, cell_id);
+ EXPECT_STREQ(tag_hash_key_get_compound_key(key_get), tag_hash_key_get_compound_key(key));
+}
+
+TEST(unit_test_cell_manager, add_with_key_length_is_1_int_type_topk)
+{
+ struct cell_manager *cm = cell_manager_new(SAMPLING_MODE_TOPK, 10);
+ struct tag_hash_key *key = tag_hash_key_construct_with_fieldstat_tag(&TEST_TAG_INT, 1);
+
+ add_key_and_assert_find_result(cm, key);
+
+ cell_manager_free(cm);
+ tag_hash_key_free(key);
+}
+
+TEST(unit_test_cell_manager, add_with_key_length_is_1_double_type_topk)
+{
+ struct tag_hash_key *key = tag_hash_key_construct_with_fieldstat_tag(&TEST_TAG_DOUBLE, 1);
+ struct cell_manager *cm = cell_manager_new(SAMPLING_MODE_TOPK, 10);
+
+ add_key_and_assert_find_result(cm, key);
+
+ cell_manager_free(cm);
+ tag_hash_key_free(key);
+}
+
+TEST(unit_test_cell_manager, add_with_key_length_is_1_string_type_topk)
+{
+ struct tag_hash_key *key = tag_hash_key_construct_with_fieldstat_tag(&TEST_TAG_STRING, 1);
+ struct cell_manager *cm = cell_manager_new(SAMPLING_MODE_TOPK, 10);
+
+ add_key_and_assert_find_result(cm, key);
+
+ cell_manager_free(cm);
+ tag_hash_key_free(key);
+}
+
+TEST(unit_test_cell_manager, add_with_key_length_is_3_of_diff_types_topk)
+{
+ const struct fieldstat_tag tags[3] = {TEST_TAG_INT, TEST_TAG_STRING, TEST_TAG_DOUBLE};
+ struct tag_hash_key *key = tag_hash_key_construct_with_fieldstat_tag(tags, 3);
+ struct cell_manager *cm = cell_manager_new(SAMPLING_MODE_TOPK, 10);
+
+ add_key_and_assert_find_result(cm, key);
+
+ cell_manager_free(cm);
+ tag_hash_key_free(key);
+}
+
+TEST(unit_test_cell_manager, add_with_key_length_is_1_int_type_comprehensive)
+{
+ struct cell_manager *cm = cell_manager_new(SAMPLING_MODE_COMPREHENSIVE, 10);
+ struct tag_hash_key *key = tag_hash_key_construct_with_fieldstat_tag(&TEST_TAG_INT, 1);
+
+ add_key_and_assert_find_result_comprehensive(cm, key);
+
+ cell_manager_free(cm);
+ tag_hash_key_free(key);
+}
+
+TEST(unit_test_cell_manager, add_with_key_length_is_1_double_type_comprehensive)
+{
+ struct tag_hash_key *key = tag_hash_key_construct_with_fieldstat_tag(&TEST_TAG_DOUBLE, 1);
+ struct cell_manager *cm = cell_manager_new(SAMPLING_MODE_COMPREHENSIVE, 10);
+
+ add_key_and_assert_find_result_comprehensive(cm, key);
+
+ cell_manager_free(cm);
+ tag_hash_key_free(key);
+}
+
+TEST(unit_test_cell_manager, add_with_key_length_is_1_string_type_comprehensive)
+{
+ struct tag_hash_key *key = tag_hash_key_construct_with_fieldstat_tag(&TEST_TAG_STRING, 1);
+ struct cell_manager *cm = cell_manager_new(SAMPLING_MODE_COMPREHENSIVE, 10);
+
+ add_key_and_assert_find_result_comprehensive(cm, key);
+
+ cell_manager_free(cm);
+ tag_hash_key_free(key);
+}
+
+TEST(unit_test_cell_manager, add_with_key_length_is_3_of_diff_types_comprehensive)
+{
+ const struct fieldstat_tag tags[3] = {TEST_TAG_INT, TEST_TAG_STRING, TEST_TAG_DOUBLE};
+ struct tag_hash_key *key = tag_hash_key_construct_with_fieldstat_tag(tags, 3);
+ struct cell_manager *cm = cell_manager_new(SAMPLING_MODE_COMPREHENSIVE, 10);
+
+ add_key_and_assert_find_result_comprehensive(cm, key);
+
+ cell_manager_free(cm);
+ tag_hash_key_free(key);
+}
+
int main(int argc, char *argv[])
{
diff --git a/test/unit_test_fast_hash.cpp b/test/unit_test_fast_hash.cpp
deleted file mode 100644
index 50ba87d..0000000
--- a/test/unit_test_fast_hash.cpp
+++ /dev/null
@@ -1,179 +0,0 @@
-
-#include <gtest/gtest.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <time.h>
-#include "fieldstat.h"
-#include "utils.hpp"
-#include "tags/my_ut_hash_inner.h"
-#include "tags/my_ut_hash.h"
-
-struct testing_hash {
- struct tag_hash_key *key;
- int value;
- UT_hash_handle hh;
-};
-
-TEST(unit_test_fasthash, simple_add_and_find_when_length_is_1_given_different_type_of_value)
-{
- struct testing_hash *hdr = NULL;
- struct testing_hash *s = NULL;
-
- struct tag_hash_key *keys[3];
- keys[0] = tag_hash_key_construct_with_fieldstat_tag(&TEST_TAG_INT, 1);
- keys[1] = tag_hash_key_construct_with_fieldstat_tag(&TEST_TAG_DOUBLE, 1);
- keys[2] = tag_hash_key_construct_with_fieldstat_tag(&TEST_TAG_STRING, 1);
-
- s = (struct testing_hash *)malloc(sizeof(struct testing_hash));
- s->key = keys[0];
- s->value = 1;
- HASH_ADD_TAG(hdr, key, s);
- s = (struct testing_hash *)malloc(sizeof(struct testing_hash));
- s->key = keys[1];
- s->value = 2;
- HASH_ADD_TAG(hdr, key, s);
- s = (struct testing_hash *)malloc(sizeof(struct testing_hash));
- s->key = keys[2];
- s->value = 3;
- HASH_ADD_TAG(hdr, key, s);
- printf("add finish \n");
-
- struct testing_hash *find = NULL;
- HASH_FIND_TAG(hdr, keys[0], find);
- ASSERT_NE(find, nullptr);
- EXPECT_EQ(find->value, 1);
- HASH_DEL(hdr, find);
- free(find);
-
- find = NULL;
- HASH_FIND_TAG(hdr, keys[2], find);
- ASSERT_NE(find, nullptr);
- EXPECT_EQ(find->value, 3);
- HASH_DEL(hdr, find);
- free(find);
-
- find = NULL;
- HASH_FIND_TAG(hdr, keys[1], find);
- ASSERT_NE(find, nullptr);
- EXPECT_EQ(find->value, 2);
- HASH_DEL(hdr, find);
- free(find);
-
- tag_hash_key_free(keys[0]);
- tag_hash_key_free(keys[1]);
- tag_hash_key_free(keys[2]);
-}
-
-TEST(unit_test_fasthash, simple_add_and_find_when_length_is_3_of_different_types)
-{
- struct testing_hash *hdr = NULL;
- struct testing_hash *s = NULL;
- s = (struct testing_hash *)malloc(sizeof(struct testing_hash));
-
- const struct fieldstat_tag tags[3] = {TEST_TAG_INT, TEST_TAG_STRING, TEST_TAG_DOUBLE};
-
- s->key = tag_hash_key_construct_with_fieldstat_tag(tags, 3);
- s->value = 321;
- HASH_ADD_TAG(hdr, key, s);
- printf("add finish \n");
-
- struct testing_hash *find = NULL;
- HASH_FIND_TAG(hdr, s->key, find);
- ASSERT_NE(find, nullptr);
- ASSERT_EQ(find->value, 321);
-
- HASH_DEL(hdr, find);
-
- tag_hash_key_free(s->key);
- free(s);
-}
-
-TEST(unit_test_fasthash, test_multiple_hash_and_ensure_no_double_hashing)
-{
- struct testing_hash *hdr = NULL;
- struct testing_hash *s = NULL;
- struct testing_hash *find[3];
-
- s = (struct testing_hash *)malloc(sizeof(struct testing_hash));
-
- const struct fieldstat_tag tags[3] = {TEST_TAG_INT, TEST_TAG_STRING, TEST_TAG_DOUBLE};
-
- s->key = tag_hash_key_construct_with_fieldstat_tag(tags, 3);
- s->value = 321;
- printf("call hash add tag\n");
- HASH_ADD_TAG(hdr, key, s);
- printf("add finish \n");
-
- struct tag_hash_key *new_key = tag_hash_key_construct_with_fieldstat_tag(tags, 3);
- clock_t start = clock();
- HASH_FIND_TAG(hdr, new_key, find[0]);
- clock_t end = clock();
- clock_t duration = end - start;
- printf("duration with new hash key: %ld \n", (long int)duration);
-
- start = clock();
- HASH_FIND_TAG(hdr, s->key, find[1]);
- end = clock();
- clock_t duration2 = end - start;
- printf("duration with old hash key: %ld \n", (long int)duration2);
-
- start = clock();
- HASH_FIND_TAG(hdr, new_key, find[2]);
- end = clock();
- clock_t duration3 = end - start;
- printf("duration with new hash key again: %ld \n", (long int)duration3);
-
- EXPECT_GE(duration, duration2);
- EXPECT_GE(duration, duration3);
-
- EXPECT_EQ(find[0], find[1]);
- EXPECT_EQ(find[0], find[2]);
-
- HASH_DEL(hdr, find[0]);
- tag_hash_key_free(s->key);
- free(s);
- tag_hash_key_free(new_key);
-}
-
-TEST(unit_test_fasthash, serialize_and_deserialize)
-{
- const struct fieldstat_tag tags[3] = {TEST_TAG_INT, TEST_TAG_STRING, TEST_TAG_DOUBLE};
- struct tag_hash_key *new_key = tag_hash_key_construct_with_fieldstat_tag(tags, 3);
- char *blob = NULL;
- size_t blob_size = 0;
- tag_hash_key_serialize(new_key, &blob, &blob_size);
- struct tag_hash_key *deserialized_key = tag_hash_key_deserialize(blob, blob_size);
-
- struct fieldstat_tag *deserialized_tags = NULL;
- size_t n_deserialized_tags = 0;
- tag_hash_key_convert_to_fieldstat_tag(deserialized_key, &deserialized_tags, &n_deserialized_tags);
-
- EXPECT_EQ(n_deserialized_tags, 3);
- // the keys are rearranged
- EXPECT_STREQ(deserialized_tags[0].key, TEST_TAG_DOUBLE.key);
- EXPECT_STREQ(deserialized_tags[1].key, TEST_TAG_INT.key);
- EXPECT_STREQ(deserialized_tags[2].key, TEST_TAG_STRING.key);
- EXPECT_EQ(deserialized_tags[0].value_double, TEST_TAG_DOUBLE.value_double);
- EXPECT_EQ(deserialized_tags[1].value_longlong, TEST_TAG_INT.value_longlong);
- EXPECT_STREQ(deserialized_tags[2].value_str, TEST_TAG_STRING.value_str);
-
- tag_hash_key_free(new_key);
- tag_hash_key_free(deserialized_key);
- free(blob);
-
- for (size_t i = 0; i < n_deserialized_tags; i++)
- {
- free((char *)deserialized_tags[i].key);
- if (deserialized_tags[i].type == TAG_CSTRING) {
- free((char *)deserialized_tags[i].value_str);
- }
- }
-
- free(deserialized_tags);
-}
-
-int main(int argc, char *argv[])
-{
- testing::InitGoogleTest(&argc, argv);
- return RUN_ALL_TESTS();
-} \ No newline at end of file