summaryrefslogtreecommitdiff
path: root/test
diff options
context:
space:
mode:
Diffstat (limited to 'test')
-rw-r--r--test/test_empty_tags.cpp36
-rw-r--r--test/test_fuzz_test.cpp4
-rw-r--r--test/test_merge.cpp244
-rw-r--r--test/test_metric_hll.cpp68
-rw-r--r--test/test_register_and_reset.cpp209
-rw-r--r--test/utils.cpp50
-rw-r--r--test/utils.hpp2
7 files changed, 556 insertions, 57 deletions
diff --git a/test/test_empty_tags.cpp b/test/test_empty_tags.cpp
index 709ef8d..54e243a 100644
--- a/test/test_empty_tags.cpp
+++ b/test/test_empty_tags.cpp
@@ -94,6 +94,42 @@ TEST(test_empty_tag, merge_topk)
fieldstat_free(instance_src);
}
+TEST(test_empty_tag, merge_spreadsketch)
+{
+ struct fieldstat *instance_src = fieldstat_new();
+ int cube_id = fieldstat_create_cube(instance_src, NULL, 0, SAMPLING_MODE_SPREADSKETCH, 1);
+ int metric_id = fieldstat_register_hll(instance_src, "metric", 4);
+ fieldstat_hll_add(instance_src, cube_id, metric_id, NULL, 0, "1", 1);
+ struct fieldstat *instance_dst = fieldstat_new();
+
+ fieldstat_merge(instance_dst, instance_src);
+ fieldstat_merge(instance_dst, instance_src);
+
+ int *ret_cube_id_arr = NULL;
+ int n_cube = 0;
+ fieldstat_get_cubes(instance_dst, &ret_cube_id_arr, &n_cube);
+ int ret_cell_id = ret_cube_id_arr[0];
+ struct field_list *shared_tag = fieldstat_cube_get_tags(instance_dst, ret_cell_id);
+ EXPECT_EQ(shared_tag->n_field, 0);
+ EXPECT_TRUE(shared_tag->field == NULL);
+ fieldstat_tag_list_arr_free(shared_tag, 1);
+ free(ret_cube_id_arr);
+
+ struct field_list *tag_list = NULL;
+ size_t n_cell = 0;
+ fieldstat_cube_get_cells(instance_dst, cube_id, &tag_list, &n_cell);
+ EXPECT_EQ(n_cell, 1);
+ EXPECT_EQ(tag_list[0].n_field, 0);
+ EXPECT_TRUE(tag_list[0].field == NULL);
+ double value;
+ fieldstat_hll_get(instance_dst, cube_id, &tag_list[0], 0, &value);
+ EXPECT_NEAR(value, 1, 0.4);
+ fieldstat_tag_list_arr_free(tag_list, n_cell);
+
+ fieldstat_free(instance_dst);
+ fieldstat_free(instance_src);
+}
+
TEST(test_empty_tag, export)
{
struct fieldstat *instance = test_empty_my_init();
diff --git a/test/test_fuzz_test.cpp b/test/test_fuzz_test.cpp
index 38f68bc..8d896b5 100644
--- a/test/test_fuzz_test.cpp
+++ b/test/test_fuzz_test.cpp
@@ -431,8 +431,8 @@ TEST(Fuzz_test, simple_one_for_perf)
int main(int argc, char *argv[])
{
testing::InitGoogleTest(&argc, argv);
- // testing::GTEST_FLAG(filter) = "Fuzz_test.many_instance_random_flow_unregister_calibrate_reset_fork_merge_topk";
- testing::GTEST_FLAG(filter) = "Fuzz_test.simple_one_for_perf";
+ // testing::GTEST_FLAG(filter) = "Fuzz_test.add_and_reset_with_randomly_generated_flows_and_randomly_chosen_metric";
+ testing::GTEST_FLAG(filter) = "-Fuzz_test.simple_one_for_perf";
return RUN_ALL_TESTS();
} \ No newline at end of file
diff --git a/test/test_merge.cpp b/test/test_merge.cpp
index e51b90d..7a07baf 100644
--- a/test/test_merge.cpp
+++ b/test/test_merge.cpp
@@ -2,6 +2,7 @@
#include <gtest/gtest.h>
#include <set>
#include <unordered_map>
+#include <unordered_set>
#include "fieldstat.h"
#include "utils.hpp"
@@ -38,6 +39,13 @@ long long merge_test_fieldstat_counter_get(const struct fieldstat *instance, int
return ret;
}
+double merge_test_fieldstat_hll_get(const struct fieldstat *instance, int cube_id, int metric_id, const struct field_list *tag_list = &TEST_TAG_LIST_STRING)
+{
+ double ret = 0;
+ fieldstat_hll_get(instance, cube_id, tag_list, metric_id, &ret);
+ return ret;
+}
+
TEST(unit_test_merge, test_metric_name_mapping_by_adding_metric_to_dest)
{
struct fieldstat *instance = fieldstat_new();
@@ -362,7 +370,7 @@ TEST(unit_test_merge, new_too_many_cells_on_one_metric_given_source_cube_reset_a
fieldstat_tag_list_arr_free(tag_list, n_cell);
}
-struct fieldstat *test_push_flows(vector<Fieldstat_tag_list_wrapper *> &flows_in_test, int K, long long count = 1)
+struct fieldstat *topk_test_push_flows(vector<Fieldstat_tag_list_wrapper *> &flows_in_test, int K, long long count = 1)
{
struct fieldstat *instance = fieldstat_new();
int cube_id = fieldstat_create_cube(instance, &TEST_SHARED_TAG, 1, SAMPLING_MODE_TOPK, K);
@@ -377,9 +385,9 @@ TEST(unit_test_merge, merge_accuracy_test_with_K_large_enough_topk)
{
int K = 100;
vector<Fieldstat_tag_list_wrapper *> flows_in_src = test_gen_topk_flows(K, K);
- struct fieldstat *instance_src = test_push_flows(flows_in_src, K);
+ struct fieldstat *instance_src = topk_test_push_flows(flows_in_src, K);
vector<Fieldstat_tag_list_wrapper *> flows_in_dest = test_gen_topk_flows(K, K);
- struct fieldstat *instance_dest = test_push_flows(flows_in_dest, K);
+ struct fieldstat *instance_dest = topk_test_push_flows(flows_in_dest, K);
fieldstat_merge(instance_dest, instance_src);
struct field_list *tag_list = NULL;
@@ -406,17 +414,17 @@ TEST(unit_test_merge, merge_accuracy_test_with_K_large_enough_topk)
}
}
-TEST(unit_test_merge, merge_accuracy_test_gen_dest_full_all_inserted_given_src_flows_larger)
+TEST(unit_test_merge, merge_accuracy_test_gen_dest_full_all_inserted_given_src_flows_larger_topk)
{
int K = 1000;
vector<Fieldstat_tag_list_wrapper *> flows_in_src = test_gen_topk_flows(10000, K);
- struct fieldstat *instance_src = test_push_flows(flows_in_src, K, 1000); // 1000 times larger than dest 1
+ struct fieldstat *instance_src = topk_test_push_flows(flows_in_src, K, 1000); // 1000 times larger than dest 1
vector<Fieldstat_tag_list_wrapper *> flows_in_dest;
for (int i = 0; i < K; i++) {
Fieldstat_tag_list_wrapper *tmp = new Fieldstat_tag_list_wrapper("flows in dest", to_string(i).c_str());
flows_in_dest.push_back(tmp);
}
- struct fieldstat *instance_dest = test_push_flows(flows_in_dest, K, 1);
+ struct fieldstat *instance_dest = topk_test_push_flows(flows_in_dest, K, 1);
fieldstat_merge(instance_dest, instance_src);
@@ -444,13 +452,13 @@ TEST(unit_test_merge, merge_accuracy_test_gen_dest_full_all_inserted_given_src_f
}
}
-TEST(unit_test_merge, merge_accuracy_test_gen_dest_full_some_inserted_and_some_merged_and_some_fail_to_add)
+TEST(unit_test_merge, merge_accuracy_test_gen_dest_full_some_inserted_and_some_merged_and_some_fail_to_add_topk)
{
int K = 100;
vector<Fieldstat_tag_list_wrapper *> flows_in_src = test_gen_topk_flows(30000, K + 50); // let elephant flows in src and dest different
- struct fieldstat *instance_src = test_push_flows(flows_in_src, K);
+ struct fieldstat *instance_src = topk_test_push_flows(flows_in_src, K);
vector<Fieldstat_tag_list_wrapper *> flows_in_dest = test_gen_topk_flows(30000, K + 50);
- struct fieldstat *instance_dest = test_push_flows(flows_in_dest, K);
+ struct fieldstat *instance_dest = topk_test_push_flows(flows_in_dest, K);
fieldstat_merge(instance_dest, instance_src);
struct field_list *tag_list = NULL;
@@ -523,11 +531,227 @@ TEST(unit_test_merge, primary_metric_id_different)
fieldstat_free(instance_dst);
}
+TEST(unit_test_merge, new_cube_and_metric_to_empty_spreadsketch) {
+ struct fieldstat *instance = fieldstat_new();
+ fieldstat_create_cube(instance, &TEST_TAG_INT, 1, SAMPLING_MODE_SPREADSKETCH, 10);
+ fieldstat_register_hll(instance, "metric", 6);
+
+ struct fieldstat *instance_dest = fieldstat_new();
+ fieldstat_merge(instance_dest, instance);
+
+ int *cube_id_dest;
+ int n_cube;
+ fieldstat_get_cubes(instance_dest, &cube_id_dest, &n_cube);
+ EXPECT_TRUE(n_cube == 1);
+ EXPECT_STREQ(fieldstat_get_metric_name(instance_dest, 0), "metric");
+
+ free(cube_id_dest);
+ fieldstat_free(instance);
+ fieldstat_free(instance_dest);
+}
+
+TEST(unit_test_merge, new_cell_on_existing_cube_and_metric_spreadsketch) {
+ struct fieldstat *instance = fieldstat_new();
+ int cube_id = fieldstat_create_cube(instance, &TEST_SHARED_TAG, 1, SAMPLING_MODE_SPREADSKETCH, 10);
+ int metric_id = fieldstat_register_hll(instance, "metric", 6);
+ struct fieldstat *instance_dest = fieldstat_new();
+ fieldstat_merge(instance_dest, instance);
+
+ fieldstat_hll_add(instance, cube_id, metric_id, &TEST_TAG_STRING, 1, "1", 1);
+ fieldstat_hll_add(instance, cube_id, metric_id, &TEST_TAG_STRING, 1, "2", 1);
+ fieldstat_merge(instance_dest, instance);
+
+ int *cube_id_dest;
+ int n_cube;
+ fieldstat_get_cubes(instance_dest, &cube_id_dest, &n_cube);
+ EXPECT_TRUE(n_cube == 1);
+ free(cube_id_dest);
+ EXPECT_STREQ(fieldstat_get_metric_name(instance_dest, 0), "metric");
+ long long measure = merge_test_fieldstat_hll_get(instance, cube_id, metric_id);
+ EXPECT_NEAR(measure, 2, 0.3);
+
+ struct field_list *tag_list = NULL;
+ size_t n_cell = 0;
+ fieldstat_cube_get_cells(instance, cube_id, &tag_list, &n_cell);
+ EXPECT_EQ(n_cell, 1);
+ EXPECT_EQ(tag_list->n_field, 1);
+ EXPECT_STREQ(tag_list->field[0].key, TEST_TAG_STRING.key);
+
+ fieldstat_free(instance);
+ fieldstat_free(instance_dest);
+ fieldstat_tag_list_arr_free(tag_list, n_cell);
+}
+
+TEST(unit_test_merge, merge_existing_cell_on_existing_cube_and_metric_spreadsketch) {
+ struct fieldstat *instance = fieldstat_new();
+ int cube_id = fieldstat_create_cube(instance, &TEST_SHARED_TAG, 1, SAMPLING_MODE_SPREADSKETCH, 10);
+ int metric_id = fieldstat_register_hll(instance, "metric", 6);
+ fieldstat_hll_add(instance, cube_id, metric_id, &TEST_TAG_STRING, 1, "1", 1);
+ struct fieldstat *instance_dest = fieldstat_new();
+
+ fieldstat_merge(instance_dest, instance);
+ fieldstat_merge(instance_dest, instance);
+ fieldstat_hll_add(instance, cube_id, metric_id, &TEST_TAG_STRING, 1, "2", 1);
+ fieldstat_merge(instance_dest, instance);
+
+ struct field_list *tag_list = NULL;
+ size_t n_cell = 0;
+ fieldstat_cube_get_cells(instance_dest, cube_id, &tag_list, &n_cell);
+ EXPECT_EQ(n_cell, 1);
+ double value = merge_test_fieldstat_hll_get(instance_dest, cube_id, metric_id, &tag_list[0]);
+ EXPECT_NEAR(value, 2, 0.3);
+
+ fieldstat_free(instance);
+ fieldstat_free(instance_dest);
+ fieldstat_tag_list_arr_free(tag_list, n_cell);
+}
+
+TEST(unit_test_merge, new_too_many_cells_on_one_metric_given_source_cube_reset_and_get_different_cube_spreadsketch) {
+ struct fieldstat *instance = fieldstat_new();
+ int cube_id = fieldstat_create_cube(instance, &TEST_SHARED_TAG, 1, SAMPLING_MODE_SPREADSKETCH, 2);
+ int metric_id = fieldstat_register_hll(instance, "metric", 6);
+ fieldstat_hll_add(instance, cube_id, metric_id, &TEST_TAG_STRING, 1, "1", 1);
+ struct fieldstat *instance_dest = fieldstat_new();
+ fieldstat_merge(instance_dest, instance);
+
+ fieldstat_reset(instance);
+ fieldstat_hll_add(instance, cube_id, metric_id, &TEST_TAG_INT, 1, "21", 2);
+ fieldstat_hll_add(instance, cube_id, metric_id, &TEST_TAG_INT, 1, "22", 2);
+ fieldstat_hll_add(instance, cube_id, metric_id, &TEST_TAG_DOUBLE, 1, "31", 2);
+ fieldstat_hll_add(instance, cube_id, metric_id, &TEST_TAG_DOUBLE, 1, "32", 2);
+ fieldstat_hll_add(instance, cube_id, metric_id, &TEST_TAG_DOUBLE, 1, "33", 2);
+ fieldstat_merge(instance_dest, instance);
+
+ struct field_list *tag_list = NULL;
+ size_t n_cell = 0;
+ fieldstat_cube_get_cells(instance_dest, 0, &tag_list, &n_cell);
+ EXPECT_EQ(n_cell, 2);
+ EXPECT_NEAR(merge_test_fieldstat_hll_get(instance_dest, 0, 0, &tag_list[0]), 3, 0.3);
+ EXPECT_NEAR(merge_test_fieldstat_hll_get(instance_dest, 0, 0, &tag_list[1]), 2, 0.3);
+ EXPECT_STREQ(tag_list[0].field[0].key, TEST_TAG_DOUBLE.key);
+ EXPECT_STREQ(tag_list[1].field[0].key, TEST_TAG_INT.key);
+
+ fieldstat_free(instance);
+ fieldstat_free(instance_dest);
+ fieldstat_tag_list_arr_free(tag_list, n_cell);
+}
+
+TEST(unit_test_merge, gen_dest_full_all_src_inserted_given_src_flows_larger_spreadsketch) {
+ int K = 100;
+ SpreadSketchZipfGenerator flow_generator(1.0, K); // exactly the number of cells, so there will be almost all(in case of hash collision happen) cells added successfully
+ struct fieldstat *instance_src = fieldstat_new();
+ int cube_id = fieldstat_create_cube(instance_src, &TEST_SHARED_TAG, 1, SAMPLING_MODE_SPREADSKETCH, K);
+ int metric_id = fieldstat_register_hll(instance_src, "metric", 6);
+ struct fieldstat *instance_dest = fieldstat_fork(instance_src);
+ const char dest_key[] = "key of dest";
+ const char src_key[] = "key of src";
+
+ std::unordered_map<std::string, std::unordered_set<std::string>> flow_cnt;
+ for (int i = 0; i < 500000; i++) { // add more, so the fanout of any flow to src instance is more than dest
+ Flow flow = flow_generator.next();
+ Fieldstat_tag_list_wrapper dimension = Fieldstat_tag_list_wrapper(src_key, flow.src_ip.c_str());
+ Fieldstat_tag_list_wrapper item = Fieldstat_tag_list_wrapper("dummy", flow.dst_ip.c_str());
+ fieldstat_hll_add_field(instance_src, cube_id, metric_id, dimension.get_tag(), dimension.get_tag_count(), item.get_tag(), item.get_tag_count());
+
+ flow_cnt[dimension.to_string()].insert(item.to_string());
+ }
+
+ for (int i = 0; i < 1000; i++) {
+ Flow flow = flow_generator.next();
+ Fieldstat_tag_list_wrapper dimension = Fieldstat_tag_list_wrapper(dest_key, flow.src_ip.c_str());
+ Fieldstat_tag_list_wrapper item = Fieldstat_tag_list_wrapper("dummy", flow.dst_ip.c_str());
+ fieldstat_hll_add_field(instance_dest, cube_id, metric_id, dimension.get_tag(), dimension.get_tag_count(), item.get_tag(), item.get_tag_count());
+
+ flow_cnt[dimension.to_string()].insert(item.to_string());
+ }
+
+ fieldstat_merge(instance_dest, instance_src);
+
+ struct field_list *tag_list = NULL;
+ struct field_list *tag_list_src = NULL;
+ size_t n_cell = 0;
+ size_t n_cell_src = 0;
+ std::vector<struct Fieldstat_tag_list_wrapper *> test_result;
+ fieldstat_cube_get_cells(instance_dest, 0, &tag_list, &n_cell);
+ fieldstat_cube_get_cells(instance_src, 0, &tag_list_src, &n_cell_src);
+ for (size_t i = 0; i < n_cell; i++) {
+ test_result.push_back(new Fieldstat_tag_list_wrapper(&tag_list[i]));
+ }
+ std::unordered_map<std::string, int> expected_unique_cnt;
+ for (auto &kv : flow_cnt) {
+ expected_unique_cnt[kv.first] = kv.second.size();
+ }
+
+ double recall = test_cal_topk_accuracy(test_result, expected_unique_cnt);
+ EXPECT_NEAR(recall, n_cell_src * 1.0 / n_cell, 0.0001); // the false positive is only generated because some cells in src are left because of hash collision
+
+ fieldstat_free(instance_src);
+ fieldstat_free(instance_dest);
+ fieldstat_tag_list_arr_free(tag_list, n_cell);
+ fieldstat_tag_list_arr_free(tag_list_src, n_cell_src);
+ for (size_t i = 0; i < test_result.size(); i++) {
+ delete test_result[i];
+ }
+}
+
+TEST(unit_test_merge, merge_accuracy_test_gen_dest_full_some_inserted_and_some_merged_and_some_fail_to_add_spreadsketch) {
+ int K = 100;
+ SpreadSketchZipfGenerator flow_generator(1.0, K); // exactly the number of cells, so there will be almost all(in case of hash collision happen) cells added successfully
+ struct fieldstat *instance_src = fieldstat_new();
+ int cube_id = fieldstat_create_cube(instance_src, &TEST_SHARED_TAG, 1, SAMPLING_MODE_SPREADSKETCH, K);
+ int metric_id = fieldstat_register_hll(instance_src, "metric", 6);
+ struct fieldstat *instance_dest = fieldstat_fork(instance_src);
+
+ std::unordered_map<std::string, std::unordered_set<std::string>> flow_cnt;
+ for (int i = 0; i < 100000; i++) {
+ Flow flow = flow_generator.next();
+ const char *use_key = rand()%2? "src":"common";
+ Fieldstat_tag_list_wrapper dimension = Fieldstat_tag_list_wrapper(use_key, flow.src_ip.c_str());
+ Fieldstat_tag_list_wrapper item = Fieldstat_tag_list_wrapper("dummy", flow.dst_ip.c_str());
+ fieldstat_hll_add_field(instance_src, cube_id, metric_id, dimension.get_tag(), dimension.get_tag_count(), item.get_tag(), item.get_tag_count());
+
+ flow_cnt[dimension.to_string()].insert(item.to_string());
+ }
+ for (int i = 0; i < 100000; i++) {
+ Flow flow = flow_generator.next();
+ const char *use_key = rand()%2? "dest":"common";
+ Fieldstat_tag_list_wrapper dimension = Fieldstat_tag_list_wrapper(use_key, flow.src_ip.c_str());
+ Fieldstat_tag_list_wrapper item = Fieldstat_tag_list_wrapper("dummy", flow.dst_ip.c_str());
+ fieldstat_hll_add_field(instance_src, cube_id, metric_id, dimension.get_tag(), dimension.get_tag_count(), item.get_tag(), item.get_tag_count());
+
+ flow_cnt[dimension.to_string()].insert(item.to_string());
+ }
+
+ fieldstat_merge(instance_dest, instance_src);
+
+ struct field_list *tag_list = NULL;
+ size_t n_cell = 0;
+ std::vector<struct Fieldstat_tag_list_wrapper *> test_result;
+ fieldstat_cube_get_cells(instance_dest, 0, &tag_list, &n_cell);
+ for (size_t i = 0; i < n_cell; i++) {
+ test_result.push_back(new Fieldstat_tag_list_wrapper(&tag_list[i]));
+ }
+
+ std::unordered_map<std::string, int> expected_unique_cnt;
+ for (auto &kv : flow_cnt) {
+ expected_unique_cnt[kv.first] = kv.second.size();
+ }
+ double recall = test_cal_topk_accuracy(test_result, expected_unique_cnt);
+ EXPECT_GT(recall, 0.7);
+ printf("gen_dest_full_all_src_inserted_given_src_flows_larger_spreadsketch recall is %lf\n", recall);
+
+ fieldstat_free(instance_src);
+ fieldstat_free(instance_dest);
+ fieldstat_tag_list_arr_free(tag_list, n_cell);
+ for (size_t i = 0; i < test_result.size(); i++) {
+ delete test_result[i];
+ }
+}
int main(int argc, char *argv[])
{
testing::InitGoogleTest(&argc, argv);
- // testing::GTEST_FLAG(filter) = "unit_test_merge.merge_existing_cell_on_existing_cube_and_metric_topk";
+ testing::GTEST_FLAG(filter) = "*spreadsketch";
return RUN_ALL_TESTS();
} \ No newline at end of file
diff --git a/test/test_metric_hll.cpp b/test/test_metric_hll.cpp
index 8e96266..7c38de2 100644
--- a/test/test_metric_hll.cpp
+++ b/test/test_metric_hll.cpp
@@ -1,4 +1,10 @@
#include <gtest/gtest.h>
+#include <string>
+#include <vector>
+#include <unordered_map>
+#include <unordered_set>
+#include <math.h>
+
#include "fieldstat.h"
#include "utils.hpp"
@@ -216,9 +222,71 @@ TEST(metric_test_hll, add_with_wrong_metric_id_expecting_fail)
fieldstat_free(instance);
}
+TEST(metric_test_hll, spread_sketch_add_and_test_accuracy)
+{
+ struct fieldstat *instance = fieldstat_new();
+ int K = 10;
+ fieldstat_create_cube(instance, &TEST_TAG_INT_collided, 1, SAMPLING_MODE_SPREADSKETCH, K);
+ fieldstat_register_hll(instance, "testss", 6);
+
+ int n_flows = 100000;
+ std::unordered_map<std::string, std::unordered_set<std::string>> flow_cnt;
+ SpreadSketchZipfGenerator generator(1.0, K * 10); // give much bigger distribution, so that we can test the accuracy
+ for (int i = 0; i < n_flows; i++)
+ {
+ Flow f = generator.next();
+ Fieldstat_tag_list_wrapper dimension("src ip", f.src_ip.c_str());
+ Fieldstat_tag_list_wrapper counted("dst ip", f.dst_ip.c_str());
+
+ fieldstat_hll_add_field(instance, 0, 0, dimension.get_tag(), dimension.get_tag_count(), counted.get_tag(), counted.get_tag_count());
+
+ flow_cnt[dimension.to_string()].insert(counted.to_string());
+ }
+
+ // recall
+ std::unordered_map<std::string, int> expected_unique_cnt;
+ std::vector<struct Fieldstat_tag_list_wrapper *> test_result;
+ for (auto &kv : flow_cnt) {
+ expected_unique_cnt[kv.first] = kv.second.size();
+ }
+
+ struct field_list *tag_list = NULL;
+ size_t n_cell = 0;
+ fieldstat_cube_get_cells(instance, 0, &tag_list, &n_cell);
+ EXPECT_EQ(n_cell, K);
+ for (size_t i = 0; i < n_cell; i++) {
+ Fieldstat_tag_list_wrapper tmp = Fieldstat_tag_list_wrapper(&tag_list[i]);
+ test_result.push_back(new Fieldstat_tag_list_wrapper(tmp));
+ }
+ double recall = test_cal_topk_accuracy(test_result, expected_unique_cnt);
+ printf("spread_sketch_add_and_test_accuracy recall: %f\n", recall);
+ EXPECT_GE(recall, 0.8);
+
+ // MRE
+ double mre = 0;
+ for (size_t i = 0; i < n_cell; i++) {
+ Fieldstat_tag_list_wrapper tmp = Fieldstat_tag_list_wrapper(&tag_list[i]);
+ double value_true = expected_unique_cnt[tmp.to_string()];
+ double value_est;
+ fieldstat_hll_get(instance, 0, &tag_list[i], 0, &value_est);
+ // printf("the estimated value for %s is %f, the true value is %f\n", tmp.to_string().c_str(), value_est, value_true);
+
+ mre += fabs(value_true - value_est) / value_true;
+ }
+ mre = mre / n_cell;
+ printf("topk_add_and_test_accuracy Mean ratio e: %f\n", mre);
+ EXPECT_LE(mre, 0.2);
+
+ fieldstat_tag_list_arr_free(tag_list, n_cell);
+ fieldstat_free(instance);
+ for (auto &ptr : test_result) {
+ delete ptr;
+ }
+}
int main(int argc, char *argv[])
{
testing::InitGoogleTest(&argc, argv);
+ // testing::GTEST_FLAG(filter) = "metric_test_hll.spread_sketch_add_and_test_accuracy";
return RUN_ALL_TESTS();
}
diff --git a/test/test_register_and_reset.cpp b/test/test_register_and_reset.cpp
index 2e295e8..d94e935 100644
--- a/test/test_register_and_reset.cpp
+++ b/test/test_register_and_reset.cpp
@@ -23,6 +23,12 @@ TEST(test_register, delete_comprehensive_cube_with_cells_and_metrics)
fieldstat_counter_incrby(instance, cube_id, metric_id, &TEST_TAG_INT, 1, 1);
fieldstat_destroy_cube(instance, cube_id);
+
+ struct field_list *tag_list = fieldstat_cube_get_tags(instance, cube_id);
+ EXPECT_EQ(tag_list, nullptr);
+ int cube_id_ret = fieldstat_find_cube(instance, &TEST_SHARED_TAG, 1);
+ EXPECT_EQ(cube_id_ret, FS_ERR_INVALID_KEY);
+
fieldstat_free(instance);
}
@@ -34,9 +40,34 @@ TEST(test_register, delete_topk_cube_with_cells_and_metrics)
fieldstat_counter_incrby(instance, cube_id, metric_id, &TEST_TAG_INT, 1, 1);
fieldstat_destroy_cube(instance, cube_id);
+ struct field_list *tag_list = fieldstat_cube_get_tags(instance, cube_id);
+ EXPECT_EQ(tag_list, nullptr);
+ int cube_id_ret = fieldstat_find_cube(instance, &TEST_SHARED_TAG, 1);
+ EXPECT_EQ(cube_id_ret, FS_ERR_INVALID_KEY);
+
fieldstat_free(instance);
}
+TEST(test_register, delete_spreadsketch_cube_with_cells_and_metrics)
+{
+ struct fieldstat *instance = fieldstat_new();
+ int cube_id = fieldstat_create_cube(instance, &TEST_SHARED_TAG, 1, SAMPLING_MODE_SPREADSKETCH, 10);
+ int metric_id1 = fieldstat_register_counter(instance, "counter");
+ int metric_primary = fieldstat_register_hll(instance, "hll_primary", 5);
+ fieldstat_cube_set_primary_metric(instance, cube_id, metric_primary);
+ fieldstat_counter_incrby(instance, cube_id, metric_id1, &TEST_TAG_INT, 1, 1);
+ fieldstat_hll_add_field(instance, cube_id, metric_primary, &TEST_TAG_INT, 1, &TEST_TAG_DOUBLE, 1);
+
+ fieldstat_destroy_cube(instance, cube_id);
+ struct field_list *tag_list = fieldstat_cube_get_tags(instance, cube_id);
+ EXPECT_EQ(tag_list, nullptr);
+ int cube_id_ret = fieldstat_find_cube(instance, &TEST_SHARED_TAG, 1);
+ EXPECT_EQ(cube_id_ret, FS_ERR_INVALID_KEY);
+
+ fieldstat_free(instance);
+}
+
+
int test_get_max_metric_id(const struct fieldstat *instance)
{
int *metric_id_out;
@@ -46,7 +77,7 @@ int test_get_max_metric_id(const struct fieldstat *instance)
return n_metric - 1;
}
-TEST(test_register, reset_and_try_to_query_cell)
+TEST(test_register, reset_and_try_to_query_cell_comprehensive)
{
struct fieldstat *instance = fieldstat_new();
int cube_id = fieldstat_create_cube(instance, &TEST_SHARED_TAG, 1, SAMPLING_MODE_COMPREHENSIVE, 10);
@@ -54,14 +85,56 @@ TEST(test_register, reset_and_try_to_query_cell)
fieldstat_counter_incrby(instance, cube_id, metric_id, &TEST_TAG_INT, 1, 1);
fieldstat_reset(instance);
- EXPECT_EQ(test_get_max_metric_id(instance), 0);
long long value;
EXPECT_EQ(fieldstat_counter_get(instance, cube_id, &TEST_TAG_LIST_INT, metric_id, &value), FS_ERR_INVALID_TAG);
+ size_t n_cell;
+ struct field_list *tag_list;
+ fieldstat_cube_get_cells(instance, cube_id, &tag_list, &n_cell);
+ EXPECT_EQ(n_cell, 0);
+
fieldstat_free(instance);
}
-TEST(test_register, reset_and_new_cell)
+TEST(test_register, reset_and_try_to_query_cell_topk)
+{
+ struct fieldstat *instance = fieldstat_new();
+ int cube_id = fieldstat_create_cube(instance, &TEST_SHARED_TAG, 1, SAMPLING_MODE_TOPK, 10);
+ int metric_id = fieldstat_register_counter(instance, "counter");
+ fieldstat_counter_incrby(instance, cube_id, metric_id, &TEST_TAG_INT, 1, 1);
+
+ fieldstat_reset(instance);
+ long long value;
+ EXPECT_EQ(fieldstat_counter_get(instance, cube_id, &TEST_TAG_LIST_INT, metric_id, &value), FS_ERR_INVALID_TAG);
+
+ size_t n_cell;
+ struct field_list *tag_list;
+ fieldstat_cube_get_cells(instance, cube_id, &tag_list, &n_cell);
+ EXPECT_EQ(n_cell, 0);
+
+ fieldstat_free(instance);
+}
+
+TEST(test_register, reset_and_try_to_query_cell_spreadsketch)
+{
+ struct fieldstat *instance = fieldstat_new();
+ int cube_id = fieldstat_create_cube(instance, &TEST_SHARED_TAG, 1, SAMPLING_MODE_SPREADSKETCH, 10);
+ int metric_id = fieldstat_register_hll(instance, "hll", 5);
+ fieldstat_hll_add(instance, cube_id, metric_id, &TEST_TAG_INT, 1, "12abc", 5);
+
+ fieldstat_reset(instance);
+ double value;
+ EXPECT_EQ(fieldstat_hll_get(instance, cube_id, &TEST_TAG_LIST_INT, metric_id, &value), FS_ERR_INVALID_TAG);
+
+ size_t n_cell;
+ struct field_list *tag_list;
+ fieldstat_cube_get_cells(instance, cube_id, &tag_list, &n_cell);
+ EXPECT_EQ(n_cell, 0);
+
+ fieldstat_free(instance);
+}
+
+TEST(test_register, reset_and_new_cell_comprehensive)
{
struct fieldstat *instance = fieldstat_new();
int cube_id = fieldstat_create_cube(instance, &TEST_SHARED_TAG, 1, SAMPLING_MODE_COMPREHENSIVE, 2);
@@ -78,6 +151,134 @@ TEST(test_register, reset_and_new_cell)
fieldstat_free(instance);
}
+TEST(test_register, reset_and_new_cell_topk)
+{
+ struct fieldstat *instance = fieldstat_new();
+ int cube_id = fieldstat_create_cube(instance, &TEST_SHARED_TAG, 1, SAMPLING_MODE_TOPK, 1);
+ int metric_id = fieldstat_register_counter(instance, "counter");
+ fieldstat_counter_incrby(instance, cube_id, metric_id, &TEST_TAG_INT, 1, 100);//100: bigger value
+ int ret = fieldstat_counter_incrby(instance, cube_id, metric_id, &TEST_TAG_DOUBLE, 1, 1);
+ EXPECT_EQ(ret, FS_ERR_TOO_MANY_CELLS);
+
+ fieldstat_reset(instance);
+ ret = fieldstat_counter_incrby(instance, cube_id, metric_id, &TEST_TAG_DOUBLE, 1, 1);
+ EXPECT_EQ(ret, FS_OK);
+
+ fieldstat_free(instance);
+}
+
+TEST(test_register, reset_and_new_cell_spreadsketch)
+{
+ struct fieldstat *instance = fieldstat_new();
+ int cube_id = fieldstat_create_cube(instance, &TEST_SHARED_TAG, 1, SAMPLING_MODE_SPREADSKETCH, 1);
+ int metric_id = fieldstat_register_hll(instance, "hll", 5);
+ // spread sketch will store more data than expected cell number 1. So loop for many cells first to trigger the error
+ struct field test_tag_long = TEST_TAG_INT;
+ for (int i = 0; i < 10000; i++) {
+ test_tag_long.value_longlong = i;
+ fieldstat_hll_add(instance, cube_id, metric_id, &test_tag_long, 1, "12abc", 5);
+ }
+ int ret = fieldstat_hll_add(instance, cube_id, metric_id, &TEST_TAG_DOUBLE, 1, "12abc", 5);
+ EXPECT_EQ(ret, FS_ERR_TOO_MANY_CELLS);
+
+ fieldstat_reset(instance);
+ ret = fieldstat_hll_add(instance, cube_id, metric_id, &TEST_TAG_DOUBLE, 1, "12abc", 5);
+ EXPECT_EQ(ret, FS_OK);
+
+ fieldstat_free(instance);
+}
+
+TEST(test_register, ensure_recovery_more_faster_comprehensive) {
+ struct fieldstat *instance = fieldstat_new();
+ int cell_num = 1000;
+ int cube_id = fieldstat_create_cube(instance, &TEST_SHARED_TAG, 1, SAMPLING_MODE_COMPREHENSIVE, cell_num);
+ int metric_id = fieldstat_register_counter(instance, "counter");
+ struct field test_tag_long = TEST_TAG_INT;
+
+ clock_t start = clock();
+ for (int i = 0; i < cell_num; i++) {
+ test_tag_long.value_longlong = i;
+ fieldstat_counter_incrby(instance, cube_id, metric_id, &test_tag_long, 1, 1);
+ }
+ clock_t end = clock();
+ clock_t duration_initialize = end - start;
+
+ fieldstat_reset(instance);
+
+ start = clock();
+ for (int i = 0; i < cell_num; i++) {
+ test_tag_long.value_longlong = i;
+ fieldstat_counter_incrby(instance, cube_id, metric_id, &test_tag_long, 1, 1);
+ }
+ end = clock();
+ clock_t duration_reset = end - start;
+
+ EXPECT_LT(duration_reset, duration_initialize);
+
+ fieldstat_free(instance);
+}
+
+TEST(test_register, ensure_recovery_more_faster_topk) {
+ struct fieldstat *instance = fieldstat_new();
+ int cell_num = 1000;
+ int cube_id = fieldstat_create_cube(instance, &TEST_SHARED_TAG, 1, SAMPLING_MODE_TOPK, cell_num);
+ int metric_id = fieldstat_register_counter(instance, "counter");
+ struct field test_tag_long = TEST_TAG_INT;
+
+ clock_t start = clock();
+ for (int i = 0; i < cell_num; i++) {
+ test_tag_long.value_longlong = i;
+ fieldstat_counter_incrby(instance, cube_id, metric_id, &test_tag_long, 1, 1);
+ }
+ clock_t end = clock();
+ clock_t duration_initialize = end - start;
+
+ fieldstat_reset(instance);
+
+ start = clock();
+ for (int i = 0; i < cell_num; i++) {
+ test_tag_long.value_longlong = i;
+ fieldstat_counter_incrby(instance, cube_id, metric_id, &test_tag_long, 1, 1);
+ }
+ end = clock();
+ clock_t duration_reset = end - start;
+
+ EXPECT_LT(duration_reset, duration_initialize);
+
+ fieldstat_free(instance);
+}
+
+TEST(test_register, ensure_recovery_more_faster_spreadsketch) {
+ struct fieldstat *instance = fieldstat_new();
+ int cell_num = 1000;
+ int cube_id = fieldstat_create_cube(instance, &TEST_SHARED_TAG, 1, SAMPLING_MODE_SPREADSKETCH, cell_num);
+ int metric_id = fieldstat_register_counter(instance, "counter");
+ struct field test_tag_long = TEST_TAG_INT;
+
+ clock_t start = clock();
+ for (int i = 0; i < cell_num; i++) {
+ test_tag_long.value_longlong = i;
+ fieldstat_hll_add(instance, cube_id, metric_id, &test_tag_long, 1, "1", 1);
+ }
+ clock_t end = clock();
+ clock_t duration_initialize = end - start;
+
+ fieldstat_reset(instance);
+
+ start = clock();
+ for (int i = 0; i < cell_num; i++) {
+ test_tag_long.value_longlong = i;
+ fieldstat_hll_add(instance, cube_id, metric_id, &test_tag_long, 1, "1", 1);
+ }
+ end = clock();
+ clock_t duration_reset = end - start;
+
+ printf("initialize: %ld, reset: %ld\n", duration_initialize, duration_reset);
+ EXPECT_LT(duration_reset, duration_initialize);
+
+ fieldstat_free(instance);
+}
+
TEST(test_register, register_many_cubes)
{
struct fieldstat *instance = fieldstat_new();
@@ -109,7 +310,7 @@ TEST(test_register, add_many_tagged_cells)
int cube_id = fieldstat_create_cube(instance, &TEST_SHARED_TAG, 1, SAMPLING_MODE_COMPREHENSIVE, 10);
size_t n_field = 1000;
struct field test_tag_long[n_field];
- for (int i = 0; i < n_field; i++) {
+ for (size_t i = 0; i < n_field; i++) {
test_tag_long[i] = TEST_TAG_INT; // will trigger realloc
}
diff --git a/test/utils.cpp b/test/utils.cpp
index 20475c3..eb14f6f 100644
--- a/test/utils.cpp
+++ b/test/utils.cpp
@@ -343,40 +343,10 @@ int zipf(double alpha, int n)
return(zipf_value);
}
-
-// class SpreadSketchZipfGenerator {
-// private:
-// const int MAX_DATA = 1000000;
-// std::pair<std::string, std::string> *loadeds;
-// unsigned cursor;
-
-// public:
-// SpreadSketchZipfGenerator(double alpha, int n) {
-
-// }
-
-// struct Flow next() {
-// int r_cursor = cursor % MAX_DATA;
-// struct Flow flow;
-// flow.src_ip = loadeds[r_cursor].first;
-// flow.dst_ip = loadeds[r_cursor].second;
-
-// cursor++;
-
-// return flow;
-// }
-
-// ~SpreadSketchZipfGenerator() {
-// delete[] loadeds;
-// }
-
-// double _alpha;
-// int _n;
-// };
-
SpreadSketchZipfGenerator::SpreadSketchZipfGenerator(double alpha, int n) {
_alpha = alpha;
_n = n;
+ cursor = 0;
// generate data and write them to file
std::string filename = "zipf_" + std::to_string(alpha) + "_" + std::to_string(n) + ".txt";
@@ -411,28 +381,28 @@ SpreadSketchZipfGenerator::SpreadSketchZipfGenerator(double alpha, int n) {
return;
}
- loadeds = new std::pair<std::string, std::string>[MAX_DATA];
+ loadeds = new std::vector<std::pair<std::string, std::string>>;
std::string line;
- int i = 0;
- while (std::getline(file, line) && i < MAX_DATA) {
+ while (std::getline(file, line)) {
std::istringstream iss(line);
std::string src_ip, dst_ip;
iss >> src_ip >> dst_ip;
- loadeds[i] = std::make_pair(src_ip, dst_ip);
- i++;
+ loadeds->push_back(std::make_pair(src_ip, dst_ip));
}
file.close();
+
}
SpreadSketchZipfGenerator::~SpreadSketchZipfGenerator() {
- delete[] loadeds;
+ delete loadeds;
}
struct Flow SpreadSketchZipfGenerator::next() {
- int r_cursor = cursor % MAX_DATA;
+ int r_cursor = cursor % loadeds->size();
struct Flow flow;
- flow.src_ip = loadeds[r_cursor].first;
- flow.dst_ip = loadeds[r_cursor].second;
+
+ flow.src_ip = loadeds->at(r_cursor).first;
+ flow.dst_ip = loadeds->at(r_cursor).second;
cursor++;
diff --git a/test/utils.hpp b/test/utils.hpp
index 84f5e09..ce73db0 100644
--- a/test/utils.hpp
+++ b/test/utils.hpp
@@ -55,7 +55,7 @@ struct Flow {
class SpreadSketchZipfGenerator {
private:
const int MAX_DATA = 1000000;
- std::pair<std::string, std::string> *loadeds;
+ std::vector<std::pair<std::string, std::string>> *loadeds;
unsigned cursor;
public: