diff options
Diffstat (limited to 'test/test_merge.cpp')
| -rw-r--r-- | test/test_merge.cpp | 538 |
1 files changed, 365 insertions, 173 deletions
diff --git a/test/test_merge.cpp b/test/test_merge.cpp index a37dff9..46583d1 100644 --- a/test/test_merge.cpp +++ b/test/test_merge.cpp @@ -2,6 +2,7 @@ #include <gtest/gtest.h> #include <set> #include <unordered_map> +#include <unordered_set> #include "fieldstat.h" #include "utils.hpp" @@ -31,64 +32,37 @@ double test_cal_accuracy_given_expected_key(vector<struct Fieldstat_tag_list_wra return test_cal_topk_accuracy(test_result, countMap); } -long long merge_test_fieldstat_counter_get(const struct fieldstat *instance, int cube_id, int metric_id, const struct fieldstat_tag_list *tag_list = &TEST_TAG_LIST_STRING) +long long merge_test_fieldstat_counter_get(const struct fieldstat *instance, int cube_id, int metric_id, const struct field_list *tag_list = &TEST_FIELD_LIST_STRING) { long long ret = 0; - fieldstat_counter_get(instance, cube_id, metric_id, tag_list, &ret); + fieldstat_counter_get(instance, cube_id, tag_list, metric_id, &ret); return ret; } -TEST(unit_test_merge, test_metric_name_mapping_with_new_metric_on_existing_cube) +double merge_test_fieldstat_hll_get(const struct fieldstat *instance, int cube_id, int metric_id, const struct field_list *tag_list = &TEST_FIELD_LIST_STRING) { - struct fieldstat *instance = fieldstat_new(); - - int cube_id1 = fieldstat_create_cube(instance, &TEST_SHARED_TAG, 1, SAMPLING_MODE_COMPREHENSIVE, 10); - int metric_id_1_0 = fieldstat_register_counter(instance, "metric_name cube1 cube2"); - int metric_id_1_1 = fieldstat_register_counter(instance, "shared name"); - fieldstat_counter_incrby(instance, cube_id1, metric_id_1_0, &TEST_TAG_STRING, 1, 1); - fieldstat_counter_incrby(instance, cube_id1, metric_id_1_1, &TEST_TAG_STRING, 1, 2); - int cube_id2 = fieldstat_create_cube(instance, &TEST_TAG_INT, 1, SAMPLING_MODE_COMPREHENSIVE, 10); - fieldstat_counter_incrby(instance, cube_id2, metric_id_1_0, &TEST_TAG_STRING, 1, 3); - - struct fieldstat *instance_dest = fieldstat_new(); - int cube_id_dest = fieldstat_create_cube(instance_dest, &TEST_SHARED_TAG, 1, SAMPLING_MODE_COMPREHENSIVE, 10); - (void)fieldstat_register_counter(instance_dest, "shared name"); - // shared name metric is not operated on cube_id_dest - - EXPECT_EQ(fieldstat_merge(instance_dest, instance), FS_OK); - - int *cube_id; - int n_cube; - fieldstat_get_cubes(instance_dest, &cube_id, &n_cube); - EXPECT_TRUE(n_cube == 2); - EXPECT_TRUE(cube_id[0] == cube_id_dest); - - int *metric_ids; - size_t n_metrics; - fieldstat_get_metrics_used_by_cube(instance_dest, cube_id_dest, &metric_ids, &n_metrics); - EXPECT_EQ(n_metrics, 2); - EXPECT_STREQ(fieldstat_get_metric_name(instance_dest, metric_ids[0]), "shared name"); - EXPECT_STREQ(fieldstat_get_metric_name(instance_dest, metric_ids[1]), "metric_name cube1 cube2"); - - EXPECT_EQ(merge_test_fieldstat_counter_get(instance_dest, cube_id_dest, 0), 2); // shared name - EXPECT_EQ(merge_test_fieldstat_counter_get(instance_dest, cube_id_dest, 1), 1); // metric_name cube1 cube2 on cube1 - EXPECT_EQ(merge_test_fieldstat_counter_get(instance_dest, cube_id[1], 1), 3); // metric_name cube1 cube2 on cube2 + double ret = 0; + fieldstat_hll_get(instance, cube_id, tag_list, metric_id, &ret); + return ret; +} - fieldstat_free(instance); - fieldstat_free(instance_dest); - free(cube_id); - free(metric_ids); +int test_fieldstat_cube_create(struct fieldstat *instance, const struct field *dimensions, size_t n_dimensions, enum sampling_mode mode, int k, int primary_metric_id=0) +{ + assert(mode == SAMPLING_MODE_COMPREHENSIVE); + int ret = fieldstat_cube_create(instance, dimensions, n_dimensions); + fieldstat_cube_set_sampling(instance, ret, mode, k, primary_metric_id); + return ret; } TEST(unit_test_merge, cube_shared_tag_mapping_with_new_cube) { struct fieldstat *instance = fieldstat_new(); - (void)fieldstat_create_cube(instance, &TEST_TAG_DOUBLE, 1, SAMPLING_MODE_COMPREHENSIVE, 10); - int cube_id2 = fieldstat_create_cube(instance, &TEST_SHARED_TAG, 1, SAMPLING_MODE_COMPREHENSIVE, 10); - int metric_id = fieldstat_register_counter(instance, "metric in cube 2"); - fieldstat_counter_incrby(instance, cube_id2, metric_id, &TEST_TAG_STRING, 1, 1); + (void)test_fieldstat_cube_create(instance, &TEST_FIELD_DOUBLE, 1, SAMPLING_MODE_COMPREHENSIVE, 10); + int cube_id2 = test_fieldstat_cube_create(instance, &TEST_SHARED_TAG, 1, SAMPLING_MODE_COMPREHENSIVE, 10); + int metric_id = fieldstat_register_counter(instance,cube_id2,"metric in cube 2"); + fieldstat_counter_incrby(instance, cube_id2, metric_id, &TEST_FIELD_STRING, 1, 1); struct fieldstat *instance_dest = fieldstat_new(); - int cube_id_dest = fieldstat_create_cube(instance_dest, &TEST_SHARED_TAG, 1, SAMPLING_MODE_COMPREHENSIVE, 10); + int cube_id_dest = test_fieldstat_cube_create(instance_dest, &TEST_SHARED_TAG, 1, SAMPLING_MODE_COMPREHENSIVE, 10); fieldstat_merge(instance_dest, instance); @@ -121,8 +95,8 @@ TEST(unit_test_merge, empty_instance) TEST(unit_test_merge, new_cube_and_metric_to_empty_comprehensive) { struct fieldstat *instance = fieldstat_new(); - fieldstat_create_cube(instance, &TEST_TAG_INT, 1, SAMPLING_MODE_COMPREHENSIVE, 10); - fieldstat_register_counter(instance, "metric_name"); + test_fieldstat_cube_create(instance, &TEST_FIELD_INT, 1, SAMPLING_MODE_COMPREHENSIVE, 10); + fieldstat_register_counter(instance, 0, "metric_name"); struct fieldstat *instance_dest = fieldstat_new(); @@ -132,7 +106,7 @@ TEST(unit_test_merge, new_cube_and_metric_to_empty_comprehensive) int n_cube; fieldstat_get_cubes(instance_dest, &cube_id_dest, &n_cube); EXPECT_TRUE(n_cube == 1); - EXPECT_STREQ(fieldstat_get_metric_name(instance_dest, 0), "metric_name"); + EXPECT_STREQ(fieldstat_get_metric_name(instance_dest, cube_id_dest[0], 0), "metric_name"); fieldstat_free(instance); fieldstat_free(instance_dest); @@ -142,47 +116,47 @@ TEST(unit_test_merge, new_cube_and_metric_to_empty_comprehensive) TEST(unit_test_merge, new_cell_on_existing_cube_and_metric_comprehensive) { struct fieldstat *instance = fieldstat_new(); - int cube_id = fieldstat_create_cube(instance, &TEST_SHARED_TAG, 1, SAMPLING_MODE_COMPREHENSIVE, 10); - int metric_id = fieldstat_register_counter(instance, "metric_name"); + int cube_id = test_fieldstat_cube_create(instance, &TEST_SHARED_TAG, 1, SAMPLING_MODE_COMPREHENSIVE, 10); + int metric_id = fieldstat_register_counter(instance, 0, "metric_name"); struct fieldstat *instance_dest = fieldstat_new(); fieldstat_merge(instance_dest, instance); - fieldstat_counter_incrby(instance, cube_id, metric_id, &TEST_TAG_STRING, 1, 10086); + fieldstat_counter_incrby(instance, cube_id, metric_id, &TEST_FIELD_STRING, 1, 10086); fieldstat_merge(instance_dest, instance); int *cube_id_dest; int n_cube; fieldstat_get_cubes(instance_dest, &cube_id_dest, &n_cube); EXPECT_TRUE(n_cube == 1); + EXPECT_STREQ(fieldstat_get_metric_name(instance_dest, cube_id_dest[0], 0), "metric_name"); free(cube_id_dest); - EXPECT_STREQ(fieldstat_get_metric_name(instance_dest, 0), "metric_name"); long long measure = merge_test_fieldstat_counter_get(instance, cube_id, metric_id); EXPECT_EQ(measure, 10086); - struct fieldstat_tag_list *tag_list = NULL; + struct field_list *tag_list = NULL; size_t n_cell = 0; - fieldstat_get_cells_used_by_metric(instance, cube_id, metric_id, &tag_list, &n_cell); + fieldstat_cube_get_cells(instance, cube_id, &tag_list, &n_cell); EXPECT_EQ(n_cell, 1); - EXPECT_EQ(tag_list->n_tag, 1); - EXPECT_STREQ(tag_list->tag[0].key, TEST_TAG_STRING.key); + EXPECT_EQ(tag_list->n_field, 1); + EXPECT_STREQ(tag_list->field[0].key, TEST_FIELD_STRING.key); fieldstat_free(instance); fieldstat_free(instance_dest); - fieldstat_tag_list_arr_free(tag_list, n_cell); + fieldstat_field_list_arr_free(tag_list, n_cell); } TEST(unit_test_merge, merge_existing_cell_on_existing_cube_and_metric_comprehensive) { struct fieldstat *instance = fieldstat_new(); - int cube_id = fieldstat_create_cube(instance, &TEST_SHARED_TAG, 1, SAMPLING_MODE_COMPREHENSIVE, 10); - int metric_id = fieldstat_register_counter(instance, "metric_name"); - fieldstat_counter_incrby(instance, cube_id, metric_id, &TEST_TAG_STRING, 1, 5); + int cube_id = test_fieldstat_cube_create(instance, &TEST_SHARED_TAG, 1, SAMPLING_MODE_COMPREHENSIVE, 10); + int metric_id = fieldstat_register_counter(instance, cube_id, "metric_name"); + fieldstat_counter_incrby(instance, cube_id, metric_id, &TEST_FIELD_STRING, 1, 5); struct fieldstat *instance_dest = fieldstat_new(); fieldstat_merge(instance_dest, instance); fieldstat_merge(instance_dest, instance); - EXPECT_STREQ(fieldstat_get_metric_name(instance_dest, 0), "metric_name"); + EXPECT_STREQ(fieldstat_get_metric_name(instance_dest, cube_id, 0), "metric_name"); long long measure = merge_test_fieldstat_counter_get(instance_dest, cube_id, metric_id); EXPECT_EQ(measure, 10); @@ -193,61 +167,61 @@ TEST(unit_test_merge, merge_existing_cell_on_existing_cube_and_metric_comprehens TEST(unit_test_merge, new_too_many_cells_on_one_metric_given_source_cube_reset_and_get_different_cube_comprehensive) { struct fieldstat *instance = fieldstat_new(); - int cube_id = fieldstat_create_cube(instance, &TEST_SHARED_TAG, 1, SAMPLING_MODE_COMPREHENSIVE, 2); // limit is 2 - int metric_id = fieldstat_register_counter(instance, "metric name"); - fieldstat_counter_incrby(instance, cube_id, metric_id, &TEST_TAG_STRING, 1, 1); + int cube_id = test_fieldstat_cube_create(instance, &TEST_SHARED_TAG, 1, SAMPLING_MODE_COMPREHENSIVE, 2); // limit is 2 + int metric_id = fieldstat_register_counter(instance, cube_id, "metric name"); + fieldstat_counter_incrby(instance, cube_id, metric_id, &TEST_FIELD_STRING, 1, 1); struct fieldstat *instance_dest = fieldstat_new(); fieldstat_merge(instance_dest, instance); fieldstat_reset(instance); - fieldstat_counter_incrby(instance, cube_id, metric_id, &TEST_TAG_INT, 1, 2); // 2nd cell - fieldstat_counter_incrby(instance, cube_id, metric_id, &TEST_TAG_DOUBLE, 1, 3); // 3rd cell, exceeding the limit 2 + fieldstat_counter_incrby(instance, cube_id, metric_id, &TEST_FIELD_INT, 1, 2); // 2nd cell + fieldstat_counter_incrby(instance, cube_id, metric_id, &TEST_FIELD_DOUBLE, 1, 3); // 3rd cell, exceeding the limit 2 fieldstat_merge(instance_dest, instance); - struct fieldstat_tag_list *tag_list = NULL; + struct field_list *tag_list = NULL; size_t n_cell = 0; - fieldstat_get_cells_used_by_metric(instance_dest, 0, 0, &tag_list, &n_cell); + fieldstat_cube_get_cells(instance_dest, 0, &tag_list, &n_cell); EXPECT_EQ(n_cell, 2); EXPECT_EQ(merge_test_fieldstat_counter_get(instance_dest, 0, 0, &tag_list[0]), 1); EXPECT_EQ(merge_test_fieldstat_counter_get(instance_dest, 0, 0, &tag_list[1]), 2); fieldstat_free(instance); fieldstat_free(instance_dest); - fieldstat_tag_list_arr_free(tag_list, n_cell); + fieldstat_field_list_arr_free(tag_list, n_cell); } TEST(unit_test_merge, new_too_many_cells_on_multiple_metric_given_source_cube_reset_and_get_different_cube_comprehensive) { struct fieldstat *instance = fieldstat_new(); - int cube_id = fieldstat_create_cube(instance, &TEST_SHARED_TAG, 1, SAMPLING_MODE_COMPREHENSIVE, 2); - int metric_id1 = fieldstat_register_counter(instance, "metric name1"); - int metric_id2 = fieldstat_register_counter(instance, "metric name2"); - fieldstat_counter_incrby(instance, cube_id, metric_id1, &TEST_TAG_STRING, 1, 1); // 1st cell on metric name1 + int cube_id = test_fieldstat_cube_create(instance, &TEST_SHARED_TAG, 1, SAMPLING_MODE_COMPREHENSIVE, 2); + int metric_id1 = fieldstat_register_counter(instance, cube_id, "metric name1"); + int metric_id2 = fieldstat_register_counter(instance, cube_id, "metric name2"); + fieldstat_counter_incrby(instance, cube_id, metric_id1, &TEST_FIELD_STRING, 1, 1); // 1st cell on metric name1 struct fieldstat *instance_dest = fieldstat_new(); fieldstat_merge(instance_dest, instance); fieldstat_reset(instance); - int metric_id3 = fieldstat_register_counter(instance, "metric name3"); - fieldstat_counter_incrby(instance, cube_id, metric_id3, &TEST_TAG_INT, 1, 2); // 2nd cell on metric name3, this is a metric dest dont have - fieldstat_counter_incrby(instance, cube_id, metric_id2, &TEST_TAG_DOUBLE, 1, 3); // 3nd cell on metric name2 + int metric_id3 = fieldstat_register_counter(instance, cube_id, "metric name3"); + fieldstat_counter_incrby(instance, cube_id, metric_id3, &TEST_FIELD_INT, 1, 2); // 2nd cell on metric name3, this is a metric dest dont have + fieldstat_counter_incrby(instance, cube_id, metric_id2, &TEST_FIELD_DOUBLE, 1, 3); // 3nd cell on metric name2 fieldstat_merge(instance_dest, instance); - struct fieldstat_tag_list *tag_list = NULL; - size_t n_cell = 0; - fieldstat_get_cells_used_by_metric(instance_dest, 0, metric_id1, &tag_list, &n_cell); - EXPECT_EQ(n_cell, 1); - EXPECT_STREQ(tag_list->tag[0].key, TEST_TAG_STRING.key); - fieldstat_tag_list_arr_free(tag_list, n_cell); - - fieldstat_get_cells_used_by_metric(instance_dest, 0, metric_id2, &tag_list, &n_cell); // 3nd cell failed to merge - EXPECT_EQ(n_cell, 0); + int *metric_ids = NULL; + size_t n_metrics = 0; + fieldstat_get_metric_in_cell(instance_dest, 0, &TEST_FIELD_LIST_STRING, &metric_ids, &n_metrics); + EXPECT_EQ(n_metrics, 1); + EXPECT_STREQ(fieldstat_get_metric_name(instance_dest, 0, metric_ids[0]), "metric name1"); + free(metric_ids); - fieldstat_get_cells_used_by_metric(instance_dest, 0, metric_id3, &tag_list, &n_cell); - EXPECT_EQ(n_cell, 1); - EXPECT_STREQ(tag_list->tag[0].key, TEST_TAG_INT.key); - fieldstat_tag_list_arr_free(tag_list, n_cell); + fieldstat_get_metric_in_cell(instance_dest, 0, &TEST_FIELD_LIST_INT, &metric_ids, &n_metrics); + EXPECT_EQ(n_metrics, 1); + EXPECT_STREQ(fieldstat_get_metric_name(instance_dest, 0, metric_ids[0]), "metric name3"); + free(metric_ids); + // 3nd cell failed to merge, because max sampling is 2 + fieldstat_get_metric_in_cell(instance_dest, 0, &TEST_FIELD_LIST_DOUBLE, &metric_ids, &n_metrics); + EXPECT_EQ(n_metrics, 0); fieldstat_free(instance); fieldstat_free(instance_dest); @@ -256,8 +230,9 @@ TEST(unit_test_merge, new_too_many_cells_on_multiple_metric_given_source_cube_re TEST(unit_test_merge, new_cube_and_metric_to_empty_topk) { struct fieldstat *instance = fieldstat_new(); - fieldstat_create_cube(instance, &TEST_TAG_INT, 1, SAMPLING_MODE_TOPK, 10); - fieldstat_register_counter(instance, "metric_name"); + fieldstat_cube_create(instance, &TEST_FIELD_INT, 1); + fieldstat_register_counter(instance, 0, "metric_name"); + fieldstat_cube_set_sampling(instance, 0, SAMPLING_MODE_TOPK, 10, 0); struct fieldstat *instance_dest = fieldstat_new(); @@ -267,7 +242,7 @@ TEST(unit_test_merge, new_cube_and_metric_to_empty_topk) int n_cube; fieldstat_get_cubes(instance_dest, &cube_id_dest, &n_cube); EXPECT_TRUE(n_cube == 1); - EXPECT_STREQ(fieldstat_get_metric_name(instance_dest, 0), "metric_name"); + EXPECT_STREQ(fieldstat_get_metric_name(instance_dest, cube_id_dest[0], 0), "metric_name"); fieldstat_free(instance); fieldstat_free(instance_dest); @@ -277,41 +252,43 @@ TEST(unit_test_merge, new_cube_and_metric_to_empty_topk) TEST(unit_test_merge, new_cell_on_existing_cube_and_metric_topk) { struct fieldstat *instance = fieldstat_new(); - int cube_id = fieldstat_create_cube(instance, &TEST_SHARED_TAG, 1, SAMPLING_MODE_TOPK, 10); - int metric_id = fieldstat_register_counter(instance, "metric_name"); + int cube_id = fieldstat_cube_create(instance, &TEST_SHARED_TAG, 1); + int metric_id = fieldstat_register_counter(instance, cube_id, "metric_name"); + fieldstat_cube_set_sampling(instance, cube_id, SAMPLING_MODE_TOPK, 10, 0); struct fieldstat *instance_dest = fieldstat_new(); fieldstat_merge(instance_dest, instance); - fieldstat_counter_incrby(instance, cube_id, metric_id, &TEST_TAG_STRING, 1, 10086); + fieldstat_counter_incrby(instance, cube_id, metric_id, &TEST_FIELD_STRING, 1, 10086); fieldstat_merge(instance_dest, instance); int *cube_id_dest; int n_cube; fieldstat_get_cubes(instance_dest, &cube_id_dest, &n_cube); EXPECT_TRUE(n_cube == 1); + EXPECT_STREQ(fieldstat_get_metric_name(instance_dest,cube_id_dest[0], 0), "metric_name"); free(cube_id_dest); - EXPECT_STREQ(fieldstat_get_metric_name(instance_dest, 0), "metric_name"); long long measure = merge_test_fieldstat_counter_get(instance, cube_id, metric_id); EXPECT_EQ(measure, 10086); - struct fieldstat_tag_list *tag_list = NULL; + struct field_list *tag_list = NULL; size_t n_cell = 0; - fieldstat_get_cells_used_by_metric(instance, cube_id, metric_id, &tag_list, &n_cell); + fieldstat_cube_get_cells(instance, cube_id, &tag_list, &n_cell); EXPECT_EQ(n_cell, 1); - EXPECT_EQ(tag_list->n_tag, 1); - EXPECT_STREQ(tag_list->tag[0].key, TEST_TAG_STRING.key); + EXPECT_EQ(tag_list->n_field, 1); + EXPECT_STREQ(tag_list->field[0].key, TEST_FIELD_STRING.key); fieldstat_free(instance); fieldstat_free(instance_dest); - fieldstat_tag_list_arr_free(tag_list, n_cell); + fieldstat_field_list_arr_free(tag_list, n_cell); } TEST(unit_test_merge, merge_existing_cell_on_existing_cube_and_metric_topk) { struct fieldstat *instance = fieldstat_new(); - int cube_id = fieldstat_create_cube(instance, &TEST_SHARED_TAG, 1, SAMPLING_MODE_TOPK, 10); - int metric_id = fieldstat_register_counter(instance, "metric_name"); - fieldstat_counter_incrby(instance, cube_id, metric_id, &TEST_TAG_STRING, 1, 5); + int cube_id = fieldstat_cube_create(instance, &TEST_SHARED_TAG, 1); + int metric_id = fieldstat_register_counter(instance, cube_id, "metric_name"); + fieldstat_cube_set_sampling(instance, cube_id, SAMPLING_MODE_TOPK, 10, 0); + fieldstat_counter_incrby(instance, cube_id, metric_id, &TEST_FIELD_STRING, 1, 5); struct fieldstat *instance_dest = fieldstat_new(); fieldstat_merge(instance_dest, instance); @@ -322,17 +299,17 @@ TEST(unit_test_merge, merge_existing_cell_on_existing_cube_and_metric_topk) fieldstat_get_cubes(instance_dest, &cube_id_dest, &n_cube); EXPECT_TRUE(n_cube == 1); int ret_cube_id = cube_id_dest[0]; + EXPECT_STREQ(fieldstat_get_metric_name(instance_dest,cube_id_dest[0], 0), "metric_name"); free(cube_id_dest); - EXPECT_STREQ(fieldstat_get_metric_name(instance_dest, 0), "metric_name"); - struct fieldstat_tag_list *tag_list = NULL; + struct field_list *tag_list = NULL; size_t n_cell = 0; - fieldstat_get_cells_used_by_metric(instance_dest, ret_cube_id, 0, &tag_list, &n_cell); + fieldstat_cube_get_cells(instance_dest, ret_cube_id, &tag_list, &n_cell); EXPECT_EQ(n_cell, 1); long long measure = merge_test_fieldstat_counter_get(instance_dest, cube_id, metric_id, &tag_list[0]); EXPECT_EQ(measure, 10); - fieldstat_tag_list_arr_free(tag_list, n_cell); + fieldstat_field_list_arr_free(tag_list, n_cell); fieldstat_free(instance); fieldstat_free(instance_dest); } @@ -340,36 +317,38 @@ TEST(unit_test_merge, merge_existing_cell_on_existing_cube_and_metric_topk) TEST(unit_test_merge, new_too_many_cells_on_one_metric_given_source_cube_reset_and_get_different_cube_topk) { struct fieldstat *instance = fieldstat_new(); - int cube_id = fieldstat_create_cube(instance, &TEST_SHARED_TAG, 1, SAMPLING_MODE_TOPK, 2); - int metric_id = fieldstat_register_counter(instance, "metric name"); - fieldstat_counter_incrby(instance, cube_id, metric_id, &TEST_TAG_STRING, 1, 1); + int cube_id = fieldstat_cube_create(instance, &TEST_SHARED_TAG, 1); + int metric_id = fieldstat_register_counter(instance, cube_id, "metric name"); + fieldstat_cube_set_sampling(instance, cube_id, SAMPLING_MODE_TOPK, 2, 0); + fieldstat_counter_incrby(instance, cube_id, metric_id, &TEST_FIELD_STRING, 1, 1); struct fieldstat *instance_dest = fieldstat_new(); fieldstat_merge(instance_dest, instance); fieldstat_reset(instance); - fieldstat_counter_incrby(instance, cube_id, metric_id, &TEST_TAG_INT, 1, 2); // 2nd cell - fieldstat_counter_incrby(instance, cube_id, metric_id, &TEST_TAG_DOUBLE, 1, 3); // 3rd cell,bigger than the others, so keep it + fieldstat_counter_incrby(instance, cube_id, metric_id, &TEST_FIELD_INT, 1, 2); // 2nd cell + fieldstat_counter_incrby(instance, cube_id, metric_id, &TEST_FIELD_DOUBLE, 1, 3); // 3rd cell,bigger than the others, so keep it fieldstat_merge(instance_dest, instance); - struct fieldstat_tag_list *tag_list = NULL; + struct field_list *tag_list = NULL; size_t n_cell = 0; - fieldstat_get_cells_used_by_metric(instance_dest, 0, 0, &tag_list, &n_cell); + fieldstat_cube_get_cells(instance_dest, 0, &tag_list, &n_cell); EXPECT_EQ(n_cell, 2); EXPECT_EQ(merge_test_fieldstat_counter_get(instance_dest, 0, 0, &tag_list[0]), 3); EXPECT_EQ(merge_test_fieldstat_counter_get(instance_dest, 0, 0, &tag_list[1]), 2); fieldstat_free(instance); fieldstat_free(instance_dest); - fieldstat_tag_list_arr_free(tag_list, n_cell); + fieldstat_field_list_arr_free(tag_list, n_cell); } -struct fieldstat *test_push_flows(vector<Fieldstat_tag_list_wrapper *> &flows_in_test, int K, long long count = 1) +struct fieldstat *topk_test_push_flows(vector<Fieldstat_tag_list_wrapper *> &flows_in_test, int K, long long count = 1) { struct fieldstat *instance = fieldstat_new(); - int cube_id = fieldstat_create_cube(instance, &TEST_SHARED_TAG, 1, SAMPLING_MODE_TOPK, K); - int metric_id = fieldstat_register_counter(instance, "metric name"); + int cube_id = fieldstat_cube_create(instance, &TEST_SHARED_TAG, 1); + int metric_id = fieldstat_register_counter(instance, cube_id, "metric name"); + fieldstat_cube_set_sampling(instance, cube_id, SAMPLING_MODE_TOPK, K, 0); for (size_t i = 0; i < flows_in_test.size(); i++) { fieldstat_counter_incrby(instance, cube_id, metric_id, flows_in_test[i]->get_tag(), flows_in_test[i]->get_tag_count(), count); } @@ -380,14 +359,14 @@ TEST(unit_test_merge, merge_accuracy_test_with_K_large_enough_topk) { int K = 100; vector<Fieldstat_tag_list_wrapper *> flows_in_src = test_gen_topk_flows(K, K); - struct fieldstat *instance_src = test_push_flows(flows_in_src, K); + struct fieldstat *instance_src = topk_test_push_flows(flows_in_src, K); vector<Fieldstat_tag_list_wrapper *> flows_in_dest = test_gen_topk_flows(K, K); - struct fieldstat *instance_dest = test_push_flows(flows_in_dest, K); + struct fieldstat *instance_dest = topk_test_push_flows(flows_in_dest, K); fieldstat_merge(instance_dest, instance_src); - struct fieldstat_tag_list *tag_list = NULL; + struct field_list *tag_list = NULL; size_t n_cell = 0; - fieldstat_get_cells_used_by_metric(instance_dest, 0, 0, &tag_list, &n_cell); + fieldstat_cube_get_cells(instance_dest, 0, &tag_list, &n_cell); vector<Fieldstat_tag_list_wrapper *> flows_in_merged; for (size_t i = 0; i < n_cell; i++) { @@ -400,7 +379,7 @@ TEST(unit_test_merge, merge_accuracy_test_with_K_large_enough_topk) fieldstat_free(instance_src); fieldstat_free(instance_dest); - fieldstat_tag_list_arr_free(tag_list, n_cell); + fieldstat_field_list_arr_free(tag_list, n_cell); for (size_t i = 0; i < flows_in_merged.size(); i++) { delete flows_in_merged[i]; } @@ -409,23 +388,23 @@ TEST(unit_test_merge, merge_accuracy_test_with_K_large_enough_topk) } } -TEST(unit_test_merge, merge_accuracy_test_gen_dest_full_all_inserted_given_src_flows_larger) +TEST(unit_test_merge, merge_accuracy_test_gen_dest_full_all_inserted_given_src_flows_larger_topk) { int K = 1000; vector<Fieldstat_tag_list_wrapper *> flows_in_src = test_gen_topk_flows(10000, K); - struct fieldstat *instance_src = test_push_flows(flows_in_src, K, 1000); // 1000 times larger than dest 1 + struct fieldstat *instance_src = topk_test_push_flows(flows_in_src, K, 1000); // 1000 times larger than dest 1 vector<Fieldstat_tag_list_wrapper *> flows_in_dest; for (int i = 0; i < K; i++) { Fieldstat_tag_list_wrapper *tmp = new Fieldstat_tag_list_wrapper("flows in dest", to_string(i).c_str()); flows_in_dest.push_back(tmp); } - struct fieldstat *instance_dest = test_push_flows(flows_in_dest, K, 1); + struct fieldstat *instance_dest = topk_test_push_flows(flows_in_dest, K, 1); fieldstat_merge(instance_dest, instance_src); - struct fieldstat_tag_list *tag_list = NULL; + struct field_list *tag_list = NULL; size_t n_cell = 0; - fieldstat_get_cells_used_by_metric(instance_dest, 0, 0, &tag_list, &n_cell); + fieldstat_cube_get_cells(instance_dest, 0, &tag_list, &n_cell); vector<Fieldstat_tag_list_wrapper *> flows_in_merged; for (size_t i = 0; i < n_cell; i++) { flows_in_merged.push_back(new Fieldstat_tag_list_wrapper(&tag_list[i])); @@ -438,7 +417,7 @@ TEST(unit_test_merge, merge_accuracy_test_gen_dest_full_all_inserted_given_src_f fieldstat_free(instance_src); fieldstat_free(instance_dest); - fieldstat_tag_list_arr_free(tag_list, n_cell); + fieldstat_field_list_arr_free(tag_list, n_cell); for (size_t i = 0; i < flows_in_merged.size(); i++) { delete flows_in_merged[i]; } @@ -447,18 +426,18 @@ TEST(unit_test_merge, merge_accuracy_test_gen_dest_full_all_inserted_given_src_f } } -TEST(unit_test_merge, merge_accuracy_test_gen_dest_full_some_inserted_and_some_merged_and_some_fail_to_add) +TEST(unit_test_merge, merge_accuracy_test_gen_dest_full_some_inserted_and_some_merged_and_some_fail_to_add_topk) { int K = 100; - vector<Fieldstat_tag_list_wrapper *> flows_in_src = test_gen_topk_flows(10000, K + 50); // let elephant flows in src and dest different - struct fieldstat *instance_src = test_push_flows(flows_in_src, K); - vector<Fieldstat_tag_list_wrapper *> flows_in_dest = test_gen_topk_flows(10000, K + 50); - struct fieldstat *instance_dest = test_push_flows(flows_in_dest, K); + vector<Fieldstat_tag_list_wrapper *> flows_in_src = test_gen_topk_flows(30000, K + 50); // let elephant flows in src and dest different + struct fieldstat *instance_src = topk_test_push_flows(flows_in_src, K); + vector<Fieldstat_tag_list_wrapper *> flows_in_dest = test_gen_topk_flows(30000, K + 50); + struct fieldstat *instance_dest = topk_test_push_flows(flows_in_dest, K); fieldstat_merge(instance_dest, instance_src); - struct fieldstat_tag_list *tag_list = NULL; + struct field_list *tag_list = NULL; size_t n_cell = 0; - fieldstat_get_cells_used_by_metric(instance_dest, 0, 0, &tag_list, &n_cell); + fieldstat_cube_get_cells(instance_dest, 0, &tag_list, &n_cell); vector<Fieldstat_tag_list_wrapper *> flows_in_merged; for (size_t i = 0; i < n_cell; i++) { flows_in_merged.push_back(new Fieldstat_tag_list_wrapper(&tag_list[i])); @@ -466,12 +445,12 @@ TEST(unit_test_merge, merge_accuracy_test_gen_dest_full_some_inserted_and_some_m flows_in_dest.insert(flows_in_dest.end(), std::make_move_iterator(flows_in_src.begin()), std::make_move_iterator(flows_in_src.end())); double accuracy = test_cal_accuracy_given_expected_key(flows_in_dest, flows_in_merged); - EXPECT_GE(accuracy, 0.87); // by heavy keeper benchmark, with K = 100, merging result should be about 0.96, for adding the flows will also cause some inaccuracy, so here we set 0.93 + EXPECT_GE(accuracy, 0.87); printf("merge_accuracy_test_gen_dest_full_some_inserted_and_some_merged_and_some_fail_to_add accuracy is %lf\n", accuracy); fieldstat_free(instance_src); fieldstat_free(instance_dest); - fieldstat_tag_list_arr_free(tag_list, n_cell); + fieldstat_field_list_arr_free(tag_list, n_cell); for (size_t i = 0; i < flows_in_merged.size(); i++) { delete flows_in_merged[i]; } @@ -483,23 +462,24 @@ TEST(unit_test_merge, merge_accuracy_test_gen_dest_full_some_inserted_and_some_m TEST(unit_test_merge, primary_metric_has_no_value) { struct fieldstat *instance = fieldstat_new(); - int cube_id = fieldstat_create_cube(instance, &TEST_SHARED_TAG, 1, SAMPLING_MODE_TOPK, 2); - int metric_primary = fieldstat_register_counter(instance, "primary"); - int metric_operated = fieldstat_register_counter(instance, "operated"); - fieldstat_counter_incrby(instance, cube_id, metric_operated, &TEST_TAG_STRING, 1, 1); + int cube_id = fieldstat_cube_create(instance, &TEST_SHARED_TAG, 1); + int metric_primary = fieldstat_register_counter(instance, cube_id, "primary"); + int metric_operated = fieldstat_register_counter(instance, cube_id, "operated"); + fieldstat_cube_set_sampling(instance, cube_id, SAMPLING_MODE_TOPK, 2, metric_primary); + fieldstat_counter_incrby(instance, cube_id, metric_operated, &TEST_FIELD_STRING, 1, 1); struct fieldstat *instance_dest = fieldstat_new(); fieldstat_merge(instance_dest, instance); fieldstat_merge(instance_dest, instance); - struct fieldstat_tag_list *tag_list = NULL; + struct field_list *tag_list = NULL; size_t n_cell = 0; - fieldstat_get_cells_used_by_metric(instance_dest, 0, metric_primary, &tag_list, &n_cell); - EXPECT_EQ(n_cell, 0); - - fieldstat_get_cells_used_by_metric(instance_dest, 0, metric_operated, &tag_list, &n_cell); + fieldstat_cube_get_cells(instance_dest, 0, &tag_list, &n_cell); EXPECT_EQ(n_cell, 1); + EXPECT_STREQ(tag_list[0].field[0].key, TEST_FIELD_STRING.key); + EXPECT_EQ(merge_test_fieldstat_counter_get(instance_dest, 0, metric_operated, &tag_list[0]), 2); - fieldstat_tag_list_arr_free(tag_list, n_cell); + EXPECT_EQ(merge_test_fieldstat_counter_get(instance_dest, 0, metric_primary, &tag_list[0]), 0); + fieldstat_field_list_arr_free(tag_list, n_cell); fieldstat_free(instance); fieldstat_free(instance_dest); @@ -508,42 +488,254 @@ TEST(unit_test_merge, primary_metric_has_no_value) TEST(unit_test_merge, primary_metric_id_different) { struct fieldstat *instance = fieldstat_new(); - int cube_id = fieldstat_create_cube(instance, &TEST_SHARED_TAG, 1, SAMPLING_MODE_TOPK, 2); - int metric_primary = fieldstat_register_counter(instance, "primary"); - int metric_2 = fieldstat_register_counter(instance, "2"); - fieldstat_counter_incrby(instance, cube_id, metric_primary, &TEST_TAG_STRING, 1, 100); - fieldstat_counter_incrby(instance, cube_id, metric_2, &TEST_TAG_STRING, 1, 1); + int cube_id = fieldstat_cube_create(instance, &TEST_SHARED_TAG, 1); + int metric_primary = fieldstat_register_counter(instance, cube_id, "primary"); + int metric_2 = fieldstat_register_counter(instance, cube_id, "2"); + fieldstat_cube_set_sampling(instance, cube_id, SAMPLING_MODE_TOPK, 2, metric_primary); + + fieldstat_counter_incrby(instance, cube_id, metric_primary, &TEST_FIELD_STRING, 1, 100); + fieldstat_counter_incrby(instance, cube_id, metric_2, &TEST_FIELD_STRING, 1, 1); struct fieldstat *instance_dst = fieldstat_new(); - int cube_id_dst = fieldstat_create_cube(instance_dst, &TEST_SHARED_TAG, 1, SAMPLING_MODE_TOPK, 2); - fieldstat_register_counter(instance_dst, "2"); - int metric_primary_dst = fieldstat_register_counter(instance_dst, "primary"); - fieldstat_cube_set_primary_metric(instance_dst, cube_id_dst, metric_primary_dst); + int cube_id_dst = fieldstat_cube_create(instance_dst, &TEST_SHARED_TAG, 1); + fieldstat_register_counter(instance_dst, cube_id_dst, "2"); + int metric_primary_dst = fieldstat_register_counter(instance_dst, cube_id_dst, "primary"); + fieldstat_cube_set_sampling(instance_dst, cube_id_dst, SAMPLING_MODE_TOPK, 2, metric_primary_dst); + + EXPECT_EQ(fieldstat_merge(instance_dst, instance), FS_ERR_DIFFERENT_CONFIGURATION_FOR_SAME_CUBE); + + fieldstat_free(instance); + fieldstat_free(instance_dst); +} + +TEST(unit_test_merge, new_cube_and_metric_to_empty_spreadsketch) { + struct fieldstat *instance = fieldstat_new(); + fieldstat_cube_create(instance, &TEST_FIELD_INT, 1); + fieldstat_register_hll(instance, 0, "metric", 6); + fieldstat_cube_set_sampling(instance, 0, SAMPLING_MODE_TOP_CARDINALITY, 10, 0); + + struct fieldstat *instance_dest = fieldstat_new(); + fieldstat_merge(instance_dest, instance); + + int *cube_id_dest; + int n_cube; + fieldstat_get_cubes(instance_dest, &cube_id_dest, &n_cube); + EXPECT_TRUE(n_cube == 1); + EXPECT_STREQ(fieldstat_get_metric_name(instance_dest, cube_id_dest[0], 0), "metric"); + + free(cube_id_dest); + fieldstat_free(instance); + fieldstat_free(instance_dest); +} + +TEST(unit_test_merge, new_cell_on_existing_cube_and_metric_spreadsketch) { + struct fieldstat *instance = fieldstat_new(); + int cube_id = fieldstat_cube_create(instance, &TEST_SHARED_TAG, 1); + int metric_id = fieldstat_register_hll(instance, cube_id, "metric", 6); + fieldstat_cube_set_sampling(instance, cube_id, SAMPLING_MODE_TOP_CARDINALITY, 10, 0); + struct fieldstat *instance_dest = fieldstat_new(); + fieldstat_merge(instance_dest, instance); + + fieldstat_hll_add(instance, cube_id, metric_id, &TEST_FIELD_STRING, 1, "1", 1); + fieldstat_hll_add(instance, cube_id, metric_id, &TEST_FIELD_STRING, 1, "2", 1); + fieldstat_merge(instance_dest, instance); - fieldstat_merge(instance_dst, instance); + int *cube_id_dest; + int n_cube; + fieldstat_get_cubes(instance_dest, &cube_id_dest, &n_cube); + EXPECT_TRUE(n_cube == 1); + EXPECT_STREQ(fieldstat_get_metric_name(instance_dest, cube_id_dest[0], 0), "metric"); + long long measure = merge_test_fieldstat_hll_get(instance, cube_id, metric_id); + EXPECT_NEAR(measure, 2, 0.3); - struct fieldstat_tag_list *tag_list = NULL; + struct field_list *tag_list = NULL; size_t n_cell = 0; - fieldstat_get_cells_used_by_metric(instance_dst, 0, metric_primary, &tag_list, &n_cell); + fieldstat_cube_get_cells(instance, cube_id, &tag_list, &n_cell); EXPECT_EQ(n_cell, 1); - int *metric_ids; - size_t n_metrics; - fieldstat_get_metrics(instance_dst, &metric_ids, &n_metrics); - EXPECT_EQ(n_metrics, 2); - EXPECT_STREQ(fieldstat_get_metric_name(instance_dst, metric_ids[0]), "2"); - EXPECT_STREQ(fieldstat_get_metric_name(instance_dst, metric_ids[1]), "primary"); - - EXPECT_EQ(merge_test_fieldstat_counter_get(instance_dst, 0, metric_ids[1], &tag_list[0]), 100); - EXPECT_EQ(merge_test_fieldstat_counter_get(instance_dst, 0, metric_ids[0], &tag_list[0]), 1); - fieldstat_tag_list_arr_free(tag_list, n_cell); - free(metric_ids); + EXPECT_EQ(tag_list->n_field, 1); + EXPECT_STREQ(tag_list->field[0].key, TEST_FIELD_STRING.key); + + free(cube_id_dest); fieldstat_free(instance); - fieldstat_free(instance_dst); + fieldstat_free(instance_dest); + fieldstat_field_list_arr_free(tag_list, n_cell); } +TEST(unit_test_merge, merge_existing_cell_on_existing_cube_and_metric_spreadsketch) { + struct fieldstat *instance = fieldstat_new(); + int cube_id = fieldstat_cube_create(instance, &TEST_SHARED_TAG, 1); + int metric_id = fieldstat_register_hll(instance, cube_id, "metric", 6); + fieldstat_cube_set_sampling(instance, cube_id, SAMPLING_MODE_TOP_CARDINALITY, 10, 0); + fieldstat_hll_add(instance, cube_id, metric_id, &TEST_FIELD_STRING, 1, "1", 1); + struct fieldstat *instance_dest = fieldstat_new(); + + fieldstat_merge(instance_dest, instance); + fieldstat_merge(instance_dest, instance); + fieldstat_hll_add(instance, cube_id, metric_id, &TEST_FIELD_STRING, 1, "2", 1); + fieldstat_merge(instance_dest, instance); + + struct field_list *tag_list = NULL; + size_t n_cell = 0; + fieldstat_cube_get_cells(instance_dest, cube_id, &tag_list, &n_cell); + EXPECT_EQ(n_cell, 1); + double value = merge_test_fieldstat_hll_get(instance_dest, cube_id, metric_id, &tag_list[0]); + EXPECT_NEAR(value, 2, 0.3); + + fieldstat_free(instance); + fieldstat_free(instance_dest); + fieldstat_field_list_arr_free(tag_list, n_cell); +} + +TEST(unit_test_merge, new_too_many_cells_on_one_metric_given_source_cube_reset_and_get_different_cube_spreadsketch) { + struct fieldstat *instance = fieldstat_new(); + int cube_id = fieldstat_cube_create(instance, &TEST_SHARED_TAG, 1); + int metric_id = fieldstat_register_hll(instance, cube_id, "metric", 6); + fieldstat_cube_set_sampling(instance, cube_id, SAMPLING_MODE_TOP_CARDINALITY, 2, 0); + fieldstat_hll_add(instance, cube_id, metric_id, &TEST_FIELD_STRING, 1, "1", 1); + struct fieldstat *instance_dest = fieldstat_new(); + fieldstat_merge(instance_dest, instance); + + fieldstat_reset(instance); + fieldstat_hll_add(instance, cube_id, metric_id, &TEST_FIELD_INT, 1, "21", 2); + fieldstat_hll_add(instance, cube_id, metric_id, &TEST_FIELD_INT, 1, "22", 2); + fieldstat_hll_add(instance, cube_id, metric_id, &TEST_FIELD_DOUBLE, 1, "31", 2); + fieldstat_hll_add(instance, cube_id, metric_id, &TEST_FIELD_DOUBLE, 1, "32", 2); + fieldstat_hll_add(instance, cube_id, metric_id, &TEST_FIELD_DOUBLE, 1, "33", 2); + fieldstat_merge(instance_dest, instance); + + struct field_list *tag_list = NULL; + size_t n_cell = 0; + fieldstat_cube_get_cells(instance_dest, 0, &tag_list, &n_cell); + EXPECT_EQ(n_cell, 2); + EXPECT_NEAR(merge_test_fieldstat_hll_get(instance_dest, 0, 0, &tag_list[0]), 3, 0.3); + EXPECT_NEAR(merge_test_fieldstat_hll_get(instance_dest, 0, 0, &tag_list[1]), 2, 0.3); + EXPECT_STREQ(tag_list[0].field[0].key, TEST_FIELD_DOUBLE.key); + EXPECT_STREQ(tag_list[1].field[0].key, TEST_FIELD_INT.key); + + fieldstat_free(instance); + fieldstat_free(instance_dest); + fieldstat_field_list_arr_free(tag_list, n_cell); +} + + +TEST(unit_test_merge, gen_dest_full_all_src_inserted_given_src_flows_larger_spreadsketch) { + int K = 100; + SpreadSketchZipfGenerator flow_generator(1.0, K); // exactly the number of cells, so there will be almost all(in case of hash collision happen) cells added successfully + struct fieldstat *instance_src = fieldstat_new(); + int cube_id = fieldstat_cube_create(instance_src, &TEST_SHARED_TAG, 1); + int metric_id = fieldstat_register_hll(instance_src, cube_id, "metric", 6); + fieldstat_cube_set_sampling(instance_src, cube_id, SAMPLING_MODE_TOP_CARDINALITY, K, 0); + struct fieldstat *instance_dest = fieldstat_fork(instance_src); + const char dest_key[] = "key of dest"; + const char src_key[] = "key of src"; + + std::unordered_map<std::string, std::unordered_set<std::string>> flow_cnt; + for (int i = 0; i < 500000; i++) { // add more, so the fanout of any flow to src instance is more than dest + Flow flow = flow_generator.next(); + Fieldstat_tag_list_wrapper dimension = Fieldstat_tag_list_wrapper(src_key, flow.src_ip.c_str()); + Fieldstat_tag_list_wrapper item = Fieldstat_tag_list_wrapper("dummy", flow.dst_ip.c_str()); + fieldstat_hll_add_field(instance_src, cube_id, metric_id, dimension.get_tag(), dimension.get_tag_count(), item.get_tag(), item.get_tag_count()); + + flow_cnt[dimension.to_string()].insert(item.to_string()); + } + + for (int i = 0; i < 1000; i++) { + Flow flow = flow_generator.next(); + Fieldstat_tag_list_wrapper dimension = Fieldstat_tag_list_wrapper(dest_key, flow.src_ip.c_str()); + Fieldstat_tag_list_wrapper item = Fieldstat_tag_list_wrapper("dummy", flow.dst_ip.c_str()); + fieldstat_hll_add_field(instance_dest, cube_id, metric_id, dimension.get_tag(), dimension.get_tag_count(), item.get_tag(), item.get_tag_count()); + + flow_cnt[dimension.to_string()].insert(item.to_string()); + } + + fieldstat_merge(instance_dest, instance_src); + + struct field_list *tag_list = NULL; + struct field_list *tag_list_src = NULL; + size_t n_cell = 0; + size_t n_cell_src = 0; + std::vector<struct Fieldstat_tag_list_wrapper *> test_result; + fieldstat_cube_get_cells(instance_dest, 0, &tag_list, &n_cell); + fieldstat_cube_get_cells(instance_src, 0, &tag_list_src, &n_cell_src); + for (size_t i = 0; i < n_cell; i++) { + test_result.push_back(new Fieldstat_tag_list_wrapper(&tag_list[i])); + } + std::unordered_map<std::string, int> expected_unique_cnt; + for (auto &kv : flow_cnt) { + expected_unique_cnt[kv.first] = kv.second.size(); + } + + double recall = test_cal_topk_accuracy(test_result, expected_unique_cnt); + EXPECT_NEAR(recall, n_cell_src * 1.0 / n_cell, 0.0001); // the false positive is only generated because some cells in src are left because of hash collision + + fieldstat_free(instance_src); + fieldstat_free(instance_dest); + fieldstat_field_list_arr_free(tag_list, n_cell); + fieldstat_field_list_arr_free(tag_list_src, n_cell_src); + for (size_t i = 0; i < test_result.size(); i++) { + delete test_result[i]; + } +} + +TEST(unit_test_merge, merge_accuracy_test_gen_dest_full_some_inserted_and_some_merged_and_some_fail_to_add_spreadsketch) { + int K = 10; + SpreadSketchZipfGenerator flow_generator(1.0, K * 10); + struct fieldstat *instance_src = fieldstat_new(); + int cube_id = fieldstat_cube_create(instance_src, &TEST_SHARED_TAG, 1); + int metric_id = fieldstat_register_hll(instance_src, cube_id, "metric", 6); + fieldstat_cube_set_sampling(instance_src, cube_id, SAMPLING_MODE_TOP_CARDINALITY, K, 0); + struct fieldstat *instance_dest = fieldstat_fork(instance_src); + + std::unordered_map<std::string, std::unordered_set<std::string>> flow_cnt; + for (int i = 0; i < 100000; i++) { + Flow flow = flow_generator.next(); + const char *use_key = rand()%2? "src":"common"; + Fieldstat_tag_list_wrapper dimension = Fieldstat_tag_list_wrapper(use_key, flow.src_ip.c_str()); + Fieldstat_tag_list_wrapper item = Fieldstat_tag_list_wrapper("dummy", flow.dst_ip.c_str()); + fieldstat_hll_add_field(instance_src, cube_id, metric_id, dimension.get_tag(), dimension.get_tag_count(), item.get_tag(), item.get_tag_count()); + + flow_cnt[dimension.to_string()].insert(item.to_string()); + } + for (int i = 0; i < 100000; i++) { + Flow flow = flow_generator.next(); + const char *use_key = rand()%2? "dest":"common"; + Fieldstat_tag_list_wrapper dimension = Fieldstat_tag_list_wrapper(use_key, flow.src_ip.c_str()); + Fieldstat_tag_list_wrapper item = Fieldstat_tag_list_wrapper("dummy", flow.dst_ip.c_str()); + fieldstat_hll_add_field(instance_src, cube_id, metric_id, dimension.get_tag(), dimension.get_tag_count(), item.get_tag(), item.get_tag_count()); + + flow_cnt[dimension.to_string()].insert(item.to_string()); + } + + fieldstat_merge(instance_dest, instance_src); + + struct field_list *tag_list = NULL; + size_t n_cell = 0; + std::vector<struct Fieldstat_tag_list_wrapper *> test_result; + fieldstat_cube_get_cells(instance_dest, 0, &tag_list, &n_cell); + for (size_t i = 0; i < n_cell; i++) { + test_result.push_back(new Fieldstat_tag_list_wrapper(&tag_list[i])); + } + + std::unordered_map<std::string, int> expected_unique_cnt; + for (auto &kv : flow_cnt) { + expected_unique_cnt[kv.first] = kv.second.size(); + } + double recall = test_cal_topk_accuracy(test_result, expected_unique_cnt); + EXPECT_GE(recall, 0.7); + printf("gen_dest_full_all_src_inserted_given_src_flows_larger_spreadsketch recall is %lf\n", recall); + + fieldstat_free(instance_src); + fieldstat_free(instance_dest); + fieldstat_field_list_arr_free(tag_list, n_cell); + for (size_t i = 0; i < test_result.size(); i++) { + delete test_result[i]; + } +} int main(int argc, char *argv[]) { testing::InitGoogleTest(&argc, argv); + // testing::GTEST_FLAG(filter) = "*gen_dest_full_all_src_inserted_given_src_flows_larger_spreadsketch"; + return RUN_ALL_TESTS(); }
\ No newline at end of file |
