summaryrefslogtreecommitdiff
path: root/test/test_merge.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'test/test_merge.cpp')
-rw-r--r--test/test_merge.cpp538
1 files changed, 365 insertions, 173 deletions
diff --git a/test/test_merge.cpp b/test/test_merge.cpp
index a37dff9..46583d1 100644
--- a/test/test_merge.cpp
+++ b/test/test_merge.cpp
@@ -2,6 +2,7 @@
#include <gtest/gtest.h>
#include <set>
#include <unordered_map>
+#include <unordered_set>
#include "fieldstat.h"
#include "utils.hpp"
@@ -31,64 +32,37 @@ double test_cal_accuracy_given_expected_key(vector<struct Fieldstat_tag_list_wra
return test_cal_topk_accuracy(test_result, countMap);
}
-long long merge_test_fieldstat_counter_get(const struct fieldstat *instance, int cube_id, int metric_id, const struct fieldstat_tag_list *tag_list = &TEST_TAG_LIST_STRING)
+long long merge_test_fieldstat_counter_get(const struct fieldstat *instance, int cube_id, int metric_id, const struct field_list *tag_list = &TEST_FIELD_LIST_STRING)
{
long long ret = 0;
- fieldstat_counter_get(instance, cube_id, metric_id, tag_list, &ret);
+ fieldstat_counter_get(instance, cube_id, tag_list, metric_id, &ret);
return ret;
}
-TEST(unit_test_merge, test_metric_name_mapping_with_new_metric_on_existing_cube)
+double merge_test_fieldstat_hll_get(const struct fieldstat *instance, int cube_id, int metric_id, const struct field_list *tag_list = &TEST_FIELD_LIST_STRING)
{
- struct fieldstat *instance = fieldstat_new();
-
- int cube_id1 = fieldstat_create_cube(instance, &TEST_SHARED_TAG, 1, SAMPLING_MODE_COMPREHENSIVE, 10);
- int metric_id_1_0 = fieldstat_register_counter(instance, "metric_name cube1 cube2");
- int metric_id_1_1 = fieldstat_register_counter(instance, "shared name");
- fieldstat_counter_incrby(instance, cube_id1, metric_id_1_0, &TEST_TAG_STRING, 1, 1);
- fieldstat_counter_incrby(instance, cube_id1, metric_id_1_1, &TEST_TAG_STRING, 1, 2);
- int cube_id2 = fieldstat_create_cube(instance, &TEST_TAG_INT, 1, SAMPLING_MODE_COMPREHENSIVE, 10);
- fieldstat_counter_incrby(instance, cube_id2, metric_id_1_0, &TEST_TAG_STRING, 1, 3);
-
- struct fieldstat *instance_dest = fieldstat_new();
- int cube_id_dest = fieldstat_create_cube(instance_dest, &TEST_SHARED_TAG, 1, SAMPLING_MODE_COMPREHENSIVE, 10);
- (void)fieldstat_register_counter(instance_dest, "shared name");
- // shared name metric is not operated on cube_id_dest
-
- EXPECT_EQ(fieldstat_merge(instance_dest, instance), FS_OK);
-
- int *cube_id;
- int n_cube;
- fieldstat_get_cubes(instance_dest, &cube_id, &n_cube);
- EXPECT_TRUE(n_cube == 2);
- EXPECT_TRUE(cube_id[0] == cube_id_dest);
-
- int *metric_ids;
- size_t n_metrics;
- fieldstat_get_metrics_used_by_cube(instance_dest, cube_id_dest, &metric_ids, &n_metrics);
- EXPECT_EQ(n_metrics, 2);
- EXPECT_STREQ(fieldstat_get_metric_name(instance_dest, metric_ids[0]), "shared name");
- EXPECT_STREQ(fieldstat_get_metric_name(instance_dest, metric_ids[1]), "metric_name cube1 cube2");
-
- EXPECT_EQ(merge_test_fieldstat_counter_get(instance_dest, cube_id_dest, 0), 2); // shared name
- EXPECT_EQ(merge_test_fieldstat_counter_get(instance_dest, cube_id_dest, 1), 1); // metric_name cube1 cube2 on cube1
- EXPECT_EQ(merge_test_fieldstat_counter_get(instance_dest, cube_id[1], 1), 3); // metric_name cube1 cube2 on cube2
+ double ret = 0;
+ fieldstat_hll_get(instance, cube_id, tag_list, metric_id, &ret);
+ return ret;
+}
- fieldstat_free(instance);
- fieldstat_free(instance_dest);
- free(cube_id);
- free(metric_ids);
+int test_fieldstat_cube_create(struct fieldstat *instance, const struct field *dimensions, size_t n_dimensions, enum sampling_mode mode, int k, int primary_metric_id=0)
+{
+ assert(mode == SAMPLING_MODE_COMPREHENSIVE);
+ int ret = fieldstat_cube_create(instance, dimensions, n_dimensions);
+ fieldstat_cube_set_sampling(instance, ret, mode, k, primary_metric_id);
+ return ret;
}
TEST(unit_test_merge, cube_shared_tag_mapping_with_new_cube)
{
struct fieldstat *instance = fieldstat_new();
- (void)fieldstat_create_cube(instance, &TEST_TAG_DOUBLE, 1, SAMPLING_MODE_COMPREHENSIVE, 10);
- int cube_id2 = fieldstat_create_cube(instance, &TEST_SHARED_TAG, 1, SAMPLING_MODE_COMPREHENSIVE, 10);
- int metric_id = fieldstat_register_counter(instance, "metric in cube 2");
- fieldstat_counter_incrby(instance, cube_id2, metric_id, &TEST_TAG_STRING, 1, 1);
+ (void)test_fieldstat_cube_create(instance, &TEST_FIELD_DOUBLE, 1, SAMPLING_MODE_COMPREHENSIVE, 10);
+ int cube_id2 = test_fieldstat_cube_create(instance, &TEST_SHARED_TAG, 1, SAMPLING_MODE_COMPREHENSIVE, 10);
+ int metric_id = fieldstat_register_counter(instance,cube_id2,"metric in cube 2");
+ fieldstat_counter_incrby(instance, cube_id2, metric_id, &TEST_FIELD_STRING, 1, 1);
struct fieldstat *instance_dest = fieldstat_new();
- int cube_id_dest = fieldstat_create_cube(instance_dest, &TEST_SHARED_TAG, 1, SAMPLING_MODE_COMPREHENSIVE, 10);
+ int cube_id_dest = test_fieldstat_cube_create(instance_dest, &TEST_SHARED_TAG, 1, SAMPLING_MODE_COMPREHENSIVE, 10);
fieldstat_merge(instance_dest, instance);
@@ -121,8 +95,8 @@ TEST(unit_test_merge, empty_instance)
TEST(unit_test_merge, new_cube_and_metric_to_empty_comprehensive)
{
struct fieldstat *instance = fieldstat_new();
- fieldstat_create_cube(instance, &TEST_TAG_INT, 1, SAMPLING_MODE_COMPREHENSIVE, 10);
- fieldstat_register_counter(instance, "metric_name");
+ test_fieldstat_cube_create(instance, &TEST_FIELD_INT, 1, SAMPLING_MODE_COMPREHENSIVE, 10);
+ fieldstat_register_counter(instance, 0, "metric_name");
struct fieldstat *instance_dest = fieldstat_new();
@@ -132,7 +106,7 @@ TEST(unit_test_merge, new_cube_and_metric_to_empty_comprehensive)
int n_cube;
fieldstat_get_cubes(instance_dest, &cube_id_dest, &n_cube);
EXPECT_TRUE(n_cube == 1);
- EXPECT_STREQ(fieldstat_get_metric_name(instance_dest, 0), "metric_name");
+ EXPECT_STREQ(fieldstat_get_metric_name(instance_dest, cube_id_dest[0], 0), "metric_name");
fieldstat_free(instance);
fieldstat_free(instance_dest);
@@ -142,47 +116,47 @@ TEST(unit_test_merge, new_cube_and_metric_to_empty_comprehensive)
TEST(unit_test_merge, new_cell_on_existing_cube_and_metric_comprehensive)
{
struct fieldstat *instance = fieldstat_new();
- int cube_id = fieldstat_create_cube(instance, &TEST_SHARED_TAG, 1, SAMPLING_MODE_COMPREHENSIVE, 10);
- int metric_id = fieldstat_register_counter(instance, "metric_name");
+ int cube_id = test_fieldstat_cube_create(instance, &TEST_SHARED_TAG, 1, SAMPLING_MODE_COMPREHENSIVE, 10);
+ int metric_id = fieldstat_register_counter(instance, 0, "metric_name");
struct fieldstat *instance_dest = fieldstat_new();
fieldstat_merge(instance_dest, instance);
- fieldstat_counter_incrby(instance, cube_id, metric_id, &TEST_TAG_STRING, 1, 10086);
+ fieldstat_counter_incrby(instance, cube_id, metric_id, &TEST_FIELD_STRING, 1, 10086);
fieldstat_merge(instance_dest, instance);
int *cube_id_dest;
int n_cube;
fieldstat_get_cubes(instance_dest, &cube_id_dest, &n_cube);
EXPECT_TRUE(n_cube == 1);
+ EXPECT_STREQ(fieldstat_get_metric_name(instance_dest, cube_id_dest[0], 0), "metric_name");
free(cube_id_dest);
- EXPECT_STREQ(fieldstat_get_metric_name(instance_dest, 0), "metric_name");
long long measure = merge_test_fieldstat_counter_get(instance, cube_id, metric_id);
EXPECT_EQ(measure, 10086);
- struct fieldstat_tag_list *tag_list = NULL;
+ struct field_list *tag_list = NULL;
size_t n_cell = 0;
- fieldstat_get_cells_used_by_metric(instance, cube_id, metric_id, &tag_list, &n_cell);
+ fieldstat_cube_get_cells(instance, cube_id, &tag_list, &n_cell);
EXPECT_EQ(n_cell, 1);
- EXPECT_EQ(tag_list->n_tag, 1);
- EXPECT_STREQ(tag_list->tag[0].key, TEST_TAG_STRING.key);
+ EXPECT_EQ(tag_list->n_field, 1);
+ EXPECT_STREQ(tag_list->field[0].key, TEST_FIELD_STRING.key);
fieldstat_free(instance);
fieldstat_free(instance_dest);
- fieldstat_tag_list_arr_free(tag_list, n_cell);
+ fieldstat_field_list_arr_free(tag_list, n_cell);
}
TEST(unit_test_merge, merge_existing_cell_on_existing_cube_and_metric_comprehensive)
{
struct fieldstat *instance = fieldstat_new();
- int cube_id = fieldstat_create_cube(instance, &TEST_SHARED_TAG, 1, SAMPLING_MODE_COMPREHENSIVE, 10);
- int metric_id = fieldstat_register_counter(instance, "metric_name");
- fieldstat_counter_incrby(instance, cube_id, metric_id, &TEST_TAG_STRING, 1, 5);
+ int cube_id = test_fieldstat_cube_create(instance, &TEST_SHARED_TAG, 1, SAMPLING_MODE_COMPREHENSIVE, 10);
+ int metric_id = fieldstat_register_counter(instance, cube_id, "metric_name");
+ fieldstat_counter_incrby(instance, cube_id, metric_id, &TEST_FIELD_STRING, 1, 5);
struct fieldstat *instance_dest = fieldstat_new();
fieldstat_merge(instance_dest, instance);
fieldstat_merge(instance_dest, instance);
- EXPECT_STREQ(fieldstat_get_metric_name(instance_dest, 0), "metric_name");
+ EXPECT_STREQ(fieldstat_get_metric_name(instance_dest, cube_id, 0), "metric_name");
long long measure = merge_test_fieldstat_counter_get(instance_dest, cube_id, metric_id);
EXPECT_EQ(measure, 10);
@@ -193,61 +167,61 @@ TEST(unit_test_merge, merge_existing_cell_on_existing_cube_and_metric_comprehens
TEST(unit_test_merge, new_too_many_cells_on_one_metric_given_source_cube_reset_and_get_different_cube_comprehensive)
{
struct fieldstat *instance = fieldstat_new();
- int cube_id = fieldstat_create_cube(instance, &TEST_SHARED_TAG, 1, SAMPLING_MODE_COMPREHENSIVE, 2); // limit is 2
- int metric_id = fieldstat_register_counter(instance, "metric name");
- fieldstat_counter_incrby(instance, cube_id, metric_id, &TEST_TAG_STRING, 1, 1);
+ int cube_id = test_fieldstat_cube_create(instance, &TEST_SHARED_TAG, 1, SAMPLING_MODE_COMPREHENSIVE, 2); // limit is 2
+ int metric_id = fieldstat_register_counter(instance, cube_id, "metric name");
+ fieldstat_counter_incrby(instance, cube_id, metric_id, &TEST_FIELD_STRING, 1, 1);
struct fieldstat *instance_dest = fieldstat_new();
fieldstat_merge(instance_dest, instance);
fieldstat_reset(instance);
- fieldstat_counter_incrby(instance, cube_id, metric_id, &TEST_TAG_INT, 1, 2); // 2nd cell
- fieldstat_counter_incrby(instance, cube_id, metric_id, &TEST_TAG_DOUBLE, 1, 3); // 3rd cell, exceeding the limit 2
+ fieldstat_counter_incrby(instance, cube_id, metric_id, &TEST_FIELD_INT, 1, 2); // 2nd cell
+ fieldstat_counter_incrby(instance, cube_id, metric_id, &TEST_FIELD_DOUBLE, 1, 3); // 3rd cell, exceeding the limit 2
fieldstat_merge(instance_dest, instance);
- struct fieldstat_tag_list *tag_list = NULL;
+ struct field_list *tag_list = NULL;
size_t n_cell = 0;
- fieldstat_get_cells_used_by_metric(instance_dest, 0, 0, &tag_list, &n_cell);
+ fieldstat_cube_get_cells(instance_dest, 0, &tag_list, &n_cell);
EXPECT_EQ(n_cell, 2);
EXPECT_EQ(merge_test_fieldstat_counter_get(instance_dest, 0, 0, &tag_list[0]), 1);
EXPECT_EQ(merge_test_fieldstat_counter_get(instance_dest, 0, 0, &tag_list[1]), 2);
fieldstat_free(instance);
fieldstat_free(instance_dest);
- fieldstat_tag_list_arr_free(tag_list, n_cell);
+ fieldstat_field_list_arr_free(tag_list, n_cell);
}
TEST(unit_test_merge, new_too_many_cells_on_multiple_metric_given_source_cube_reset_and_get_different_cube_comprehensive)
{
struct fieldstat *instance = fieldstat_new();
- int cube_id = fieldstat_create_cube(instance, &TEST_SHARED_TAG, 1, SAMPLING_MODE_COMPREHENSIVE, 2);
- int metric_id1 = fieldstat_register_counter(instance, "metric name1");
- int metric_id2 = fieldstat_register_counter(instance, "metric name2");
- fieldstat_counter_incrby(instance, cube_id, metric_id1, &TEST_TAG_STRING, 1, 1); // 1st cell on metric name1
+ int cube_id = test_fieldstat_cube_create(instance, &TEST_SHARED_TAG, 1, SAMPLING_MODE_COMPREHENSIVE, 2);
+ int metric_id1 = fieldstat_register_counter(instance, cube_id, "metric name1");
+ int metric_id2 = fieldstat_register_counter(instance, cube_id, "metric name2");
+ fieldstat_counter_incrby(instance, cube_id, metric_id1, &TEST_FIELD_STRING, 1, 1); // 1st cell on metric name1
struct fieldstat *instance_dest = fieldstat_new();
fieldstat_merge(instance_dest, instance);
fieldstat_reset(instance);
- int metric_id3 = fieldstat_register_counter(instance, "metric name3");
- fieldstat_counter_incrby(instance, cube_id, metric_id3, &TEST_TAG_INT, 1, 2); // 2nd cell on metric name3, this is a metric dest dont have
- fieldstat_counter_incrby(instance, cube_id, metric_id2, &TEST_TAG_DOUBLE, 1, 3); // 3nd cell on metric name2
+ int metric_id3 = fieldstat_register_counter(instance, cube_id, "metric name3");
+ fieldstat_counter_incrby(instance, cube_id, metric_id3, &TEST_FIELD_INT, 1, 2); // 2nd cell on metric name3, this is a metric dest dont have
+ fieldstat_counter_incrby(instance, cube_id, metric_id2, &TEST_FIELD_DOUBLE, 1, 3); // 3nd cell on metric name2
fieldstat_merge(instance_dest, instance);
- struct fieldstat_tag_list *tag_list = NULL;
- size_t n_cell = 0;
- fieldstat_get_cells_used_by_metric(instance_dest, 0, metric_id1, &tag_list, &n_cell);
- EXPECT_EQ(n_cell, 1);
- EXPECT_STREQ(tag_list->tag[0].key, TEST_TAG_STRING.key);
- fieldstat_tag_list_arr_free(tag_list, n_cell);
-
- fieldstat_get_cells_used_by_metric(instance_dest, 0, metric_id2, &tag_list, &n_cell); // 3nd cell failed to merge
- EXPECT_EQ(n_cell, 0);
+ int *metric_ids = NULL;
+ size_t n_metrics = 0;
+ fieldstat_get_metric_in_cell(instance_dest, 0, &TEST_FIELD_LIST_STRING, &metric_ids, &n_metrics);
+ EXPECT_EQ(n_metrics, 1);
+ EXPECT_STREQ(fieldstat_get_metric_name(instance_dest, 0, metric_ids[0]), "metric name1");
+ free(metric_ids);
- fieldstat_get_cells_used_by_metric(instance_dest, 0, metric_id3, &tag_list, &n_cell);
- EXPECT_EQ(n_cell, 1);
- EXPECT_STREQ(tag_list->tag[0].key, TEST_TAG_INT.key);
- fieldstat_tag_list_arr_free(tag_list, n_cell);
+ fieldstat_get_metric_in_cell(instance_dest, 0, &TEST_FIELD_LIST_INT, &metric_ids, &n_metrics);
+ EXPECT_EQ(n_metrics, 1);
+ EXPECT_STREQ(fieldstat_get_metric_name(instance_dest, 0, metric_ids[0]), "metric name3");
+ free(metric_ids);
+ // 3nd cell failed to merge, because max sampling is 2
+ fieldstat_get_metric_in_cell(instance_dest, 0, &TEST_FIELD_LIST_DOUBLE, &metric_ids, &n_metrics);
+ EXPECT_EQ(n_metrics, 0);
fieldstat_free(instance);
fieldstat_free(instance_dest);
@@ -256,8 +230,9 @@ TEST(unit_test_merge, new_too_many_cells_on_multiple_metric_given_source_cube_re
TEST(unit_test_merge, new_cube_and_metric_to_empty_topk)
{
struct fieldstat *instance = fieldstat_new();
- fieldstat_create_cube(instance, &TEST_TAG_INT, 1, SAMPLING_MODE_TOPK, 10);
- fieldstat_register_counter(instance, "metric_name");
+ fieldstat_cube_create(instance, &TEST_FIELD_INT, 1);
+ fieldstat_register_counter(instance, 0, "metric_name");
+ fieldstat_cube_set_sampling(instance, 0, SAMPLING_MODE_TOPK, 10, 0);
struct fieldstat *instance_dest = fieldstat_new();
@@ -267,7 +242,7 @@ TEST(unit_test_merge, new_cube_and_metric_to_empty_topk)
int n_cube;
fieldstat_get_cubes(instance_dest, &cube_id_dest, &n_cube);
EXPECT_TRUE(n_cube == 1);
- EXPECT_STREQ(fieldstat_get_metric_name(instance_dest, 0), "metric_name");
+ EXPECT_STREQ(fieldstat_get_metric_name(instance_dest, cube_id_dest[0], 0), "metric_name");
fieldstat_free(instance);
fieldstat_free(instance_dest);
@@ -277,41 +252,43 @@ TEST(unit_test_merge, new_cube_and_metric_to_empty_topk)
TEST(unit_test_merge, new_cell_on_existing_cube_and_metric_topk)
{
struct fieldstat *instance = fieldstat_new();
- int cube_id = fieldstat_create_cube(instance, &TEST_SHARED_TAG, 1, SAMPLING_MODE_TOPK, 10);
- int metric_id = fieldstat_register_counter(instance, "metric_name");
+ int cube_id = fieldstat_cube_create(instance, &TEST_SHARED_TAG, 1);
+ int metric_id = fieldstat_register_counter(instance, cube_id, "metric_name");
+ fieldstat_cube_set_sampling(instance, cube_id, SAMPLING_MODE_TOPK, 10, 0);
struct fieldstat *instance_dest = fieldstat_new();
fieldstat_merge(instance_dest, instance);
- fieldstat_counter_incrby(instance, cube_id, metric_id, &TEST_TAG_STRING, 1, 10086);
+ fieldstat_counter_incrby(instance, cube_id, metric_id, &TEST_FIELD_STRING, 1, 10086);
fieldstat_merge(instance_dest, instance);
int *cube_id_dest;
int n_cube;
fieldstat_get_cubes(instance_dest, &cube_id_dest, &n_cube);
EXPECT_TRUE(n_cube == 1);
+ EXPECT_STREQ(fieldstat_get_metric_name(instance_dest,cube_id_dest[0], 0), "metric_name");
free(cube_id_dest);
- EXPECT_STREQ(fieldstat_get_metric_name(instance_dest, 0), "metric_name");
long long measure = merge_test_fieldstat_counter_get(instance, cube_id, metric_id);
EXPECT_EQ(measure, 10086);
- struct fieldstat_tag_list *tag_list = NULL;
+ struct field_list *tag_list = NULL;
size_t n_cell = 0;
- fieldstat_get_cells_used_by_metric(instance, cube_id, metric_id, &tag_list, &n_cell);
+ fieldstat_cube_get_cells(instance, cube_id, &tag_list, &n_cell);
EXPECT_EQ(n_cell, 1);
- EXPECT_EQ(tag_list->n_tag, 1);
- EXPECT_STREQ(tag_list->tag[0].key, TEST_TAG_STRING.key);
+ EXPECT_EQ(tag_list->n_field, 1);
+ EXPECT_STREQ(tag_list->field[0].key, TEST_FIELD_STRING.key);
fieldstat_free(instance);
fieldstat_free(instance_dest);
- fieldstat_tag_list_arr_free(tag_list, n_cell);
+ fieldstat_field_list_arr_free(tag_list, n_cell);
}
TEST(unit_test_merge, merge_existing_cell_on_existing_cube_and_metric_topk)
{
struct fieldstat *instance = fieldstat_new();
- int cube_id = fieldstat_create_cube(instance, &TEST_SHARED_TAG, 1, SAMPLING_MODE_TOPK, 10);
- int metric_id = fieldstat_register_counter(instance, "metric_name");
- fieldstat_counter_incrby(instance, cube_id, metric_id, &TEST_TAG_STRING, 1, 5);
+ int cube_id = fieldstat_cube_create(instance, &TEST_SHARED_TAG, 1);
+ int metric_id = fieldstat_register_counter(instance, cube_id, "metric_name");
+ fieldstat_cube_set_sampling(instance, cube_id, SAMPLING_MODE_TOPK, 10, 0);
+ fieldstat_counter_incrby(instance, cube_id, metric_id, &TEST_FIELD_STRING, 1, 5);
struct fieldstat *instance_dest = fieldstat_new();
fieldstat_merge(instance_dest, instance);
@@ -322,17 +299,17 @@ TEST(unit_test_merge, merge_existing_cell_on_existing_cube_and_metric_topk)
fieldstat_get_cubes(instance_dest, &cube_id_dest, &n_cube);
EXPECT_TRUE(n_cube == 1);
int ret_cube_id = cube_id_dest[0];
+ EXPECT_STREQ(fieldstat_get_metric_name(instance_dest,cube_id_dest[0], 0), "metric_name");
free(cube_id_dest);
- EXPECT_STREQ(fieldstat_get_metric_name(instance_dest, 0), "metric_name");
- struct fieldstat_tag_list *tag_list = NULL;
+ struct field_list *tag_list = NULL;
size_t n_cell = 0;
- fieldstat_get_cells_used_by_metric(instance_dest, ret_cube_id, 0, &tag_list, &n_cell);
+ fieldstat_cube_get_cells(instance_dest, ret_cube_id, &tag_list, &n_cell);
EXPECT_EQ(n_cell, 1);
long long measure = merge_test_fieldstat_counter_get(instance_dest, cube_id, metric_id, &tag_list[0]);
EXPECT_EQ(measure, 10);
- fieldstat_tag_list_arr_free(tag_list, n_cell);
+ fieldstat_field_list_arr_free(tag_list, n_cell);
fieldstat_free(instance);
fieldstat_free(instance_dest);
}
@@ -340,36 +317,38 @@ TEST(unit_test_merge, merge_existing_cell_on_existing_cube_and_metric_topk)
TEST(unit_test_merge, new_too_many_cells_on_one_metric_given_source_cube_reset_and_get_different_cube_topk)
{
struct fieldstat *instance = fieldstat_new();
- int cube_id = fieldstat_create_cube(instance, &TEST_SHARED_TAG, 1, SAMPLING_MODE_TOPK, 2);
- int metric_id = fieldstat_register_counter(instance, "metric name");
- fieldstat_counter_incrby(instance, cube_id, metric_id, &TEST_TAG_STRING, 1, 1);
+ int cube_id = fieldstat_cube_create(instance, &TEST_SHARED_TAG, 1);
+ int metric_id = fieldstat_register_counter(instance, cube_id, "metric name");
+ fieldstat_cube_set_sampling(instance, cube_id, SAMPLING_MODE_TOPK, 2, 0);
+ fieldstat_counter_incrby(instance, cube_id, metric_id, &TEST_FIELD_STRING, 1, 1);
struct fieldstat *instance_dest = fieldstat_new();
fieldstat_merge(instance_dest, instance);
fieldstat_reset(instance);
- fieldstat_counter_incrby(instance, cube_id, metric_id, &TEST_TAG_INT, 1, 2); // 2nd cell
- fieldstat_counter_incrby(instance, cube_id, metric_id, &TEST_TAG_DOUBLE, 1, 3); // 3rd cell,bigger than the others, so keep it
+ fieldstat_counter_incrby(instance, cube_id, metric_id, &TEST_FIELD_INT, 1, 2); // 2nd cell
+ fieldstat_counter_incrby(instance, cube_id, metric_id, &TEST_FIELD_DOUBLE, 1, 3); // 3rd cell,bigger than the others, so keep it
fieldstat_merge(instance_dest, instance);
- struct fieldstat_tag_list *tag_list = NULL;
+ struct field_list *tag_list = NULL;
size_t n_cell = 0;
- fieldstat_get_cells_used_by_metric(instance_dest, 0, 0, &tag_list, &n_cell);
+ fieldstat_cube_get_cells(instance_dest, 0, &tag_list, &n_cell);
EXPECT_EQ(n_cell, 2);
EXPECT_EQ(merge_test_fieldstat_counter_get(instance_dest, 0, 0, &tag_list[0]), 3);
EXPECT_EQ(merge_test_fieldstat_counter_get(instance_dest, 0, 0, &tag_list[1]), 2);
fieldstat_free(instance);
fieldstat_free(instance_dest);
- fieldstat_tag_list_arr_free(tag_list, n_cell);
+ fieldstat_field_list_arr_free(tag_list, n_cell);
}
-struct fieldstat *test_push_flows(vector<Fieldstat_tag_list_wrapper *> &flows_in_test, int K, long long count = 1)
+struct fieldstat *topk_test_push_flows(vector<Fieldstat_tag_list_wrapper *> &flows_in_test, int K, long long count = 1)
{
struct fieldstat *instance = fieldstat_new();
- int cube_id = fieldstat_create_cube(instance, &TEST_SHARED_TAG, 1, SAMPLING_MODE_TOPK, K);
- int metric_id = fieldstat_register_counter(instance, "metric name");
+ int cube_id = fieldstat_cube_create(instance, &TEST_SHARED_TAG, 1);
+ int metric_id = fieldstat_register_counter(instance, cube_id, "metric name");
+ fieldstat_cube_set_sampling(instance, cube_id, SAMPLING_MODE_TOPK, K, 0);
for (size_t i = 0; i < flows_in_test.size(); i++) {
fieldstat_counter_incrby(instance, cube_id, metric_id, flows_in_test[i]->get_tag(), flows_in_test[i]->get_tag_count(), count);
}
@@ -380,14 +359,14 @@ TEST(unit_test_merge, merge_accuracy_test_with_K_large_enough_topk)
{
int K = 100;
vector<Fieldstat_tag_list_wrapper *> flows_in_src = test_gen_topk_flows(K, K);
- struct fieldstat *instance_src = test_push_flows(flows_in_src, K);
+ struct fieldstat *instance_src = topk_test_push_flows(flows_in_src, K);
vector<Fieldstat_tag_list_wrapper *> flows_in_dest = test_gen_topk_flows(K, K);
- struct fieldstat *instance_dest = test_push_flows(flows_in_dest, K);
+ struct fieldstat *instance_dest = topk_test_push_flows(flows_in_dest, K);
fieldstat_merge(instance_dest, instance_src);
- struct fieldstat_tag_list *tag_list = NULL;
+ struct field_list *tag_list = NULL;
size_t n_cell = 0;
- fieldstat_get_cells_used_by_metric(instance_dest, 0, 0, &tag_list, &n_cell);
+ fieldstat_cube_get_cells(instance_dest, 0, &tag_list, &n_cell);
vector<Fieldstat_tag_list_wrapper *> flows_in_merged;
for (size_t i = 0; i < n_cell; i++) {
@@ -400,7 +379,7 @@ TEST(unit_test_merge, merge_accuracy_test_with_K_large_enough_topk)
fieldstat_free(instance_src);
fieldstat_free(instance_dest);
- fieldstat_tag_list_arr_free(tag_list, n_cell);
+ fieldstat_field_list_arr_free(tag_list, n_cell);
for (size_t i = 0; i < flows_in_merged.size(); i++) {
delete flows_in_merged[i];
}
@@ -409,23 +388,23 @@ TEST(unit_test_merge, merge_accuracy_test_with_K_large_enough_topk)
}
}
-TEST(unit_test_merge, merge_accuracy_test_gen_dest_full_all_inserted_given_src_flows_larger)
+TEST(unit_test_merge, merge_accuracy_test_gen_dest_full_all_inserted_given_src_flows_larger_topk)
{
int K = 1000;
vector<Fieldstat_tag_list_wrapper *> flows_in_src = test_gen_topk_flows(10000, K);
- struct fieldstat *instance_src = test_push_flows(flows_in_src, K, 1000); // 1000 times larger than dest 1
+ struct fieldstat *instance_src = topk_test_push_flows(flows_in_src, K, 1000); // 1000 times larger than dest 1
vector<Fieldstat_tag_list_wrapper *> flows_in_dest;
for (int i = 0; i < K; i++) {
Fieldstat_tag_list_wrapper *tmp = new Fieldstat_tag_list_wrapper("flows in dest", to_string(i).c_str());
flows_in_dest.push_back(tmp);
}
- struct fieldstat *instance_dest = test_push_flows(flows_in_dest, K, 1);
+ struct fieldstat *instance_dest = topk_test_push_flows(flows_in_dest, K, 1);
fieldstat_merge(instance_dest, instance_src);
- struct fieldstat_tag_list *tag_list = NULL;
+ struct field_list *tag_list = NULL;
size_t n_cell = 0;
- fieldstat_get_cells_used_by_metric(instance_dest, 0, 0, &tag_list, &n_cell);
+ fieldstat_cube_get_cells(instance_dest, 0, &tag_list, &n_cell);
vector<Fieldstat_tag_list_wrapper *> flows_in_merged;
for (size_t i = 0; i < n_cell; i++) {
flows_in_merged.push_back(new Fieldstat_tag_list_wrapper(&tag_list[i]));
@@ -438,7 +417,7 @@ TEST(unit_test_merge, merge_accuracy_test_gen_dest_full_all_inserted_given_src_f
fieldstat_free(instance_src);
fieldstat_free(instance_dest);
- fieldstat_tag_list_arr_free(tag_list, n_cell);
+ fieldstat_field_list_arr_free(tag_list, n_cell);
for (size_t i = 0; i < flows_in_merged.size(); i++) {
delete flows_in_merged[i];
}
@@ -447,18 +426,18 @@ TEST(unit_test_merge, merge_accuracy_test_gen_dest_full_all_inserted_given_src_f
}
}
-TEST(unit_test_merge, merge_accuracy_test_gen_dest_full_some_inserted_and_some_merged_and_some_fail_to_add)
+TEST(unit_test_merge, merge_accuracy_test_gen_dest_full_some_inserted_and_some_merged_and_some_fail_to_add_topk)
{
int K = 100;
- vector<Fieldstat_tag_list_wrapper *> flows_in_src = test_gen_topk_flows(10000, K + 50); // let elephant flows in src and dest different
- struct fieldstat *instance_src = test_push_flows(flows_in_src, K);
- vector<Fieldstat_tag_list_wrapper *> flows_in_dest = test_gen_topk_flows(10000, K + 50);
- struct fieldstat *instance_dest = test_push_flows(flows_in_dest, K);
+ vector<Fieldstat_tag_list_wrapper *> flows_in_src = test_gen_topk_flows(30000, K + 50); // let elephant flows in src and dest different
+ struct fieldstat *instance_src = topk_test_push_flows(flows_in_src, K);
+ vector<Fieldstat_tag_list_wrapper *> flows_in_dest = test_gen_topk_flows(30000, K + 50);
+ struct fieldstat *instance_dest = topk_test_push_flows(flows_in_dest, K);
fieldstat_merge(instance_dest, instance_src);
- struct fieldstat_tag_list *tag_list = NULL;
+ struct field_list *tag_list = NULL;
size_t n_cell = 0;
- fieldstat_get_cells_used_by_metric(instance_dest, 0, 0, &tag_list, &n_cell);
+ fieldstat_cube_get_cells(instance_dest, 0, &tag_list, &n_cell);
vector<Fieldstat_tag_list_wrapper *> flows_in_merged;
for (size_t i = 0; i < n_cell; i++) {
flows_in_merged.push_back(new Fieldstat_tag_list_wrapper(&tag_list[i]));
@@ -466,12 +445,12 @@ TEST(unit_test_merge, merge_accuracy_test_gen_dest_full_some_inserted_and_some_m
flows_in_dest.insert(flows_in_dest.end(), std::make_move_iterator(flows_in_src.begin()), std::make_move_iterator(flows_in_src.end()));
double accuracy = test_cal_accuracy_given_expected_key(flows_in_dest, flows_in_merged);
- EXPECT_GE(accuracy, 0.87); // by heavy keeper benchmark, with K = 100, merging result should be about 0.96, for adding the flows will also cause some inaccuracy, so here we set 0.93
+ EXPECT_GE(accuracy, 0.87);
printf("merge_accuracy_test_gen_dest_full_some_inserted_and_some_merged_and_some_fail_to_add accuracy is %lf\n", accuracy);
fieldstat_free(instance_src);
fieldstat_free(instance_dest);
- fieldstat_tag_list_arr_free(tag_list, n_cell);
+ fieldstat_field_list_arr_free(tag_list, n_cell);
for (size_t i = 0; i < flows_in_merged.size(); i++) {
delete flows_in_merged[i];
}
@@ -483,23 +462,24 @@ TEST(unit_test_merge, merge_accuracy_test_gen_dest_full_some_inserted_and_some_m
TEST(unit_test_merge, primary_metric_has_no_value)
{
struct fieldstat *instance = fieldstat_new();
- int cube_id = fieldstat_create_cube(instance, &TEST_SHARED_TAG, 1, SAMPLING_MODE_TOPK, 2);
- int metric_primary = fieldstat_register_counter(instance, "primary");
- int metric_operated = fieldstat_register_counter(instance, "operated");
- fieldstat_counter_incrby(instance, cube_id, metric_operated, &TEST_TAG_STRING, 1, 1);
+ int cube_id = fieldstat_cube_create(instance, &TEST_SHARED_TAG, 1);
+ int metric_primary = fieldstat_register_counter(instance, cube_id, "primary");
+ int metric_operated = fieldstat_register_counter(instance, cube_id, "operated");
+ fieldstat_cube_set_sampling(instance, cube_id, SAMPLING_MODE_TOPK, 2, metric_primary);
+ fieldstat_counter_incrby(instance, cube_id, metric_operated, &TEST_FIELD_STRING, 1, 1);
struct fieldstat *instance_dest = fieldstat_new();
fieldstat_merge(instance_dest, instance);
fieldstat_merge(instance_dest, instance);
- struct fieldstat_tag_list *tag_list = NULL;
+ struct field_list *tag_list = NULL;
size_t n_cell = 0;
- fieldstat_get_cells_used_by_metric(instance_dest, 0, metric_primary, &tag_list, &n_cell);
- EXPECT_EQ(n_cell, 0);
-
- fieldstat_get_cells_used_by_metric(instance_dest, 0, metric_operated, &tag_list, &n_cell);
+ fieldstat_cube_get_cells(instance_dest, 0, &tag_list, &n_cell);
EXPECT_EQ(n_cell, 1);
+ EXPECT_STREQ(tag_list[0].field[0].key, TEST_FIELD_STRING.key);
+
EXPECT_EQ(merge_test_fieldstat_counter_get(instance_dest, 0, metric_operated, &tag_list[0]), 2);
- fieldstat_tag_list_arr_free(tag_list, n_cell);
+ EXPECT_EQ(merge_test_fieldstat_counter_get(instance_dest, 0, metric_primary, &tag_list[0]), 0);
+ fieldstat_field_list_arr_free(tag_list, n_cell);
fieldstat_free(instance);
fieldstat_free(instance_dest);
@@ -508,42 +488,254 @@ TEST(unit_test_merge, primary_metric_has_no_value)
TEST(unit_test_merge, primary_metric_id_different)
{
struct fieldstat *instance = fieldstat_new();
- int cube_id = fieldstat_create_cube(instance, &TEST_SHARED_TAG, 1, SAMPLING_MODE_TOPK, 2);
- int metric_primary = fieldstat_register_counter(instance, "primary");
- int metric_2 = fieldstat_register_counter(instance, "2");
- fieldstat_counter_incrby(instance, cube_id, metric_primary, &TEST_TAG_STRING, 1, 100);
- fieldstat_counter_incrby(instance, cube_id, metric_2, &TEST_TAG_STRING, 1, 1);
+ int cube_id = fieldstat_cube_create(instance, &TEST_SHARED_TAG, 1);
+ int metric_primary = fieldstat_register_counter(instance, cube_id, "primary");
+ int metric_2 = fieldstat_register_counter(instance, cube_id, "2");
+ fieldstat_cube_set_sampling(instance, cube_id, SAMPLING_MODE_TOPK, 2, metric_primary);
+
+ fieldstat_counter_incrby(instance, cube_id, metric_primary, &TEST_FIELD_STRING, 1, 100);
+ fieldstat_counter_incrby(instance, cube_id, metric_2, &TEST_FIELD_STRING, 1, 1);
struct fieldstat *instance_dst = fieldstat_new();
- int cube_id_dst = fieldstat_create_cube(instance_dst, &TEST_SHARED_TAG, 1, SAMPLING_MODE_TOPK, 2);
- fieldstat_register_counter(instance_dst, "2");
- int metric_primary_dst = fieldstat_register_counter(instance_dst, "primary");
- fieldstat_cube_set_primary_metric(instance_dst, cube_id_dst, metric_primary_dst);
+ int cube_id_dst = fieldstat_cube_create(instance_dst, &TEST_SHARED_TAG, 1);
+ fieldstat_register_counter(instance_dst, cube_id_dst, "2");
+ int metric_primary_dst = fieldstat_register_counter(instance_dst, cube_id_dst, "primary");
+ fieldstat_cube_set_sampling(instance_dst, cube_id_dst, SAMPLING_MODE_TOPK, 2, metric_primary_dst);
+
+ EXPECT_EQ(fieldstat_merge(instance_dst, instance), FS_ERR_DIFFERENT_CONFIGURATION_FOR_SAME_CUBE);
+
+ fieldstat_free(instance);
+ fieldstat_free(instance_dst);
+}
+
+TEST(unit_test_merge, new_cube_and_metric_to_empty_spreadsketch) {
+ struct fieldstat *instance = fieldstat_new();
+ fieldstat_cube_create(instance, &TEST_FIELD_INT, 1);
+ fieldstat_register_hll(instance, 0, "metric", 6);
+ fieldstat_cube_set_sampling(instance, 0, SAMPLING_MODE_TOP_CARDINALITY, 10, 0);
+
+ struct fieldstat *instance_dest = fieldstat_new();
+ fieldstat_merge(instance_dest, instance);
+
+ int *cube_id_dest;
+ int n_cube;
+ fieldstat_get_cubes(instance_dest, &cube_id_dest, &n_cube);
+ EXPECT_TRUE(n_cube == 1);
+ EXPECT_STREQ(fieldstat_get_metric_name(instance_dest, cube_id_dest[0], 0), "metric");
+
+ free(cube_id_dest);
+ fieldstat_free(instance);
+ fieldstat_free(instance_dest);
+}
+
+TEST(unit_test_merge, new_cell_on_existing_cube_and_metric_spreadsketch) {
+ struct fieldstat *instance = fieldstat_new();
+ int cube_id = fieldstat_cube_create(instance, &TEST_SHARED_TAG, 1);
+ int metric_id = fieldstat_register_hll(instance, cube_id, "metric", 6);
+ fieldstat_cube_set_sampling(instance, cube_id, SAMPLING_MODE_TOP_CARDINALITY, 10, 0);
+ struct fieldstat *instance_dest = fieldstat_new();
+ fieldstat_merge(instance_dest, instance);
+
+ fieldstat_hll_add(instance, cube_id, metric_id, &TEST_FIELD_STRING, 1, "1", 1);
+ fieldstat_hll_add(instance, cube_id, metric_id, &TEST_FIELD_STRING, 1, "2", 1);
+ fieldstat_merge(instance_dest, instance);
- fieldstat_merge(instance_dst, instance);
+ int *cube_id_dest;
+ int n_cube;
+ fieldstat_get_cubes(instance_dest, &cube_id_dest, &n_cube);
+ EXPECT_TRUE(n_cube == 1);
+ EXPECT_STREQ(fieldstat_get_metric_name(instance_dest, cube_id_dest[0], 0), "metric");
+ long long measure = merge_test_fieldstat_hll_get(instance, cube_id, metric_id);
+ EXPECT_NEAR(measure, 2, 0.3);
- struct fieldstat_tag_list *tag_list = NULL;
+ struct field_list *tag_list = NULL;
size_t n_cell = 0;
- fieldstat_get_cells_used_by_metric(instance_dst, 0, metric_primary, &tag_list, &n_cell);
+ fieldstat_cube_get_cells(instance, cube_id, &tag_list, &n_cell);
EXPECT_EQ(n_cell, 1);
- int *metric_ids;
- size_t n_metrics;
- fieldstat_get_metrics(instance_dst, &metric_ids, &n_metrics);
- EXPECT_EQ(n_metrics, 2);
- EXPECT_STREQ(fieldstat_get_metric_name(instance_dst, metric_ids[0]), "2");
- EXPECT_STREQ(fieldstat_get_metric_name(instance_dst, metric_ids[1]), "primary");
-
- EXPECT_EQ(merge_test_fieldstat_counter_get(instance_dst, 0, metric_ids[1], &tag_list[0]), 100);
- EXPECT_EQ(merge_test_fieldstat_counter_get(instance_dst, 0, metric_ids[0], &tag_list[0]), 1);
- fieldstat_tag_list_arr_free(tag_list, n_cell);
- free(metric_ids);
+ EXPECT_EQ(tag_list->n_field, 1);
+ EXPECT_STREQ(tag_list->field[0].key, TEST_FIELD_STRING.key);
+
+ free(cube_id_dest);
fieldstat_free(instance);
- fieldstat_free(instance_dst);
+ fieldstat_free(instance_dest);
+ fieldstat_field_list_arr_free(tag_list, n_cell);
}
+TEST(unit_test_merge, merge_existing_cell_on_existing_cube_and_metric_spreadsketch) {
+ struct fieldstat *instance = fieldstat_new();
+ int cube_id = fieldstat_cube_create(instance, &TEST_SHARED_TAG, 1);
+ int metric_id = fieldstat_register_hll(instance, cube_id, "metric", 6);
+ fieldstat_cube_set_sampling(instance, cube_id, SAMPLING_MODE_TOP_CARDINALITY, 10, 0);
+ fieldstat_hll_add(instance, cube_id, metric_id, &TEST_FIELD_STRING, 1, "1", 1);
+ struct fieldstat *instance_dest = fieldstat_new();
+
+ fieldstat_merge(instance_dest, instance);
+ fieldstat_merge(instance_dest, instance);
+ fieldstat_hll_add(instance, cube_id, metric_id, &TEST_FIELD_STRING, 1, "2", 1);
+ fieldstat_merge(instance_dest, instance);
+
+ struct field_list *tag_list = NULL;
+ size_t n_cell = 0;
+ fieldstat_cube_get_cells(instance_dest, cube_id, &tag_list, &n_cell);
+ EXPECT_EQ(n_cell, 1);
+ double value = merge_test_fieldstat_hll_get(instance_dest, cube_id, metric_id, &tag_list[0]);
+ EXPECT_NEAR(value, 2, 0.3);
+
+ fieldstat_free(instance);
+ fieldstat_free(instance_dest);
+ fieldstat_field_list_arr_free(tag_list, n_cell);
+}
+
+TEST(unit_test_merge, new_too_many_cells_on_one_metric_given_source_cube_reset_and_get_different_cube_spreadsketch) {
+ struct fieldstat *instance = fieldstat_new();
+ int cube_id = fieldstat_cube_create(instance, &TEST_SHARED_TAG, 1);
+ int metric_id = fieldstat_register_hll(instance, cube_id, "metric", 6);
+ fieldstat_cube_set_sampling(instance, cube_id, SAMPLING_MODE_TOP_CARDINALITY, 2, 0);
+ fieldstat_hll_add(instance, cube_id, metric_id, &TEST_FIELD_STRING, 1, "1", 1);
+ struct fieldstat *instance_dest = fieldstat_new();
+ fieldstat_merge(instance_dest, instance);
+
+ fieldstat_reset(instance);
+ fieldstat_hll_add(instance, cube_id, metric_id, &TEST_FIELD_INT, 1, "21", 2);
+ fieldstat_hll_add(instance, cube_id, metric_id, &TEST_FIELD_INT, 1, "22", 2);
+ fieldstat_hll_add(instance, cube_id, metric_id, &TEST_FIELD_DOUBLE, 1, "31", 2);
+ fieldstat_hll_add(instance, cube_id, metric_id, &TEST_FIELD_DOUBLE, 1, "32", 2);
+ fieldstat_hll_add(instance, cube_id, metric_id, &TEST_FIELD_DOUBLE, 1, "33", 2);
+ fieldstat_merge(instance_dest, instance);
+
+ struct field_list *tag_list = NULL;
+ size_t n_cell = 0;
+ fieldstat_cube_get_cells(instance_dest, 0, &tag_list, &n_cell);
+ EXPECT_EQ(n_cell, 2);
+ EXPECT_NEAR(merge_test_fieldstat_hll_get(instance_dest, 0, 0, &tag_list[0]), 3, 0.3);
+ EXPECT_NEAR(merge_test_fieldstat_hll_get(instance_dest, 0, 0, &tag_list[1]), 2, 0.3);
+ EXPECT_STREQ(tag_list[0].field[0].key, TEST_FIELD_DOUBLE.key);
+ EXPECT_STREQ(tag_list[1].field[0].key, TEST_FIELD_INT.key);
+
+ fieldstat_free(instance);
+ fieldstat_free(instance_dest);
+ fieldstat_field_list_arr_free(tag_list, n_cell);
+}
+
+
+TEST(unit_test_merge, gen_dest_full_all_src_inserted_given_src_flows_larger_spreadsketch) {
+ int K = 100;
+ SpreadSketchZipfGenerator flow_generator(1.0, K); // exactly the number of cells, so there will be almost all(in case of hash collision happen) cells added successfully
+ struct fieldstat *instance_src = fieldstat_new();
+ int cube_id = fieldstat_cube_create(instance_src, &TEST_SHARED_TAG, 1);
+ int metric_id = fieldstat_register_hll(instance_src, cube_id, "metric", 6);
+ fieldstat_cube_set_sampling(instance_src, cube_id, SAMPLING_MODE_TOP_CARDINALITY, K, 0);
+ struct fieldstat *instance_dest = fieldstat_fork(instance_src);
+ const char dest_key[] = "key of dest";
+ const char src_key[] = "key of src";
+
+ std::unordered_map<std::string, std::unordered_set<std::string>> flow_cnt;
+ for (int i = 0; i < 500000; i++) { // add more, so the fanout of any flow to src instance is more than dest
+ Flow flow = flow_generator.next();
+ Fieldstat_tag_list_wrapper dimension = Fieldstat_tag_list_wrapper(src_key, flow.src_ip.c_str());
+ Fieldstat_tag_list_wrapper item = Fieldstat_tag_list_wrapper("dummy", flow.dst_ip.c_str());
+ fieldstat_hll_add_field(instance_src, cube_id, metric_id, dimension.get_tag(), dimension.get_tag_count(), item.get_tag(), item.get_tag_count());
+
+ flow_cnt[dimension.to_string()].insert(item.to_string());
+ }
+
+ for (int i = 0; i < 1000; i++) {
+ Flow flow = flow_generator.next();
+ Fieldstat_tag_list_wrapper dimension = Fieldstat_tag_list_wrapper(dest_key, flow.src_ip.c_str());
+ Fieldstat_tag_list_wrapper item = Fieldstat_tag_list_wrapper("dummy", flow.dst_ip.c_str());
+ fieldstat_hll_add_field(instance_dest, cube_id, metric_id, dimension.get_tag(), dimension.get_tag_count(), item.get_tag(), item.get_tag_count());
+
+ flow_cnt[dimension.to_string()].insert(item.to_string());
+ }
+
+ fieldstat_merge(instance_dest, instance_src);
+
+ struct field_list *tag_list = NULL;
+ struct field_list *tag_list_src = NULL;
+ size_t n_cell = 0;
+ size_t n_cell_src = 0;
+ std::vector<struct Fieldstat_tag_list_wrapper *> test_result;
+ fieldstat_cube_get_cells(instance_dest, 0, &tag_list, &n_cell);
+ fieldstat_cube_get_cells(instance_src, 0, &tag_list_src, &n_cell_src);
+ for (size_t i = 0; i < n_cell; i++) {
+ test_result.push_back(new Fieldstat_tag_list_wrapper(&tag_list[i]));
+ }
+ std::unordered_map<std::string, int> expected_unique_cnt;
+ for (auto &kv : flow_cnt) {
+ expected_unique_cnt[kv.first] = kv.second.size();
+ }
+
+ double recall = test_cal_topk_accuracy(test_result, expected_unique_cnt);
+ EXPECT_NEAR(recall, n_cell_src * 1.0 / n_cell, 0.0001); // the false positive is only generated because some cells in src are left because of hash collision
+
+ fieldstat_free(instance_src);
+ fieldstat_free(instance_dest);
+ fieldstat_field_list_arr_free(tag_list, n_cell);
+ fieldstat_field_list_arr_free(tag_list_src, n_cell_src);
+ for (size_t i = 0; i < test_result.size(); i++) {
+ delete test_result[i];
+ }
+}
+
+TEST(unit_test_merge, merge_accuracy_test_gen_dest_full_some_inserted_and_some_merged_and_some_fail_to_add_spreadsketch) {
+ int K = 10;
+ SpreadSketchZipfGenerator flow_generator(1.0, K * 10);
+ struct fieldstat *instance_src = fieldstat_new();
+ int cube_id = fieldstat_cube_create(instance_src, &TEST_SHARED_TAG, 1);
+ int metric_id = fieldstat_register_hll(instance_src, cube_id, "metric", 6);
+ fieldstat_cube_set_sampling(instance_src, cube_id, SAMPLING_MODE_TOP_CARDINALITY, K, 0);
+ struct fieldstat *instance_dest = fieldstat_fork(instance_src);
+
+ std::unordered_map<std::string, std::unordered_set<std::string>> flow_cnt;
+ for (int i = 0; i < 100000; i++) {
+ Flow flow = flow_generator.next();
+ const char *use_key = rand()%2? "src":"common";
+ Fieldstat_tag_list_wrapper dimension = Fieldstat_tag_list_wrapper(use_key, flow.src_ip.c_str());
+ Fieldstat_tag_list_wrapper item = Fieldstat_tag_list_wrapper("dummy", flow.dst_ip.c_str());
+ fieldstat_hll_add_field(instance_src, cube_id, metric_id, dimension.get_tag(), dimension.get_tag_count(), item.get_tag(), item.get_tag_count());
+
+ flow_cnt[dimension.to_string()].insert(item.to_string());
+ }
+ for (int i = 0; i < 100000; i++) {
+ Flow flow = flow_generator.next();
+ const char *use_key = rand()%2? "dest":"common";
+ Fieldstat_tag_list_wrapper dimension = Fieldstat_tag_list_wrapper(use_key, flow.src_ip.c_str());
+ Fieldstat_tag_list_wrapper item = Fieldstat_tag_list_wrapper("dummy", flow.dst_ip.c_str());
+ fieldstat_hll_add_field(instance_src, cube_id, metric_id, dimension.get_tag(), dimension.get_tag_count(), item.get_tag(), item.get_tag_count());
+
+ flow_cnt[dimension.to_string()].insert(item.to_string());
+ }
+
+ fieldstat_merge(instance_dest, instance_src);
+
+ struct field_list *tag_list = NULL;
+ size_t n_cell = 0;
+ std::vector<struct Fieldstat_tag_list_wrapper *> test_result;
+ fieldstat_cube_get_cells(instance_dest, 0, &tag_list, &n_cell);
+ for (size_t i = 0; i < n_cell; i++) {
+ test_result.push_back(new Fieldstat_tag_list_wrapper(&tag_list[i]));
+ }
+
+ std::unordered_map<std::string, int> expected_unique_cnt;
+ for (auto &kv : flow_cnt) {
+ expected_unique_cnt[kv.first] = kv.second.size();
+ }
+ double recall = test_cal_topk_accuracy(test_result, expected_unique_cnt);
+ EXPECT_GE(recall, 0.7);
+ printf("gen_dest_full_all_src_inserted_given_src_flows_larger_spreadsketch recall is %lf\n", recall);
+
+ fieldstat_free(instance_src);
+ fieldstat_free(instance_dest);
+ fieldstat_field_list_arr_free(tag_list, n_cell);
+ for (size_t i = 0; i < test_result.size(); i++) {
+ delete test_result[i];
+ }
+}
int main(int argc, char *argv[])
{
testing::InitGoogleTest(&argc, argv);
+ // testing::GTEST_FLAG(filter) = "*gen_dest_full_all_src_inserted_given_src_flows_larger_spreadsketch";
+
return RUN_ALL_TESTS();
} \ No newline at end of file