summaryrefslogtreecommitdiff
path: root/test/test_metric_hll.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'test/test_metric_hll.cpp')
-rw-r--r--test/test_metric_hll.cpp68
1 files changed, 68 insertions, 0 deletions
diff --git a/test/test_metric_hll.cpp b/test/test_metric_hll.cpp
index 8e96266..7c38de2 100644
--- a/test/test_metric_hll.cpp
+++ b/test/test_metric_hll.cpp
@@ -1,4 +1,10 @@
#include <gtest/gtest.h>
+#include <string>
+#include <vector>
+#include <unordered_map>
+#include <unordered_set>
+#include <math.h>
+
#include "fieldstat.h"
#include "utils.hpp"
@@ -216,9 +222,71 @@ TEST(metric_test_hll, add_with_wrong_metric_id_expecting_fail)
fieldstat_free(instance);
}
+TEST(metric_test_hll, spread_sketch_add_and_test_accuracy)
+{
+ struct fieldstat *instance = fieldstat_new();
+ int K = 10;
+ fieldstat_create_cube(instance, &TEST_TAG_INT_collided, 1, SAMPLING_MODE_SPREADSKETCH, K);
+ fieldstat_register_hll(instance, "testss", 6);
+
+ int n_flows = 100000;
+ std::unordered_map<std::string, std::unordered_set<std::string>> flow_cnt;
+ SpreadSketchZipfGenerator generator(1.0, K * 10); // give much bigger distribution, so that we can test the accuracy
+ for (int i = 0; i < n_flows; i++)
+ {
+ Flow f = generator.next();
+ Fieldstat_tag_list_wrapper dimension("src ip", f.src_ip.c_str());
+ Fieldstat_tag_list_wrapper counted("dst ip", f.dst_ip.c_str());
+
+ fieldstat_hll_add_field(instance, 0, 0, dimension.get_tag(), dimension.get_tag_count(), counted.get_tag(), counted.get_tag_count());
+
+ flow_cnt[dimension.to_string()].insert(counted.to_string());
+ }
+
+ // recall
+ std::unordered_map<std::string, int> expected_unique_cnt;
+ std::vector<struct Fieldstat_tag_list_wrapper *> test_result;
+ for (auto &kv : flow_cnt) {
+ expected_unique_cnt[kv.first] = kv.second.size();
+ }
+
+ struct field_list *tag_list = NULL;
+ size_t n_cell = 0;
+ fieldstat_cube_get_cells(instance, 0, &tag_list, &n_cell);
+ EXPECT_EQ(n_cell, K);
+ for (size_t i = 0; i < n_cell; i++) {
+ Fieldstat_tag_list_wrapper tmp = Fieldstat_tag_list_wrapper(&tag_list[i]);
+ test_result.push_back(new Fieldstat_tag_list_wrapper(tmp));
+ }
+ double recall = test_cal_topk_accuracy(test_result, expected_unique_cnt);
+ printf("spread_sketch_add_and_test_accuracy recall: %f\n", recall);
+ EXPECT_GE(recall, 0.8);
+
+ // MRE
+ double mre = 0;
+ for (size_t i = 0; i < n_cell; i++) {
+ Fieldstat_tag_list_wrapper tmp = Fieldstat_tag_list_wrapper(&tag_list[i]);
+ double value_true = expected_unique_cnt[tmp.to_string()];
+ double value_est;
+ fieldstat_hll_get(instance, 0, &tag_list[i], 0, &value_est);
+ // printf("the estimated value for %s is %f, the true value is %f\n", tmp.to_string().c_str(), value_est, value_true);
+
+ mre += fabs(value_true - value_est) / value_true;
+ }
+ mre = mre / n_cell;
+ printf("topk_add_and_test_accuracy Mean ratio e: %f\n", mre);
+ EXPECT_LE(mre, 0.2);
+
+ fieldstat_tag_list_arr_free(tag_list, n_cell);
+ fieldstat_free(instance);
+ for (auto &ptr : test_result) {
+ delete ptr;
+ }
+}
int main(int argc, char *argv[])
{
testing::InitGoogleTest(&argc, argv);
+ // testing::GTEST_FLAG(filter) = "metric_test_hll.spread_sketch_add_and_test_accuracy";
return RUN_ALL_TESTS();
}