diff options
| author | chenzizhan <[email protected]> | 2024-07-11 16:14:09 +0800 |
|---|---|---|
| committer | chenzizhan <[email protected]> | 2024-07-11 16:14:09 +0800 |
| commit | 5dc3d8a96bb203abc1ee050cd0c884f2ab989dba (patch) | |
| tree | 38f5bc67522843c1cca51a30e413e4f8f9d1834e /test/test_metric_hll.cpp | |
| parent | 677f337e195e3b9b6e416109df8d51c14da2791b (diff) | |
spread sketch merge, reset
Diffstat (limited to 'test/test_metric_hll.cpp')
| -rw-r--r-- | test/test_metric_hll.cpp | 68 |
1 files changed, 68 insertions, 0 deletions
diff --git a/test/test_metric_hll.cpp b/test/test_metric_hll.cpp index 8e96266..7c38de2 100644 --- a/test/test_metric_hll.cpp +++ b/test/test_metric_hll.cpp @@ -1,4 +1,10 @@ #include <gtest/gtest.h> +#include <string> +#include <vector> +#include <unordered_map> +#include <unordered_set> +#include <math.h> + #include "fieldstat.h" #include "utils.hpp" @@ -216,9 +222,71 @@ TEST(metric_test_hll, add_with_wrong_metric_id_expecting_fail) fieldstat_free(instance); } +TEST(metric_test_hll, spread_sketch_add_and_test_accuracy) +{ + struct fieldstat *instance = fieldstat_new(); + int K = 10; + fieldstat_create_cube(instance, &TEST_TAG_INT_collided, 1, SAMPLING_MODE_SPREADSKETCH, K); + fieldstat_register_hll(instance, "testss", 6); + + int n_flows = 100000; + std::unordered_map<std::string, std::unordered_set<std::string>> flow_cnt; + SpreadSketchZipfGenerator generator(1.0, K * 10); // give much bigger distribution, so that we can test the accuracy + for (int i = 0; i < n_flows; i++) + { + Flow f = generator.next(); + Fieldstat_tag_list_wrapper dimension("src ip", f.src_ip.c_str()); + Fieldstat_tag_list_wrapper counted("dst ip", f.dst_ip.c_str()); + + fieldstat_hll_add_field(instance, 0, 0, dimension.get_tag(), dimension.get_tag_count(), counted.get_tag(), counted.get_tag_count()); + + flow_cnt[dimension.to_string()].insert(counted.to_string()); + } + + // recall + std::unordered_map<std::string, int> expected_unique_cnt; + std::vector<struct Fieldstat_tag_list_wrapper *> test_result; + for (auto &kv : flow_cnt) { + expected_unique_cnt[kv.first] = kv.second.size(); + } + + struct field_list *tag_list = NULL; + size_t n_cell = 0; + fieldstat_cube_get_cells(instance, 0, &tag_list, &n_cell); + EXPECT_EQ(n_cell, K); + for (size_t i = 0; i < n_cell; i++) { + Fieldstat_tag_list_wrapper tmp = Fieldstat_tag_list_wrapper(&tag_list[i]); + test_result.push_back(new Fieldstat_tag_list_wrapper(tmp)); + } + double recall = test_cal_topk_accuracy(test_result, expected_unique_cnt); + printf("spread_sketch_add_and_test_accuracy recall: %f\n", recall); + EXPECT_GE(recall, 0.8); + + // MRE + double mre = 0; + for (size_t i = 0; i < n_cell; i++) { + Fieldstat_tag_list_wrapper tmp = Fieldstat_tag_list_wrapper(&tag_list[i]); + double value_true = expected_unique_cnt[tmp.to_string()]; + double value_est; + fieldstat_hll_get(instance, 0, &tag_list[i], 0, &value_est); + // printf("the estimated value for %s is %f, the true value is %f\n", tmp.to_string().c_str(), value_est, value_true); + + mre += fabs(value_true - value_est) / value_true; + } + mre = mre / n_cell; + printf("topk_add_and_test_accuracy Mean ratio e: %f\n", mre); + EXPECT_LE(mre, 0.2); + + fieldstat_tag_list_arr_free(tag_list, n_cell); + fieldstat_free(instance); + for (auto &ptr : test_result) { + delete ptr; + } +} int main(int argc, char *argv[]) { testing::InitGoogleTest(&argc, argv); + // testing::GTEST_FLAG(filter) = "metric_test_hll.spread_sketch_add_and_test_accuracy"; return RUN_ALL_TESTS(); } |
