diff options
| author | chenzizhan <[email protected]> | 2024-07-16 14:44:58 +0800 |
|---|---|---|
| committer | chenzizhan <[email protected]> | 2024-07-16 14:44:58 +0800 |
| commit | a47ec2b6babe917447909afb777917d48957cd2a (patch) | |
| tree | d577e80d2cdcecb5b297936b3005f949dbe1652c | |
| parent | 482d4ce120cfa93105991c5edaa3105848a188fe (diff) | |
accuracy test; fix a bug in spread_sketch_reset
| -rw-r--r-- | include/fieldstat/fieldstat.h | 4 | ||||
| -rw-r--r-- | src/cells/spread_sketch.c | 6 | ||||
| -rw-r--r-- | src/cells/spread_sketch.h | 7 | ||||
| -rw-r--r-- | src/cube.c | 29 | ||||
| -rw-r--r-- | test/test_fuzz_test.cpp | 44 | ||||
| -rw-r--r-- | test/test_register_and_reset.cpp | 2 |
6 files changed, 78 insertions, 14 deletions
diff --git a/include/fieldstat/fieldstat.h b/include/fieldstat/fieldstat.h index dc3db94..6ab1764 100644 --- a/include/fieldstat/fieldstat.h +++ b/include/fieldstat/fieldstat.h @@ -68,6 +68,10 @@ int fieldstat_calibrate(const struct fieldstat *master, struct fieldstat *replic * @return cube id, if success; otherwise, return FS_ERR_NULL_HANDLER, or FS_ERR_INVALID_PARAM when (max_n_cell == 0 && mode == TOPK). return FS_ERR_INVALID_KEY when the cube_dimensions is not unique. */ int fieldstat_create_cube(struct fieldstat *instance, const struct field *cube_dimensions, size_t n_dimension, enum sampling_mode mode, size_t max_n_cell); +// todo: 重命名为fieldstat_cube_create + +//todo: create cube 接口变化 +// int fieldstat_cube_set_sampling(struct fieldstat *instance, int cube_id, enum sampling_mode mode, int max_n_cell, int primary_metric_id); /* @brief Change the topk cube primary metric id. When fieldstat_counter_add or fieldstat_counter_set are called on the primary metric, the topk record of such cell will be updated. diff --git a/src/cells/spread_sketch.c b/src/cells/spread_sketch.c index a05d2bd..4486c4c 100644 --- a/src/cells/spread_sketch.c +++ b/src/cells/spread_sketch.c @@ -13,6 +13,11 @@ // todo:把primary metric 记到sketch 里,且使用特殊的st Hyperloglog // todo: topk 也是这样 +/* +1. 修改set primary metric 为 set sampling +2. 增加一个list key 和一堆根据key 查base64之类的接口 +3. serialize 不用实现带exdata的情况 +*/ struct entry { int ref_count; @@ -336,6 +341,7 @@ void spread_sketch_reset(struct spread_sketch *ss) { memset(ss->level_cnt, 0, sizeof(ss->level_cnt)); ss->level_cnt[0] = ss->depth * ss->width; + ss->level_min = 0; } void spread_sketch_set_exdata_schema(struct spread_sketch *ss, exdata_new_cb new_fn, exdata_free_cb free_fn, exdata_merge_cb merge_fn, exdata_reset_cb reset_fn, exdata_copy_cb copy_fn) { diff --git a/src/cells/spread_sketch.h b/src/cells/spread_sketch.h index 9717238..64f30f7 100644 --- a/src/cells/spread_sketch.h +++ b/src/cells/spread_sketch.h @@ -20,6 +20,8 @@ void spread_sketch_free(struct spread_sketch *ss); void spread_sketch_reset(struct spread_sketch *ss); int spread_sketch_add(struct spread_sketch *ss, const char *key, size_t key_length, uint64_t item_hash, void *arg); +// void spread_sketch_add(struct spread_sketch *ss, const char *key, size_t key_length, const char* item, size_t item_len, void *arg); +// TODO: 增加add_hash 接口 void spread_sketch_set_exdata_schema(struct spread_sketch *ss, exdata_new_cb new_fn, exdata_free_cb free_fn, exdata_merge_cb merge_fn, exdata_reset_cb reset_fn, exdata_copy_cb copy_fn); void *spread_sketch_get0_exdata(const struct spread_sketch *ss, const char *key, size_t key_len); @@ -28,6 +30,11 @@ void *spread_sketch_get0_exdata(const struct spread_sketch *ss, const char *key, int spread_sketch_get_count(const struct spread_sketch *ss); size_t spread_sketch_list(const struct spread_sketch *ss, void **exdatas, size_t n_exdatas); +// 一系列对于spread sketch的查询 +// size_t spread_sketch_list_keys(const struct spread_sketch *ss, const char *keys[], size_t n_keys); +// const uint32_t spread_sketch_query_register(ss, const char *key, size_t key_len); + +// size_t spread_sketch_list(const struct spread_sketch *ss, void **exdatas, size_t n_exdatas); void spread_sketch_merge(struct spread_sketch *dest, const struct spread_sketch *src); @@ -137,6 +137,28 @@ struct cube_manager *cube_manager_new() { return pthis; } +void print_field_array(const struct field *fields, size_t n_field) +{ + printf("dimension with length %zu \n", n_field); + for (size_t i = 0; i < n_field; i++) { + printf("%s: ", fields[i].key); + switch (fields[i].type) + { + case FIELD_VALUE_INTEGER: + printf("%lld\n", fields[i].value_longlong); + break; + case FIELD_VALUE_DOUBLE: + printf("%lf\n", fields[i].value_double); + break; + case FIELD_VALUE_CSTRING: + printf("%s\n", fields[i].value_str); + break; + default: + break; + } + } +} + static void field_array_to_key(const struct field fields[], size_t n_tags, char **out_key, size_t *out_key_size) { if (n_tags == 0) { @@ -303,13 +325,6 @@ void cube_manager_calibrate(struct cube_manager *pthis, const struct cube_manage } } - // for (int i = 0; i < pthis->cube_cnt; i++) { - // if (pthis->cube[i] == NULL) { - // continue; - // } - // metric_manifest_manager_free(pthis->cube[i]->manifest_manager); - // pthis->cube[i]->manifest_manager = metric_manifest_manager_copy(master->cube[i]->manifest_manager); - // } } struct cube_manager *cube_manager_fork(const struct cube_manager *src) diff --git a/test/test_fuzz_test.cpp b/test/test_fuzz_test.cpp index e9cd3b3..69e1451 100644 --- a/test/test_fuzz_test.cpp +++ b/test/test_fuzz_test.cpp @@ -315,6 +315,20 @@ TEST(Fuzz_test, many_instance_random_flow_unregister_calibrate_reset_fork_merge_ EXPECT_GE(accuracy, 0.95); // printf("topk accuracy: %lf\n", accuracy); + // mre + double mre = 0; + for (size_t j = 0; j < cell_num; j++) { + Fieldstat_tag_list_wrapper cell_dimension = Fieldstat_tag_list_wrapper(&fields[j]); + long long value_true = count_map[Fieldstat_tag_list_wrapper(shared_tag_out).to_string()][cell_dimension.to_string()]; + long long value_est; + fieldstat_counter_get(instance_in_focus, cube_ids[i], &fields[j], 0, &value_est); + + mre += (double)(abs(value_true - value_est)) / (double)value_true; + } + mre = mre / cell_num; + // printf("topk_add_and_test_accuracy Mean ratio e: %f\n", mre); + EXPECT_LE(mre, 0.25); + for (size_t j = 0; j < cell_num; j++) { delete test_result[j]; } @@ -421,6 +435,7 @@ TEST(Fuzz_test, many_instance_random_flow_unregister_calibrate_reset_fork_merge_ int cube_num; struct fieldstat *instance_in_focus = dest; fieldstat_get_cubes(instance_in_focus, &cube_ids, &cube_num); + double sum_accuracy = 0; for (int i = 0; i < cube_num; i++) { struct field_list *shared_tag_out = fieldstat_cube_get_tags(instance_in_focus, cube_ids[i]); @@ -433,9 +448,23 @@ TEST(Fuzz_test, many_instance_random_flow_unregister_calibrate_reset_fork_merge_ test_result.push_back(new Fieldstat_tag_list_wrapper(&cells[j])); } - double accuracy = test_cal_topk_accuracy(test_result, count_map[Fieldstat_tag_list_wrapper(shared_tag_out).to_string()]); + Fieldstat_tag_list_wrapper cube_dimension = Fieldstat_tag_list_wrapper(shared_tag_out); + double accuracy = test_cal_topk_accuracy(test_result, count_map[cube_dimension.to_string()]); // printf("spreadsketch accuracy: %lf\n", accuracy); - EXPECT_GE(accuracy, 0.7); + sum_accuracy += accuracy; + + // MRE + double mre = 0; + for (size_t j = 0; j < cell_num; j++) { + Fieldstat_tag_list_wrapper cell_dimension = Fieldstat_tag_list_wrapper(&cells[j]); + double value_true = count_map[cube_dimension.to_string()][cell_dimension.to_string()]; + double value_est; + fieldstat_hll_get(instance_in_focus, cube_ids[i], &cells[j], 0, &value_est); + + mre += fabs(value_true - value_est) / value_true; + } + mre = mre / cell_num; + EXPECT_LE(mre, 0.25); for (size_t j = 0; j < cell_num; j++) { delete test_result[j]; @@ -444,6 +473,9 @@ TEST(Fuzz_test, many_instance_random_flow_unregister_calibrate_reset_fork_merge_ fieldstat_tag_list_arr_free(cells, cell_num); fieldstat_tag_list_arr_free(shared_tag_out, 1); } + double mean_accuracy = sum_accuracy / cube_num; + EXPECT_GE(mean_accuracy, 0.7); + free(cube_ids); fieldstat_free(master); @@ -502,7 +534,7 @@ TEST(Fuzz_test, add_and_reset_with_randomly_generated_flows_and_randomly_chosen_ } -TEST(Fuzz_test, simple_one_for_perf_topk) +TEST(perf, simple_one_for_perf_topk) { const int CUBE_NUM = 5; const int FLOW_NUM = 50000; @@ -557,7 +589,7 @@ TEST(Fuzz_test, simple_one_for_perf_topk) fieldstat_free(master); } -TEST(Fuzz_test, simple_one_for_perf_spreadsketch) +TEST(perf, simple_one_for_perf_spreadsketch) { const int CELL_MAX = 100; const int TEST_ROUND = 500000; @@ -592,8 +624,8 @@ TEST(Fuzz_test, simple_one_for_perf_spreadsketch) int main(int argc, char *argv[]) { testing::InitGoogleTest(&argc, argv); - // testing::GTEST_FLAG(filter) = "*spreadsketch"; - testing::GTEST_FLAG(filter) = "-Fuzz_test.simple_one_for_perf*"; + // testing::GTEST_FLAG(filter) = "*many_instance_random_flow_unregister_calibrate_reset_fork_merge_spreadsketch"; + testing::GTEST_FLAG(filter) = "-perf.*"; return RUN_ALL_TESTS(); }
\ No newline at end of file diff --git a/test/test_register_and_reset.cpp b/test/test_register_and_reset.cpp index 94b7888..a09bda0 100644 --- a/test/test_register_and_reset.cpp +++ b/test/test_register_and_reset.cpp @@ -768,6 +768,6 @@ TEST(calibrate, delete_first_cube) int main(int argc, char *argv[]) { testing::InitGoogleTest(&argc, argv); - // testing::GTEST_FLAG(filter) = "test_register.ensure_recovery_more_faster_spreadsketch"; + testing::GTEST_FLAG(filter) = "test_register.reset_and_new_cell_spreadsketch"; return RUN_ALL_TESTS(); }
\ No newline at end of file |
