summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorchenzizhan <[email protected]>2024-07-16 14:44:58 +0800
committerchenzizhan <[email protected]>2024-07-16 14:44:58 +0800
commita47ec2b6babe917447909afb777917d48957cd2a (patch)
treed577e80d2cdcecb5b297936b3005f949dbe1652c
parent482d4ce120cfa93105991c5edaa3105848a188fe (diff)
accuracy test; fix a bug in spread_sketch_reset
-rw-r--r--include/fieldstat/fieldstat.h4
-rw-r--r--src/cells/spread_sketch.c6
-rw-r--r--src/cells/spread_sketch.h7
-rw-r--r--src/cube.c29
-rw-r--r--test/test_fuzz_test.cpp44
-rw-r--r--test/test_register_and_reset.cpp2
6 files changed, 78 insertions, 14 deletions
diff --git a/include/fieldstat/fieldstat.h b/include/fieldstat/fieldstat.h
index dc3db94..6ab1764 100644
--- a/include/fieldstat/fieldstat.h
+++ b/include/fieldstat/fieldstat.h
@@ -68,6 +68,10 @@ int fieldstat_calibrate(const struct fieldstat *master, struct fieldstat *replic
* @return cube id, if success; otherwise, return FS_ERR_NULL_HANDLER, or FS_ERR_INVALID_PARAM when (max_n_cell == 0 && mode == TOPK). return FS_ERR_INVALID_KEY when the cube_dimensions is not unique.
*/
int fieldstat_create_cube(struct fieldstat *instance, const struct field *cube_dimensions, size_t n_dimension, enum sampling_mode mode, size_t max_n_cell);
+// todo: 重命名为fieldstat_cube_create
+
+//todo: create cube 接口变化
+// int fieldstat_cube_set_sampling(struct fieldstat *instance, int cube_id, enum sampling_mode mode, int max_n_cell, int primary_metric_id);
/*
@brief Change the topk cube primary metric id. When fieldstat_counter_add or fieldstat_counter_set are called on the primary metric, the topk record of such cell will be updated.
diff --git a/src/cells/spread_sketch.c b/src/cells/spread_sketch.c
index a05d2bd..4486c4c 100644
--- a/src/cells/spread_sketch.c
+++ b/src/cells/spread_sketch.c
@@ -13,6 +13,11 @@
// todo:把primary metric 记到sketch 里,且使用特殊的st Hyperloglog
// todo: topk 也是这样
+/*
+1. 修改set primary metric 为 set sampling
+2. 增加一个list key 和一堆根据key 查base64之类的接口
+3. serialize 不用实现带exdata的情况
+*/
struct entry {
int ref_count;
@@ -336,6 +341,7 @@ void spread_sketch_reset(struct spread_sketch *ss) {
memset(ss->level_cnt, 0, sizeof(ss->level_cnt));
ss->level_cnt[0] = ss->depth * ss->width;
+ ss->level_min = 0;
}
void spread_sketch_set_exdata_schema(struct spread_sketch *ss, exdata_new_cb new_fn, exdata_free_cb free_fn, exdata_merge_cb merge_fn, exdata_reset_cb reset_fn, exdata_copy_cb copy_fn) {
diff --git a/src/cells/spread_sketch.h b/src/cells/spread_sketch.h
index 9717238..64f30f7 100644
--- a/src/cells/spread_sketch.h
+++ b/src/cells/spread_sketch.h
@@ -20,6 +20,8 @@ void spread_sketch_free(struct spread_sketch *ss);
void spread_sketch_reset(struct spread_sketch *ss);
int spread_sketch_add(struct spread_sketch *ss, const char *key, size_t key_length, uint64_t item_hash, void *arg);
+// void spread_sketch_add(struct spread_sketch *ss, const char *key, size_t key_length, const char* item, size_t item_len, void *arg);
+// TODO: 增加add_hash 接口
void spread_sketch_set_exdata_schema(struct spread_sketch *ss, exdata_new_cb new_fn, exdata_free_cb free_fn, exdata_merge_cb merge_fn, exdata_reset_cb reset_fn, exdata_copy_cb copy_fn);
void *spread_sketch_get0_exdata(const struct spread_sketch *ss, const char *key, size_t key_len);
@@ -28,6 +30,11 @@ void *spread_sketch_get0_exdata(const struct spread_sketch *ss, const char *key,
int spread_sketch_get_count(const struct spread_sketch *ss);
size_t spread_sketch_list(const struct spread_sketch *ss, void **exdatas, size_t n_exdatas);
+// 一系列对于spread sketch的查询
+// size_t spread_sketch_list_keys(const struct spread_sketch *ss, const char *keys[], size_t n_keys);
+// const uint32_t spread_sketch_query_register(ss, const char *key, size_t key_len);
+
+// size_t spread_sketch_list(const struct spread_sketch *ss, void **exdatas, size_t n_exdatas);
void spread_sketch_merge(struct spread_sketch *dest, const struct spread_sketch *src);
diff --git a/src/cube.c b/src/cube.c
index efba3a7..6069cdd 100644
--- a/src/cube.c
+++ b/src/cube.c
@@ -137,6 +137,28 @@ struct cube_manager *cube_manager_new() {
return pthis;
}
+void print_field_array(const struct field *fields, size_t n_field)
+{
+ printf("dimension with length %zu \n", n_field);
+ for (size_t i = 0; i < n_field; i++) {
+ printf("%s: ", fields[i].key);
+ switch (fields[i].type)
+ {
+ case FIELD_VALUE_INTEGER:
+ printf("%lld\n", fields[i].value_longlong);
+ break;
+ case FIELD_VALUE_DOUBLE:
+ printf("%lf\n", fields[i].value_double);
+ break;
+ case FIELD_VALUE_CSTRING:
+ printf("%s\n", fields[i].value_str);
+ break;
+ default:
+ break;
+ }
+ }
+}
+
static void field_array_to_key(const struct field fields[], size_t n_tags, char **out_key, size_t *out_key_size)
{
if (n_tags == 0) {
@@ -303,13 +325,6 @@ void cube_manager_calibrate(struct cube_manager *pthis, const struct cube_manage
}
}
- // for (int i = 0; i < pthis->cube_cnt; i++) {
- // if (pthis->cube[i] == NULL) {
- // continue;
- // }
- // metric_manifest_manager_free(pthis->cube[i]->manifest_manager);
- // pthis->cube[i]->manifest_manager = metric_manifest_manager_copy(master->cube[i]->manifest_manager);
- // }
}
struct cube_manager *cube_manager_fork(const struct cube_manager *src)
diff --git a/test/test_fuzz_test.cpp b/test/test_fuzz_test.cpp
index e9cd3b3..69e1451 100644
--- a/test/test_fuzz_test.cpp
+++ b/test/test_fuzz_test.cpp
@@ -315,6 +315,20 @@ TEST(Fuzz_test, many_instance_random_flow_unregister_calibrate_reset_fork_merge_
EXPECT_GE(accuracy, 0.95);
// printf("topk accuracy: %lf\n", accuracy);
+ // mre
+ double mre = 0;
+ for (size_t j = 0; j < cell_num; j++) {
+ Fieldstat_tag_list_wrapper cell_dimension = Fieldstat_tag_list_wrapper(&fields[j]);
+ long long value_true = count_map[Fieldstat_tag_list_wrapper(shared_tag_out).to_string()][cell_dimension.to_string()];
+ long long value_est;
+ fieldstat_counter_get(instance_in_focus, cube_ids[i], &fields[j], 0, &value_est);
+
+ mre += (double)(abs(value_true - value_est)) / (double)value_true;
+ }
+ mre = mre / cell_num;
+ // printf("topk_add_and_test_accuracy Mean ratio e: %f\n", mre);
+ EXPECT_LE(mre, 0.25);
+
for (size_t j = 0; j < cell_num; j++) {
delete test_result[j];
}
@@ -421,6 +435,7 @@ TEST(Fuzz_test, many_instance_random_flow_unregister_calibrate_reset_fork_merge_
int cube_num;
struct fieldstat *instance_in_focus = dest;
fieldstat_get_cubes(instance_in_focus, &cube_ids, &cube_num);
+ double sum_accuracy = 0;
for (int i = 0; i < cube_num; i++) {
struct field_list *shared_tag_out = fieldstat_cube_get_tags(instance_in_focus, cube_ids[i]);
@@ -433,9 +448,23 @@ TEST(Fuzz_test, many_instance_random_flow_unregister_calibrate_reset_fork_merge_
test_result.push_back(new Fieldstat_tag_list_wrapper(&cells[j]));
}
- double accuracy = test_cal_topk_accuracy(test_result, count_map[Fieldstat_tag_list_wrapper(shared_tag_out).to_string()]);
+ Fieldstat_tag_list_wrapper cube_dimension = Fieldstat_tag_list_wrapper(shared_tag_out);
+ double accuracy = test_cal_topk_accuracy(test_result, count_map[cube_dimension.to_string()]);
// printf("spreadsketch accuracy: %lf\n", accuracy);
- EXPECT_GE(accuracy, 0.7);
+ sum_accuracy += accuracy;
+
+ // MRE
+ double mre = 0;
+ for (size_t j = 0; j < cell_num; j++) {
+ Fieldstat_tag_list_wrapper cell_dimension = Fieldstat_tag_list_wrapper(&cells[j]);
+ double value_true = count_map[cube_dimension.to_string()][cell_dimension.to_string()];
+ double value_est;
+ fieldstat_hll_get(instance_in_focus, cube_ids[i], &cells[j], 0, &value_est);
+
+ mre += fabs(value_true - value_est) / value_true;
+ }
+ mre = mre / cell_num;
+ EXPECT_LE(mre, 0.25);
for (size_t j = 0; j < cell_num; j++) {
delete test_result[j];
@@ -444,6 +473,9 @@ TEST(Fuzz_test, many_instance_random_flow_unregister_calibrate_reset_fork_merge_
fieldstat_tag_list_arr_free(cells, cell_num);
fieldstat_tag_list_arr_free(shared_tag_out, 1);
}
+ double mean_accuracy = sum_accuracy / cube_num;
+ EXPECT_GE(mean_accuracy, 0.7);
+
free(cube_ids);
fieldstat_free(master);
@@ -502,7 +534,7 @@ TEST(Fuzz_test, add_and_reset_with_randomly_generated_flows_and_randomly_chosen_
}
-TEST(Fuzz_test, simple_one_for_perf_topk)
+TEST(perf, simple_one_for_perf_topk)
{
const int CUBE_NUM = 5;
const int FLOW_NUM = 50000;
@@ -557,7 +589,7 @@ TEST(Fuzz_test, simple_one_for_perf_topk)
fieldstat_free(master);
}
-TEST(Fuzz_test, simple_one_for_perf_spreadsketch)
+TEST(perf, simple_one_for_perf_spreadsketch)
{
const int CELL_MAX = 100;
const int TEST_ROUND = 500000;
@@ -592,8 +624,8 @@ TEST(Fuzz_test, simple_one_for_perf_spreadsketch)
int main(int argc, char *argv[])
{
testing::InitGoogleTest(&argc, argv);
- // testing::GTEST_FLAG(filter) = "*spreadsketch";
- testing::GTEST_FLAG(filter) = "-Fuzz_test.simple_one_for_perf*";
+ // testing::GTEST_FLAG(filter) = "*many_instance_random_flow_unregister_calibrate_reset_fork_merge_spreadsketch";
+ testing::GTEST_FLAG(filter) = "-perf.*";
return RUN_ALL_TESTS();
} \ No newline at end of file
diff --git a/test/test_register_and_reset.cpp b/test/test_register_and_reset.cpp
index 94b7888..a09bda0 100644
--- a/test/test_register_and_reset.cpp
+++ b/test/test_register_and_reset.cpp
@@ -768,6 +768,6 @@ TEST(calibrate, delete_first_cube)
int main(int argc, char *argv[])
{
testing::InitGoogleTest(&argc, argv);
- // testing::GTEST_FLAG(filter) = "test_register.ensure_recovery_more_faster_spreadsketch";
+ testing::GTEST_FLAG(filter) = "test_register.reset_and_new_cell_spreadsketch";
return RUN_ALL_TESTS();
} \ No newline at end of file