summaryrefslogtreecommitdiff
path: root/src/cube.c
diff options
context:
space:
mode:
authorchenzizhan <[email protected]>2024-07-10 11:03:24 +0800
committerchenzizhan <[email protected]>2024-07-10 11:03:24 +0800
commit677f337e195e3b9b6e416109df8d51c14da2791b (patch)
tree5000114106f11d715e95eb9762dddaf5af361c41 /src/cube.c
parent34be9bf8b545162c1a32f751776906c5fd1f5ad3 (diff)
parente1fd771fc7e33ffd659535e81412179e8ac6929a (diff)
Merge branch 'spreadsketch' into refactor-heavykeeper-newkey
Diffstat (limited to 'src/cube.c')
-rw-r--r--src/cube.c46
1 files changed, 43 insertions, 3 deletions
diff --git a/src/cube.c b/src/cube.c
index 6b1f386..47c9065 100644
--- a/src/cube.c
+++ b/src/cube.c
@@ -13,6 +13,7 @@
#include "metric.h"
#include "heavy_keeper.h"
#include "tag_map.h"
+#include "spread_sketch.h"
#define DEFAULT_N_METRIC 32
#define DEFAULT_N_CUBE 64
@@ -42,6 +43,7 @@ struct cube {
union {
struct heavy_keeper *topk;
struct hash_table *comprehensive;
+ struct spread_sketch *spread_sketch;
};
size_t max_n_cell;
@@ -570,6 +572,9 @@ struct cube *cube_new(const struct field *dimensions, size_t n_dimensions, enum
cube->comprehensive = hash_table_new(max_n_cell);
hash_table_set_exdata_schema(cube->comprehensive, exdata_new_i, exdata_free_i, exdata_merge_i, exdata_reset_i, exdata_copy_i);
break;
+ case SAMPLING_MODE_SPREADSKETCH:
+ cube->spread_sketch = spread_sketch_new(max_n_cell);
+ spread_sketch_set_exdata_schema(cube->spread_sketch, exdata_new_i, exdata_free_i, exdata_merge_i, exdata_reset_i, exdata_copy_i);
default:
assert(0);
break;
@@ -587,6 +592,9 @@ void cube_free(struct cube *cube) {
case SAMPLING_MODE_COMPREHENSIVE:
hash_table_free(cube->comprehensive);
break;
+ case SAMPLING_MODE_SPREADSKETCH:
+ spread_sketch_free(cube->spread_sketch);
+ break;
default:
assert(0);
break;
@@ -599,10 +607,25 @@ void cube_free(struct cube *cube) {
}
void cube_reset(struct cube *cube) {
- if (cube->sampling_mode == SAMPLING_MODE_TOPK) {
+ // if (cube->sampling_mode == SAMPLING_MODE_TOPK) {
+ // heavy_keeper_reset(cube->topk);
+ // } else {
+ // hash_table_reset(cube->comprehensive);
+ // }
+ switch (cube->sampling_mode)
+ {
+ case SAMPLING_MODE_TOPK:
heavy_keeper_reset(cube->topk);
- } else {
+ break;
+ case SAMPLING_MODE_COMPREHENSIVE:
hash_table_reset(cube->comprehensive);
+ break;
+ case SAMPLING_MODE_SPREADSKETCH:
+ spread_sketch_reset(cube->spread_sketch);
+ break;
+ default:
+ assert(0);
+ break;
}
}
@@ -656,7 +679,24 @@ struct cell *get_cell(struct cube *cube, const struct field *dimensions, size_t
}
}
break;}
- }
+ case SAMPLING_MODE_SPREADSKETCH: {
+ if (cube->primary_metric_id != metric_id) {
+ cell_data = spread_sketch_get0_exdata(cube->spread_sketch, tag_in_string, tag_len);
+ // todo: spread sketch 没办法支持dummy 场景。首先,我不能让所有metric 都走spread sketch流程,
+ // 因为正常来说,用level=0 的hashy 做数值,没有任何意义,肯定都更新不了,只是在刚开始的时候,起一个记录key 的作用。
+ // 而,如果像是topk 那样,给count=0 的一席之地,那么存在问题:spread sketch本身不对记录的个数有限制,所以什么时候停止记录呢?这样的设计也太麻烦了。
+ // 之前跟老板讨论的时候,给了两个方案,方案1:做一个buffer,如果get exdata0 get 不到,则往buffer 中的cell 里写,等到来了primary以后,把cell 送进去。
+ // 方案2:简单略去第一轮添加时的情况。这会造成很少量的误差。不过,实际上这个操作不是逐包而是会话开始结束时来一次,所以误差也不会太小。必须是让贺岚风能第一个操作primary metric。
+ }
+ if (cell_data == NULL) {
+ int tmp_ret = spread_sketch_add(cube->spread_sketch, tag_in_string, tag_len, 0, (void *)&args);
+ if (tmp_ret == 1) {
+ cell_data = spread_sketch_get0_exdata(cube->spread_sketch, tag_in_string, tag_len);
+ }
+ }
+ break;}
+ }
+
if (free_key) {
free(key);