summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorchenzizhan <[email protected]>2024-10-14 11:27:41 +0800
committerchenzizhan <[email protected]>2024-10-14 11:27:41 +0800
commit652bccee18d8555232dc1679cfbe7bf677181c77 (patch)
tree1d3c795060c5d697e281dbfd1cf2b455d420e2d8 /src
parent2d9b9cda59297fa80889c875b909881d25709045 (diff)
Performance: Cache cell for repeatedly adding metrics to the same cell
Diffstat (limited to 'src')
-rw-r--r--src/cells/hash_table.c4
-rw-r--r--src/cells/hash_table.h2
-rw-r--r--src/cells/heavy_keeper.c13
-rw-r--r--src/cells/heavy_keeper.h1
-rw-r--r--src/cells/spread_sketch.c11
-rw-r--r--src/cells/spread_sketch.h2
-rw-r--r--src/cube.c87
7 files changed, 98 insertions, 22 deletions
diff --git a/src/cells/hash_table.c b/src/cells/hash_table.c
index 255a3ef..362b563 100644
--- a/src/cells/hash_table.c
+++ b/src/cells/hash_table.c
@@ -115,7 +115,7 @@ int hash_table_add(struct hash_table *pthis, const char *key, size_t key_len, vo
assert(item->dying);
item->dying = false;
pthis->current_cell_num++;
- return 1;
+ return 2;
}
item = calloc(1, sizeof(struct entry));
@@ -126,7 +126,7 @@ int hash_table_add(struct hash_table *pthis, const char *key, size_t key_len, vo
HASH_ADD_KEYPTR(hh, pthis->key_exdata_map, item->key, key_len, item);
pthis->current_cell_num++;
- return 1;
+ return 2;
}
void hash_table_set_exdata_schema(struct hash_table *pthis, exdata_new_cb new_fn, exdata_free_cb free_fn, exdata_merge_cb merge_fn, exdata_reset_cb reset_fn, exdata_copy_cb copy_fn) {
diff --git a/src/cells/hash_table.h b/src/cells/hash_table.h
index 59fc8e8..b310040 100644
--- a/src/cells/hash_table.h
+++ b/src/cells/hash_table.h
@@ -17,7 +17,7 @@ void hash_table_reset(struct hash_table *pthis);
int hash_table_merge(struct hash_table *dest, struct hash_table *src);
struct hash_table *hash_table_copy(const struct hash_table *src);
-// int hash_table_add(struct hash_table *pthis, const char *key, size_t key_len, int count, void *arg);
+// return 0 for not added, 1 for an existing item, 2 for added a new item
int hash_table_add(struct hash_table *pthis, const char *key, size_t key_len, void *arg);
// void *hash_table_get0_exdata(struct hash_table *pthis, const char *key, size_t key_len);
diff --git a/src/cells/heavy_keeper.c b/src/cells/heavy_keeper.c
index 82fe4be..eff0c78 100644
--- a/src/cells/heavy_keeper.c
+++ b/src/cells/heavy_keeper.c
@@ -565,9 +565,6 @@ static inline uint64_t cal_hash_val_with_seed(const char *key, size_t key_len, u
return XXH3_64bits_withSeed(key, key_len, seed);
}
-/*
-1 for newly add something. 0 for not add. -1 for unexpected cases.
-*/
int heavy_keeper_add(struct heavy_keeper *heavy_keeper, const char *key, size_t key_len, long long count, void *arg) {
assert(count >= 0);
if (count == 0) {
@@ -575,11 +572,11 @@ int heavy_keeper_add(struct heavy_keeper *heavy_keeper, const char *key, size_t
const struct heap_entry *entry = sorted_set_find_entry(heavy_keeper->top_K_heap, key, key_len);
if (entry != NULL && !sorted_set_entry_dying(entry)) {
- return 0;
+ return 1;
}
sorted_set_insert(heavy_keeper->top_K_heap, key, key_len, count, arg);
- return 1;
+ return 2;
}
return 0;
}
@@ -631,19 +628,19 @@ int heavy_keeper_add(struct heavy_keeper *heavy_keeper, const char *key, size_t
if (not_in_sorted_set) {
if (sorted_set_cardinality(summary) != heavy_keeper->K) {
sorted_set_insert(summary, key, key_len, maxv, arg);
- return 1;
+ return 2;
}
long long min_value = sorted_set_get_min_score(summary);
if (maxv > min_value || min_value == NOT_FIND) {
sorted_set_insert(summary, key, key_len, maxv, arg);
- return 1;
+ return 2;
}
return 0;
} else {
if (maxv > old_cnt) {
sorted_set_incrby(summary, key, key_len, maxv - old_cnt);
}
- return 1; // no popped, but the exdata definitely exists in the sorted set
+ return 1;
}
}
diff --git a/src/cells/heavy_keeper.h b/src/cells/heavy_keeper.h
index 21c5b23..07e617a 100644
--- a/src/cells/heavy_keeper.h
+++ b/src/cells/heavy_keeper.h
@@ -24,6 +24,7 @@ void heavy_keeper_free(struct heavy_keeper *hk);
*/
void heavy_keeper_reset(struct heavy_keeper *hk);
+// return -1 for error, 0 for not added, 1 for updated an existing item, 2 for added a new item
int heavy_keeper_add(struct heavy_keeper *hk, const char *key, size_t key_len, long long count, void *arg);
int heavy_keeper_set_exdata_schema(struct heavy_keeper *hk, exdata_new_cb new_fn, exdata_free_cb free_fn, exdata_merge_cb merge_fn, exdata_reset_cb reset_fn, exdata_copy_cb copy_fn);
diff --git a/src/cells/spread_sketch.c b/src/cells/spread_sketch.c
index 9618771..c2dee92 100644
--- a/src/cells/spread_sketch.c
+++ b/src/cells/spread_sketch.c
@@ -239,10 +239,10 @@ void move_registers_forward(struct spread_sketch *ss, const struct timeval *now)
}
}
-// return 0 if not added, return 1 if added
int spread_sketch_add_hash(struct spread_sketch *ss, const char *entry, size_t entry_length, uint64_t item_hash, void *arg, struct timeval now) {
uint32_t level = (uint32_t)__builtin_clzll(item_hash) + 1;
long long now_ms = now.tv_sec * 1000 + now.tv_usec / 1000;
+ int ret_code = 0; // // return 0 if not added, return 1 if the item already exists in the spread sketch, return 2 if the item is added successfully
if (item_hash == DUMMY_ITEM_HASH) {
if (ss->level0_cnt == 0) {
@@ -257,7 +257,6 @@ int spread_sketch_add_hash(struct spread_sketch *ss, const char *entry, size_t e
uint32_t hash_x1 = (uint32_t) (hash_x_tmp >> 32);
uint32_t hash_x2 = (uint32_t) hash_x_tmp;
- bool in_sketch = false;
move_registers_forward(ss, &now);
for (int i = 0; i < ss->depth; i++) {
uint32_t hash_x = hash_x1 + i * hash_x2;
@@ -274,7 +273,9 @@ int spread_sketch_add_hash(struct spread_sketch *ss, const char *entry, size_t e
}
bucket->level = level;
}
- in_sketch = true;
+ if (ret_code < 1) {
+ ret_code = 1;
+ }
} else {
uint32_t true_level = bucket->content == NULL ? 0: cal_true_level(ss, bucket_idx, now_ms);
@@ -294,7 +295,7 @@ int spread_sketch_add_hash(struct spread_sketch *ss, const char *entry, size_t e
bucket->last_update_ms = now_ms;
bucket->level = level;
- in_sketch = true;
+ ret_code = 2;
}
}
if (item_hash != DUMMY_ITEM_HASH) {
@@ -302,7 +303,7 @@ int spread_sketch_add_hash(struct spread_sketch *ss, const char *entry, size_t e
}
}
- return in_sketch ? 1 : 0;
+ return ret_code;
}
int spread_sketch_add(struct spread_sketch *ss, const char *entry, size_t entry_length, const char* item, size_t item_len, void *arg, struct timeval now) {
diff --git a/src/cells/spread_sketch.h b/src/cells/spread_sketch.h
index abd6de2..3062b37 100644
--- a/src/cells/spread_sketch.h
+++ b/src/cells/spread_sketch.h
@@ -21,7 +21,9 @@ struct spread_sketch *spread_sketch_new(int depth, int width, unsigned char prec
void spread_sketch_free(struct spread_sketch *ss);
void spread_sketch_set_exdata_schema(struct spread_sketch *ss, exdata_new_cb new_fn, exdata_free_cb free_fn, exdata_merge_cb merge_fn, exdata_reset_cb reset_fn, exdata_copy_cb copy_fn);
+// return 0 if not added, return 1 if the item already exists in the spread sketch, return 2 if the item is added successfully
int spread_sketch_add_hash(struct spread_sketch *ss, const char *entry, size_t entry_length, uint64_t item_hash, void *arg, struct timeval now);
+// the same as `spread_sketch_add_hash`, but the item is a string
int spread_sketch_add(struct spread_sketch *ss, const char *entry, size_t entry_length, const char* item, size_t item_len, void *arg, struct timeval now);
// get the number of entrys stored in spread sketch
diff --git a/src/cube.c b/src/cube.c
index 1c3e6df..d4125b0 100644
--- a/src/cube.c
+++ b/src/cube.c
@@ -40,6 +40,12 @@ struct cell {
struct field_list cell_dimensions;
};
+struct cell_cache {
+ char *serialized_cell_dimensions;
+ size_t serialized_cell_dimensions_len;
+ struct cell *cell;
+};
+
struct cube {
enum sampling_mode sampling_mode;
union {
@@ -58,6 +64,8 @@ struct cube {
size_t serialized_dimensions_len;
int id;
UT_hash_handle hh;
+
+ struct cell_cache last_oper_cache;
};
static struct field *field_array_duplicate(const struct field *fields_src, size_t n_field)
@@ -89,6 +97,46 @@ static struct field *field_array_duplicate(const struct field *fields_src, size_
return ret;
}
+bool cell_cache_check(const struct cell_cache *cache, const char *serialized_cell_dimensions, size_t serialized_cell_dimensions_len)
+{
+ if (cache->serialized_cell_dimensions == NULL) {
+ return false;
+ }
+
+ if (cache->serialized_cell_dimensions_len != serialized_cell_dimensions_len) {
+ return false;
+ }
+
+ return memcmp(cache->serialized_cell_dimensions, serialized_cell_dimensions, serialized_cell_dimensions_len) == 0;
+}
+
+struct cell *cell_cache_get(const struct cell_cache *cache)
+{
+ return cache->cell;
+}
+
+void cell_cache_replace(struct cell_cache *cache, const char *serialized_cell_dimensions, size_t serialized_cell_dimensions_len, struct cell *cell)
+{
+ if (cache->serialized_cell_dimensions != NULL) {
+ free(cache->serialized_cell_dimensions);
+ }
+
+ cache->serialized_cell_dimensions = (char *)malloc(serialized_cell_dimensions_len);
+ memcpy(cache->serialized_cell_dimensions, serialized_cell_dimensions, serialized_cell_dimensions_len);
+ cache->serialized_cell_dimensions_len = serialized_cell_dimensions_len;
+ cache->cell = cell;
+}
+
+void cell_cache_clear(struct cell_cache *cache)
+{
+ if (cache->serialized_cell_dimensions != NULL) {
+ free(cache->serialized_cell_dimensions);
+ cache->serialized_cell_dimensions = NULL;
+ }
+ cache->serialized_cell_dimensions_len = 0;
+ cache->cell = NULL;
+}
+
void add_cube_to_position(struct cube_manager *pthis, struct cube *cube, int id)
{
if (id >= pthis->slots_number) {
@@ -553,6 +601,8 @@ int cube_set_sampling(struct cube *cube, enum sampling_mode mode, int max_n_cell
assert(0);
break;
}
+
+ cell_cache_clear(&cube->last_oper_cache);
}
switch (mode)
@@ -615,6 +665,8 @@ void cube_free(struct cube *cube) {
free(cube->serialized_dimensions);
metric_manifest_manager_free(cube->manifest_manager);
+ cell_cache_clear(&cube->last_oper_cache);
+
free(cube);
}
@@ -634,6 +686,8 @@ void cube_reset(struct cube *cube) {
assert(0);
break;
}
+
+ cell_cache_clear(&cube->last_oper_cache);
}
struct cell *get_cell_in_cube_generic(struct cube *cube, const struct field *dimensions, size_t n_dimensions) {
@@ -641,6 +695,11 @@ struct cell *get_cell_in_cube_generic(struct cube *cube, const struct field *dim
size_t compound_dimension_len;
field_array_to_key(dimensions, n_dimensions, &compound_dimension, &compound_dimension_len);
+ if (cell_cache_check(&cube->last_oper_cache, compound_dimension, compound_dimension_len)) {
+ free(compound_dimension);
+ return cell_cache_get(&cube->last_oper_cache);
+ }
+
struct exdata_new_args args;
args.cell_dimensions = dimensions;
args.n_dimensions = n_dimensions;
@@ -653,7 +712,7 @@ struct cell *get_cell_in_cube_generic(struct cube *cube, const struct field *dim
cell_data = hash_table_get0_exdata(cube->table, compound_dimension, compound_dimension_len);
if (cell_data == NULL) {
tmp_ret = hash_table_add(cube->table, compound_dimension, compound_dimension_len, (void *)&args);
- if (tmp_ret == 1) {
+ if (tmp_ret >= 1) {
cell_data = hash_table_get0_exdata(cube->table, compound_dimension, compound_dimension_len);
}
}
@@ -662,7 +721,7 @@ struct cell *get_cell_in_cube_generic(struct cube *cube, const struct field *dim
cell_data = heavy_keeper_get0_exdata(cube->heavykeeper, compound_dimension, compound_dimension_len);
if (cell_data == NULL) {
tmp_ret = heavy_keeper_add(cube->heavykeeper, compound_dimension, compound_dimension_len, 0, (void *)&args);
- if (tmp_ret == 1) {
+ if (tmp_ret >= 1) {
cell_data = heavy_keeper_get0_exdata(cube->heavykeeper, compound_dimension, compound_dimension_len);
}
}
@@ -671,7 +730,7 @@ struct cell *get_cell_in_cube_generic(struct cube *cube, const struct field *dim
cell_data = spread_sketch_get0_exdata(cube->spread_sketch, compound_dimension, compound_dimension_len);
if (cell_data == NULL) {
tmp_ret = spread_sketch_add_hash(cube->spread_sketch, compound_dimension, compound_dimension_len, DUMMY_ITEM_HASH, (void *)&args, DUMMY_TIME_VAL);
- if (tmp_ret == 1) {
+ if (tmp_ret >= 1) {
cell_data = spread_sketch_get0_exdata(cube->spread_sketch, compound_dimension, compound_dimension_len);
}
}
@@ -681,6 +740,8 @@ struct cell *get_cell_in_cube_generic(struct cube *cube, const struct field *dim
break;
}
+ cell_cache_replace(&cube->last_oper_cache, compound_dimension, compound_dimension_len, cell_data);
+
free(compound_dimension);
return cell_data;
}
@@ -825,8 +886,12 @@ int cube_hll_add(struct cube *cube, int metric_id, const struct field *dimension
args.n_dimensions = n_dimensions;
int tmp_ret = spread_sketch_add(cube->spread_sketch, compound_dimension, compound_dimension_len, key, key_len, (void *)&args, DUMMY_TIME_VAL);
+ if (tmp_ret == 2) {
+ cell_cache_clear(&cube->last_oper_cache);
+ }
+
free(compound_dimension);
- return tmp_ret == 1 ? FS_OK : FS_ERR_TOO_MANY_CELLS;
+ return tmp_ret == 0 ? FS_ERR_TOO_MANY_CELLS : FS_OK;
}
struct cell *cell_data = get_cell_in_cube_generic(cube, dimensions, n_dimensions);
@@ -878,8 +943,12 @@ int cube_hll_add_field(struct cube *cube, int metric_id, const struct field *dim
uint64_t hash = field_array_to_hash(item_fields, n_item);
int tmp_ret = spread_sketch_add_hash(cube->spread_sketch, compound_dimension, compound_dimension_len, hash, (void *)&args, DUMMY_TIME_VAL);
+ if (tmp_ret == 2) {
+ cell_cache_clear(&cube->last_oper_cache);
+ }
+
free(compound_dimension);
- return tmp_ret == 1 ? FS_OK : FS_ERR_TOO_MANY_CELLS;
+ return tmp_ret == 0 ? FS_ERR_TOO_MANY_CELLS : FS_OK;
}
struct cell *cell_data = get_cell_in_cube_generic(cube, dimensions, n_dimensions);
@@ -924,8 +993,12 @@ int cube_counter_incrby(struct cube *cube, int metric_id, const struct field *di
args.n_dimensions = n_dimensions;
int tmp_ret = heavy_keeper_add(cube->heavykeeper, compound_dimension, compound_dimension_len, increment, (void *)&args);
+ if (tmp_ret == 2) {
+ cell_cache_clear(&cube->last_oper_cache);
+ }
+
free(compound_dimension);
- return tmp_ret == 1 ? FS_OK : FS_ERR_TOO_MANY_CELLS;
+ return tmp_ret == 0 ? FS_ERR_TOO_MANY_CELLS : FS_OK;
}
struct cell *cell_data = get_cell_in_cube_generic(cube, dimensions, n_dimensions);
@@ -1035,6 +1108,8 @@ int cube_merge(struct cube *dest, const struct cube *src)
break;
}
+ cell_cache_clear(&dest->last_oper_cache); // just clear the cache any way to avoid inconsistency
+
return FS_OK;
}