From 652bccee18d8555232dc1679cfbe7bf677181c77 Mon Sep 17 00:00:00 2001 From: chenzizhan Date: Mon, 14 Oct 2024 11:27:41 +0800 Subject: Performance: Cache cell for repeatedly adding metrics to the same cell --- src/cells/hash_table.c | 4 +-- src/cells/hash_table.h | 2 +- src/cells/heavy_keeper.c | 13 +++---- src/cells/heavy_keeper.h | 1 + src/cells/spread_sketch.c | 11 +++--- src/cells/spread_sketch.h | 2 ++ src/cube.c | 87 +++++++++++++++++++++++++++++++++++++++++++---- 7 files changed, 98 insertions(+), 22 deletions(-) (limited to 'src') diff --git a/src/cells/hash_table.c b/src/cells/hash_table.c index 255a3ef..362b563 100644 --- a/src/cells/hash_table.c +++ b/src/cells/hash_table.c @@ -115,7 +115,7 @@ int hash_table_add(struct hash_table *pthis, const char *key, size_t key_len, vo assert(item->dying); item->dying = false; pthis->current_cell_num++; - return 1; + return 2; } item = calloc(1, sizeof(struct entry)); @@ -126,7 +126,7 @@ int hash_table_add(struct hash_table *pthis, const char *key, size_t key_len, vo HASH_ADD_KEYPTR(hh, pthis->key_exdata_map, item->key, key_len, item); pthis->current_cell_num++; - return 1; + return 2; } void hash_table_set_exdata_schema(struct hash_table *pthis, exdata_new_cb new_fn, exdata_free_cb free_fn, exdata_merge_cb merge_fn, exdata_reset_cb reset_fn, exdata_copy_cb copy_fn) { diff --git a/src/cells/hash_table.h b/src/cells/hash_table.h index 59fc8e8..b310040 100644 --- a/src/cells/hash_table.h +++ b/src/cells/hash_table.h @@ -17,7 +17,7 @@ void hash_table_reset(struct hash_table *pthis); int hash_table_merge(struct hash_table *dest, struct hash_table *src); struct hash_table *hash_table_copy(const struct hash_table *src); -// int hash_table_add(struct hash_table *pthis, const char *key, size_t key_len, int count, void *arg); +// return 0 for not added, 1 for an existing item, 2 for added a new item int hash_table_add(struct hash_table *pthis, const char *key, size_t key_len, void *arg); // void *hash_table_get0_exdata(struct hash_table *pthis, const char *key, size_t key_len); diff --git a/src/cells/heavy_keeper.c b/src/cells/heavy_keeper.c index 82fe4be..eff0c78 100644 --- a/src/cells/heavy_keeper.c +++ b/src/cells/heavy_keeper.c @@ -565,9 +565,6 @@ static inline uint64_t cal_hash_val_with_seed(const char *key, size_t key_len, u return XXH3_64bits_withSeed(key, key_len, seed); } -/* -1 for newly add something. 0 for not add. -1 for unexpected cases. -*/ int heavy_keeper_add(struct heavy_keeper *heavy_keeper, const char *key, size_t key_len, long long count, void *arg) { assert(count >= 0); if (count == 0) { @@ -575,11 +572,11 @@ int heavy_keeper_add(struct heavy_keeper *heavy_keeper, const char *key, size_t const struct heap_entry *entry = sorted_set_find_entry(heavy_keeper->top_K_heap, key, key_len); if (entry != NULL && !sorted_set_entry_dying(entry)) { - return 0; + return 1; } sorted_set_insert(heavy_keeper->top_K_heap, key, key_len, count, arg); - return 1; + return 2; } return 0; } @@ -631,19 +628,19 @@ int heavy_keeper_add(struct heavy_keeper *heavy_keeper, const char *key, size_t if (not_in_sorted_set) { if (sorted_set_cardinality(summary) != heavy_keeper->K) { sorted_set_insert(summary, key, key_len, maxv, arg); - return 1; + return 2; } long long min_value = sorted_set_get_min_score(summary); if (maxv > min_value || min_value == NOT_FIND) { sorted_set_insert(summary, key, key_len, maxv, arg); - return 1; + return 2; } return 0; } else { if (maxv > old_cnt) { sorted_set_incrby(summary, key, key_len, maxv - old_cnt); } - return 1; // no popped, but the exdata definitely exists in the sorted set + return 1; } } diff --git a/src/cells/heavy_keeper.h b/src/cells/heavy_keeper.h index 21c5b23..07e617a 100644 --- a/src/cells/heavy_keeper.h +++ b/src/cells/heavy_keeper.h @@ -24,6 +24,7 @@ void heavy_keeper_free(struct heavy_keeper *hk); */ void heavy_keeper_reset(struct heavy_keeper *hk); +// return -1 for error, 0 for not added, 1 for updated an existing item, 2 for added a new item int heavy_keeper_add(struct heavy_keeper *hk, const char *key, size_t key_len, long long count, void *arg); int heavy_keeper_set_exdata_schema(struct heavy_keeper *hk, exdata_new_cb new_fn, exdata_free_cb free_fn, exdata_merge_cb merge_fn, exdata_reset_cb reset_fn, exdata_copy_cb copy_fn); diff --git a/src/cells/spread_sketch.c b/src/cells/spread_sketch.c index 9618771..c2dee92 100644 --- a/src/cells/spread_sketch.c +++ b/src/cells/spread_sketch.c @@ -239,10 +239,10 @@ void move_registers_forward(struct spread_sketch *ss, const struct timeval *now) } } -// return 0 if not added, return 1 if added int spread_sketch_add_hash(struct spread_sketch *ss, const char *entry, size_t entry_length, uint64_t item_hash, void *arg, struct timeval now) { uint32_t level = (uint32_t)__builtin_clzll(item_hash) + 1; long long now_ms = now.tv_sec * 1000 + now.tv_usec / 1000; + int ret_code = 0; // // return 0 if not added, return 1 if the item already exists in the spread sketch, return 2 if the item is added successfully if (item_hash == DUMMY_ITEM_HASH) { if (ss->level0_cnt == 0) { @@ -257,7 +257,6 @@ int spread_sketch_add_hash(struct spread_sketch *ss, const char *entry, size_t e uint32_t hash_x1 = (uint32_t) (hash_x_tmp >> 32); uint32_t hash_x2 = (uint32_t) hash_x_tmp; - bool in_sketch = false; move_registers_forward(ss, &now); for (int i = 0; i < ss->depth; i++) { uint32_t hash_x = hash_x1 + i * hash_x2; @@ -274,7 +273,9 @@ int spread_sketch_add_hash(struct spread_sketch *ss, const char *entry, size_t e } bucket->level = level; } - in_sketch = true; + if (ret_code < 1) { + ret_code = 1; + } } else { uint32_t true_level = bucket->content == NULL ? 0: cal_true_level(ss, bucket_idx, now_ms); @@ -294,7 +295,7 @@ int spread_sketch_add_hash(struct spread_sketch *ss, const char *entry, size_t e bucket->last_update_ms = now_ms; bucket->level = level; - in_sketch = true; + ret_code = 2; } } if (item_hash != DUMMY_ITEM_HASH) { @@ -302,7 +303,7 @@ int spread_sketch_add_hash(struct spread_sketch *ss, const char *entry, size_t e } } - return in_sketch ? 1 : 0; + return ret_code; } int spread_sketch_add(struct spread_sketch *ss, const char *entry, size_t entry_length, const char* item, size_t item_len, void *arg, struct timeval now) { diff --git a/src/cells/spread_sketch.h b/src/cells/spread_sketch.h index abd6de2..3062b37 100644 --- a/src/cells/spread_sketch.h +++ b/src/cells/spread_sketch.h @@ -21,7 +21,9 @@ struct spread_sketch *spread_sketch_new(int depth, int width, unsigned char prec void spread_sketch_free(struct spread_sketch *ss); void spread_sketch_set_exdata_schema(struct spread_sketch *ss, exdata_new_cb new_fn, exdata_free_cb free_fn, exdata_merge_cb merge_fn, exdata_reset_cb reset_fn, exdata_copy_cb copy_fn); +// return 0 if not added, return 1 if the item already exists in the spread sketch, return 2 if the item is added successfully int spread_sketch_add_hash(struct spread_sketch *ss, const char *entry, size_t entry_length, uint64_t item_hash, void *arg, struct timeval now); +// the same as `spread_sketch_add_hash`, but the item is a string int spread_sketch_add(struct spread_sketch *ss, const char *entry, size_t entry_length, const char* item, size_t item_len, void *arg, struct timeval now); // get the number of entrys stored in spread sketch diff --git a/src/cube.c b/src/cube.c index 1c3e6df..d4125b0 100644 --- a/src/cube.c +++ b/src/cube.c @@ -40,6 +40,12 @@ struct cell { struct field_list cell_dimensions; }; +struct cell_cache { + char *serialized_cell_dimensions; + size_t serialized_cell_dimensions_len; + struct cell *cell; +}; + struct cube { enum sampling_mode sampling_mode; union { @@ -58,6 +64,8 @@ struct cube { size_t serialized_dimensions_len; int id; UT_hash_handle hh; + + struct cell_cache last_oper_cache; }; static struct field *field_array_duplicate(const struct field *fields_src, size_t n_field) @@ -89,6 +97,46 @@ static struct field *field_array_duplicate(const struct field *fields_src, size_ return ret; } +bool cell_cache_check(const struct cell_cache *cache, const char *serialized_cell_dimensions, size_t serialized_cell_dimensions_len) +{ + if (cache->serialized_cell_dimensions == NULL) { + return false; + } + + if (cache->serialized_cell_dimensions_len != serialized_cell_dimensions_len) { + return false; + } + + return memcmp(cache->serialized_cell_dimensions, serialized_cell_dimensions, serialized_cell_dimensions_len) == 0; +} + +struct cell *cell_cache_get(const struct cell_cache *cache) +{ + return cache->cell; +} + +void cell_cache_replace(struct cell_cache *cache, const char *serialized_cell_dimensions, size_t serialized_cell_dimensions_len, struct cell *cell) +{ + if (cache->serialized_cell_dimensions != NULL) { + free(cache->serialized_cell_dimensions); + } + + cache->serialized_cell_dimensions = (char *)malloc(serialized_cell_dimensions_len); + memcpy(cache->serialized_cell_dimensions, serialized_cell_dimensions, serialized_cell_dimensions_len); + cache->serialized_cell_dimensions_len = serialized_cell_dimensions_len; + cache->cell = cell; +} + +void cell_cache_clear(struct cell_cache *cache) +{ + if (cache->serialized_cell_dimensions != NULL) { + free(cache->serialized_cell_dimensions); + cache->serialized_cell_dimensions = NULL; + } + cache->serialized_cell_dimensions_len = 0; + cache->cell = NULL; +} + void add_cube_to_position(struct cube_manager *pthis, struct cube *cube, int id) { if (id >= pthis->slots_number) { @@ -553,6 +601,8 @@ int cube_set_sampling(struct cube *cube, enum sampling_mode mode, int max_n_cell assert(0); break; } + + cell_cache_clear(&cube->last_oper_cache); } switch (mode) @@ -615,6 +665,8 @@ void cube_free(struct cube *cube) { free(cube->serialized_dimensions); metric_manifest_manager_free(cube->manifest_manager); + cell_cache_clear(&cube->last_oper_cache); + free(cube); } @@ -634,6 +686,8 @@ void cube_reset(struct cube *cube) { assert(0); break; } + + cell_cache_clear(&cube->last_oper_cache); } struct cell *get_cell_in_cube_generic(struct cube *cube, const struct field *dimensions, size_t n_dimensions) { @@ -641,6 +695,11 @@ struct cell *get_cell_in_cube_generic(struct cube *cube, const struct field *dim size_t compound_dimension_len; field_array_to_key(dimensions, n_dimensions, &compound_dimension, &compound_dimension_len); + if (cell_cache_check(&cube->last_oper_cache, compound_dimension, compound_dimension_len)) { + free(compound_dimension); + return cell_cache_get(&cube->last_oper_cache); + } + struct exdata_new_args args; args.cell_dimensions = dimensions; args.n_dimensions = n_dimensions; @@ -653,7 +712,7 @@ struct cell *get_cell_in_cube_generic(struct cube *cube, const struct field *dim cell_data = hash_table_get0_exdata(cube->table, compound_dimension, compound_dimension_len); if (cell_data == NULL) { tmp_ret = hash_table_add(cube->table, compound_dimension, compound_dimension_len, (void *)&args); - if (tmp_ret == 1) { + if (tmp_ret >= 1) { cell_data = hash_table_get0_exdata(cube->table, compound_dimension, compound_dimension_len); } } @@ -662,7 +721,7 @@ struct cell *get_cell_in_cube_generic(struct cube *cube, const struct field *dim cell_data = heavy_keeper_get0_exdata(cube->heavykeeper, compound_dimension, compound_dimension_len); if (cell_data == NULL) { tmp_ret = heavy_keeper_add(cube->heavykeeper, compound_dimension, compound_dimension_len, 0, (void *)&args); - if (tmp_ret == 1) { + if (tmp_ret >= 1) { cell_data = heavy_keeper_get0_exdata(cube->heavykeeper, compound_dimension, compound_dimension_len); } } @@ -671,7 +730,7 @@ struct cell *get_cell_in_cube_generic(struct cube *cube, const struct field *dim cell_data = spread_sketch_get0_exdata(cube->spread_sketch, compound_dimension, compound_dimension_len); if (cell_data == NULL) { tmp_ret = spread_sketch_add_hash(cube->spread_sketch, compound_dimension, compound_dimension_len, DUMMY_ITEM_HASH, (void *)&args, DUMMY_TIME_VAL); - if (tmp_ret == 1) { + if (tmp_ret >= 1) { cell_data = spread_sketch_get0_exdata(cube->spread_sketch, compound_dimension, compound_dimension_len); } } @@ -681,6 +740,8 @@ struct cell *get_cell_in_cube_generic(struct cube *cube, const struct field *dim break; } + cell_cache_replace(&cube->last_oper_cache, compound_dimension, compound_dimension_len, cell_data); + free(compound_dimension); return cell_data; } @@ -825,8 +886,12 @@ int cube_hll_add(struct cube *cube, int metric_id, const struct field *dimension args.n_dimensions = n_dimensions; int tmp_ret = spread_sketch_add(cube->spread_sketch, compound_dimension, compound_dimension_len, key, key_len, (void *)&args, DUMMY_TIME_VAL); + if (tmp_ret == 2) { + cell_cache_clear(&cube->last_oper_cache); + } + free(compound_dimension); - return tmp_ret == 1 ? FS_OK : FS_ERR_TOO_MANY_CELLS; + return tmp_ret == 0 ? FS_ERR_TOO_MANY_CELLS : FS_OK; } struct cell *cell_data = get_cell_in_cube_generic(cube, dimensions, n_dimensions); @@ -878,8 +943,12 @@ int cube_hll_add_field(struct cube *cube, int metric_id, const struct field *dim uint64_t hash = field_array_to_hash(item_fields, n_item); int tmp_ret = spread_sketch_add_hash(cube->spread_sketch, compound_dimension, compound_dimension_len, hash, (void *)&args, DUMMY_TIME_VAL); + if (tmp_ret == 2) { + cell_cache_clear(&cube->last_oper_cache); + } + free(compound_dimension); - return tmp_ret == 1 ? FS_OK : FS_ERR_TOO_MANY_CELLS; + return tmp_ret == 0 ? FS_ERR_TOO_MANY_CELLS : FS_OK; } struct cell *cell_data = get_cell_in_cube_generic(cube, dimensions, n_dimensions); @@ -924,8 +993,12 @@ int cube_counter_incrby(struct cube *cube, int metric_id, const struct field *di args.n_dimensions = n_dimensions; int tmp_ret = heavy_keeper_add(cube->heavykeeper, compound_dimension, compound_dimension_len, increment, (void *)&args); + if (tmp_ret == 2) { + cell_cache_clear(&cube->last_oper_cache); + } + free(compound_dimension); - return tmp_ret == 1 ? FS_OK : FS_ERR_TOO_MANY_CELLS; + return tmp_ret == 0 ? FS_ERR_TOO_MANY_CELLS : FS_OK; } struct cell *cell_data = get_cell_in_cube_generic(cube, dimensions, n_dimensions); @@ -1035,6 +1108,8 @@ int cube_merge(struct cube *dest, const struct cube *src) break; } + cell_cache_clear(&dest->last_oper_cache); // just clear the cache any way to avoid inconsistency + return FS_OK; } -- cgit v1.2.3