diff options
| author | chenzizhan <[email protected]> | 2024-07-08 10:22:22 +0800 |
|---|---|---|
| committer | chenzizhan <[email protected]> | 2024-07-08 10:22:22 +0800 |
| commit | 4083cad56dfeac683832e93b1d8a295304aa8ea1 (patch) | |
| tree | a23c9e6eafc0318b14369e70e853a341b5339c03 | |
| parent | 18a584f99412b40a81f79947f311371d7e9b2047 (diff) | |
rename
| -rw-r--r-- | src/metrics/metric.c | 24 | ||||
| -rw-r--r-- | src/metrics/python_api.c | 10 | ||||
| -rw-r--r-- | src/metrics/st_hyperloglog.c | 58 | ||||
| -rw-r--r-- | src/metrics/st_hyperloglog.h | 29 | ||||
| -rw-r--r-- | src/tags/heavy_keeper.c | 8 | ||||
| -rw-r--r-- | test/test_exporter_json.cpp | 10 | ||||
| -rw-r--r-- | test/test_metric_hll.cpp | 10 |
7 files changed, 75 insertions, 74 deletions
diff --git a/src/metrics/metric.c b/src/metrics/metric.c index fc7c68b..91ef47f 100644 --- a/src/metrics/metric.c +++ b/src/metrics/metric.c @@ -37,7 +37,7 @@ struct metric_scheme { struct metric_data { union { struct metric_counter_or_gauge *counter; - struct ST_hyperloglog *hll; + struct hyperloglog *hll; struct hdr_histogram *hdr; }; }; @@ -120,40 +120,40 @@ void metric_scheme_counter_reset(struct metric_data *data) struct metric_data *metric_scheme_hll_new(const union metric_parameter *para) { struct metric_data *data = (struct metric_data *)malloc(sizeof(struct metric_data)); - struct ST_hyperloglog *hll = ST_hyperloglog_new(para->hll.precision); + struct hyperloglog *hll = hyperloglog_new(para->hll.precision); data->hll = hll; return data; } static void metric_scheme_hll_free(struct metric_data *data) { - ST_hyperloglog_free(data->hll); + hyperloglog_free(data->hll); free(data); } static void metric_scheme_hll_serialize(const struct metric_data *data, char **blob, size_t *blob_size) { - ST_hyperloglog_serialize(data->hll, blob, blob_size); + hyperloglog_serialize(data->hll, blob, blob_size); } static int metric_scheme_hll_merge(struct metric_data *pthis, const struct metric_data *from) { - return ST_hyperloglog_merge(pthis->hll, from->hll); + return hyperloglog_merge(pthis->hll, from->hll); } struct metric_data *metric_scheme_hll_copy(const struct metric_data *from) { struct metric_data *pthis = (struct metric_data *)malloc(sizeof(struct metric_data)); - struct ST_hyperloglog *hll = ST_hyperloglog_new(from->hll->cfg.precision); + struct hyperloglog *hll = hyperloglog_new(from->hll->cfg.precision); pthis->hll = hll; - ST_hyperloglog_merge(hll, from->hll); + hyperloglog_merge(hll, from->hll); return pthis; } struct metric_data *metric_scheme_hll_deserialize(const char *blob, size_t blob_size) { struct metric_data *ret = (struct metric_data *)malloc(sizeof(struct metric_data)); - struct ST_hyperloglog *hll = ST_hyperloglog_deserialize(blob, blob_size); + struct hyperloglog *hll = hyperloglog_deserialize(blob, blob_size); ret->hll = hll; return ret; @@ -161,7 +161,7 @@ struct metric_data *metric_scheme_hll_deserialize(const char *blob, size_t blob_ void metric_scheme_hll_reset(struct metric_data *data) { - ST_hyperloglog_reset(data->hll); + hyperloglog_reset(data->hll); } /* --------------------------------- histogram -------------------------------- */ @@ -297,7 +297,7 @@ void metric_serialize(const struct metric *pthis, char **blob, size_t *blob_size struct metric_data *data = pthis->data; enum metric_type type = pthis->type; if (type == METRIC_TYPE_HLL) { - ST_hyperloglog_serialize_for_networking(data->hll, blob, blob_size); + hyperloglog_serialize_for_networking(data->hll, blob, blob_size); return; } if (type == METRIC_TYPE_HISTOGRAM) { @@ -326,11 +326,11 @@ long long metric_counter_get(const struct metric *pthis) { } void metric_hll_add(struct metric *pthis, const char *key, size_t key_len) { - ST_hyperloglog_add(pthis->data->hll, key, key_len); + hyperloglog_add(pthis->data->hll, key, key_len); } double metric_hll_get(const struct metric *pthis) { - return ST_hyperloglog_count(pthis->data->hll); + return hyperloglog_count(pthis->data->hll); } int metric_histogram_record(struct metric *pthis, long long value) { diff --git a/src/metrics/python_api.c b/src/metrics/python_api.c index c7c4c6d..8388a1c 100644 --- a/src/metrics/python_api.c +++ b/src/metrics/python_api.c @@ -75,9 +75,9 @@ void fieldstat_histogram_free(void *h) void *hll_base64_decode(char *buf); double fieldstat_hll_base64_to_count(char *buf) { - struct ST_hyperloglog *hll = hll_base64_decode(buf); - double count = ST_hyperloglog_count(hll); - ST_hyperloglog_free(hll); + struct hyperloglog *hll = hll_base64_decode(buf); + double count = hyperloglog_count(hll); + hyperloglog_free(hll); return count; } @@ -101,7 +101,7 @@ void *hll_base64_decode(char *buf) unsigned char precision; memcpy(&precision, dec + sizeof(unsigned char), sizeof(unsigned char)); - struct ST_hyperloglog *hll_from_blob = ST_hyperloglog_new(precision); + struct hyperloglog *hll_from_blob = hyperloglog_new(precision); int num_reg = NUM_REG(precision); int words = INT_CEIL(num_reg, REG_PER_WORD); @@ -120,7 +120,7 @@ void *hll_base64_decode(char *buf) void fieldstat_hll_free(void *hll) { - ST_hyperloglog_free((struct ST_hyperloglog *)hll); + hyperloglog_free((struct hyperloglog *)hll); } // cppcheck-suppress [constParameterPointer, unmatchedSuppression] diff --git a/src/metrics/st_hyperloglog.c b/src/metrics/st_hyperloglog.c index 0b12c74..be240f2 100644 --- a/src/metrics/st_hyperloglog.c +++ b/src/metrics/st_hyperloglog.c @@ -14,16 +14,16 @@ #include <stdlib.h> #include "base64/b64.h" -const size_t BLOB_HDR_SIZE= offsetof(struct ST_hyperloglog, registers); +const size_t BLOB_HDR_SIZE= offsetof(struct hyperloglog, registers); -struct ST_hyperloglog *ST_hyperloglog_new(unsigned char precision) +struct hyperloglog *hyperloglog_new(unsigned char precision) { // Ensure the precision is somewhat sane if (precision < HLL_MIN_PRECISION || precision > HLL_MAX_PRECISION) return NULL; - struct ST_hyperloglog *h=ALLOC(struct ST_hyperloglog, 1); + struct hyperloglog *h=ALLOC(struct hyperloglog, 1); h->cfg.precision = precision; @@ -37,7 +37,7 @@ struct ST_hyperloglog *ST_hyperloglog_new(unsigned char precision) h->registers = ALLOC(uint32_t, words); return h; } -void ST_hyperloglog_configure(struct ST_hyperloglog *h, unsigned char precision, int time_window_seconds, const struct timeval now) +void hyperloglog_configure(struct hyperloglog *h, unsigned char precision, int time_window_seconds, const struct timeval now) { if(h->cfg.precision != precision) { @@ -54,7 +54,7 @@ void ST_hyperloglog_configure(struct ST_hyperloglog *h, unsigned char precision, } return; } -void ST_hyperloglog_free(struct ST_hyperloglog *h) +void hyperloglog_free(struct hyperloglog *h) { free(h->registers); h->registers=NULL; @@ -62,14 +62,14 @@ void ST_hyperloglog_free(struct ST_hyperloglog *h) return; } -static int get_register(const struct ST_hyperloglog *h, int idx) { +static int get_register(const struct hyperloglog *h, int idx) { uint32_t word = *(h->registers + (idx / REG_PER_WORD)); word = word >> REG_WIDTH * (idx % REG_PER_WORD); return word & ((1 << REG_WIDTH) - 1); } -static void set_register(const struct ST_hyperloglog *h, int idx, int val) { +static void set_register(const struct hyperloglog *h, int idx, int val) { uint32_t *word = h->registers + (idx / REG_PER_WORD); // Shift the val into place @@ -82,7 +82,7 @@ static void set_register(const struct ST_hyperloglog *h, int idx, int val) { return; } -static void reset_register(const struct ST_hyperloglog *h, int idx) +static void reset_register(const struct hyperloglog *h, int idx) { uint32_t *word = h->registers + (idx / REG_PER_WORD); unsigned shift = REG_WIDTH * (idx % REG_PER_WORD); @@ -90,7 +90,7 @@ static void reset_register(const struct ST_hyperloglog *h, int idx) *word &= ~val_mask; } -void ST_hyperloglog_reset(struct ST_hyperloglog *h) +void hyperloglog_reset(struct hyperloglog *h) { int n_register=NUM_REG(h->cfg.precision); @@ -100,7 +100,7 @@ void ST_hyperloglog_reset(struct ST_hyperloglog *h) return; } -int hll_add_hash(struct ST_hyperloglog *h, uint64_t hash) +int hyperloglog_add_hash(struct hyperloglog *h, uint64_t hash) { // Determine the index using the first p bits int idx = hash >> (64 - h->cfg.precision); @@ -119,17 +119,17 @@ int hll_add_hash(struct ST_hyperloglog *h, uint64_t hash) return 0; } -int ST_hyperloglog_add(struct ST_hyperloglog *h, const char *key, size_t keylen) +int hyperloglog_add(struct hyperloglog *h, const char *key, size_t keylen) { uint64_t hash=0; hash=XXH3_64bits_withSeed(key, keylen, 171); // Add the hashed value - return hll_add_hash(h, hash); + return hyperloglog_add_hash(h, hash); } // https://djhworld.github.io/hyperloglog/merging/ -int ST_hyperloglog_merge(struct ST_hyperloglog *dest, const struct ST_hyperloglog *src) +int hyperloglog_merge(struct hyperloglog *dest, const struct hyperloglog *src) { if(dest->cfg.precision != src->cfg.precision) return -1; int n_register=NUM_REG(dest->cfg.precision); @@ -142,7 +142,7 @@ int ST_hyperloglog_merge(struct ST_hyperloglog *dest, const struct ST_hyperloglo } return 0; } -size_t ST_hyperloglog_serialized_size(const struct ST_hyperloglog *h) +size_t hyperloglog_serialized_size(const struct hyperloglog *h) { size_t sz=0; size_t num_reg = NUM_REG(h->cfg.precision); @@ -153,13 +153,13 @@ size_t ST_hyperloglog_serialized_size(const struct ST_hyperloglog *h) return sz; } -void ST_hyperloglog_serialize(const struct ST_hyperloglog *h, char **blob, size_t *blob_sz) +void hyperloglog_serialize(const struct hyperloglog *h, char **blob, size_t *blob_sz) { size_t sz=0, offset=0; size_t num_reg = NUM_REG(h->cfg.precision); size_t words = INT_CEIL(num_reg, REG_PER_WORD); - sz = ST_hyperloglog_serialized_size(h); + sz = hyperloglog_serialized_size(h); char *buffer = ALLOC(char, sz); memcpy(buffer+offset, h, BLOB_HDR_SIZE); @@ -172,9 +172,9 @@ void ST_hyperloglog_serialize(const struct ST_hyperloglog *h, char **blob, size_ return; } -struct ST_hyperloglog *ST_hyperloglog_deserialize(const char *blob, size_t blob_sz) +struct hyperloglog *hyperloglog_deserialize(const char *blob, size_t blob_sz) { - struct ST_hyperloglog *h=ALLOC(struct ST_hyperloglog, 1); + struct hyperloglog *h=ALLOC(struct hyperloglog, 1); size_t offset=0; memcpy(h, blob, BLOB_HDR_SIZE); offset += BLOB_HDR_SIZE; @@ -187,13 +187,13 @@ struct ST_hyperloglog *ST_hyperloglog_deserialize(const char *blob, size_t blob_ return h; } -void ST_hyperloglog_serialize_for_networking(const struct ST_hyperloglog *h, char **blob, size_t *blob_sz) +void hyperloglog_serialize_for_networking(const struct hyperloglog *h, char **blob, size_t *blob_sz) { size_t sz=0, offset=0; size_t num_reg = NUM_REG(h->cfg.precision); size_t words = INT_CEIL(num_reg, REG_PER_WORD); - sz = ST_hyperloglog_serialized_size(h) + 1; // [precision][version][data... + sz = hyperloglog_serialized_size(h) + 1; // [precision][version][data... unsigned char *buffer = ALLOC(unsigned char, sz); const unsigned char version = 1; @@ -219,11 +219,11 @@ void ST_hyperloglog_serialize_for_networking(const struct ST_hyperloglog *h, cha return; } -void ST_hyperloglog_merge_blob(struct ST_hyperloglog *dest, const char *blob, size_t blob_sz) +void hyperloglog_merge_blob(struct hyperloglog *dest, const char *blob, size_t blob_sz) { - struct ST_hyperloglog *src=ST_hyperloglog_deserialize(blob, blob_sz); - ST_hyperloglog_merge(dest, src); - ST_hyperloglog_free(src); + struct hyperloglog *src=hyperloglog_deserialize(blob, blob_sz); + hyperloglog_merge(dest, src); + hyperloglog_free(src); return; } double g_switchThreshold[15] = {10, 20, 40, 80, 220, 400, 900, 1800, 3100, 6500, @@ -314,7 +314,7 @@ static double alpha(unsigned char precision) { /* * Computes the raw cardinality estimate */ -static double raw_estimate(const struct ST_hyperloglog *h, int *num_zero) +static double raw_estimate(const struct hyperloglog *h, int *num_zero) { unsigned char precision = h->cfg.precision; int num_reg = NUM_REG(precision); @@ -334,7 +334,7 @@ static double raw_estimate(const struct ST_hyperloglog *h, int *num_zero) * Estimates cardinality using a linear counting. * Used when some registers still have a zero value. */ -static double linear_count(const struct ST_hyperloglog *h, int num_zero) +static double linear_count(const struct hyperloglog *h, int num_zero) { int registers = NUM_REG(h->cfg.precision); return registers * @@ -364,7 +364,7 @@ static int binary_search(double val, int num, const double *array) { * empircal data collected by Google, from the * paper mentioned above. */ -static double bias_estimate(const struct ST_hyperloglog *h, double raw_est) { +static double bias_estimate(const struct hyperloglog *h, double raw_est) { // Determine the samples available int samples; int precision = h->cfg.precision; @@ -394,7 +394,7 @@ static double bias_estimate(const struct ST_hyperloglog *h, double raw_est) { return (biases[idx] + biases[idx-1]) / 2; } -double ST_hyperloglog_count(const struct ST_hyperloglog *h){ +double hyperloglog_count(const struct hyperloglog *h){ int num_zero = 0; int num_reg = NUM_REG(h->cfg.precision); double raw_est = raw_estimate(h, &num_zero); @@ -428,7 +428,7 @@ double ST_hyperloglog_count(const struct ST_hyperloglog *h){ * @return The expected variance in the count, * or zero on error. */ -double ST_hyperloglog_error_for_precision(unsigned char precision) +double hyperloglog_error_for_precision(unsigned char precision) { // Check that the error bound is sane if (precision < HLL_MIN_PRECISION || precision > HLL_MAX_PRECISION) diff --git a/src/metrics/st_hyperloglog.h b/src/metrics/st_hyperloglog.h index 9d05674..254ecb3 100644 --- a/src/metrics/st_hyperloglog.h +++ b/src/metrics/st_hyperloglog.h @@ -34,26 +34,27 @@ struct ST_HLL_configuration { unsigned char precision; }; -struct ST_hyperloglog +struct hyperloglog { struct ST_HLL_configuration cfg; uint32_t *registers; }; -struct ST_hyperloglog *ST_hyperloglog_new(unsigned char precision); -void ST_hyperloglog_free(struct ST_hyperloglog *h); +struct hyperloglog *hyperloglog_new(unsigned char precision); +void hyperloglog_free(struct hyperloglog *h); //Return 1 if at least 1 ST HyperLogLog internal register was altered. 0 otherwise. -int ST_hyperloglog_add(struct ST_hyperloglog *h, const char *key, size_t keylen); -void ST_hyperloglog_reset(struct ST_hyperloglog *h); -double ST_hyperloglog_count(const struct ST_hyperloglog *h); -size_t ST_hyperloglog_serialized_size(const struct ST_hyperloglog *h); -void ST_hyperloglog_serialize(const struct ST_hyperloglog *h, char **blob, size_t *blob_sz); -void ST_hyperloglog_serialize_for_networking(const struct ST_hyperloglog *h, char **blob, size_t *blob_sz); -struct ST_hyperloglog *ST_hyperloglog_deserialize(const char *blob, size_t blob_sz); -int ST_hyperloglog_merge(struct ST_hyperloglog *dest, const struct ST_hyperloglog *src); -void ST_hyperloglog_merge_blob(struct ST_hyperloglog *dest, const char *blob, size_t blob_sz); -double ST_hyperloglog_error_for_precision(unsigned char precision); -void ST_hyperloglog_configure(struct ST_hyperloglog *h, unsigned char precision, int time_window_seconds, const struct timeval now); +int hyperloglog_add(struct hyperloglog *h, const char *key, size_t keylen); +int hyperloglog_add_hash(struct hyperloglog *h, uint64_t hash); +void hyperloglog_reset(struct hyperloglog *h); +double hyperloglog_count(const struct hyperloglog *h); +size_t hyperloglog_serialized_size(const struct hyperloglog *h); +void hyperloglog_serialize(const struct hyperloglog *h, char **blob, size_t *blob_sz); +void hyperloglog_serialize_for_networking(const struct hyperloglog *h, char **blob, size_t *blob_sz); +struct hyperloglog *hyperloglog_deserialize(const char *blob, size_t blob_sz); +int hyperloglog_merge(struct hyperloglog *dest, const struct hyperloglog *src); +void hyperloglog_merge_blob(struct hyperloglog *dest, const char *blob, size_t blob_sz); +double hyperloglog_error_for_precision(unsigned char precision); +void hyperloglog_configure(struct hyperloglog *h, unsigned char precision, int time_window_seconds, const struct timeval now); #ifdef __cplusplus } #endif
\ No newline at end of file diff --git a/src/tags/heavy_keeper.c b/src/tags/heavy_keeper.c index 09f40d0..e48eb8d 100644 --- a/src/tags/heavy_keeper.c +++ b/src/tags/heavy_keeper.c @@ -768,15 +768,15 @@ void heavy_keeper_merge(struct heavy_keeper *dest, const struct heavy_keeper *sr continue; } + unsigned long long cnt = find_count_in_sketch(dest, key_arr[i], key_lens[i]);// the cnt is the estimated count in the merged sketch, since the dest heavy keeper has been merged if (sorted_set_check_is_full(new_rec)) { unsigned long long mincnt_new = sorted_set_get_min_count(new_rec); - unsigned long long maxv = find_count_in_sketch(dest, key_arr[i], key_lens[i]);// the maxv is the max value in the merged sketch, since the dest heavy keeper has been merged - if (maxv > mincnt_new) { + if (cnt > mincnt_new) { sorted_set_pop(new_rec); - sorted_set_insert_to_available_heap(new_rec, key_arr[i], key_lens[i], maxv, dest->copy_fn(exdatas_src[i])); + sorted_set_insert_to_available_heap(new_rec, key_arr[i], key_lens[i], cnt, dest->copy_fn(exdatas_src[i])); } } else { - sorted_set_insert_to_available_heap(new_rec, key_arr[i], key_lens[i], maxv, dest->copy_fn(exdatas_src[i])); + sorted_set_insert_to_available_heap(new_rec, key_arr[i], key_lens[i], cnt, dest->copy_fn(exdatas_src[i])); } } diff --git a/test/test_exporter_json.cpp b/test/test_exporter_json.cpp index 873abfe..aac8ac7 100644 --- a/test/test_exporter_json.cpp +++ b/test/test_exporter_json.cpp @@ -18,7 +18,7 @@ const size_t OPER_NUM = 10000; std::string g_hll_standard_oper[OPER_NUM]; long long g_histogram_standard_oper[OPER_NUM]; -struct ST_hyperloglog *g_hll_standard; +struct hyperloglog *g_hll_standard; struct hdr_histogram *g_histogram_standard; #define TEST_TOPK_STANDARD_K 5 #define TEST_METRIC_NUM 2 @@ -84,7 +84,7 @@ void test_check_if_metric_gauge_correct(cJSON *metric_obj, const char *name) { char *blob_gauge_benchmark = NULL; size_t size_dummy = 0; - ST_hyperloglog_serialize_for_networking(g_hll_standard, &blob_gauge_benchmark, &size_dummy); + hyperloglog_serialize_for_networking(g_hll_standard, &blob_gauge_benchmark, &size_dummy); cJSON *gauge_obj = cJSON_GetObjectItem(metric_obj, name); EXPECT_NE(gauge_obj, nullptr); @@ -1057,11 +1057,11 @@ TEST(export_unit_test, json_writer_length_is_on_margin_4096_string) void init_hll_standard_oper() { - g_hll_standard = ST_hyperloglog_new(12); + g_hll_standard = hyperloglog_new(12); for (size_t i = 0; i < OPER_NUM; i++) { std::string added_tmp = std::to_string(i); g_hll_standard_oper[i] = added_tmp; - ST_hyperloglog_add(g_hll_standard, added_tmp.c_str(), added_tmp.size()); + hyperloglog_add(g_hll_standard, added_tmp.c_str(), added_tmp.size()); } } @@ -1084,7 +1084,7 @@ int main(int argc, char *argv[]) // testing::GTEST_FLAG(filter) = "export_test.cjson_export_on_one_cube_of_topk_sampling"; int ret = RUN_ALL_TESTS(); - ST_hyperloglog_free(g_hll_standard); + hyperloglog_free(g_hll_standard); hdr_close(g_histogram_standard); return ret; } diff --git a/test/test_metric_hll.cpp b/test/test_metric_hll.cpp index 96b4e6c..e4e1b09 100644 --- a/test/test_metric_hll.cpp +++ b/test/test_metric_hll.cpp @@ -130,7 +130,7 @@ TEST(metric_test_hll, serialize_with_b64_and_query) memcpy(&precision, dec + sizeof(unsigned char), sizeof(unsigned char)); EXPECT_EQ(precision, 10); // the one initialized in test_init_standard_instance_one_cube_one_metric_one_cell_hll - struct ST_hyperloglog *hll_from_blob = ST_hyperloglog_new(precision); + struct hyperloglog *hll_from_blob = hyperloglog_new(precision); int num_reg = NUM_REG(precision); int words = INT_CEIL(num_reg, REG_PER_WORD); size_t reg_size = words * sizeof(uint32_t); @@ -143,11 +143,11 @@ TEST(metric_test_hll, serialize_with_b64_and_query) memcpy(hll_from_blob->registers, registers, reg_size); free(registers); - EXPECT_NEAR(ST_hyperloglog_count(hll_from_blob), 3, 0.5); + EXPECT_NEAR(hyperloglog_count(hll_from_blob), 3, 0.5); free(blob); free(dec); fieldstat_free(instance); - ST_hyperloglog_free(hll_from_blob); + hyperloglog_free(hll_from_blob); } @@ -170,10 +170,10 @@ TEST(metric_test_hll, serialize_with_b64_and_query_with_python_api) EXPECT_EQ(flag, true); void *hll_from_blob = hll_base64_decode(blob); - EXPECT_NEAR(ST_hyperloglog_count((struct ST_hyperloglog *)hll_from_blob), 3, 0.5); + EXPECT_NEAR(hyperloglog_count((struct hyperloglog *)hll_from_blob), 3, 0.5); free(blob); fieldstat_free(instance); - ST_hyperloglog_free((struct ST_hyperloglog *)hll_from_blob); + hyperloglog_free((struct hyperloglog *)hll_from_blob); } TEST(metric_test_hll, add_with_wrong_cube_id_expecting_fail) |
