diff options
Diffstat (limited to 'src/metrics/hyperloglog.h')
| -rw-r--r-- | src/metrics/hyperloglog.h | 59 |
1 files changed, 59 insertions, 0 deletions
diff --git a/src/metrics/hyperloglog.h b/src/metrics/hyperloglog.h new file mode 100644 index 0000000..b0e6db5 --- /dev/null +++ b/src/metrics/hyperloglog.h @@ -0,0 +1,59 @@ +/* +* Staggered HyperLogLog CRDT +* ST HLL Reference: Cornacchia, Alessandro, et al. "Staggered HLL: Near-continuous-time cardinality estimation with no overhead." +* Computer Communications 193 (2022): 168-175. +* https://www.sciencedirect.com/science/article/abs/pii/S0140366422002407 +* HLL Reference: HyperLogLog in Practice: Algorithmic Engineering of a State of The Art Cardinality Estimation Algorithm +* https://storage.googleapis.com/pub-tools-public-publication-data/pdf/40671.pdf +* The HyperLogLog implementation is adapted from armon/hlld (https://github.com/armon/hlld/blob/master/src/hll.c) +* Author: [email protected] +*/ +#pragma once +#include <stddef.h> +#include <sys/time.h> +#include <uuid/uuid.h> +#include <stdint.h> +#ifdef __cplusplus +extern "C" +{ +#endif +// Ensure precision in a sane bound +#define HLL_MIN_PRECISION 4 // 16 registers +#define HLL_MAX_PRECISION 18 // 262,144 registers + +#define REG_WIDTH 6 // Bits per register +#define INT_WIDTH 32 // Bits in an int +#define REG_PER_WORD 5 // floor(INT_WIDTH / REG_WIDTH) + +#define NUM_REG(precision) ((1 << precision)) +#define INT_CEIL(num, denom) (((num) + (denom) - 1) / (denom)) + +#define REGISTER_SIZE(precision) INT_WIDTH*INT_CEIL(NUM_REG(precision), REG_PER_WORD) + +struct HLL_configuration +{ + unsigned char precision; +}; +struct hyperloglog +{ + struct HLL_configuration cfg; + uint32_t *registers; +}; + +struct hyperloglog *hyperloglog_new(unsigned char precision); +void hyperloglog_free(struct hyperloglog *h); +//Return 1 if at least 1 ST HyperLogLog internal register was altered. 0 otherwise. +int hyperloglog_add(struct hyperloglog *h, const char *key, size_t keylen); +int hyperloglog_add_hash(struct hyperloglog *h, uint64_t hash); +void hyperloglog_reset(struct hyperloglog *h); +double hyperloglog_count(const struct hyperloglog *h); +size_t hyperloglog_serialized_size(const struct hyperloglog *h); +void hyperloglog_serialize(const struct hyperloglog *h, char **blob, size_t *blob_sz); +void hyperloglog_serialize_for_networking(const struct hyperloglog *h, char **blob, size_t *blob_sz); +struct hyperloglog *hyperloglog_deserialize(const char *blob, size_t blob_sz); +int hyperloglog_merge(struct hyperloglog *dest, const struct hyperloglog *src); +void hyperloglog_merge_blob(struct hyperloglog *dest, const char *blob, size_t blob_sz); +double hyperloglog_error_for_precision(unsigned char precision); +#ifdef __cplusplus +} +#endif
\ No newline at end of file |
