summaryrefslogtreecommitdiff
path: root/src/metrics/hyperloglog.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/metrics/hyperloglog.h')
-rw-r--r--src/metrics/hyperloglog.h59
1 files changed, 59 insertions, 0 deletions
diff --git a/src/metrics/hyperloglog.h b/src/metrics/hyperloglog.h
new file mode 100644
index 0000000..b0e6db5
--- /dev/null
+++ b/src/metrics/hyperloglog.h
@@ -0,0 +1,59 @@
+/*
+* Staggered HyperLogLog CRDT
+* ST HLL Reference: Cornacchia, Alessandro, et al. "Staggered HLL: Near-continuous-time cardinality estimation with no overhead."
+* Computer Communications 193 (2022): 168-175.
+* https://www.sciencedirect.com/science/article/abs/pii/S0140366422002407
+* HLL Reference: HyperLogLog in Practice: Algorithmic Engineering of a State of The Art Cardinality Estimation Algorithm
+* https://storage.googleapis.com/pub-tools-public-publication-data/pdf/40671.pdf
+* The HyperLogLog implementation is adapted from armon/hlld (https://github.com/armon/hlld/blob/master/src/hll.c)
+*/
+#pragma once
+#include <stddef.h>
+#include <sys/time.h>
+#include <uuid/uuid.h>
+#include <stdint.h>
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+// Ensure precision in a sane bound
+#define HLL_MIN_PRECISION 4 // 16 registers
+#define HLL_MAX_PRECISION 18 // 262,144 registers
+
+#define REG_WIDTH 6 // Bits per register
+#define INT_WIDTH 32 // Bits in an int
+#define REG_PER_WORD 5 // floor(INT_WIDTH / REG_WIDTH)
+
+#define NUM_REG(precision) ((1 << precision))
+#define INT_CEIL(num, denom) (((num) + (denom) - 1) / (denom))
+
+#define REGISTER_SIZE(precision) INT_WIDTH*INT_CEIL(NUM_REG(precision), REG_PER_WORD)
+
+struct HLL_configuration
+{
+ unsigned char precision;
+};
+struct hyperloglog
+{
+ struct HLL_configuration cfg;
+ uint32_t *registers;
+};
+
+struct hyperloglog *hyperloglog_new(unsigned char precision);
+void hyperloglog_free(struct hyperloglog *h);
+//Return 1 if at least 1 ST HyperLogLog internal register was altered. 0 otherwise.
+int hyperloglog_add(struct hyperloglog *h, const char *key, size_t keylen);
+int hyperloglog_add_hash(struct hyperloglog *h, uint64_t hash);
+void hyperloglog_reset(struct hyperloglog *h);
+double hyperloglog_count(const struct hyperloglog *h);
+size_t hyperloglog_serialized_size(const struct hyperloglog *h);
+void hyperloglog_serialize(const struct hyperloglog *h, char **blob, size_t *blob_sz);
+void hyperloglog_serialize_for_networking(const struct hyperloglog *h, char **blob, size_t *blob_sz);
+struct hyperloglog *hyperloglog_deserialize(const char *blob, size_t blob_sz);
+int hyperloglog_merge(struct hyperloglog *dest, const struct hyperloglog *src);
+void hyperloglog_merge_blob(struct hyperloglog *dest, const char *blob, size_t blob_sz);
+double hyperloglog_error_for_precision(unsigned char precision);
+#ifdef __cplusplus
+}
+#endif \ No newline at end of file