1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
|
/*
* Staggered HyperLogLog CRDT
* ST HLL Reference: Cornacchia, Alessandro, et al. "Staggered HLL: Near-continuous-time cardinality estimation with no overhead."
* Computer Communications 193 (2022): 168-175.
* https://www.sciencedirect.com/science/article/abs/pii/S0140366422002407
* HLL Reference: HyperLogLog in Practice: Algorithmic Engineering of a State of The Art Cardinality Estimation Algorithm
* https://storage.googleapis.com/pub-tools-public-publication-data/pdf/40671.pdf
* The HyperLogLog implementation is adapted from armon/hlld (https://github.com/armon/hlld/blob/master/src/hll.c)
* Author: [email protected]
*/
#pragma once
#include <stddef.h>
#include <sys/time.h>
#include <uuid/uuid.h>
#include <stdint.h>
#ifdef __cplusplus
extern "C"
{
#endif
// Ensure precision in a sane bound
#define HLL_MIN_PRECISION 4 // 16 registers
#define HLL_MAX_PRECISION 18 // 262,144 registers
#define REG_WIDTH 6 // Bits per register
#define INT_WIDTH 32 // Bits in an int
#define REG_PER_WORD 5 // floor(INT_WIDTH / REG_WIDTH)
#define NUM_REG(precision) ((1 << precision))
#define INT_CEIL(num, denom) (((num) + (denom) - 1) / (denom))
#define REGISTER_SIZE(precision) INT_WIDTH*INT_CEIL(NUM_REG(precision), REG_PER_WORD)
struct ST_HLL_configuration
{
unsigned char precision;
};
struct ST_hyperloglog
{
struct ST_HLL_configuration cfg;
uint32_t *registers;
};
struct ST_hyperloglog *ST_hyperloglog_new(unsigned char precision);
void ST_hyperloglog_free(struct ST_hyperloglog *h);
//Return 1 if at least 1 ST HyperLogLog internal register was altered. 0 otherwise.
int ST_hyperloglog_add(struct ST_hyperloglog *h, const char *key, size_t keylen);
void ST_hyperloglog_reset(struct ST_hyperloglog *h);
double ST_hyperloglog_count(const struct ST_hyperloglog *h);
size_t ST_hyperloglog_serialized_size(const struct ST_hyperloglog *h);
void ST_hyperloglog_serialize(const struct ST_hyperloglog *h, char **blob, size_t *blob_sz);
void ST_hyperloglog_serialize_for_networking(const struct ST_hyperloglog *h, char **blob, size_t *blob_sz);
struct ST_hyperloglog *ST_hyperloglog_deserialize(const char *blob, size_t blob_sz);
int ST_hyperloglog_merge(struct ST_hyperloglog *dst, const struct ST_hyperloglog *src);
void ST_hyperloglog_merge_blob(struct ST_hyperloglog *dst, const char *blob, size_t blob_sz);
double ST_hyperloglog_error_for_precision(unsigned char precision);
void ST_hyperloglog_configure(struct ST_hyperloglog *h, unsigned char precision, int time_window_seconds, const struct timeval now);
#ifdef __cplusplus
}
#endif
|