summaryrefslogtreecommitdiff
path: root/src/metrics/hyperloglog.h
blob: b1b2330b278d7d50050157cf863cd471c13402c2 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
/*
* Staggered HyperLogLog CRDT
* ST HLL Reference: Cornacchia, Alessandro, et al. "Staggered HLL: Near-continuous-time cardinality estimation with no overhead." 
* Computer Communications 193 (2022): 168-175.
* https://www.sciencedirect.com/science/article/abs/pii/S0140366422002407
* HLL Reference: HyperLogLog in Practice: Algorithmic Engineering of a State of The Art Cardinality Estimation Algorithm
* https://storage.googleapis.com/pub-tools-public-publication-data/pdf/40671.pdf
* The HyperLogLog implementation is adapted from armon/hlld (https://github.com/armon/hlld/blob/master/src/hll.c)
* Author: [email protected]
*/
#pragma once
#include <stddef.h>
#include <sys/time.h>
#include <uuid/uuid.h>
#include <stdint.h>
#ifdef __cplusplus
extern "C"
{
#endif
// Ensure precision in a sane bound
#define HLL_MIN_PRECISION 4      // 16 registers
#define HLL_MAX_PRECISION 18     // 262,144 registers

#define REG_WIDTH 6     // Bits per register
#define INT_WIDTH 32    // Bits in an int
#define REG_PER_WORD 5  // floor(INT_WIDTH / REG_WIDTH)

#define NUM_REG(precision) ((1 << precision))
#define INT_CEIL(num, denom) (((num) + (denom) - 1) / (denom))

#define REGISTER_SIZE(precision) INT_WIDTH*INT_CEIL(NUM_REG(precision), REG_PER_WORD)

struct HLL_configuration
{
    unsigned char precision;
};
struct hyperloglog
{
    struct HLL_configuration cfg;
    uint32_t *registers;
};

struct hyperloglog *hyperloglog_new(unsigned char precision);
void hyperloglog_free(struct hyperloglog *h);
//Return 1 if at least 1 ST HyperLogLog internal register was altered. 0 otherwise.
int hyperloglog_add(struct hyperloglog *h, const char *key, size_t keylen);
int hyperloglog_add_hash(struct hyperloglog *h, uint64_t hash);
void hyperloglog_reset(struct hyperloglog *h);
double hyperloglog_count(const struct hyperloglog *h);
size_t hyperloglog_serialized_size(const struct hyperloglog *h);
void hyperloglog_serialize(const struct hyperloglog *h, char **blob, size_t *blob_sz);
void hyperloglog_serialize_into_base64(const struct hyperloglog *h, char **blob, size_t *blob_sz);
struct hyperloglog *hyperloglog_deserialize(const char *blob, size_t blob_sz);
int hyperloglog_merge(struct hyperloglog *dest, const struct hyperloglog *src);
void hyperloglog_merge_blob(struct hyperloglog *dest, const char *blob, size_t blob_sz);
double hyperloglog_error_for_precision(unsigned char precision);
#ifdef __cplusplus
}
#endif