Feature Fair and Bulk Token Bucket

author: 郑超 <[email protected]> 2023-05-05 12:16:36 +0000
committer: 郑超 <[email protected]> 2023-05-05 12:16:36 +0000
commit: d698baf916c37bd831aa08440f9898fee328725c (patch)
tree: ca1c28c451f42209b11ff41bfdd17e15b5aec46d
parent: 7614cf86377091b8f43649e7432c038d2daa96f9 (diff)
46 files changed, 7666 insertions, 1859 deletions
diff --git a/CRDT/CMakeLists.txt b/CRDT/CMakeLists.txt
index b791af2..991b5b5 100644
--- a/CRDT/CMakeLists.txt
+++ b/CRDT/CMakeLists.txt
@@ -1,10 +1,19 @@
 add_definitions(-D_GNU_SOURCE) 
 add_definitions(-fPIC)
 
-add_library(CRDT lww_register.c pn_counter.c or_map.c or_set.c oc_token_bucket.c)
+add_library(CRDT lww_register.c pn_counter.c or_map.c or_set.c cm_sketch.c st_hyperloglog.c
+			g_array.c token_bucket_common.c oc_token_bucket.c fair_token_bucket.c bulk_token_bucket.c)
 
 include_directories(${PROJECT_SOURCE_DIR}/deps/mpack
-					${PROJECT_SOURCE_DIR}/deps/uthash)
+					${PROJECT_SOURCE_DIR}/deps/uthash
+					${PROJECT_SOURCE_DIR}/deps/xxhash)
 
-add_executable(CRDT_gtest crdt_gtest.cpp ${PROJECT_SOURCE_DIR}/deps/mpack/mpack.c)
-target_link_libraries(CRDT_gtest CRDT gtest-static uuid)
-\ No newline at end of file
+add_executable(CRDT_base_gtest crdt_base_gtest.cpp 
+				${PROJECT_SOURCE_DIR}/deps/mpack/mpack.c
+				${PROJECT_SOURCE_DIR}/deps/xxhash/xxhash.c)
+target_link_libraries(CRDT_base_gtest CRDT gtest-static uuid)
+
+add_executable(CRDT_tb_gtest crdt_tb_gtest.cpp 
+				${PROJECT_SOURCE_DIR}/deps/mpack/mpack.c
+				${PROJECT_SOURCE_DIR}/deps/xxhash/xxhash.c)
+target_link_libraries(CRDT_tb_gtest CRDT gtest-static uuid)
+\ No newline at end of file
diff --git a/CRDT/bulk_token_bucket.c b/CRDT/bulk_token_bucket.c
new file mode 100644
index 0000000..9b1a1e3
--- /dev/null
+++ b/CRDT/bulk_token_bucket.c
@@ -0,0 +1,277 @@
+#include "bulk_token_bucket.h"
+#include "st_hyperloglog.h"
+#include "g_array.h"
+#include "crdt_utils.h"
+
+#include "xxhash.h"
+#include "uthash.h"
+#include "mpack.h"
+
+#include <math.h>
+#include <assert.h>
+
+#define PERTURB_INTERVAL_MAX_MS  8000
+
+
+struct btb_configuration
+{
+	long long CIR;	//Committed Information Rate
+	long long CBS;	//Committed Burst Size
+	long long refill_interval_ms;
+    long long bucket_num;
+	struct timeval timestamp;
+};
+struct refill_mark
+{
+    long long refilled; 
+    long long refill_ms; //mili-second since the creation of structure bulk_token_bucket;
+};
+
+struct bulk_token_bucket
+{
+	uuid_t my_id;
+    struct btb_configuration cfg;
+    struct timeval start;
+    long long perturb;
+	struct timeval perturb_timestamp;
+
+    struct ST_hyperloglog *hll; //counting active bucket id with a sliding window style
+	struct g_array *consumed;
+    struct refill_mark *refilled;
+};
+static double collision_probability(long long bucket_num, long long n_key)
+{
+    //https://en.wikipedia.org/wiki/Birthday_problem#Number_of_people_with_a_shared_birthday
+  	double p_non_collsion=1-pow((double)(bucket_num-1)/bucket_num, n_key-1);
+    return p_non_collsion;
+}
+
+struct bulk_token_bucket *bulk_token_bucket_new(uuid_t my_id, struct timeval now, long long CIR, long long CBS, long long bucket_num)
+{
+    struct bulk_token_bucket *btb=ALLOC(struct bulk_token_bucket, 1);
+    btb->cfg.CIR=CIR;
+    btb->cfg.CBS=CBS;
+    btb->cfg.bucket_num=bucket_num;
+    btb->cfg.refill_interval_ms=10;
+	memcpy(&btb->cfg.timestamp, &now, sizeof(btb->cfg.timestamp));
+    btb->hll=ST_hyperloglog_new(9, 5, now);
+    memcpy(&btb->start, &now, sizeof(btb->start));
+    btb->consumed=g_array_new(my_id, bucket_num);
+    btb->refilled=ALLOC(struct refill_mark, bucket_num);
+	btb->perturb=1;
+	memcpy(&btb->perturb_timestamp, &now, sizeof(btb->perturb_timestamp));
+	memcpy(&btb->start, &now, sizeof(btb->start));
+    uuid_copy(btb->my_id, my_id);
+    return btb;
+}
+void bulk_token_bucket_free(struct bulk_token_bucket *btb)
+{
+	g_array_free(btb->consumed);
+	ST_hyperloglog_free(btb->hll);
+    free(btb->refilled);
+    free(btb);
+}
+
+long long bulk_token_bucket_consume(struct bulk_token_bucket *btb, struct timeval now, const char *key, size_t keylen, enum tb_consume_type cmd, long long tokens)
+{
+	long long delta_time_ms=0, bucket_idx=0, consumed=0, refilled=0;
+	long long now_ms=timeval_delta_ms(btb->start, now);
+	delta_time_ms=timeval_delta_ms(btb->perturb_timestamp, now);
+	ST_hyperloglog_add(btb->hll, key, keylen, now);
+	//Perturb every CBS/CIR seconds, so that buckets not used after last perturb have been refilled to CBS. 
+	if(!btb->cfg.bucket_num) return 0;
+	if(btb->cfg.CIR && delta_time_ms > MIN(1000*btb->cfg.CBS/btb->cfg.CIR, PERTURB_INTERVAL_MAX_MS))
+	{
+		btb->perturb++;
+		memcpy(&btb->perturb_timestamp, &now, sizeof(btb->perturb_timestamp));
+	}
+	
+	bucket_idx=XXH3_64bits_withSeed(key, keylen, btb->perturb)%btb->cfg.bucket_num;
+	consumed=g_array_get(btb->consumed, bucket_idx);
+	refilled=btb->refilled[bucket_idx].refilled;
+	assert(refilled>=0);
+	assert(consumed>=0);
+
+	long long perturb_ms=timeval_delta_ms(btb->start, btb->perturb_timestamp);
+
+	//If current bucket hasn't been refilled since latest perturb.
+	//find the previous bucket that the key used.
+	//set the current bucket to the same available tokens as previous bucket.
+	if(btb->refilled[bucket_idx].refill_ms < perturb_ms)
+	{
+		long long pre_bucket_idx=0;
+		pre_bucket_idx=XXH3_64bits_withSeed(key, keylen, btb->perturb-1)%btb->cfg.bucket_num;
+		if(btb->refilled[pre_bucket_idx].refill_ms < perturb_ms &&
+				pre_bucket_idx!=bucket_idx)
+		{
+			long long pre_consumed=0, pre_refilled=0;
+			pre_consumed=g_array_get(btb->consumed, pre_bucket_idx);
+			pre_refilled=btb->refilled[pre_bucket_idx].refilled;
+			long long pre_available=0, pre_new_refilled=0;
+			pre_available=tb_available(btb->cfg.CIR, btb->cfg.CBS, 
+										pre_consumed, pre_refilled, 
+										now_ms - btb->refilled[pre_bucket_idx].refill_ms, btb->cfg.refill_interval_ms, 
+										&pre_new_refilled);
+
+			btb->refilled[bucket_idx].refilled  += pre_available - (btb->refilled[bucket_idx].refilled - consumed);
+			btb->refilled[bucket_idx].refill_ms = now_ms;
+			refilled = btb->refilled[bucket_idx].refilled;
+		}
+	}
+
+	long long new_refilled=0, global_available=0;
+	int infinite_flag=0;
+    if(btb->cfg.CBS==0 && btb->cfg.CIR==0)
+    {
+		infinite_flag=1;
+		new_refilled += tokens;
+    }
+	else
+	{
+		long long delta_ms= now_ms - btb->refilled[bucket_idx].refill_ms;
+		global_available=tb_available(btb->cfg.CIR, btb->cfg.CBS, consumed, refilled, delta_ms, btb->cfg.refill_interval_ms, &new_refilled);
+	}
+
+	size_t n_replica=MAX(1, g_array_replicas(btb->consumed));
+	long long assigned=0;
+	if(infinite_flag)
+	{
+		assigned=tokens;
+	}
+	else
+	{
+		assigned=tb_consume(btb->cfg.CIR, global_available, n_replica, cmd, tokens);
+	}
+
+	if(new_refilled!=refilled)
+	{
+		btb->refilled[bucket_idx].refilled=new_refilled;
+        btb->refilled[bucket_idx].refill_ms=timeval_delta_ms(btb->start, now);
+	}
+	assert(assigned>=0);
+	if(assigned) g_array_incrby(btb->consumed, bucket_idx, assigned);
+	return assigned;
+}
+void bulk_token_bucket_configure(struct bulk_token_bucket *btb, struct timeval now, long long CIR, long long CBS, long long bucket_num)
+{
+    btb->cfg.CIR=CIR;
+    btb->cfg.CBS=CBS;
+    btb->cfg.bucket_num=bucket_num;
+    memcpy(&btb->cfg.timestamp, &now, sizeof(btb->cfg.timestamp));
+ 	g_array_resize(btb->consumed, bucket_num);
+	btb->refilled=realloc(btb->refilled, btb->cfg.bucket_num*sizeof(struct refill_mark));
+    return;
+}
+void bulk_token_bucket_info(const struct bulk_token_bucket *btb, struct timeval now, struct bulk_token_bucket_info *info)
+{
+	info->CIR=btb->cfg.CIR;
+	info->CBS=btb->cfg.CBS;
+	info->bucket_number=btb->cfg.bucket_num;
+	info->replicas=g_array_replicas(btb->consumed);
+	info->estimate_keys=ST_hyperloglog_count(btb->hll);
+	info->collision_rate=collision_probability(info->bucket_number, info->estimate_keys);
+	return;
+}
+long long bulk_token_bucket_read_available(const struct bulk_token_bucket *btb, struct timeval now, const char *key, size_t keylen)
+{
+	long long consumed=0, refilled=0, available=0, new_refilled=0;
+	int bucket_idx=0;
+	bucket_idx=XXH3_64bits_withSeed(key, keylen, btb->perturb)%btb->cfg.bucket_num;
+	consumed=g_array_get(btb->consumed, bucket_idx);
+	refilled=btb->refilled[bucket_idx].refilled;
+	long long now_ms=timeval_delta_ms(btb->start, now);
+	long long delta_ms= now_ms - btb->refilled[bucket_idx].refill_ms;
+	available=tb_available(btb->cfg.CIR, btb->cfg.CBS, consumed, refilled, delta_ms, btb->cfg.refill_interval_ms, &new_refilled);
+	if(btb->cfg.CBS==0 && btb->cfg.CIR==0) available=INT64_MAX;
+	return available;
+}
+const size_t  BULK_TOKEN_BUCKET_HEADER_SIZE=offsetof(struct bulk_token_bucket, hll);
+void bulk_token_bucket_serialize(const struct bulk_token_bucket *btb, char **blob, size_t *blob_sz)
+{
+	size_t sz=0, offset=0;
+	sz += BULK_TOKEN_BUCKET_HEADER_SIZE;
+	sz += ST_hyperloglog_serialized_size(btb->hll);
+	sz += g_array_serialized_size(btb->consumed);
+	sz += btb->cfg.bucket_num*sizeof(struct refill_mark);
+	
+	char *buffer = ALLOC(char, sz);
+	memcpy(buffer+offset, btb, BULK_TOKEN_BUCKET_HEADER_SIZE);
+	offset+=BULK_TOKEN_BUCKET_HEADER_SIZE;
+	char *tmp_blob=NULL;
+	size_t tmp_sz=0;
+
+	ST_hyperloglog_serialize(btb->hll, &tmp_blob, &tmp_sz);
+	memcpy(buffer+offset, tmp_blob, tmp_sz);
+	offset+=tmp_sz;
+	free(tmp_blob);
+	tmp_blob=NULL;
+
+	g_array_serialize(btb->consumed, &tmp_blob, &tmp_sz);
+	memcpy(buffer+offset, tmp_blob, tmp_sz);
+	offset+=tmp_sz;
+	free(tmp_blob);
+	tmp_blob=NULL;
+
+	memcpy(buffer+offset, btb->refilled, btb->cfg.bucket_num*sizeof(struct refill_mark));
+	offset+=btb->cfg.bucket_num*sizeof(struct refill_mark);
+	
+	*blob=buffer;
+	*blob_sz=sz;
+
+	assert(offset==sz);
+	return;
+}
+struct bulk_token_bucket *bulk_token_bucket_deserialize(const char *blob, size_t blob_sz)
+{
+	struct bulk_token_bucket *btb=ALLOC(struct bulk_token_bucket, 1);
+	size_t offset=0;
+	memcpy(btb, blob+offset, BULK_TOKEN_BUCKET_HEADER_SIZE);
+	offset+=BULK_TOKEN_BUCKET_HEADER_SIZE;
+	btb->hll=ST_hyperloglog_deserialize(blob+offset, blob_sz-offset);
+	offset+=ST_hyperloglog_serialized_size(btb->hll);
+	btb->consumed=g_array_deserialize(blob+offset, blob_sz-offset);
+	offset+=g_array_serialized_size(btb->consumed);
+	btb->refilled=ALLOC(struct refill_mark, btb->cfg.bucket_num);
+	memcpy(btb->refilled, blob+offset,  btb->cfg.bucket_num*sizeof(struct refill_mark));
+	offset+=btb->cfg.bucket_num*sizeof(struct refill_mark);
+
+	assert(offset==blob_sz);
+	return btb;
+}
+void bulk_token_bucket_merge(struct bulk_token_bucket *dst, const struct bulk_token_bucket *src)
+{
+    if(timercmp(&(dst->cfg.timestamp), &(src->cfg.timestamp), <))//Last-Write-Wins
+    {
+		bulk_token_bucket_configure(dst, src->cfg.timestamp, src->cfg.CIR, src->cfg.CBS, src->cfg.bucket_num);
+    }
+    if(timercmp(&(dst->perturb_timestamp), &(src->perturb_timestamp), <))//Last-Write-Wins
+	{
+		dst->perturb=src->perturb;
+		memcpy(&dst->perturb_timestamp, &src->perturb_timestamp, sizeof(dst->perturb_timestamp));
+	}
+	//Stop to proceed when src is older than dst, and has a different bucket_num.
+	if(dst->cfg.bucket_num != src->cfg.bucket_num) return;
+	ST_hyperloglog_merge(dst->hll, src->hll);
+	g_array_merge(dst->consumed, src->consumed);
+	for(long long i=0; i<dst->cfg.bucket_num; i++)
+	{
+		if(dst->refilled[i].refilled < src->refilled[i].refilled)//Bigger refilled wins
+		{
+			dst->refilled[i].refilled=src->refilled[i].refilled;
+			dst->refilled[i].refill_ms=src->refilled[i].refill_ms;
+		}
+	}
+	return;
+}
+void bulk_token_bucket_merge_blob(struct bulk_token_bucket *btb, const char * blob, size_t blob_sz)
+{
+	struct bulk_token_bucket *src=bulk_token_bucket_deserialize(blob, blob_sz);
+	bulk_token_bucket_merge(btb, src);
+	bulk_token_bucket_free(src);
+	return;
+}
+size_t bulk_token_bucket_mem_size(const struct bulk_token_bucket *btb)
+{
+	//to do
+    return 0;
+}
+\ No newline at end of file
diff --git a/CRDT/bulk_token_bucket.h b/CRDT/bulk_token_bucket.h
new file mode 100644
index 0000000..cce7324
--- /dev/null
+++ b/CRDT/bulk_token_bucket.h
@@ -0,0 +1,38 @@
+/*
+* Bulk Token Bucket CRDT. A bulk version of OC token bucket.
+* Author: [email protected]
+* 2023-4-14
+*/
+#pragma once
+#include "token_bucket_common.h"
+#include <stddef.h>
+#include <sys/time.h>
+#include <uuid/uuid.h>
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+struct bulk_token_bucket *bulk_token_bucket_new(uuid_t my_id, struct timeval now, long long CIR, long long CBS, long long bucket_num);
+void bulk_token_bucket_free(struct bulk_token_bucket *btb);
+void bulk_token_bucket_configure(struct bulk_token_bucket *btb, struct timeval now, long long CIR, long long CBS, long long bucket_num);
+
+long long bulk_token_bucket_consume(struct bulk_token_bucket *btb, struct timeval now, const char *key, size_t key_len, enum tb_consume_type cmd, long long tokens);
+long long bulk_token_bucket_read_available(const struct bulk_token_bucket *btb, struct timeval now, const char *key, size_t keylen);
+struct bulk_token_bucket_info
+{
+	long long CIR;
+	long long CBS;
+	long long bucket_number;
+	long long estimate_keys;
+	long long replicas;
+	double collision_rate;
+};
+void bulk_token_bucket_info(const struct bulk_token_bucket *btb, struct timeval now, struct bulk_token_bucket_info *info);
+void bulk_token_bucket_serialize(const struct bulk_token_bucket *btb, char **blob, size_t *blob_sz);
+struct bulk_token_bucket *bulk_token_bucket_deserialize(const char *blob, size_t blob_sz);
+void bulk_token_bucket_merge(struct bulk_token_bucket *dst, const struct bulk_token_bucket *src);
+void bulk_token_bucket_merge_blob(struct bulk_token_bucket *btb, const char * blob, size_t blob_sz);
+size_t bulk_token_bucket_mem_size(const struct bulk_token_bucket *btb);
+#ifdef __cplusplus
+}
+#endif
diff --git a/CRDT/cm_sketch.c b/CRDT/cm_sketch.c
new file mode 100644
index 0000000..9cdaca3
--- /dev/null
+++ b/CRDT/cm_sketch.c
@@ -0,0 +1,418 @@
+#include "cm_sketch.h"
+#include "uthash.h"
+#include "xxhash.h"
+#include "mpack.h"
+#include "crdt_utils.h"
+
+#include <stdint.h>
+#include <string.h>
+#include <assert.h>
+#include <math.h> //pow
+
+#define CMS_MAX_DEPTH   16
+
+struct cms_opt
+{
+    int width;
+    int depth;
+};
+static const struct cms_opt cms_default_opt = {
+    .width=1024*16,
+    .depth=6
+};
+static uint64_t __fnv_1a(const char *key, size_t len, int seed)
+{
+    // FNV-1a hash (http://www.isthe.com/chongo/tech/comp/fnv/)
+    size_t i;
+    uint64_t h = 14695981039346656037ULL + (31 * seed); // FNV_OFFSET 64 bit with magic number seed
+    for (i = 0; i < len; ++i){
+            h = h ^ (unsigned char) key[i];
+            h = h * 1099511628211ULL; // FNV_PRIME 64 bit
+    }
+    return h;
+}
+static int32_t __safe_add(int32_t a, int32_t b)
+{
+    if (a == INT32_MAX || a == INT32_MIN) {
+        return a;
+    }
+    int32_t c=0;
+    c = ((int64_t) a + b > INT32_MAX) ? INT32_MAX : (a + b);
+    return c;
+}
+static int32_t __safe_sub(int32_t a, int32_t b)
+{
+    if (a == INT32_MAX || a == INT32_MIN) {
+        return a;
+    }
+    int32_t c = 0;
+    c = ((int64_t) b - a < INT32_MIN) ? INT32_MAX : (a - b);
+    return c;
+}
+
+struct cms_bin
+{
+    uuid_t uuid;
+    unsigned long long sequence;
+    uint64_t elements_added;
+    int width;
+    int depth;
+    UT_hash_handle hh;
+    int32_t *bins;
+};
+struct cms_bin *cms_bin_new(uuid_t uuid, int width, int depth)
+{
+    struct cms_bin *bin=NULL;
+    bin=ALLOC(struct cms_bin, 1);
+    uuid_copy(bin->uuid, uuid);
+    bin->width=width;
+    bin->depth=depth;
+    bin->bins=ALLOC(int32_t, width*depth);
+    return bin;
+}
+static int32_t cms_bin_query(const struct cms_bin *bin, const uint64_t *hashes, size_t n_hash)
+{
+    if(n_hash!=bin->depth) return 0;
+
+    int32_t added=INT32_MAX;
+    for(int i=0; i<bin->depth; i++)
+    {
+        uint64_t idx=(hashes[i] % bin->width) + (i * bin->width);
+        added=MIN(added, bin->bins[idx]);
+    }
+    return added;
+}
+#define CMLS16_BASE 1.00025
+double point_value(int32_t c)
+{
+    if(c==0) return 0;
+    return pow(CMLS16_BASE, c-1); 
+}
+static int32_t cms_bin_log_query(const struct cms_bin *bin, const uint64_t *hashes, size_t n_hash)
+{
+    if(n_hash!=bin->depth) return 0;
+    int32_t c=INT32_MAX;
+    for(int i=0; i<bin->depth; i++)
+    {
+        int32_t idx=(hashes[i] % bin->width) + (i * bin->width);
+        c=MIN(c, bin->bins[idx]);
+    }
+    if(c<=1)
+    {
+        return point_value(c);
+    }
+    else
+    {
+        double v=point_value(c+1);
+        v=(1-v)/(1-CMLS16_BASE);
+        return (int32_t)v;
+    }
+}
+int increase_decision(int32_t c)
+{
+    long pr=(long)pow(CMLS16_BASE, c);
+    long r=random()%pr;
+    if(r==0) return 1;
+    else return 0;
+}
+static void cms_bin_log_add(struct cms_bin *bin, const uint64_t *hashes, size_t n_hash, int32_t times)
+{
+    if(n_hash!=bin->depth) return;
+    int32_t c=INT32_MAX;
+    for(int i=0; i<bin->depth; i++)
+    {
+        int32_t idx=(hashes[i] % bin->width) + (i * bin->width);
+        c=MIN(c, bin->bins[idx]);
+    }
+   
+    long pr=(long)pow(CMLS16_BASE, c);
+    long r=random()%pr;
+    for(int t=0; t<times; t++)
+    {
+        int increase=increase_decision(c);
+        if(!increase)
+            continue;
+        for(int i=0; i<bin->depth; i++)
+        {
+            int32_t idx=(hashes[i] % bin->width) + (i * bin->width);
+            if(bin->bins[idx] == c)
+            {
+                bin->bins[idx]++;
+            }
+        }
+        c++;
+    }
+    //bin->bins[cu_idx]+=times;
+    bin->elements_added += times;
+    bin->sequence++;
+    return;
+}
+static int32_t cms_bin_add(struct cms_bin *bin, const uint64_t *hashes, size_t n_hash, int32_t times)
+{
+    if(n_hash!=bin->depth) return 0;
+    int32_t cu_idx=-1;//conservative update idx
+    for(int i=0; i<bin->depth; i++)
+    {
+        int32_t idx=(hashes[i] % bin->width) + (i * bin->width);
+        if(cu_idx<0) cu_idx=idx;
+        if(bin->bins[cu_idx] < bin->bins[idx])
+        {
+            cu_idx=idx;
+        }
+        if(times>0)
+        {
+            bin->bins[idx] = __safe_add(bin->bins[idx], times);
+        }
+        else
+        {
+            bin->bins[idx] = __safe_sub(bin->bins[idx], 0-times);
+        }
+    }
+    //bin->bins[cu_idx]+=times;
+    bin->elements_added += times;
+    bin->sequence++;
+    return  bin->bins[cu_idx];
+}
+static void cms_bin_free(struct cms_bin *bin)
+{
+    free(bin->bins);
+    free(bin);
+}
+static size_t cms_bin_blob_size(const struct cms_bin *bin)
+{
+    return offsetof(struct cms_bin, hh) + sizeof(int32_t)*bin->depth*bin->width;
+}
+static size_t cms_bin_size(const struct cms_bin *bin)
+{
+   return sizeof(struct cms_bin)+sizeof(int32_t)*bin->depth*bin->width;
+}
+static size_t cms_bin_serialize(const struct cms_bin *bin, char *buff, size_t buff_sz)
+{
+    size_t offset=0;
+    assert(buff_sz>=cms_bin_blob_size(bin));
+    memcpy(buff, bin, offsetof(struct cms_bin, hh));
+    offset += offsetof(struct cms_bin, hh);
+    memcpy(buff+offset, bin->bins, sizeof(int32_t)*bin->depth*bin->width);
+    offset += sizeof(int32_t)*bin->depth*bin->width;
+    assert(offset==cms_bin_blob_size(bin));
+    return offset;
+}
+static struct cms_bin *cms_bin_deserialize(const char *blob, size_t blob_sz)
+{
+    struct cms_bin *bin=ALLOC(struct cms_bin, 1);
+    size_t offset=0;
+    memcpy(bin, blob, offsetof(struct cms_bin, hh));
+    offset +=  offsetof(struct cms_bin, hh);
+    bin->bins=ALLOC(int32_t, bin->depth*bin->width);
+    assert(sizeof(int32_t)*bin->depth*bin->width==blob_sz-offset);
+    memcpy(bin->bins, blob+offset, sizeof(int32_t)*bin->depth*bin->width);
+    return bin;
+}
+static void cms_bin_merge(struct cms_bin *dst, const struct cms_bin *src)
+{
+    assert(dst->depth==src->depth);
+    assert(dst->width==src->width);
+    if(dst->sequence<src->sequence)
+    {
+        dst->elements_added=src->elements_added;
+        dst->sequence=src->sequence;
+        memcpy(dst->bins, src->bins, sizeof(int32_t)*dst->width*dst->depth);
+    }
+    return;
+}
+struct CM_sketch
+{
+    struct cms_opt opt;
+    uuid_t my_uuid;
+    struct cms_bin *hash_bins;
+    uint64_t hashes_max[CMS_MAX_DEPTH], hashes_min[CMS_MAX_DEPTH];
+    int is_virgin;
+};
+
+
+struct CM_sketch *CM_sketch_new(uuid_t my_id)
+{
+    struct CM_sketch *cms=ALLOC(struct CM_sketch, 1);
+    uuid_copy(cms->my_uuid, my_id);
+    memcpy(&cms->opt, &cms_default_opt, sizeof(cms_default_opt));
+    cms->is_virgin=1;
+    return cms;
+}
+static int cms_query(const struct CM_sketch *cms, const uint64_t *hashes, size_t n_hash)
+{
+    int added_global=0;
+    struct cms_bin *bin=NULL, *tmp=NULL;
+    HASH_ITER(hh, cms->hash_bins, bin, tmp)
+    {
+        added_global+=cms_bin_query(bin, hashes, n_hash);
+    }
+    return added_global;
+}
+int CM_sketch_query(const struct CM_sketch *cms, const char *key, size_t len)
+{
+    uint64_t hashes[CMS_MAX_DEPTH]={0};
+    int added_global=0;
+    assert(cms->opt.depth <= CMS_MAX_DEPTH);
+    for(int i=0; i<cms->opt.depth; i++)
+    {
+        hashes[i]=XXH3_64bits_withSeed(key, len, i);
+        //hashes[i]=__fnv_1a(key, len, i);
+    }
+
+    added_global=cms_query(cms, hashes, cms->opt.depth);
+    return added_global;
+}
+int CM_sketch_add_n(struct CM_sketch *cms, const char *key, size_t len, int times)
+{
+    struct cms_bin *bin=NULL;
+    HASH_FIND(hh, cms->hash_bins, cms->my_uuid, sizeof(cms->my_uuid), bin);
+    if(!bin)
+    {
+        bin=cms_bin_new(cms->my_uuid, cms->opt.width, cms->opt.depth);
+        HASH_ADD_KEYPTR(hh, cms->hash_bins, bin->uuid, sizeof(bin->uuid), bin);
+    }
+
+    uint64_t hashes[CMS_MAX_DEPTH]={0};
+    for(int i=0; i<cms->opt.depth && i<CMS_MAX_DEPTH; i++)
+	{
+        hashes[i]=XXH3_64bits_withSeed(key, len, i);
+        //hashes[i]=__fnv_1a(key, len, i);
+    }
+    
+    cms_bin_add(bin, hashes, cms->opt.depth, times);
+    long long added=cms_query(cms, hashes, cms->opt.depth);
+    long long max, min;
+    max=cms_query(cms, cms->hashes_max, cms->opt.depth);
+    if(added>max || cms->is_virgin)
+    {
+        memcpy(cms->hashes_max, hashes, sizeof(cms->hashes_max));
+    }
+    min=cms_query(cms, cms->hashes_min, cms->opt.depth);
+    if(added<min || cms->is_virgin)
+    {
+        memcpy(cms->hashes_min, hashes, sizeof(cms->hashes_min));
+    }
+    cms->is_virgin=0;
+    return added;
+}
+int CM_sketch_remove_n(struct CM_sketch *CM_sketch, const char *key, size_t len, int times)
+{
+    return CM_sketch_add_n(CM_sketch, key, len, 0-times);
+}
+
+void CM_sketch_free(struct CM_sketch *cms)
+{
+    struct cms_bin *bin=NULL, *tmp=NULL;
+    HASH_ITER(hh, cms->hash_bins, bin, tmp)
+    {
+        HASH_DELETE(hh, cms->hash_bins, bin);
+        cms_bin_free(bin);
+    }
+    free(cms);
+    return;
+}
+
+size_t CM_sketch_size(const struct CM_sketch *cms)
+{
+	size_t sz=0;
+    struct cms_bin *bin=NULL, *tmp=NULL;
+    HASH_ITER(hh, cms->hash_bins, bin, tmp)
+    {
+        sz+=cms_bin_size(bin);
+    }
+	sz+=sizeof(struct CM_sketch);
+	return sz;
+}
+void CM_sketch_serialize(const struct CM_sketch *cms, char **blob, size_t *blob_sz)
+{
+    size_t mpack_buff_sz=CM_sketch_size(cms)+HASH_COUNT(cms->hash_bins)*sizeof(size_t);
+    char *mpack_buff=ALLOC(char, mpack_buff_sz);
+    struct cms_bin *bin=NULL, *tmp=NULL;
+    size_t offset=0;
+    *((size_t*)(mpack_buff+offset))=HASH_COUNT(cms->hash_bins);
+    offset+=sizeof(size_t);
+    HASH_ITER(hh, cms->hash_bins, bin, tmp)
+    {
+        *((size_t*)(mpack_buff+offset))=cms_bin_blob_size(bin);
+        offset += sizeof(size_t);
+        offset += cms_bin_serialize(bin, mpack_buff+offset, mpack_buff_sz-offset);
+    }
+    *blob=mpack_buff;
+    *blob_sz=offset;
+    return;
+}
+struct CM_sketch *CM_sketch_deserialize(const char *blob, size_t blob_sz)
+{
+    struct CM_sketch *cms=ALLOC(struct CM_sketch, 1);
+    size_t offset=0, bin_blob_sz=0;
+    size_t n_replica = *((size_t*)(blob+offset));
+    offset += sizeof(size_t);
+    struct cms_bin *bin=NULL;
+    for(int i=0; i<n_replica; i++)
+    {
+        bin_blob_sz=*((size_t*)(blob+offset));
+        offset += sizeof(size_t);
+        bin=cms_bin_deserialize(blob+offset, bin_blob_sz);
+        offset+=bin_blob_sz;
+        assert(bin_blob_sz==cms_bin_blob_size(bin));
+        HASH_ADD_KEYPTR(hh, cms->hash_bins, bin->uuid, sizeof(bin->uuid), bin);
+    }
+    return cms;
+}
+int CM_sketch_elements(const struct CM_sketch *cms)
+{
+    int total_count=0;
+    struct cms_bin *bin=NULL, *tmp=NULL;
+    HASH_ITER(hh, cms->hash_bins, bin, tmp)
+    {
+        total_count+=bin->elements_added;
+    }
+    return total_count;
+}
+int CM_sketch_max(const struct CM_sketch *cms)
+{
+    int max=cms_query(cms, cms->hashes_max, cms->opt.depth);
+    return max;
+}
+int CM_sketch_min(const struct CM_sketch *cms)
+{
+    int min=cms_query(cms, cms->hashes_min, cms->opt.depth);
+    return min;
+}
+void CM_sketch_merge(struct CM_sketch *dst, const struct CM_sketch *src)
+{
+    struct cms_bin *src_bin=NULL, *dst_bin=NULL, *tmp=NULL;
+	HASH_ITER(hh, src->hash_bins, src_bin, tmp)
+	{
+		HASH_FIND(hh, dst->hash_bins, src_bin->uuid, sizeof(src_bin->uuid), dst_bin);
+		if(!dst_bin)
+		{
+			dst_bin=cms_bin_new(src_bin->uuid, dst->opt.width, dst->opt.depth);
+			HASH_ADD_KEYPTR(hh, dst->hash_bins, dst_bin->uuid, sizeof(dst_bin->uuid), dst_bin);
+		}
+        cms_bin_merge(dst_bin, src_bin);
+	}
+}
+void CM_sketch_merge_blob(struct CM_sketch *dst, const char *blob, size_t blob_sz)
+{
+    struct CM_sketch *src=CM_sketch_deserialize(blob, blob_sz);
+    CM_sketch_merge(dst, src);
+    CM_sketch_free(src);
+    return;
+}
+
+/*In the original Count-Min paper, the authors show that the probability that the estimate of an element is between its true value x
+ and an upper bound x+em, where m is the number of elements entered into a [Count-Min Sketch], is larger than 1−d
+ under two conditions. The first condition is that the width of the sketch is 2/e
+. */
+void CM_sketch_info(const struct CM_sketch *cms, struct CM_sketch_info *info)
+{
+    //https://cs.stackexchange.com/questions/44803/what-is-the-correct-way-to-determine-the-width-and-depth-of-a-count-min-sketch
+    info->width=cms->opt.width;
+    info->depth=cms->opt.depth;
+    info->confidence = 1 - (1 / pow(2, info->depth));
+    info->error_rate=2 / (double) info->width;
+    info->n_replica=HASH_COUNT(cms->hash_bins);
+    info->n_element=CM_sketch_elements(cms);
+    return;
+}
+\ No newline at end of file
diff --git a/CRDT/cm_sketch.h b/CRDT/cm_sketch.h
new file mode 100644
index 0000000..c106c58
--- /dev/null
+++ b/CRDT/cm_sketch.h
@@ -0,0 +1,40 @@
+/*
+* A CRDT Count-Min Sketch implementation in C.
+* Cormode, Graham; S. Muthukrishnan (2005). "An Improved Data Stream Summary: The Count-Min Sketch and its Applications"
+* Author: [email protected]
+* Count-Min Sketch is a probabilistic data-structure that takes sub linear space to store the probable count, 
+* or frequency, of occurrences of elements added into the data-structure.
+*/
+
+#pragma once
+#include <uuid/uuid.h>
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+struct CM_sketch;
+struct CM_sketch *CM_sketch_new(uuid_t my_id);
+void CM_sketch_free(struct CM_sketch *cms);
+int CM_sketch_query(const struct CM_sketch *cms, const char *key, size_t len);
+int CM_sketch_add_n(struct CM_sketch *cms, const char *key, size_t len, int times);
+int CM_sketch_remove_n(struct CM_sketch *cms, const char *key, size_t len, int times);
+size_t CM_sketch_size(const struct CM_sketch *cms);
+void CM_sketch_serialize(const struct CM_sketch *cms, char **blob, size_t *blob_sz);
+struct CM_sketch *CM_sketch_deserialize(const char *blob, size_t blob_sz);
+void CM_sketch_merge_blob(struct CM_sketch *dst, const char *blob, size_t blob_sz);
+struct CM_sketch_info
+{
+    int width;
+    int depth;
+    double error_rate;
+    double confidence;
+    long long n_element;
+    long long n_replica;
+};
+void CM_sketch_info(const struct CM_sketch *cms, struct CM_sketch_info *info);
+int CM_sketch_elements(const struct CM_sketch *cms);
+int CM_sketch_max(const struct CM_sketch *cms);
+int CM_sketch_min(const struct CM_sketch *cms);
+#ifdef __cplusplus
+}
+#endif
+\ No newline at end of file
diff --git a/CRDT/crdt_gtest.cpp b/CRDT/crdt_base_gtest.cpp
index 3166d3c..a340184 100644
--- a/CRDT/crdt_gtest.cpp
+++ b/CRDT/crdt_base_gtest.cpp
@@ -2,557 +2,17 @@
 #include "pn_counter.h"
 #include "or_set.h"
 #include "or_map.h"
-#include "oc_token_bucket.h"
+
 #include "crdt_utils.h"
+#include "cm_sketch.h"
+#include "st_hyperloglog.h"
+#include "g_array.h"
 
 #include <gtest/gtest.h>
 #include <unistd.h> //usleep
 #include <uuid/uuid.h>
 #include <math.h>
 
-#define REPLICA_NUMBER	3
-void OC_token_bucket_sync(struct OC_token_bucket *list[], size_t n)
-{
-	char *blob=NULL;
-	size_t blob_sz=0;
-	
-	for(size_t i=0; i<n; i++)
-	{
-		OC_token_bucket_serialize(list[i], &blob, &blob_sz);
-		for(size_t j=0; j<n; j++)
-		{
-			if(j==i) continue;
-			OC_token_bucket_merge_blob(list[j], blob, blob_sz);
-		}
-		free(blob);
-		blob=NULL;
-	}	
-	return;
-}
-
-enum traffic_type 
-{
-	LIGHT_UNIFORM_TYPE,
-	LIGHT_TWO_EIGHT_TYPE,
-	HEAVY_TWO_EIGHT_TYPE,
-	HEAVY_UNIFORM_EXTREME_TYPE
-};
-
-long long get_request_tokens(int index, enum traffic_type type, long long step_us, long long CIR, long long CBS) 
-{
-	long long request_size;
-	long long standard = CIR * step_us / 1000000;
-	long long sd10 = floor((long double)standard * 0.1);
-	int eight_replica_num = floor(REPLICA_NUMBER * 0.8);
-	long long rand_sd10 = random() % sd10;
-	int scope_flag = FALSE;
-	if (random() % 2) scope_flag = TRUE;
-	switch (type) 
-	{
-		case LIGHT_UNIFORM_TYPE:
-			request_size = (long long)floor((long double)standard * 0.5);
-			if (scope_flag) {
-			request_size += rand_sd10;
-			} else {
-			request_size -= rand_sd10;
-			}
-			break;
-		case LIGHT_TWO_EIGHT_TYPE:
-			if (index < eight_replica_num && scope_flag) {
-			request_size = (long long)floor((long double)standard * 0.2) + rand_sd10;
-			} else if (index < eight_replica_num && !scope_flag) {
-			request_size = (long long)floor((long double)standard * 0.2) - rand_sd10;
-			} else if (index >= eight_replica_num && scope_flag) {
-			request_size = (long long)floor((long double)standard * 1.6) + rand_sd10;
-			} else {
-			request_size = (long long)floor((long double)standard * 1.6) - rand_sd10;
-			}
-			break;
-		case HEAVY_TWO_EIGHT_TYPE:
-			if (index < eight_replica_num && scope_flag) {
-			request_size = (long long)floor((long double)standard * 0.6) + rand_sd10;
-			} else if (index < eight_replica_num && !scope_flag) {
-			request_size = (long long)floor((long double)standard * 0.6) - rand_sd10;
-			} else if (index >= eight_replica_num && scope_flag) {
-			request_size = (long long)floor((long double)standard * 4.8) + rand_sd10;
-			} else {
-			request_size = (long long)floor((long double)standard * 4.8) - rand_sd10;
-			}
-			break;
-		case HEAVY_UNIFORM_EXTREME_TYPE:
-			request_size = (long long)floor((long double)standard * 2);
-			if (scope_flag) {
-			request_size += rand_sd10;
-			} else {
-			request_size -= rand_sd10;
-			}
-			break;
-		default:
-			break;
-	}
-	return request_size;
-}
-
-void traffic_distribution(traffic_type type)
-{
-	struct OC_token_bucket *buckets[REPLICA_NUMBER];
-	size_t i = 0, j = 0;
-	long long CIR = 1*1024*1024; // 1Mps
-	long long CBS = 2*1024*1024;
-	uuid_t uuid;
-	struct timeval start;
-	gettimeofday(&start, NULL);
-	for (i = 0; i < REPLICA_NUMBER; i++) 
-	{
-		uuid_generate(uuid);
-		buckets[i] = OC_token_bucket_new(uuid, start, CIR, CBS);
-	}
-	long long tokens = 0, flexible_tokens = 0;
-	long long consumed = 0, requested = 0, upper_limit = 0, refilled = 0;
-	long long mimic_duration_us = (long long)100*1000*1000;
-	long long step_us = 100;
-	struct timeval step, now;
-	memcpy(&now, &start, sizeof(now));
-	step.tv_sec = 0;
-	step.tv_usec = (suseconds_t)step_us;
-	for (i = 0; (long long)i < mimic_duration_us / step_us; i++) 
-	{
-		j = i % 3; // sequence selection
-		timeradd(&now, &step, &now);
-		tokens = get_request_tokens(j, type, step_us, CIR, CBS);
-		flexible_tokens = OC_token_bucket_control(buckets[j], now, OCTB_CMD_CONSUME_FLEXIBLE, tokens);
-		requested += tokens;
-		consumed += flexible_tokens;
-		if(i%100==0)
-		{
-			OC_token_bucket_sync(buckets, REPLICA_NUMBER);
-		}
-	}
-	upper_limit = CBS + CIR * timeval_delta_ms(start, now) / 1000;
-	refilled = OC_token_bucket_control(buckets[0], now, OCTB_CMD_READ_REFILLED, 0);
-	EXPECT_LE(consumed, requested);
-	double accuracy = (double)consumed / MIN(refilled, requested);
-	EXPECT_NEAR(accuracy, 1, 0.01);
-	printf("accuracy:%f, upper_limit:%lld, refilled:%lld, requested:%lld, consumed:%lld\n",
-			accuracy, upper_limit, refilled, requested, consumed);
-	for(i = 0; i < REPLICA_NUMBER; i++) 
-	{
-		OC_token_bucket_free(buckets[i]);
-	}
-}
-
-TEST(OCTokenBucket, TrafficTypeConsumer)
-{
-	traffic_distribution(LIGHT_UNIFORM_TYPE);
-	traffic_distribution(LIGHT_TWO_EIGHT_TYPE);
-	traffic_distribution(HEAVY_TWO_EIGHT_TYPE);
-	traffic_distribution(HEAVY_UNIFORM_EXTREME_TYPE);
-}
-
-TEST(OCTokenBucket, Basic)
-{
-	uuid_t uuid;
-	uuid_generate(uuid);
-
-	struct OC_token_bucket *bucket=NULL;
-	long long CIR=100;
-	long long CBS=200;
-	struct timeval now;
-	gettimeofday(&now, NULL);
-
-	bucket=OC_token_bucket_new(uuid, now, CIR, CBS);
-	long long tokens=0;
-
-	tokens=OC_token_bucket_control(bucket, now, OCTB_CMD_CONSUME_NORMAL, 140);
-	EXPECT_EQ(tokens, 140);
-	
-	tokens=OC_token_bucket_control(bucket, now, OCTB_CMD_CONSUME_NORMAL, 61);
-	EXPECT_EQ(tokens, 0);
-
-	tokens=OC_token_bucket_control(bucket, now, OCTB_CMD_CONSUME_NORMAL, 60);
-	EXPECT_EQ(tokens, 60);
-
-	now.tv_sec++;
-	tokens=OC_token_bucket_control(bucket, now, OCTB_CMD_CONSUME_NORMAL, 90);
-	EXPECT_EQ(tokens, 90);
-	
-	tokens=OC_token_bucket_control(bucket, now, OCTB_CMD_READ_AVAILABLE, 0);
-	EXPECT_GE(tokens, 10);
-//	printf("avail=%lld\n", tokens);
-	
-	tokens=OC_token_bucket_control(bucket, now, OCTB_CMD_CONSUME_NORMAL, 10);
-	EXPECT_EQ(tokens, 10);
-	OC_token_bucket_free(bucket);
-
-
-}
-TEST(OCTokenBucket, Boundary)
-{
-	uuid_t uuid;
-	uuid_generate(uuid);
-
-	struct timeval now;
-	gettimeofday(&now, NULL);
-	struct OC_token_bucket *bucket=NULL;
-	long long tokens=0, consumed=0;
-
-	//Zero CIR
-	bucket=OC_token_bucket_new(uuid, now, 0, 1000);
-	tokens=OC_token_bucket_control(bucket, now, OCTB_CMD_CONSUME_NORMAL, 1000);
-	EXPECT_EQ(tokens, 1000);
-
-	now.tv_sec++;
-	tokens=OC_token_bucket_control(bucket, now, OCTB_CMD_CONSUME_NORMAL, 1);
-	EXPECT_EQ(tokens, 0);
-	OC_token_bucket_free(bucket);
-
-	//Zero CBS
-	bucket=OC_token_bucket_new(uuid, now, 1000, 0);
-	tokens=OC_token_bucket_control(bucket, now, OCTB_CMD_CONSUME_NORMAL, 1);
-	EXPECT_EQ(tokens, 0);
-	OC_token_bucket_free(bucket);
-
-	//Infinite Tokens
-	bucket=OC_token_bucket_new(uuid, now, 0, 0);
-	tokens=OC_token_bucket_control(bucket, now, OCTB_CMD_CONSUME_NORMAL, 140);
-	consumed+=tokens;
-	EXPECT_EQ(tokens, 140);
-
-
-	tokens=OC_token_bucket_control(bucket, now, OCTB_CMD_CONSUME_NORMAL, 65535);
-	consumed+=tokens;
-	EXPECT_EQ(tokens, 65535);
-
-	tokens=OC_token_bucket_control(bucket, now, OCTB_CMD_READ_CONSUEMD, 0);
-	EXPECT_EQ(tokens, consumed);
-
-	tokens=OC_token_bucket_control(bucket, now, OCTB_CMD_READ_REFILLED, 0);
-	EXPECT_EQ(tokens, consumed);
-
-	now.tv_sec++;
-	OC_token_bucket_configure(bucket, now, 100, 500, 10);
-	
-	now.tv_sec+=500/100;
-	tokens=OC_token_bucket_control(bucket, now, OCTB_CMD_CONSUME_NORMAL, 500);
-	consumed+=tokens;
-	EXPECT_EQ(tokens, 500);
-
-	tokens=OC_token_bucket_control(bucket, now, OCTB_CMD_READ_CONSUEMD, 0);
-	EXPECT_EQ(tokens, consumed);
-
-	tokens=OC_token_bucket_control(bucket, now, OCTB_CMD_READ_REFILLED, 0);
-	EXPECT_EQ(tokens, consumed);
-
-	OC_token_bucket_free(bucket);
-}
-TEST(OCTokenBucket, Merge)
-{
-	struct OC_token_bucket *buckets[2];
-	uuid_t uuid;
-	long long CIR=100;
-	long long CBS=200;
-	struct timeval now;
-	gettimeofday(&now, NULL);
-
-	size_t i=0;
-	for(i=0; i<2; i++)
-	{
-		uuid_generate(uuid);
-		buckets[i]=OC_token_bucket_new(uuid, now, CIR, CBS);
-	}
-	long long tokens=0;
-	OC_token_bucket_sync(buckets, 2);
-	tokens=OC_token_bucket_control(buckets[0], now, OCTB_CMD_CONSUME_NORMAL, 130);
-	EXPECT_EQ(tokens, 130);
-	tokens=OC_token_bucket_control(buckets[1], now, OCTB_CMD_CONSUME_NORMAL, 30);
-	EXPECT_EQ(tokens, 30);
-
-	OC_token_bucket_sync(buckets, 2);
-	now.tv_sec++;
-	//each has 200-130-30+100/2=90 avaliable tokens
-	tokens=OC_token_bucket_control(buckets[0], now, OCTB_CMD_CONSUME_NORMAL, 91);
-	EXPECT_EQ(tokens, 0);
-	tokens=OC_token_bucket_control(buckets[1], now, OCTB_CMD_CONSUME_FLEXIBLE, 90);
-	EXPECT_EQ(tokens, 90);
-	OC_token_bucket_sync(buckets, 2);
-	
-	for(i=0; i<2; i++)
-	{
-		OC_token_bucket_free(buckets[i]);
-	}
-}
-double OC_token_bucket_test(size_t replica_num, long long mimic_duration_s, long long sync_interval_ms, int saturation_percent)
-{
-	struct OC_token_bucket *buckets[replica_num];
-	size_t i=0, j=0;
-	long long CIR=512*1024;
-	long long CBS=2*1024*1024;
-	uuid_t uuid;
-	struct timeval start;
-	gettimeofday(&start, NULL);
-	for(i=0; i<replica_num; i++)
-	{
-		uuid_generate(uuid);
-		buckets[i]=OC_token_bucket_new(uuid, start, CIR, CBS);
-	}
-	srandom(17);
-	long long tokens=0;
-	long long consumed=0, requested=0, upper_limit=0, refilled=0;
-	long long step_us=100, mimic_duration_us=mimic_duration_s*1000*1000;
-	long long sync_interval_us=sync_interval_ms*1000;
-	struct timeval step, now;
-	memcpy(&now, &start, sizeof(now));
-	step.tv_sec=0;
-	step.tv_usec=(suseconds_t)step_us;
-	for(i=0; (long long)i<mimic_duration_us/step_us; i++)
-	{
-		timeradd(&now, &step, &now);
-		j=random()%replica_num;
-		tokens=CIR*step_us/(1000*1000);
-		tokens=tokens*saturation_percent/100;
-		tokens=tokens/2+random()%tokens;
-		requested+=tokens;
-		consumed+=OC_token_bucket_control(buckets[j], now, OCTB_CMD_CONSUME_NORMAL, tokens);
-		if((step_us*i)%sync_interval_us==0)
-		{
-			OC_token_bucket_sync(buckets, replica_num);
-		}
-	}
-	upper_limit=CBS+CIR*timeval_delta_ms(start, now)/1000;
-	refilled=OC_token_bucket_control(buckets[0], now, OCTB_CMD_READ_REFILLED, 0);
-	EXPECT_NEAR((double)consumed/MIN(refilled, requested), 1, 0.03);
-//	EXPECT_LE(consumed, requested);
-	double accuracy=(double)consumed/MIN(upper_limit, requested);
-	double real_saturation=(double)requested/upper_limit;
-	EXPECT_NEAR(real_saturation, (double)saturation_percent/100, 0.1);
-	for(i=0; i<replica_num; i++)
-	{
-		OC_token_bucket_free(buckets[i]);
-	}
-	return accuracy;
-}
-TEST(OCTokenBucket, HeavyConsumer)
-{
-	double accuracy=0.0;
-	long long replica_num=0, test_duration_s=500, sync_interval_ms=0;
-
-	long long i=0, j=0;
-	for(i=8; i<9; i++)
-	{
-		replica_num=i+1;
-		for(j=0; j<10; j++)
-		{
-			sync_interval_ms=100*(j+1);
-			accuracy=OC_token_bucket_test(replica_num, test_duration_s, sync_interval_ms, 200);
-			printf("replica_num=%lld, sync_interval_ms=%lld, accuracy=%.4f\n", replica_num, sync_interval_ms, accuracy);
-			EXPECT_NEAR(accuracy, 1, 0.05);
-		}
-	}
-}
-TEST(OCTokenBucket, LightConsumer)
-{	
-	double accuracy=0.0;
-	long long replica_num=0, test_duration_s=100, sync_interval_ms=0;
-
-	long long i=0, j=0;
-	for(i=0; i<4; i++)
-	{
-		replica_num=i+1;
-		for(j=0; j<20; j++)
-		{
-			sync_interval_ms=100*(j+1);
-			accuracy=OC_token_bucket_test(replica_num, test_duration_s, sync_interval_ms, 90);
-			EXPECT_NEAR(accuracy, 1, 0.03);
-		}
-	}
-}
-TEST(OCTokenBucket, ConcurrentHeavyConsumer)
-{
-	struct OC_token_bucket *buckets[REPLICA_NUMBER];
-	size_t i=0, j=0;
-	long long CIR=100*1024*1024;
-	long long CBS=1000*1024*1024;
-	uuid_t uuid;
-	struct timeval start;
-	gettimeofday(&start, NULL);
-	for(i=0; i<REPLICA_NUMBER; i++)
-	{
-		uuid_generate(uuid);
-		buckets[i]=OC_token_bucket_new(uuid, start, CIR, CBS);
-	}
-	srandom(17);
-	long long tokens=0;
-	long long consumed=0, requested=0, upper_limit=0, refilled=0;
-	long long mimic_duration_us=(long long)100*1000*1000;
-	long long step_us=100;
-	struct timeval step, now;
-	memcpy(&now, &start, sizeof(now));
-	step.tv_sec=0;
-	step.tv_usec=(suseconds_t)step_us;
-	for(i=0; (long long)i<mimic_duration_us/step_us; i++)
-	{
-		timeradd(&now, &step, &now);		
-		tokens=CIR*step_us/(1000*1000);		
-		tokens=tokens/REPLICA_NUMBER+random()%tokens;
-		for(j=0; j<REPLICA_NUMBER; j++)
-		{
-			requested+=tokens;			
-			consumed+=OC_token_bucket_control(buckets[j], now, OCTB_CMD_CONSUME_FLEXIBLE, tokens);
-		}
-		if(i%100==0)
-		{
-			OC_token_bucket_sync(buckets, REPLICA_NUMBER);
-		}
-	}
-	upper_limit=CBS+CIR*timeval_delta_ms(start, now)/1000;
-	refilled=OC_token_bucket_control(buckets[0], now, OCTB_CMD_READ_REFILLED, 0);
-	EXPECT_NEAR((double)consumed/MIN(refilled, requested), 1, 0.01);
-	EXPECT_NEAR((double)refilled/upper_limit, 1, 0.01);
-	EXPECT_LE(consumed, requested);
-	double accuracy=(double)consumed/MIN(upper_limit, requested);
-	EXPECT_NEAR(accuracy, 1, 0.01);
-	for(i=0; i<REPLICA_NUMBER; i++)
-	{
-		OC_token_bucket_free(buckets[i]);
-	}
-}
-TEST(OCTokenBucket, Reconfigure)
-{
-	struct OC_token_bucket *buckets[REPLICA_NUMBER];
-	size_t i=0, j=0;
-	double accuracy=0.0;
-	long long CIR=100*1024*1024;
-	long long CBS=1000*1024*1024;
-	uuid_t uuid;
-	struct timeval start;
-	gettimeofday(&start, NULL);
-	for(i=0; i<REPLICA_NUMBER; i++)
-	{
-		uuid_generate(uuid);
-		buckets[i]=OC_token_bucket_new(uuid, start, CIR, CBS);
-	}
-	srandom(17);
-	long long tokens=0;
-	long long consumed=0, requested=0, upper_limit=0, refilled=0;
-	long long mimic_duration_us=(long long)1000*1000*1000;
-	long long step_us=100;
-	struct timeval step, now;
-	memcpy(&now, &start, sizeof(now));
-	step.tv_sec=0;
-	step.tv_usec=(suseconds_t)step_us;
-	for(i=0; (long long)i<mimic_duration_us/step_us; i++)
-	{
-		timeradd(&now, &step, &now);		
-		tokens=CIR*step_us/(1000*1000);		
-		tokens=tokens/REPLICA_NUMBER+random()%tokens;
-		for(j=0; j<REPLICA_NUMBER; j++)
-		{
-			requested+=tokens;			
-			consumed+=OC_token_bucket_control(buckets[j], now, OCTB_CMD_CONSUME_FLEXIBLE, tokens);
-		}
-		if(i%100==0)
-		{
-			OC_token_bucket_sync(buckets, REPLICA_NUMBER);
-		}
-	}
-	upper_limit=CBS+CIR*timeval_delta_ms(start, now)/1000;
-	refilled=OC_token_bucket_control(buckets[0], now, OCTB_CMD_READ_REFILLED, 0);
-	EXPECT_NEAR((double)consumed/MIN(refilled, requested), 1, 0.01);
-	EXPECT_NEAR((double)refilled/upper_limit, 1, 0.01);
-	EXPECT_LE(consumed, requested);
-	accuracy=(double)consumed/MIN(upper_limit, requested);
-	EXPECT_NEAR(accuracy, 1, 0.01);
-
-	memcpy(&start, &now, sizeof(start));
-	CIR*=2;
-	CBS*=5;
-	requested=0;
-	consumed=0;
-	OC_token_bucket_configure(buckets[0], start, CIR, CBS, 0);
-	for(i=0; (long long)i<mimic_duration_us/step_us; i++)
-	{
-		timeradd(&now, &step, &now);		
-		tokens=CIR*step_us/(1000*1000);		
-		tokens=tokens/REPLICA_NUMBER+random()%tokens;
-		for(j=0; j<REPLICA_NUMBER; j++)
-		{
-			requested+=tokens;			
-			consumed+=OC_token_bucket_control(buckets[j], now, OCTB_CMD_CONSUME_FLEXIBLE, tokens);
-		}
-		if(i%100==0)
-		{
-			OC_token_bucket_sync(buckets, REPLICA_NUMBER);
-		}
-	}
-	upper_limit=CIR*timeval_delta_ms(start, now)/1000;//not + CBS, becasue reconfiguration will not refill the bucket to full.
-	refilled=OC_token_bucket_control(buckets[0], now, OCTB_CMD_READ_REFILLED, 0)-refilled;
-	EXPECT_NEAR((double)consumed/MIN(refilled, requested), 1, 0.01);
-	EXPECT_NEAR((double)refilled/upper_limit, 1, 0.01);
-	EXPECT_LE(consumed, requested);
-	accuracy=(double)consumed/MIN(upper_limit, requested);
-	EXPECT_NEAR(accuracy, 1, 0.01);
-	
-	for(i=0; i<REPLICA_NUMBER; i++)
-	{
-		OC_token_bucket_free(buckets[i]);
-	}
-}
-TEST(OCTokenBucket, PartitionTolerance)
-{
-	struct OC_token_bucket *buckets[REPLICA_NUMBER];
-	size_t i=0, j=0;
-	long long CIR=1*1024*1024;
-	long long CBS=2*1024*1024;
-	uuid_t uuid;
-	struct timeval start;
-	gettimeofday(&start, NULL);
-	for(i=0; i<REPLICA_NUMBER; i++)
-	{
-		uuid_generate(uuid);
-		buckets[i]=OC_token_bucket_new(uuid, start, CIR, CBS);
-	}
-	srandom(17);
-	long long tokens=0;
-	long long consumed=0, requested=0, upper_limit=0, refilled=0;
-	long long mimic_duration_us=(long long)100*1000*1000;
-	long long step_us=100;
-	struct timeval step, now;
-	memcpy(&now, &start, sizeof(now));
-	step.tv_sec=0;
-	step.tv_usec=(suseconds_t)step_us;
-	for(i=0; (long long)i<mimic_duration_us/step_us; i++)
-	{
-		timeradd(&now, &step, &now);
-		j=random()%REPLICA_NUMBER;
-		tokens=CIR*step_us/(1000*1000);
-		tokens=tokens+random()%tokens;
-		for(j=0; j<REPLICA_NUMBER; j++)
-		{
-			requested+=tokens;			
-			consumed+=OC_token_bucket_control(buckets[j], now, OCTB_CMD_CONSUME_FLEXIBLE, tokens);
-		}
-		//network partition at time (t/3, t*2/3]
-		if((long long)i>mimic_duration_us/step_us/3 && (long long)i<= 2*mimic_duration_us/step_us/3)
-		{
-			continue;
-		}
-		//if(i%100==0)
-		{
-			OC_token_bucket_sync(buckets, REPLICA_NUMBER);
-		}
-	}
-	upper_limit=CBS+(CIR*timeval_delta_ms(start, now)/1000)*2/3 + (CIR*timeval_delta_ms(start, now)/1000)*REPLICA_NUMBER/3;
-	refilled=OC_token_bucket_control(buckets[0], now, OCTB_CMD_READ_REFILLED, 0);
-	EXPECT_NEAR((double)consumed/MIN(refilled, requested), 1, 0.01);
-	EXPECT_LE(consumed, requested);
-	double accuracy=(double)consumed/MIN(upper_limit, requested);
-	EXPECT_NEAR(accuracy, 1, 0.01);
-	for(i=0; i<REPLICA_NUMBER; i++)
-	{
-		OC_token_bucket_free(buckets[i]);
-	}
-
-}
 TEST(LWWRegister, Basic)
 {
 	struct LWW_register *reg=NULL;
@@ -845,7 +305,7 @@ TEST(PNCounter, Replica100)
 	}
 	EXPECT_EQ(success, test_replica_num);
 	size_t sz=0;
-	sz=PN_counter_size(pnc[0]);
+	sz=PN_counter_mem_size(pnc[0]);
 	EXPECT_EQ(sz, 8864);
 	for(i=0; i<test_replica_num; i++)
 	{
@@ -1690,7 +1150,654 @@ TEST(ORMap, Replica32)
 	}
 
 }
+TEST(CMSketch, Basic)
+{
+	uuid_t uuid;
+	uuid_generate(uuid);
+	struct CM_sketch *cms=CM_sketch_new(uuid);
+	int ret=0, n_added=0;
+	for(int i=0; i<10; i++)
+	{
+		ret=CM_sketch_add_n(cms, (char *)&i, sizeof(i), i+1);
+		n_added += i+1;
+	}
+	for(int i=0; i<10; i++)
+	{
+		ret=CM_sketch_query(cms, (char *)&i, sizeof(i));
+		EXPECT_EQ(ret, i+1);
+	}
+	for(int i=0; i<10; i++)
+	{
+		ret=CM_sketch_remove_n(cms, (char *)&i, sizeof(i), i);
+		n_added -= i;
+	}
+	for(int i=0; i<10; i++)
+	{
+		ret=CM_sketch_query(cms, (char *)&i, sizeof(i));
+		//EXPECT_EQ(ret, 1);
+	}
+	struct CM_sketch_info info;
+	CM_sketch_info(cms, &info);
+	EXPECT_EQ(info.n_element, n_added);
+	printf("error_rate: %f confidence: %f\n", info.error_rate, info.confidence);
+	CM_sketch_free(cms);
+}
+TEST(CMSketch, I5K)
+{
+	uuid_t uuid;
+	uuid_generate(uuid);
+	struct CM_sketch *cms=CM_sketch_new(uuid);
+	long long n_item=5000, ret=0;
+	long long base=10000, total_add=0;
+	for(long long i=0; i<n_item; i++)
+	{
+		ret=CM_sketch_add_n(cms, (char *)&i, sizeof(i), base+i);
+		total_add+=(base+i);
+	}
+	struct CM_sketch_info info;
+	CM_sketch_info(cms, &info);
+	long long pass=0;
+	for(long long i=0; i<n_item; i++)
+	{
+		ret=CM_sketch_query(cms, (char *)&i, sizeof(i));
+		//The formal expectation is total_add*info.error_rate
+		if(abs(ret-(i+base))< (i+base)*info.error_rate)
+		{
+			pass++;
+		}
+	}
+	EXPECT_NEAR(pass, n_item, n_item*(1-info.confidence));
+}
+static void CMS_sync(struct CM_sketch *list[], size_t n)
+{
+	char *blob=NULL;
+	size_t blob_sz=0;
+	
+	for(size_t i=0; i<n; i++)
+	{
+		CM_sketch_serialize(list[i], &blob, &blob_sz);
+		for(size_t j=0; j<n; j++)
+		{
+			if(j==i) continue;
+			CM_sketch_merge_blob(list[j], blob, blob_sz);
+		}
+		free(blob);
+		blob=NULL;
+	}	
+	return;
+}
+TEST(CMSketch, Merge)
+{
+	size_t replica_number=2, round=10;
+	long long key=1234;
+	struct CM_sketch *cms[replica_number];
+	uuid_t uuid;
+	for(size_t i=0; i<replica_number; i++)
+	{
+		uuid_generate(uuid);
+		cms[i]=CM_sketch_new(uuid);
+	}
+	for(size_t i=0; i<round; i++)
+	{
+		CM_sketch_add_n(cms[i%replica_number], (char*) &key, sizeof(key), 1);
+	}
+	CMS_sync(cms, replica_number);
+	int ret=0;
+	for(size_t i=0; i<replica_number; i++)
+	{
+		ret=CM_sketch_query(cms[i], (char *)&key, sizeof(key));
+		EXPECT_EQ(ret, round);
+	}
+	for(size_t i=0; i<replica_number; i++)
+	{
+		CM_sketch_free(cms[i]);
+	}	
+}
+TEST(CMSketch, Idempotent)
+{
+	size_t replica_number=8, round=10000, i=0;
+	struct CM_sketch *cms[replica_number];
+	uuid_t uuid;
+	
+	for(i=0; i<replica_number; i++)
+	{
+		uuid_generate(uuid);
+		cms[i]=CM_sketch_new(uuid);
+	}
+	int n_added=0;
+	for(i=0; i<round; i++)
+	{
+		CM_sketch_add_n(cms[i%replica_number], (char*) &i, sizeof(i), i);
+		n_added+=i;
+	}
+	CMS_sync(cms, replica_number);
+	CMS_sync(cms, replica_number);
+	int ret=0;
+	size_t success=0;
+	struct CM_sketch_info info;
+	CM_sketch_info(cms[0], &info);
+
+	for(i=0; i<round; i++)
+	{
+		ret=CM_sketch_query(cms[(i+1)%replica_number], (char*) &i, sizeof(i));
+		if((double)ret<((double)i+info.error_rate*n_added))
+		{
+			success++;
+		}
+	}
+	EXPECT_GE((double)success/round, info.confidence);
+	for(i=0; i<replica_number; i++)
+	{
+		CM_sketch_free(cms[i]);
+	}	
+}
+long long st_hll_test(unsigned char precision, int actual_count)
+{
+	int count=0;
+	int key=random();
+	struct timeval nouse;
+	struct ST_hyperloglog *h=ST_hyperloglog_new(precision, 0, nouse);
+	for(int i=0; i<actual_count; i++)
+	{
+		key++;
+		ST_hyperloglog_add(h, (const char *)&key, sizeof(key), nouse);
+	}
+	count=ST_hyperloglog_count(h);
+	ST_hyperloglog_free(h);
+	return count;
+}
+
+TEST(STHyperLogLog, Basic)
+{
+	double error;
+	long long est_count=0;
+	long long count=10000;
+	unsigned char precision=6;
+
+
+	est_count=st_hll_test(precision, count);
+	error=ST_hyperloglog_error_for_precision(precision);
+	EXPECT_NEAR(est_count, count, error*count);
+
+	precision=9;
+	count=10000;
+	est_count=st_hll_test(precision, count);
+	error=ST_hyperloglog_error_for_precision(precision);
+	EXPECT_NEAR(est_count, count, error*count);
+
+}
+struct st_hll_case
+{
+	unsigned char precision;
+	int time_window_s;
+	int ideal_count;
+	int est_count;
+	int n_replica;
+};
+int st_hll_case_print(const struct st_hll_case *st_case, int n_case)
+{
+	int success=0;
+	double real_error=0, est_error=0;
+	printf("prcs\twin\tideal\test\test_err\treal_err\n");
+	for(int j=0; j<n_case; j++)
+	{
+		est_error=ST_hyperloglog_error_for_precision(st_case[j].precision);
+		real_error=(double)abs(st_case[j].est_count-st_case[j].ideal_count)/st_case[j].ideal_count;
+		printf("%d\t%d\t%d\t%d\t%f\t%f\n", st_case[j].precision,
+										st_case[j].time_window_s,
+										st_case[j].ideal_count,
+										st_case[j].est_count,
+										est_error,
+										real_error);
+		if(real_error <= MAX(0.1, est_error))
+		{
+			success++;
+		}
+	}
+	return success;
+}
+int st_hll_test_sliding_window(const struct st_hll_case *mycase)
+{
+	unsigned char precision=mycase->precision;
+	int n_replica=mycase->n_replica;
+	int actual_window_count=mycase->ideal_count;
+	int time_window_s=mycase->time_window_s;
+	int count=0, add_per_step=0;
+
+	struct timeval start, step, now;
+	gettimeofday(&start, NULL);
+	memcpy(&now, &start, sizeof(now));
+	struct ST_hyperloglog *h[n_replica];
+	for(int i=0; i<n_replica; i++)
+	{
+		h[i]=ST_hyperloglog_new(precision, time_window_s, start);
+	}
+
+	int key=1319823, j=0;
+	size_t n_add=0;
+	if(time_window_s>0)
+	{
+		double per_second_count=(double)actual_window_count/time_window_s;
+		if(per_second_count>1000*1000)
+		{
+			step.tv_sec=0;
+			step.tv_usec=1;
+			add_per_step=per_second_count/1000/1000;
+		}
+		else
+		{
+			step.tv_sec=0;
+			step.tv_usec=(suseconds_t)1000*1000/per_second_count;
+			add_per_step=1;
+		}
+		while(now.tv_sec-start.tv_sec<time_window_s*10)
+		{
+			timeradd(&now, &step, &now);
+			for(int i=0; i<add_per_step; i++)
+			{
+				j=random()%n_replica;
+				key++;
+				ST_hyperloglog_add(h[j], (const char *)&key, sizeof(key), now);
+				n_add++;
+			}
+		}
+	}
+	else
+	{
+		for(int i=0; i<actual_window_count; i++)
+		{
+			j=random()%n_replica;
+			key++;
+			ST_hyperloglog_add(h[j], (const char *)&key, sizeof(key), now);
+			n_add++;
+		}
+	}
+	char *blob=NULL;
+	size_t blob_sz=0;
+	for(int i=1; i<n_replica; i++)
+	{
+		ST_hyperloglog_serialize(h[i], &blob, &blob_sz);
+		ST_hyperloglog_merge_blob(h[0], blob, blob_sz);
+		free(blob);
+		blob=NULL;
+	}
+	count=ST_hyperloglog_count(h[0]);
 
+	for(int i=0; i<n_replica; i++)
+	{
+		ST_hyperloglog_free(h[i]);
+	}
+	return count;
+}
+TEST(STHyperLogLog, NoSliding)
+{
+	struct st_hll_case st_case[128];
+	int i=0;
+	for(int j=6; j<19; j++)
+	{
+		st_case[i].precision=j;
+		st_case[i].time_window_s=0;
+		st_case[i].ideal_count=100*100;
+		st_case[i].n_replica=1;
+		st_case[i].est_count=st_hll_test_sliding_window(st_case+i);
+		i++;
+	}
+	int success=st_hll_case_print(st_case, i);
+	//allow 2 case failed.
+	EXPECT_GE(success+2, i);
+}
+TEST(STHyperLogLog, VariousPrecision)
+{
+	int n_case=HLL_MAX_PRECISION-HLL_MIN_PRECISION+1;
+	struct st_hll_case st_case[n_case];
+	for(int i=0; i<n_case; i++)
+	{
+		st_case[i].precision=HLL_MIN_PRECISION+i;
+		st_case[i].time_window_s=5;
+		st_case[i].ideal_count=70000;
+		st_case[i].n_replica=1;
+		st_case[i].est_count=st_hll_test_sliding_window(st_case+i);
+	}
+	int success=st_hll_case_print(st_case, n_case);
+	EXPECT_GE(success+3, n_case);
+
+}
+TEST(STHyperLogLog, VariousCount)
+{
+	int n_case=15;
+	struct st_hll_case st_case[n_case];
+	int i=0;
+	for(i=0; i<n_case; i++)
+	{
+		st_case[i].precision=15;
+		st_case[i].time_window_s=300;
+		st_case[i].ideal_count=100*(2<<i);
+		st_case[i].n_replica=1;
+		st_case[i].est_count=st_hll_test_sliding_window(st_case+i);		
+	}
+	int success=st_hll_case_print(st_case, n_case);
+	EXPECT_GE(success+3, n_case);
+}
+TEST(STHyperLogLog, VariousWindow)
+{
+	int n_case=10;
+	struct st_hll_case st_case[n_case];
+	int i=0;
+	for(i=0; i<n_case; i++)
+	{
+		st_case[i].precision=15;
+		st_case[i].time_window_s=5*(i*i+1);
+		st_case[i].ideal_count=10000;
+		st_case[i].n_replica=1;
+		st_case[i].est_count=st_hll_test_sliding_window(st_case+i);		
+	}
+	int success=st_hll_case_print(st_case, n_case);
+	EXPECT_EQ(success, n_case);
+}
+TEST(STHyperLogLog, VariousWindow70k)
+{
+	int n_case=10;
+	struct st_hll_case st_case[n_case];
+	int i=0;
+	for(i=0; i<n_case; i++)
+	{
+		st_case[i].precision=15;
+		st_case[i].time_window_s=5*(i*i+1);
+		st_case[i].ideal_count=70000;
+		st_case[i].n_replica=1;
+		st_case[i].est_count=st_hll_test_sliding_window(st_case+i);		
+	}
+	int success=st_hll_case_print(st_case, n_case);
+	//70000 using linear estimation, which is the worst case for precision 15.
+	//Much better at precision 9.
+	//EXPECT_EQ(success, n_case);
+	for(i=0; i<n_case; i++)
+	{
+		st_case[i].precision=9;
+		st_case[i].time_window_s=5*(i*i+1);
+		st_case[i].ideal_count=70000;
+		st_case[i].n_replica=1;
+		st_case[i].est_count=st_hll_test_sliding_window(st_case+i);		
+	}
+	success=st_hll_case_print(st_case, n_case);
+	EXPECT_EQ(success, n_case);
+}
+TEST(STHyperLogLog, Debug)
+{
+	struct st_hll_case st_case[64];
+	int i=0;
+
+	st_case[i].precision=15;
+	st_case[i].time_window_s=0;
+	st_case[i].ideal_count=70000;
+	st_case[i].n_replica=1;
+	st_case[i].est_count=st_hll_test_sliding_window(st_case+i);		
+	i++;
+
+	st_case[i].precision=15;
+	st_case[i].time_window_s=300;
+	st_case[i].ideal_count=70000;
+	st_case[i].n_replica=1;
+	st_case[i].est_count=st_hll_test_sliding_window(st_case+i);
+	i++;
+
+	st_case[i].precision=14;
+	st_case[i].time_window_s=0;
+	st_case[i].ideal_count=40000;
+	st_case[i].n_replica=1;
+	st_case[i].est_count=st_hll_test_sliding_window(st_case+i);		
+	i++;
+
+	st_case[i].precision=14;
+	st_case[i].time_window_s=300;
+	st_case[i].ideal_count=40000;
+	st_case[i].n_replica=1;
+	st_case[i].est_count=st_hll_test_sliding_window(st_case+i);
+	i++;
+
+	st_case[i].precision=15;
+	st_case[i].time_window_s=300;
+	st_case[i].ideal_count=10000;
+	st_case[i].n_replica=1;
+	st_case[i].est_count=st_hll_test_sliding_window(st_case+i);
+	i++;
+
+	int success=st_hll_case_print(st_case, i);
+	EXPECT_EQ(success+1, i);
+}
+TEST(STHyperLogLog, Replicas)
+{
+	struct st_hll_case st_case[128];
+	int i=0;
+	st_case[i].precision=9;
+	st_case[i].time_window_s=1;
+	st_case[i].ideal_count=1000;
+	st_case[i].n_replica=2;
+
+	st_case[i].est_count=st_hll_test_sliding_window(st_case+i);
+	double est_error=ST_hyperloglog_error_for_precision(st_case[i].precision);
+	EXPECT_NEAR(st_case[i].est_count, st_case[i].ideal_count, est_error*st_case[i].ideal_count);
+	i++;
+}
+TEST(STHyperLogLog, Serialize)
+{
+	struct timeval start;
+	gettimeofday(&start, NULL);
+	struct ST_hyperloglog *h=ST_hyperloglog_new(9, 5, start);
+	size_t sz=ST_hyperloglog_serialized_size(h);
+	char *blob=NULL;
+	size_t blob_sz=0;
+	ST_hyperloglog_serialize(h, &blob, &blob_sz);
+	EXPECT_EQ(sz, blob_sz);
+	struct ST_hyperloglog *h2=ST_hyperloglog_deserialize(blob, blob_sz);
+	free(blob);
+	sz=ST_hyperloglog_serialized_size(h2);
+	EXPECT_EQ(sz, blob_sz);
+	ST_hyperloglog_free(h);
+	ST_hyperloglog_free(h2);
+}
+void g_array_sync(struct g_array **replicas, size_t n_replica)
+{
+	char *blob=NULL;
+	size_t blob_sz=0;
+	for(size_t i=0; i<n_replica; i++)
+	{
+		for(size_t j=0; j<n_replica; j++)
+		{
+			if(i==j) continue;
+			g_array_serialize(replicas[j], &blob, &blob_sz);
+			g_array_merge_blob(replicas[i], blob, blob_sz);
+			free(blob);
+		}
+	}
+	return;
+}
+TEST(STHyperLogLog, Reconfigure)
+{
+	struct timeval start, step, now;
+	gettimeofday(&start, NULL);
+	memcpy(&now, &start, sizeof(now));
+	int n_replica=2;
+	struct ST_hyperloglog *h[n_replica];
+	int time_window_s=10;
+	unsigned char precision=6;
+	for(int i=0; i<n_replica; i++)
+	{
+		h[i]=ST_hyperloglog_new(precision, time_window_s, start);
+	}
+	int key=1319823, j=0;
+	int n_add=0;
+	int add_per_step=100;
+	step.tv_sec=0;
+	step.tv_usec=1000;
+	int item_per_second=add_per_step*1000*1000/step.tv_usec;
+	while(now.tv_sec-start.tv_sec<time_window_s*5)
+	{
+		timeradd(&now, &step, &now);
+		for(int i=0; i<add_per_step; i++)
+		{
+			j=random()%n_replica;
+			key++;
+			ST_hyperloglog_add(h[j], (const char *)&key, sizeof(key), now);
+			n_add++;
+		}
+	}
+	for(int i=0; i<n_replica; i++)
+	{
+		for(int j=0; j<n_replica; j++)
+		{
+			if(i==j) continue;
+			ST_hyperloglog_merge(h[i], h[j]);
+		}
+	}
+	double hll_count=0, error=0;
+	hll_count=ST_hyperloglog_count(h[0]);
+	error=ST_hyperloglog_error_for_precision(precision);
+	EXPECT_NEAR(hll_count, item_per_second*time_window_s, error*item_per_second*time_window_s);
+
+
+	time_window_s=20;
+	precision=9;
+	ST_hyperloglog_configure(h[0], precision, time_window_s, now);
+	for(int i=0; i<n_replica; i++)
+	{
+		for(int j=0; j<n_replica; j++)
+		{
+			if(i==j) continue;
+			ST_hyperloglog_merge(h[i], h[j]);
+		}
+	}
+	memcpy(&start, &now, sizeof(start));
+	n_add=0;
+	while(now.tv_sec-start.tv_sec<time_window_s*5)
+	{
+		timeradd(&now, &step, &now);
+		for(int i=0; i<add_per_step; i++)
+		{
+			j=random()%n_replica;
+			key++;
+			ST_hyperloglog_add(h[j], (const char *)&key, sizeof(key), now);
+			n_add++;
+		}
+	}
+	for(int i=0; i<n_replica; i++)
+	{
+		for(int j=0; j<n_replica; j++)
+		{
+			if(i==j) continue;
+			ST_hyperloglog_merge(h[i], h[j]);
+		}
+	}
+	hll_count=ST_hyperloglog_count(h[0]);
+	error=ST_hyperloglog_error_for_precision(precision);
+	EXPECT_NEAR(hll_count, item_per_second*time_window_s, error*item_per_second*time_window_s);
+
+	for(int i=0; i<n_replica; i++)
+	{
+		ST_hyperloglog_free(h[i]);
+	}
+}
+TEST(GArray, Basic)
+{
+	uuid_t uuid;
+	size_t n_replica=4;
+	int array_sz=1024;
+	struct g_array *a[n_replica];
+	for(size_t i=0; i<n_replica; i++)
+	{
+		uuid_generate(uuid);
+		a[i]=g_array_new(uuid, array_sz);
+	}
+	int r=0, round=100;
+	long long increment=77;
+	for(int i=0; i<round; i++)
+	{
+		r=random()%n_replica;
+		for(int j=0; j<array_sz; j++)
+		{
+			g_array_incrby(a[r], j, increment);
+		}
+		if(i%7==0)
+		{
+			g_array_sync(a, n_replica);
+		}
+	}
+	g_array_sync(a, n_replica);
+	g_array_sync(a, n_replica);
+	
+	long long value=0, success=0;
+	for(int i=0; i<array_sz; i++)
+	{
+		r=random()%n_replica;
+		value=g_array_get(a[r], i);
+		if(value == (long long)round*increment) success++;
+	}
+	EXPECT_EQ(success, array_sz);
+	g_array_reset(a[0]);
+	value=g_array_get(a[0], 0);
+	EXPECT_EQ(value, 0);
+	g_array_sync(a, n_replica);
+	r=random()%n_replica;
+	value=g_array_get(a[r], 0);
+	EXPECT_EQ(value, 0);
+
+	for(size_t i=0; i<n_replica; i++)
+	{
+		g_array_free(a[i]);
+	}
+}
+TEST(GArray, Serialize)
+{
+	uuid_t uuid;
+	int n_replica=4;
+	int array_sz=1024;
+	struct g_array *a[n_replica], *a2=NULL;
+	for(int i=0; i<n_replica; i++)
+	{
+		uuid_generate(uuid);
+		a[i]=g_array_new(uuid, array_sz);
+		g_array_incrby(a[i], 0, 30);
+	}
+	g_array_sync(a, n_replica);
+
+	size_t sz=g_array_serialized_size(a[0]);
+	char *blob=NULL;
+	size_t blob_sz=0;
+	g_array_serialize(a[0], &blob, &blob_sz);
+	EXPECT_EQ(sz, blob_sz);
+	a2=g_array_deserialize(blob, blob_sz);
+	sz=g_array_serialized_size(a2);
+	EXPECT_EQ(sz, blob_sz);
+	free(blob);
+	for(int i=0; i<n_replica; i++)
+	{
+		g_array_free(a[i]);
+	}
+	g_array_free(a2);
+}
+TEST(GArray, Merge)
+{
+	uuid_t uuid;
+	int n_replica=2;
+	int array_sz=1024;
+	struct g_array *a[n_replica];
+	for(int i=0; i<n_replica; i++)
+	{
+		uuid_generate(uuid);
+		a[i]=g_array_new(uuid, array_sz*(i+1));
+		g_array_incrby(a[i], 0, 30);
+	}
+	g_array_sync(a, n_replica);
+	long long value=0;
+	value=g_array_get(a[0], 0);
+	EXPECT_EQ(value, 30*n_replica);
+	value=g_array_incrby(a[0], array_sz*n_replica-1, 1);
+	EXPECT_EQ(value, 1);
+	for(int i=0; i<n_replica; i++)
+	{
+		g_array_free(a[i]);
+	}
+}
 int main(int argc, char ** argv)
 {
 	int ret=0;
diff --git a/CRDT/crdt_tb_gtest.cpp b/CRDT/crdt_tb_gtest.cpp
new file mode 100644
index 0000000..16bee08
--- /dev/null
+++ b/CRDT/crdt_tb_gtest.cpp
@@ -0,0 +1,1231 @@
+#include "oc_token_bucket.h"
+#include "fair_token_bucket.h"
+#include "bulk_token_bucket.h"
+
+#include "crdt_utils.h"
+
+#include <gtest/gtest.h>
+#include <unistd.h> //usleep
+#include <uuid/uuid.h>
+#include <math.h>
+
+
+void OC_token_bucket_sync(struct OC_token_bucket *list[], size_t n)
+{
+	char *blob=NULL;
+	size_t blob_sz=0;
+	
+	for(size_t i=0; i<n; i++)
+	{
+		OC_token_bucket_serialize(list[i], &blob, &blob_sz);
+		for(size_t j=0; j<n; j++)
+		{
+			if(j==i) continue;
+			OC_token_bucket_merge_blob(list[j], blob, blob_sz);
+		}
+		free(blob);
+		blob=NULL;
+	}	
+	return;
+}
+
+enum traffic_type 
+{
+	LIGHT_UNIFORM_TYPE,
+	LIGHT_TWO_EIGHT_TYPE,
+	HEAVY_TWO_EIGHT_TYPE,
+	HEAVY_UNIFORM_EXTREME_TYPE
+};
+const int REPLICA_NUMBER=3;
+long long get_request_tokens(int index, enum traffic_type type, long long step_us, long long CIR, long long CBS) 
+{
+	
+	long long request_size=0;
+	long long standard = CIR * step_us / 1000000;
+	long long sd10 = floor((long double)standard * 0.1);
+	int eight_replica_num = floor(REPLICA_NUMBER * 0.8);
+	long long rand_sd10 = random() % sd10;
+	int scope_flag = FALSE;
+	if (random() % 2) scope_flag = TRUE;
+	switch (type) 
+	{
+		case LIGHT_UNIFORM_TYPE:
+			request_size = (long long)floor((long double)standard * 0.5);
+			if (scope_flag) {
+			request_size += rand_sd10;
+			} else {
+			request_size -= rand_sd10;
+			}
+			break;
+		case LIGHT_TWO_EIGHT_TYPE:
+			if (index < eight_replica_num && scope_flag) {
+			request_size = (long long)floor((long double)standard * 0.2) + rand_sd10;
+			} else if (index < eight_replica_num && !scope_flag) {
+			request_size = (long long)floor((long double)standard * 0.2) - rand_sd10;
+			} else if (index >= eight_replica_num && scope_flag) {
+			request_size = (long long)floor((long double)standard * 1.6) + rand_sd10;
+			} else {
+			request_size = (long long)floor((long double)standard * 1.6) - rand_sd10;
+			}
+			break;
+		case HEAVY_TWO_EIGHT_TYPE:
+			if (index < eight_replica_num && scope_flag) {
+			request_size = (long long)floor((long double)standard * 0.6) + rand_sd10;
+			} else if (index < eight_replica_num && !scope_flag) {
+			request_size = (long long)floor((long double)standard * 0.6) - rand_sd10;
+			} else if (index >= eight_replica_num && scope_flag) {
+			request_size = (long long)floor((long double)standard * 4.8) + rand_sd10;
+			} else {
+			request_size = (long long)floor((long double)standard * 4.8) - rand_sd10;
+			}
+			break;
+		case HEAVY_UNIFORM_EXTREME_TYPE:
+			request_size = (long long)floor((long double)standard * 2);
+			if (scope_flag) {
+			request_size += rand_sd10;
+			} else {
+			request_size -= rand_sd10;
+			}
+			break;
+		default:
+			break;
+	}
+	return request_size;
+}
+
+void traffic_distribution(traffic_type type)
+{
+	struct OC_token_bucket *buckets[REPLICA_NUMBER];
+	size_t i = 0, j = 0;
+	long long CIR = 1*1024*1024;
+	long long CBS = 2*1024*1024;
+	uuid_t uuid;
+	struct timeval start;
+	gettimeofday(&start, NULL);
+	for (i = 0; i < REPLICA_NUMBER; i++) 
+	{
+		uuid_generate(uuid);
+		buckets[i] = OC_token_bucket_new(uuid, start, CIR, CBS);
+	}
+	long long tokens = 0, flexible_tokens = 0;
+	long long consumed = 0, requested = 0, upper_limit = 0, refilled = 0;
+	long long mimic_duration_us = (long long)100*1000*1000;
+	long long step_us = 100;
+	struct timeval step, now;
+	memcpy(&now, &start, sizeof(now));
+	step.tv_sec = 0;
+	step.tv_usec = (suseconds_t)step_us;
+	for (i = 0; (long long)i < mimic_duration_us / step_us; i++) 
+	{
+		j = i % 3; // sequence selection
+		timeradd(&now, &step, &now);
+		tokens = get_request_tokens(j, type, step_us, CIR, CBS);
+		flexible_tokens = OC_token_bucket_consume(buckets[j], now, TB_CONSUME_FLEXIBLE, tokens);
+		requested += tokens;
+		consumed += flexible_tokens;
+		if(i%100==0)
+		{
+			OC_token_bucket_sync(buckets, REPLICA_NUMBER);
+		}
+	}
+	upper_limit = CBS + CIR * timeval_delta_ms(start, now) / 1000;
+	struct OC_token_bucket_info info;
+	OC_token_bucket_info(buckets[0], now, &info);
+	refilled=info.refilled;
+	EXPECT_LE(consumed, requested);
+	double accuracy = (double)consumed / MIN(refilled, requested);
+	EXPECT_NEAR(accuracy, 1, 0.01);
+	printf("accuracy:%f, upper_limit:%lld, refilled:%lld, requested:%lld, consumed:%lld\n",
+			accuracy, upper_limit, info.refilled, requested, consumed);
+	for(i = 0; i < REPLICA_NUMBER; i++) 
+	{
+		OC_token_bucket_free(buckets[i]);
+	}
+}
+
+TEST(OCTokenBucket, TrafficTypeConsumer)
+{
+	traffic_distribution(LIGHT_UNIFORM_TYPE);
+	traffic_distribution(LIGHT_TWO_EIGHT_TYPE);
+	traffic_distribution(HEAVY_TWO_EIGHT_TYPE);
+	traffic_distribution(HEAVY_UNIFORM_EXTREME_TYPE);
+}
+
+TEST(OCTokenBucket, Basic)
+{
+	uuid_t uuid;
+	uuid_generate(uuid);
+
+	struct OC_token_bucket *bucket=NULL;
+	long long CIR=100;
+	long long CBS=200;
+	struct timeval now;
+	gettimeofday(&now, NULL);
+
+	bucket=OC_token_bucket_new(uuid, now, CIR, CBS);
+	long long tokens=0;
+
+	tokens=OC_token_bucket_consume(bucket, now, TB_CONSUME_NORMAL, 140);
+	EXPECT_EQ(tokens, 140);
+	
+	tokens=OC_token_bucket_consume(bucket, now, TB_CONSUME_NORMAL, 61);
+	EXPECT_EQ(tokens, 0);
+
+	tokens=OC_token_bucket_consume(bucket, now, TB_CONSUME_NORMAL, 60);
+	EXPECT_EQ(tokens, 60);
+
+	now.tv_sec++;
+	tokens=OC_token_bucket_consume(bucket, now, TB_CONSUME_NORMAL, 90);
+	EXPECT_EQ(tokens, 90);
+
+	struct OC_token_bucket_info info;
+	OC_token_bucket_info(bucket, now, &info);
+	EXPECT_GE(info.available, 10);
+//	printf("avail=%lld\n", tokens);
+	
+	tokens=OC_token_bucket_consume(bucket, now, TB_CONSUME_NORMAL, 10);
+	EXPECT_EQ(tokens, 10);
+	OC_token_bucket_free(bucket);
+}
+TEST(OCTokenBucket, Serialize)
+{
+	uuid_t uuid;
+	int n_replica=4;
+	long long CIR=100;
+	long long CBS=200;
+	struct timeval now;
+	gettimeofday(&now, NULL);
+	struct OC_token_bucket *b[n_replica], *b1=NULL;
+	for(int i=0; i<n_replica; i++)
+	{
+		uuid_generate(uuid);
+		b[i]=OC_token_bucket_new(uuid, now, CIR, CBS);
+		OC_token_bucket_consume(b[i], now, TB_CONSUME_NORMAL, 10);
+	}
+	OC_token_bucket_sync(b, n_replica);
+	size_t sz=OC_token_bucket_serialized_size(b[0]);
+	char *blob=NULL;
+	size_t blob_sz=0;
+	OC_token_bucket_serialize(b[0], &blob, &blob_sz);
+	EXPECT_EQ(sz, blob_sz);
+	b1=OC_token_bucket_deserialize(blob, blob_sz);
+	free(blob);
+	sz=OC_token_bucket_serialized_size(b1);
+	EXPECT_EQ(sz, blob_sz);
+
+	for(int i=0; i<n_replica; i++)
+	{
+		OC_token_bucket_free(b[i]);
+	}
+	OC_token_bucket_free(b1);
+}
+TEST(OCTokenBucket, Boundary)
+{
+	uuid_t uuid;
+	uuid_generate(uuid);
+
+	struct timeval now;
+	gettimeofday(&now, NULL);
+	struct OC_token_bucket *bucket=NULL;
+	long long tokens=0, consumed=0;
+
+	//Zero CIR
+	bucket=OC_token_bucket_new(uuid, now, 0, 1000);
+	tokens=OC_token_bucket_consume(bucket, now, TB_CONSUME_NORMAL, 1000);
+	EXPECT_EQ(tokens, 1000);
+
+	now.tv_sec++;
+	tokens=OC_token_bucket_consume(bucket, now, TB_CONSUME_NORMAL, 1);
+	EXPECT_EQ(tokens, 0);
+	OC_token_bucket_free(bucket);
+
+	//Zero CBS
+	bucket=OC_token_bucket_new(uuid, now, 1000, 0);
+	tokens=OC_token_bucket_consume(bucket, now, TB_CONSUME_NORMAL, 1);
+	EXPECT_EQ(tokens, 0);
+	OC_token_bucket_free(bucket);
+
+	//Infinite Tokens
+	bucket=OC_token_bucket_new(uuid, now, 0, 0);
+	tokens=OC_token_bucket_consume(bucket, now, TB_CONSUME_NORMAL, 140);
+	consumed+=tokens;
+	EXPECT_EQ(tokens, 140);
+
+
+	tokens=OC_token_bucket_consume(bucket, now, TB_CONSUME_NORMAL, 65535);
+	consumed+=tokens;
+	EXPECT_EQ(tokens, 65535);
+
+	struct OC_token_bucket_info info;
+	
+	OC_token_bucket_info(bucket, now, &info);
+	EXPECT_EQ(info.consumed, consumed);
+	EXPECT_EQ(info.refilled, consumed);
+	
+	now.tv_sec++;
+	OC_token_bucket_configure(bucket, now, 100, 500);
+	
+	now.tv_sec+=500/100;
+	tokens=OC_token_bucket_consume(bucket, now, TB_CONSUME_NORMAL, 500);
+	consumed+=tokens;
+	EXPECT_EQ(tokens, 500);
+
+	OC_token_bucket_info(bucket, now, &info);
+	EXPECT_EQ(info.consumed, consumed);
+	EXPECT_EQ(info.refilled, consumed);
+
+	OC_token_bucket_free(bucket);
+}
+TEST(OCTokenBucket, Merge)
+{
+	struct OC_token_bucket *buckets[2];
+	uuid_t uuid;
+	long long CIR=100;
+	long long CBS=200;
+	struct timeval now;
+	gettimeofday(&now, NULL);
+
+	size_t i=0;
+	for(i=0; i<2; i++)
+	{
+		uuid_generate(uuid);
+		buckets[i]=OC_token_bucket_new(uuid, now, CIR, CBS);
+	}
+	long long tokens=0;
+	OC_token_bucket_sync(buckets, 2);
+	tokens=OC_token_bucket_consume(buckets[0], now, TB_CONSUME_NORMAL, 130);
+	EXPECT_EQ(tokens, 130);
+	tokens=OC_token_bucket_consume(buckets[1], now, TB_CONSUME_NORMAL, 30);
+	EXPECT_EQ(tokens, 30);
+
+	OC_token_bucket_sync(buckets, 2);
+	now.tv_sec++;
+	//each has 200-130-30+100/2=90 avaliable tokens
+	tokens=OC_token_bucket_consume(buckets[0], now, TB_CONSUME_NORMAL, 91);
+	EXPECT_EQ(tokens, 0);
+	tokens=OC_token_bucket_consume(buckets[1], now, TB_CONSUME_FLEXIBLE, 90);
+	EXPECT_EQ(tokens, 90);
+	OC_token_bucket_sync(buckets, 2);
+	
+	for(i=0; i<2; i++)
+	{
+		OC_token_bucket_free(buckets[i]);
+	}
+}
+double OC_token_bucket_test(size_t replica_num, long long mimic_duration_s, long long sync_interval_ms, int saturation_percent)
+{
+	struct OC_token_bucket *buckets[replica_num];
+	size_t i=0, j=0;
+	long long CIR=512*1024;
+	long long CBS=2*1024*1024;
+	uuid_t uuid;
+	struct timeval start;
+	gettimeofday(&start, NULL);
+	for(i=0; i<replica_num; i++)
+	{
+		uuid_generate(uuid);
+		buckets[i]=OC_token_bucket_new(uuid, start, CIR, CBS);
+	}
+	srandom(17);
+	long long tokens=0;
+	long long consumed=0, requested=0, upper_limit=0, refilled=0;
+	long long step_us=100, mimic_duration_us=mimic_duration_s*1000*1000;
+	long long sync_interval_us=sync_interval_ms*1000;
+	struct timeval step, now;
+	memcpy(&now, &start, sizeof(now));
+	step.tv_sec=0;
+	step.tv_usec=(suseconds_t)step_us;
+	for(i=0; (long long)i<mimic_duration_us/step_us; i++)
+	{
+		timeradd(&now, &step, &now);
+		j=random()%replica_num;
+		tokens=CIR*step_us/(1000*1000);
+		tokens=tokens*saturation_percent/100;
+		tokens=tokens/2+random()%tokens;
+		requested+=tokens;
+		consumed+=OC_token_bucket_consume(buckets[j], now, TB_CONSUME_NORMAL, tokens);
+		if((step_us*i)%sync_interval_us==0)
+		{
+			OC_token_bucket_sync(buckets, replica_num);
+		}
+	}
+	upper_limit=CBS+CIR*timeval_delta_ms(start, now)/1000;
+
+	struct OC_token_bucket_info info;
+	OC_token_bucket_info(buckets[0], now, &info);
+	refilled=info.refilled;
+
+	EXPECT_NEAR((double)consumed/MIN(refilled, requested), 1, 0.03);
+//	EXPECT_LE(consumed, requested);
+	double accuracy=(double)consumed/MIN(upper_limit, requested);
+	double real_saturation=(double)requested/upper_limit;
+	EXPECT_NEAR(real_saturation, (double)saturation_percent/100, 0.1);
+	for(i=0; i<replica_num; i++)
+	{
+		OC_token_bucket_free(buckets[i]);
+	}
+	return accuracy;
+}
+TEST(OCTokenBucket, HeavyConsumer)
+{
+	double accuracy=0.0;
+	long long replica_num=0, test_duration_s=500, sync_interval_ms=0;
+
+	long long i=0, j=0;
+	for(i=8; i<9; i++)
+	{
+		replica_num=i+1;
+		for(j=0; j<10; j++)
+		{
+			sync_interval_ms=100*(j+1);
+			accuracy=OC_token_bucket_test(replica_num, test_duration_s, sync_interval_ms, 200);
+			printf("replica_num=%lld, sync_interval_ms=%lld, accuracy=%.4f\n", replica_num, sync_interval_ms, accuracy);
+			EXPECT_NEAR(accuracy, 1, 0.05);
+		}
+	}
+}
+TEST(OCTokenBucket, LightConsumer)
+{	
+	double accuracy=0.0;
+	long long replica_num=0, test_duration_s=100, sync_interval_ms=0;
+
+	long long i=0, j=0;
+	for(i=0; i<4; i++)
+	{
+		replica_num=i+1;
+		for(j=0; j<20; j++)
+		{
+			sync_interval_ms=100*(j+1);
+			accuracy=OC_token_bucket_test(replica_num, test_duration_s, sync_interval_ms, 90);
+			EXPECT_NEAR(accuracy, 1, 0.03);
+		}
+	}
+}
+TEST(OCTokenBucket, ConcurrentHeavyConsumer)
+{
+	struct OC_token_bucket *buckets[REPLICA_NUMBER];
+	size_t i=0, j=0;
+	long long CIR=100*1024*1024;
+	long long CBS=1000*1024*1024;
+	uuid_t uuid;
+	struct timeval start;
+	gettimeofday(&start, NULL);
+	for(i=0; i<REPLICA_NUMBER; i++)
+	{
+		uuid_generate(uuid);
+		buckets[i]=OC_token_bucket_new(uuid, start, CIR, CBS);
+	}
+	srandom(17);
+	long long tokens=0;
+	long long consumed=0, requested=0, upper_limit=0, refilled=0;
+	long long mimic_duration_us=(long long)100*1000*1000;
+	long long step_us=100;
+	struct timeval step, now;
+	memcpy(&now, &start, sizeof(now));
+	step.tv_sec=0;
+	step.tv_usec=(suseconds_t)step_us;
+	for(i=0; (long long)i<mimic_duration_us/step_us; i++)
+	{
+		timeradd(&now, &step, &now);		
+		tokens=CIR*step_us/(1000*1000);		
+		tokens=tokens/REPLICA_NUMBER+random()%tokens;
+		for(j=0; j<REPLICA_NUMBER; j++)
+		{
+			requested+=tokens;			
+			consumed+=OC_token_bucket_consume(buckets[j], now, TB_CONSUME_FLEXIBLE, tokens);
+		}
+		if(i%100==0)
+		{
+			OC_token_bucket_sync(buckets, REPLICA_NUMBER);
+		}
+	}
+	upper_limit=CBS+CIR*timeval_delta_ms(start, now)/1000;
+	struct OC_token_bucket_info info;
+	OC_token_bucket_info(buckets[0], now, &info);
+	refilled=info.refilled;
+
+	EXPECT_NEAR((double)consumed/MIN(refilled, requested), 1, 0.01);
+	EXPECT_NEAR((double)refilled/upper_limit, 1, 0.01);
+	EXPECT_LE(consumed, requested);
+	double accuracy=(double)consumed/MIN(upper_limit, requested);
+	EXPECT_NEAR(accuracy, 1, 0.01);
+	for(i=0; i<REPLICA_NUMBER; i++)
+	{
+		OC_token_bucket_free(buckets[i]);
+	}
+}
+TEST(OCTokenBucket, Reconfigure)
+{
+	struct OC_token_bucket *buckets[REPLICA_NUMBER];
+	size_t i=0, j=0;
+	double accuracy=0.0;
+	long long CIR=100*1024*1024;
+	long long CBS=1000*1024*1024;
+	uuid_t uuid;
+	struct timeval start;
+	gettimeofday(&start, NULL);
+	for(i=0; i<REPLICA_NUMBER; i++)
+	{
+		uuid_generate(uuid);
+		buckets[i]=OC_token_bucket_new(uuid, start, CIR, CBS);
+	}
+	srandom(17);
+	long long tokens=0;
+	long long consumed=0, requested=0, upper_limit=0, refilled=0;
+	long long mimic_duration_us=(long long)1000*1000*1000;
+	long long step_us=100;
+	struct timeval step, now;
+	memcpy(&now, &start, sizeof(now));
+	step.tv_sec=0;
+	step.tv_usec=(suseconds_t)step_us;
+	for(i=0; (long long)i<mimic_duration_us/step_us; i++)
+	{
+		timeradd(&now, &step, &now);		
+		tokens=CIR*step_us/(1000*1000);		
+		tokens=tokens/REPLICA_NUMBER+random()%tokens;
+		for(j=0; j<REPLICA_NUMBER; j++)
+		{
+			requested+=tokens;			
+			consumed+=OC_token_bucket_consume(buckets[j], now, TB_CONSUME_FLEXIBLE, tokens);
+		}
+		if(i%100==0)
+		{
+			OC_token_bucket_sync(buckets, REPLICA_NUMBER);
+		}
+	}
+	upper_limit=CBS+CIR*timeval_delta_ms(start, now)/1000;
+	struct OC_token_bucket_info info;
+	OC_token_bucket_info(buckets[0], now, &info);
+	refilled=info.refilled;
+	EXPECT_NEAR((double)consumed/MIN(refilled, requested), 1, 0.01);
+	EXPECT_NEAR((double)refilled/upper_limit, 1, 0.01);
+	EXPECT_LE(consumed, requested);
+	accuracy=(double)consumed/MIN(upper_limit, requested);
+	EXPECT_NEAR(accuracy, 1, 0.01);
+
+	memcpy(&start, &now, sizeof(start));
+	CIR*=2;
+	CBS*=5;
+	requested=0;
+	consumed=0;
+	OC_token_bucket_configure(buckets[0], start, CIR, CBS);
+	for(i=0; (long long)i<mimic_duration_us/step_us; i++)
+	{
+		timeradd(&now, &step, &now);		
+		tokens=CIR*step_us/(1000*1000);		
+		tokens=tokens/REPLICA_NUMBER+random()%tokens;
+		for(j=0; j<REPLICA_NUMBER; j++)
+		{
+			requested+=tokens;			
+			consumed+=OC_token_bucket_consume(buckets[j], now, TB_CONSUME_FLEXIBLE, tokens);
+		}
+		if(i%100==0)
+		{
+			OC_token_bucket_sync(buckets, REPLICA_NUMBER);
+		}
+	}
+	upper_limit=CIR*timeval_delta_ms(start, now)/1000;//not + CBS, becasue reconfiguration will not refill the bucket to full.
+
+	OC_token_bucket_info(buckets[0], now, &info);
+	refilled = info.refilled - refilled;
+
+	EXPECT_NEAR((double)consumed/MIN(refilled, requested), 1, 0.01);
+	EXPECT_NEAR((double)refilled/upper_limit, 1, 0.01);
+	EXPECT_LE(consumed, requested);
+	accuracy=(double)consumed/MIN(upper_limit, requested);
+	EXPECT_NEAR(accuracy, 1, 0.01);
+	
+	for(i=0; i<REPLICA_NUMBER; i++)
+	{
+		OC_token_bucket_free(buckets[i]);
+	}
+}
+TEST(OCTokenBucket, PartitionTolerance)
+{
+	struct OC_token_bucket *buckets[REPLICA_NUMBER];
+	size_t i=0, j=0;
+	long long CIR=1*1024*1024;
+	long long CBS=2*1024*1024;
+	uuid_t uuid;
+	struct timeval start;
+	gettimeofday(&start, NULL);
+	for(i=0; i<REPLICA_NUMBER; i++)
+	{
+		uuid_generate(uuid);
+		buckets[i]=OC_token_bucket_new(uuid, start, CIR, CBS);
+	}
+	srandom(17);
+	long long tokens=0;
+	long long consumed=0, requested=0, upper_limit=0, refilled=0;
+	long long mimic_duration_us=(long long)100*1000*1000;
+	long long step_us=100;
+	struct timeval step, now;
+	memcpy(&now, &start, sizeof(now));
+	step.tv_sec=0;
+	step.tv_usec=(suseconds_t)step_us;
+	for(i=0; (long long)i<mimic_duration_us/step_us; i++)
+	{
+		timeradd(&now, &step, &now);
+		j=random()%REPLICA_NUMBER;
+		tokens=CIR*step_us/(1000*1000);
+		tokens=tokens+random()%tokens;
+		for(j=0; j<REPLICA_NUMBER; j++)
+		{
+			requested+=tokens;			
+			consumed+=OC_token_bucket_consume(buckets[j], now, TB_CONSUME_FLEXIBLE, tokens);
+		}
+		//network partition at time (t/3, t*2/3]
+		if((long long)i>mimic_duration_us/step_us/3 && (long long)i<= 2*mimic_duration_us/step_us/3)
+		{
+			continue;
+		}
+		//if(i%100==0)
+		{
+			OC_token_bucket_sync(buckets, REPLICA_NUMBER);
+		}
+	}
+	upper_limit=CBS+(CIR*timeval_delta_ms(start, now)/1000)*2/3 + (CIR*timeval_delta_ms(start, now)/1000)*REPLICA_NUMBER/3;
+	
+	struct OC_token_bucket_info info;
+	OC_token_bucket_info(buckets[0], now, &info);
+	refilled = info.refilled;
+
+	EXPECT_NEAR((double)consumed/MIN(refilled, requested), 1, 0.01);
+	EXPECT_LE(consumed, requested);
+	double accuracy=(double)consumed/MIN(upper_limit, requested);
+	EXPECT_NEAR(accuracy, 1, 0.01);
+	for(i=0; i<REPLICA_NUMBER; i++)
+	{
+		OC_token_bucket_free(buckets[i]);
+	}
+
+}
+struct sftb_class
+{
+	long long class_id;
+	long long weight;
+	long long requested_CIR;
+	long long demand_tokens;
+	long long allocated_tokens;
+	long long ideal_tokens;
+};
+
+int cmp_sftb_class(const void *a, const void *b)
+{
+	struct sftb_class *ra=(struct sftb_class*)a;
+	struct sftb_class *rb=(struct sftb_class*)b;
+	return (int)(ra->demand_tokens-rb->demand_tokens);
+}
+double max_min_fairness_index(long long available_tokens, struct sftb_class * classes, size_t n_class)
+{
+	qsort(classes, n_class, sizeof(struct sftb_class), cmp_sftb_class);
+	long long total_weight=0;
+	for(size_t i=0; i<n_class; i++)
+	{
+		total_weight+=classes[i].weight;
+	}
+	long long left_tokens=available_tokens;
+	long long left_weight=total_weight;
+	size_t n_satisfied=0;
+
+	while(n_satisfied<n_class && left_tokens/left_weight>0 )
+	{
+		long long share=left_tokens/left_weight;
+		for(size_t i=0; i<n_class; i++)
+		{
+			long long my_share=classes[i].weight*share;
+			if(classes[i].demand_tokens == classes[i].ideal_tokens)
+			{
+				continue;
+			}
+			else if(classes[i].demand_tokens - classes[i].ideal_tokens <= my_share)
+			{
+				left_tokens -= (classes[i].demand_tokens - classes[i].ideal_tokens);
+				classes[i].ideal_tokens=classes[i].demand_tokens;
+			}
+			else
+			{
+				left_tokens -= my_share;
+				classes[i].ideal_tokens+=my_share;
+				
+			}
+		}
+		left_weight=0;
+		n_satisfied=0;
+		for(size_t i=0; i<n_class; i++)
+		{
+			if(classes[i].demand_tokens == classes[i].ideal_tokens)
+			{
+				n_satisfied++;
+				continue;
+			}
+			left_weight += classes[i].weight;
+			
+		}
+	}
+	double index=0;
+	for(size_t i=0; i<n_class; i++)
+	{
+		index += pow((double)(classes[i].ideal_tokens-classes[i].allocated_tokens)/classes[i].ideal_tokens, 2);
+	}
+	index=1-sqrt(index/n_class);
+
+	return index;
+}
+void ftb_sync(struct fair_token_bucket **ftb, size_t n_ftb)
+{
+	char *blob=NULL;
+	size_t blob_sz=0;
+	for(size_t i=0; i<n_ftb; i++)
+	{
+		for(size_t j=0; j<n_ftb; j++)
+		{
+			if(i==j) continue;
+			fair_token_bucket_serialize(ftb[j], &blob, &blob_sz);
+			fair_token_bucket_merge_blob(ftb[i], blob, blob_sz);
+			free(blob);
+			blob=NULL;
+		}
+	}
+	return;
+}
+double test_fair_token_bucket(struct sftb_class *classes, size_t n_class, long long CIR, long long CBS, int duration_s, int n_replica)
+{
+	uuid_t uuid;
+	uuid_generate(uuid);
+	struct timeval start, step, now;
+	int step_us=1000;
+	gettimeofday(&start, NULL);
+	memcpy(&now, &start, sizeof(now));
+	step.tv_sec=0;
+	step.tv_usec=(suseconds_t)step_us;
+	struct fair_token_bucket *ftb[n_replica];
+	for(int i=0; i<n_replica; i++)
+	{
+		ftb[i]=fair_token_bucket_new(uuid, now, CIR, CBS, 8192);
+	}
+	int sync_interval_ms=400;
+	for(int i=0; i<duration_s*(1000*1000/step_us); i++)
+	{
+		timeradd(&now, &step, &now);
+		int k=random()%n_class;
+		int r=random()%n_replica;
+		for(size_t j=0; j<n_class; j++)
+		{
+			int idx=(j+k)%n_class;
+			long long this_demand=classes[idx].requested_CIR*step_us/(1000*1000);
+			classes[idx].allocated_tokens+=fair_token_bucket_consume(ftb[r], now, 
+								(const char*) &(classes[idx].class_id),
+								sizeof(classes[idx].class_id),
+								classes[idx].weight,
+								TB_CONSUME_NORMAL,
+								this_demand);
+			classes[idx].demand_tokens+=this_demand;
+		}
+		if(0==i%(sync_interval_ms*1000/step_us))
+		{
+			ftb_sync(ftb, n_replica);
+		}
+	}
+	long long available_tokens=CIR*duration_s+CBS;
+	double index=max_min_fairness_index(available_tokens, classes, n_class);
+	int print=0;
+	if(print)
+	{
+		printf("class\tweight\tdemand\tallocated\tideal\r\n");
+		for(size_t i=0; i<n_class; i++)
+		{
+			if((double)classes[i].allocated_tokens/classes[i].ideal_tokens< 1.1 &&
+			(double) classes[i].allocated_tokens/classes[i].ideal_tokens > 0.9)
+				continue;
+			printf("%lld\t%lld\t%lld\t%lld\t%lld\r\n", classes[i].class_id,
+										classes[i].weight,
+										classes[i].demand_tokens/duration_s,
+										classes[i].allocated_tokens/duration_s,
+										classes[i].ideal_tokens/duration_s);
+		}
+		printf("CIR %lld fairness index %f\r\n", CIR, index);
+	}
+	for(int i=0; i<n_replica; i++)
+	{
+		fair_token_bucket_free(ftb[i]);
+	}
+	return index;
+}
+
+TEST(FairTokenBucket, Basic)
+{
+	double index=0;
+	long long duration_s=350;
+	struct sftb_class one_heavy_classes[5]={{1, 1, 20000, 0, 0, 0}, 
+								{2, 1, 20000, 0, 0, 0},
+								{3, 1, 20000, 0, 0, 0},
+								{4, 1, 20000, 0, 0, 0},
+								{5, 1, 50000, 0, 0, 0}};
+	
+	index=test_fair_token_bucket(one_heavy_classes, 1, 120000, 200000, duration_s, 1);
+	EXPECT_NEAR(index, 1, 0.02);
+
+	struct sftb_class all_light_classes[5]={{1, 1, 20000, 0, 0, 0}, 
+								{2, 1, 20000, 0, 0, 0},
+								{3, 1, 20000, 0, 0, 0},
+								{4, 1, 20000, 0, 0, 0},
+								{5, 1, 20000, 0, 0, 0}};
+	
+	index=test_fair_token_bucket(all_light_classes, 5, 100000, 200000, duration_s, 1);
+	EXPECT_NEAR(index, 1, 0.02);
+
+	struct sftb_class two_heavy_classes[5]={{1, 1, 20000, 0, 0, 0}, 
+								{2, 1, 20000, 0, 0, 0},
+								{3, 1, 50000, 0, 0, 0},
+								{400, 1, 50000, 0, 0, 0},
+								{5, 1, 20003, 0, 0, 0}};
+	
+	index=test_fair_token_bucket(two_heavy_classes, 5, 100000, 200000, duration_s, 1);
+	EXPECT_NEAR(index, 1, 0.02);
+
+	struct sftb_class all_heavy_classes[5]={{1, 1, 40000, 0, 0, 0}, 
+								{2, 1, 40000, 0, 0, 0},
+								{3, 1, 50000, 0, 0, 0},
+								{4, 1, 60000, 0, 0, 0},
+								{5, 1, 40000, 0, 0, 0}};
+	
+	index=test_fair_token_bucket(all_heavy_classes, 5, 100000, 200000, duration_s, 1);
+	EXPECT_NEAR(index, 1, 0.02);
+	
+}
+TEST(FairTokenBucket, Replicas)
+{
+	double index=0;
+	long long duration_s=350;
+	struct sftb_class one_heavy_classes[5]={{1, 1, 20000, 0, 0, 0}, 
+								{2, 1, 20000, 0, 0, 0},
+								{3, 1, 20000, 0, 0, 0},
+								{4, 1, 20000, 0, 0, 0},
+								{5, 1, 50000, 0, 0, 0}};
+	
+	index=test_fair_token_bucket(one_heavy_classes, 1, 120000, 200000, duration_s, 2);
+	EXPECT_NEAR(index, 1, 0.02);
+
+	struct sftb_class two_heavy_classes[5]={{1, 1, 20000, 0, 0, 0}, 
+								{2, 1, 20000, 0, 0, 0},
+								{3, 1, 50000, 0, 0, 0},
+								{400, 1, 50000, 0, 0, 0},
+								{5, 1, 20003, 0, 0, 0}};
+	
+	index=test_fair_token_bucket(two_heavy_classes, 5, 100000, 200000, duration_s, 2);
+	EXPECT_NEAR(index, 1, 0.02);
+
+}
+TEST(FairTokenBucket, Weight)
+{
+	double index=0;
+	long long duration_s=500;
+	struct sftb_class one_heavy_classes[5]={{1, 1, 20000, 0, 0, 0}, 
+								{2, 2, 20000, 0, 0, 0},
+								{3, 3, 20000, 0, 0, 0},
+								{4, 4, 30000, 0, 0, 0},
+								{5, 5, 50000, 0, 0, 0}};
+	
+	index=test_fair_token_bucket(one_heavy_classes, 5, 120000, 200000, duration_s, 1);
+	EXPECT_NEAR(index, 1, 0.02);
+
+	struct sftb_class t1_heavy_classes[5]={{1, 1, 40000, 0, 0, 0}, 
+								{2, 2, 20000, 0, 0, 0},
+								{3, 3, 20000, 0, 0, 0},
+								{4, 4, 30000, 0, 0, 0},
+								{5, 5, 50000, 0, 0, 0}};
+	
+	index=test_fair_token_bucket(t1_heavy_classes, 5, 120000, 200000, duration_s, 1);
+	EXPECT_NEAR(index, 1, 0.02);
+
+	struct sftb_class t2_heavy_classes[5]={{1, 1, 40000, 0, 0, 0}, 
+								{2, 2, 20000, 0, 0, 0},
+								{3, 3, 20000, 0, 0, 0},
+								{4, 4, 30000, 0, 0, 0},
+								{5, 10, 50000, 0, 0, 0}};
+	
+	index=test_fair_token_bucket(t2_heavy_classes, 5, 120000, 200000, duration_s, 1);
+	EXPECT_NEAR(index, 1, 0.02);
+
+	struct sftb_class t3_heavy_classes[5]={{1, 1, 40000, 0, 0, 0}, 
+								{2, 2, 20000, 0, 0, 0},
+								{3, 3, 22000, 0, 0, 0},
+								{4, 4, 20000, 0, 0, 0},
+								{5, 10, 10000, 0, 0, 0}};
+	
+	index=test_fair_token_bucket(t3_heavy_classes, 5, 100000, 200000, duration_s, 1);
+	EXPECT_NEAR(index, 1, 0.02);
+
+	struct sftb_class all_light_classes[5]={{1, 1, 20000, 0, 0, 0}, 
+								{2, 2, 20000, 0, 0, 0},
+								{3, 3, 22000, 0, 0, 0},
+								{4, 4, 20000, 0, 0, 0},
+								{5, 10, 23000, 0, 0, 0}};
+	
+	index=test_fair_token_bucket(all_light_classes, 5, 100000, 200000, duration_s, 1);
+	EXPECT_NEAR(index, 1, 0.02);
+
+	struct sftb_class light_with_big_player[5]={{1, 1, 10000, 0, 0, 0}, 
+								{2, 2, 10000, 0, 0, 0},
+								{3, 3, 10000, 0, 0, 0},
+								{4, 4, 10000, 0, 0, 0},
+								{5, 10, 60000, 0, 0, 0}};
+	
+	index=test_fair_token_bucket(light_with_big_player, 5, 100000, 200000, duration_s, 1);
+	EXPECT_NEAR(index, 1, 0.02);
+
+	struct sftb_class sandvine_active_logic[10]={{1, 5, 30000, 0, 0, 0}, 
+								{2, 5, 30000, 0, 0, 0},
+								{3, 3, 12000, 0, 0, 0},
+								{4, 3, 12000, 0, 0, 0},
+								{5, 3, 11000, 0, 0, 0},
+								{6, 2, 14000, 0, 0, 0},
+								{7, 2, 5000, 0, 0, 0},
+								{8, 2, 8000, 0, 0, 0},
+								{9, 2, 9000, 0, 0, 0},
+								{10, 2, 20000, 0, 0, 0}};
+	
+	index=test_fair_token_bucket(sandvine_active_logic, 10, 100000, 200000, duration_s, 1);
+	EXPECT_NEAR(index, 1, 0.02);
+}
+TEST(FairTokenBucket, Weight5000)
+{
+	size_t n_class=5000;
+	long long per_class_CIR=1000000;
+	long long CIR=n_class*per_class_CIR;
+	long long CBS=CIR;
+	struct sftb_class very_heavy_classes[n_class];
+	memset(very_heavy_classes, 0, sizeof(very_heavy_classes));
+	for(size_t i=0; i<n_class; i++)
+	{
+		very_heavy_classes[i].class_id=i;
+		very_heavy_classes[i].requested_CIR=per_class_CIR+(random()%20)*per_class_CIR;
+		very_heavy_classes[i].weight=i%20+1;
+	}
+	double index=test_fair_token_bucket(very_heavy_classes, n_class, CIR, CBS, 40, 1);
+	EXPECT_NEAR(index, 1, 0.02);
+
+	struct sftb_class slight_heavy_classes[n_class];
+	memset(slight_heavy_classes, 0, sizeof(slight_heavy_classes));
+	for(size_t i=0; i<n_class; i++)
+	{
+		slight_heavy_classes[i].class_id=i;
+		slight_heavy_classes[i].requested_CIR=per_class_CIR+i*per_class_CIR/n_class;
+		slight_heavy_classes[i].weight=i%20+1;
+	}
+	index=test_fair_token_bucket(slight_heavy_classes, n_class, CIR, CBS, 40, 1);
+	EXPECT_NEAR(index, 1, 0.02);
+}
+
+struct btb_key
+{
+	long long key;
+	long long request_CIR;
+	long long ideal_tokens;
+	long long allocated_tokens;
+};
+void btb_sync(struct bulk_token_bucket **btb, size_t n_btb)
+{
+	char *blob=NULL;
+	size_t blob_sz=0;
+	for(size_t i=0; i<n_btb; i++)
+	{
+		for(size_t j=0; j<n_btb; j++)
+		{
+			if(i==j) continue;
+			bulk_token_bucket_serialize(btb[j], &blob, &blob_sz);
+			bulk_token_bucket_merge_blob(btb[i], blob, blob_sz);
+			free(blob);
+			blob=NULL;
+		}
+	}
+	return;
+}
+struct btb_case
+{
+	long long CIR;
+	long long CBS;
+	int n_replica;
+	int bucket_num;
+	int key_num;
+	int duration_s;
+	
+	long long estimate_keys;
+	long long more;
+	long long less;
+	double collision_rate;
+	double index;
+};
+void bulk_token_bucket_test_print_result(const struct btb_case *results, size_t n_result)
+{
+	printf("bkt\trepl\tkey\testk\tmore\tless\tcoll\tindex\n");
+	for(size_t i=0; i<n_result; i++)
+	{
+		printf("%d\t%d\t%d\t%lld\t%lld\t%lld\t%.4f\t%.4f\n", results[i].bucket_num, results[i].n_replica,
+						results[i].key_num, results[i].estimate_keys, results[i].more, results[i].less,
+						results[i].collision_rate, results[i].index);
+	} 
+}
+void bulk_token_bucket_test(struct btb_case *mycase)
+{
+	uuid_t uuid;
+	struct timeval start, step, now;
+	int step_us=1000;
+	gettimeofday(&start, NULL);
+	memcpy(&now, &start, sizeof(now));
+	step.tv_sec=0;
+	step.tv_usec=(suseconds_t)step_us;
+	long long max_tokens=mycase->duration_s*mycase->CIR+mycase->CBS;
+	struct bulk_token_bucket *btb[mycase->n_replica];
+	int n_replica=mycase->n_replica;
+	int key_num=mycase->key_num;
+	for(int i=0; i<n_replica; i++)
+	{
+		uuid_generate(uuid);
+		btb[i]=bulk_token_bucket_new(uuid, now, mycase->CIR, mycase->CBS, mycase->bucket_num);
+	}
+	
+	struct btb_key bk[key_num];
+	for(int i=0; i<key_num; i++)
+	{
+		bk[i].key=random();
+		bk[i].request_CIR=mycase->CIR/2+(i*mycase->CIR)/key_num;
+		bk[i].allocated_tokens=0;
+		if(bk[i].request_CIR<mycase->CIR)
+		{
+			bk[i].ideal_tokens=bk[i].request_CIR*mycase->duration_s;
+		}
+		else
+		{
+			bk[i].ideal_tokens=max_tokens;
+		}
+	}
+	int sync_interval_ms=2000;
+	for(int i=0; i<mycase->duration_s*(1000*1000/step_us); i++)
+	{
+		timeradd(&now, &step, &now);
+		for(int j=0; j<key_num; j++)
+		{
+			int r=random()%mycase->n_replica;
+			bk[j].allocated_tokens+=bulk_token_bucket_consume(btb[r], now, (char *)&(bk[j].key), sizeof(bk[j].key),
+					TB_CONSUME_FLEXIBLE,
+					bk[j].request_CIR*step_us/(1000*1000));
+		}
+		if(0==i%(sync_interval_ms*1000/step_us))
+		{
+			btb_sync(btb, mycase->n_replica);
+		}
+	}
+	double index=0, ratio=0;
+	long long more=0, less=0;
+	for(int i=0; i<key_num; i++)
+	{
+		index += pow((double)(bk[i].ideal_tokens-bk[i].allocated_tokens)/bk[i].ideal_tokens, 2);		
+		ratio = (double)bk[i].allocated_tokens/bk[i].ideal_tokens;
+		if(ratio > 1.02) more++;
+		if (ratio <0.98) less++;
+		if(ratio>1.02 || ratio < 0.98)
+		{
+			//printf("%f\n", ratio);
+		}
+	}
+	index=1-sqrt(index/key_num);
+	struct bulk_token_bucket_info info;
+	bulk_token_bucket_info(btb[0], now, &info);
+	mycase->index=index;
+	mycase->estimate_keys=info.estimate_keys;
+	mycase->more=more;
+	mycase->less=less;
+	mycase->collision_rate=info.collision_rate;
+
+	for(int i=0; i<n_replica; i++)
+	{
+		bulk_token_bucket_free(btb[i]);
+	}
+	return;
+}
+TEST(BulkTokenBucket, Basic)
+{
+	uuid_t uuid;
+	uuid_generate(uuid);
+	struct timeval start, step, now;
+	int step_us=1000, duration_s=100;
+	gettimeofday(&start, NULL);
+	memcpy(&now, &start, sizeof(now));
+	step.tv_sec=0;
+	step.tv_usec=(suseconds_t)step_us;
+	long long CIR=10000, CBS=10000;
+	long long request_CIR=10000;
+	struct bulk_token_bucket *btb=bulk_token_bucket_new(uuid, now, CIR, CBS, 4);
+	
+
+	long long allocated=0, max_tokens=duration_s*CIR+CBS, available=0;
+	const char *key="192.168.0.1";
+	available=bulk_token_bucket_read_available(btb, now, key, strlen(key));
+	EXPECT_EQ(available, CBS);
+	allocated+=bulk_token_bucket_consume(btb, now, key, strlen(key), TB_CONSUME_FLEXIBLE, 100);
+	EXPECT_EQ(allocated, 100);
+	available=bulk_token_bucket_read_available(btb, now, key, strlen(key));
+	EXPECT_EQ(available, CBS-allocated);
+
+	int i=0;
+	for(i=0; i<duration_s*(1000*1000/step_us); i++)
+	{
+		timeradd(&now, &step, &now);
+		allocated+=bulk_token_bucket_consume(btb, now, key, strlen(key), TB_CONSUME_FLEXIBLE, request_CIR*step_us/(1000*1000));
+	}
+	bulk_token_bucket_free(btb);
+	EXPECT_LE(allocated, max_tokens);
+}
+TEST(BulkTokenBucket, Merge)
+{
+	int n_replica=5;
+	struct bulk_token_bucket *btb[n_replica];
+	long long allocated[n_replica]={0};
+	uuid_t uuid;
+	struct timeval start, step, now;
+	int step_us=1000;
+	gettimeofday(&start, NULL);
+	memcpy(&now, &start, sizeof(now));
+	step.tv_sec=0;
+	step.tv_usec=(suseconds_t)step_us;
+	long long CIR=8*1024, CBS=10*1024;
+	char key[128];
+	for(int i=0; i<n_replica; i++)
+	{
+		uuid_generate(uuid);
+		timeradd(&now, &step, &now);
+		btb[i]=bulk_token_bucket_new(uuid, now, CIR, CBS, 128*i);
+	}
+	int r=0;
+	int duration_s=100, sync_interval_ms=400;
+	for(int i=0; i<duration_s*(1000*1000/step_us); i++)
+	{
+		r=i%n_replica;
+		snprintf(key, sizeof(key), "192.168.0.%d", r);
+		timeradd(&now, &step, &now);
+		allocated[r]+=bulk_token_bucket_consume(btb[r], now, key, strlen(key), TB_CONSUME_FLEXIBLE, 1024*4);
+		if(0==i%(sync_interval_ms*1000/step_us))
+		{
+			btb_sync(btb, n_replica);
+		}
+	}
+	struct bulk_token_bucket_info info;
+	bulk_token_bucket_info(btb[0], now, &info);
+	EXPECT_NEAR(info.estimate_keys, n_replica, n_replica/5);
+	long long upper_limit=CIR*duration_s+CBS;
+	int success=0;
+	for(int i=0; i<n_replica; i++)
+	{
+		if((double)allocated[i]/upper_limit>0.95 &&(double)allocated[i]/upper_limit <1.05) success++;
+		bulk_token_bucket_free(btb[i]);
+	}
+	EXPECT_EQ(success, n_replica);
+}
+TEST(BulkTokenBucket, RareCollision)
+{
+	int n_case=7;
+	struct btb_case test[n_case];
+	for(int i=0; i<n_case; i++)
+	{
+		test[i].CIR=1000*1000;
+		test[i].CBS=1000*1000;
+		test[i].bucket_num=512;
+		test[i].n_replica=1;
+		test[i].key_num=(1<<i);
+		test[i].duration_s=100;
+		bulk_token_bucket_test(test+i);
+	}
+	bulk_token_bucket_test_print_result(test, n_case);
+
+}
+TEST(BulkTokenBucket, HighCollision)
+{
+	int n_case=4;
+	struct btb_case test[n_case];
+	for(int i=0; i<n_case; i++)
+	{
+		test[i].CIR=1000*1000;
+		test[i].CBS=1000*1000;
+		test[i].bucket_num=512;
+		test[i].n_replica=1;
+		test[i].key_num=128*(i+1);
+		test[i].duration_s=100;
+		bulk_token_bucket_test(test+i);
+	}
+	bulk_token_bucket_test_print_result(test, n_case);
+}
+TEST(BulkTokenBucket, VariousCIR)
+{
+	int n_case=5;
+	struct btb_case test[n_case];
+	for(int i=0; i<n_case; i++)
+	{
+		test[i].CIR=400*1000*(i+1);
+		test[i].CBS=1000*1000;
+		test[i].bucket_num=512;
+		test[i].n_replica=1;
+		test[i].key_num=32;
+		test[i].duration_s=100;
+		bulk_token_bucket_test(test+i);
+	}
+	bulk_token_bucket_test_print_result(test, n_case);
+}
+TEST(BulkTokenBucket, Bucket1M)
+{
+	int n_case=2;
+	struct btb_case test[n_case];
+	for(int i=0; i<n_case; i++)
+	{
+		test[i].CIR=1000*1000;
+		test[i].CBS=1000*1000;
+		test[i].bucket_num=1024*1024;
+		test[i].n_replica=1;
+		test[i].key_num=1024*(i+1);
+		test[i].duration_s=100;
+		bulk_token_bucket_test(test+i);
+	}
+	bulk_token_bucket_test_print_result(test, n_case);
+}
+TEST(BulkTokenBucket, Replicas)
+{
+	int n_case=5;
+	struct btb_case test[n_case];
+	for(int i=0; i<n_case; i++)
+	{
+		test[i].CIR=1000*1000;
+		test[i].CBS=2*1000*1000;
+		test[i].bucket_num=512;
+		test[i].n_replica=2;
+		test[i].key_num=1<<i;
+		//test[i].key_num=16*(i+1);
+		test[i].duration_s=100;
+		bulk_token_bucket_test(test+i);
+	}
+	bulk_token_bucket_test_print_result(test, n_case);
+}
+TEST(BulkTokenBucket, Replica4)
+{
+	int n_case=8;
+	struct btb_case test[n_case];
+	for(int i=0; i<n_case; i++)
+	{
+		test[i].CIR=1000*1000;
+		test[i].CBS=2*1000*1000;
+		test[i].bucket_num=512;
+		test[i].n_replica=4;
+		test[i].key_num=1<<i;
+		//test[i].key_num=16*(i+1);
+		test[i].duration_s=100;
+		bulk_token_bucket_test(test+i);
+	}
+	bulk_token_bucket_test_print_result(test, n_case);
+}
+int main(int argc, char ** argv)
+{
+	int ret=0;
+	::testing::InitGoogleTest(&argc, argv);
+	ret=RUN_ALL_TESTS();
+	return ret;
+}
+\ No newline at end of file
diff --git a/CRDT/crdt_utils.h b/CRDT/crdt_utils.h
index 6ff880e..4d39f2c 100644
--- a/CRDT/crdt_utils.h
+++ b/CRDT/crdt_utils.h
@@ -26,3 +26,6 @@
 	(type *)( (char *)__mptr - offsetof(type,member) );})
 #endif
 #define timeval_delta_ms(start, end) ((end.tv_sec-start.tv_sec)*1000 + (end.tv_usec-start.tv_usec)/1000)
+#define timeval_delta_us(start, end) ((end.tv_sec-start.tv_sec)*1000*1000 + (end.tv_usec-start.tv_usec))
+#define likely(x)       __builtin_expect((x),1)
+#define unlikely(x)     __builtin_expect((x),0)
diff --git a/CRDT/fair_token_bucket.c b/CRDT/fair_token_bucket.c
new file mode 100644
index 0000000..66e3976
--- /dev/null
+++ b/CRDT/fair_token_bucket.c
@@ -0,0 +1,225 @@
+#include "fair_token_bucket.h"
+#include "oc_token_bucket.h"
+#include "st_hyperloglog.h"
+#include "g_array.h"
+#include "crdt_utils.h"
+#include "xxhash.h"
+
+#include <string.h> 
+#include <assert.h>
+#include <stdio.h>
+#include <stdint.h>
+
+
+#define REFILL_INTERVAL_MS  200
+
+
+struct fair_token_bucket
+{
+    /* Sync Variables*/
+    uuid_t uuid;
+    long long divisor;
+    long long perturb_seed;
+    struct g_array *sfq;  //per-key deficit estimation
+    struct ST_hyperloglog *hll[FAIR_TB_WEIGHT_MAX]; //counting active keys
+    struct OC_token_bucket *bucket;
+    
+    /* Local Variables*/
+    struct timeval last_refill_time;
+    long long per_weight_quantum; //updated every REFILL_INTERVAL_MS
+    long long n_active_key;   
+};
+struct fair_token_bucket *fair_token_bucket_new(uuid_t uuid, struct timeval now, long long CIR, long long CBS, long long divisor)
+{
+    struct fair_token_bucket *ftb=ALLOC(struct fair_token_bucket, 1);
+
+    uuid_copy(ftb->uuid, uuid);
+    ftb->divisor=divisor;
+    for(int i=0; i<FAIR_TB_WEIGHT_MAX; i++)
+    {
+        ftb->hll[i]=ST_hyperloglog_new(9, 5, now);
+    }
+    ftb->sfq=g_array_new(uuid, ftb->divisor);
+    ftb->bucket=OC_token_bucket_new(uuid, now, CIR, CBS);
+    return ftb;
+}
+void fair_token_bucket_configure(struct fair_token_bucket *ftb, struct timeval now, long long CIR, long long CBS, long long divisor)
+{
+    if(divisor>ftb->divisor)
+    {
+        ftb->divisor=divisor;
+        g_array_resize(ftb->sfq, ftb->divisor);
+    }
+    OC_token_bucket_configure(ftb->bucket, now, CIR, CBS);
+    return;
+}
+void fair_token_bucket_free(struct fair_token_bucket *ftb)
+{
+    for(int i=0; i<FAIR_TB_WEIGHT_MAX; i++)
+    {
+        ST_hyperloglog_free(ftb->hll[i]);
+    }
+    g_array_free(ftb->sfq);
+    OC_token_bucket_free(ftb->bucket);
+    free(ftb);
+}
+
+static void ftb_refill(struct fair_token_bucket *ftb, struct timeval now)
+{
+    long long delta_time_ms=timeval_delta_ms(ftb->last_refill_time, now);
+    if(likely(delta_time_ms<REFILL_INTERVAL_MS))
+    {
+        return;
+    }
+    //Per weight quantum estimation
+    long long total_weight=0, n_active_key=0, count=0;
+    for(int i=0; i<FAIR_TB_WEIGHT_MAX; i++)
+    {
+        count = ST_hyperloglog_count(ftb->hll[i]);
+        total_weight += (i+1)*count;
+        n_active_key += count;
+    }
+    struct OC_token_bucket_info info;
+    OC_token_bucket_info(ftb->bucket, now, &info);
+    long long available=info.available;     
+    ftb->per_weight_quantum=available/MAX(1, total_weight);
+    ftb->n_active_key=n_active_key;
+    
+    memcpy(&ftb->last_refill_time, &now, sizeof(ftb->last_refill_time));
+    g_array_reset(ftb->sfq);
+    ftb->perturb_seed++;
+}
+
+long long fair_token_bucket_consume(struct fair_token_bucket *ftb, struct timeval now, const char *key, size_t keylen, long long weight, enum tb_consume_type cmd, long long tokens)
+{
+    if(weight>FAIR_TB_WEIGHT_MAX || weight<0)
+        return -1;
+    ST_hyperloglog_add(ftb->hll[weight-1], key, keylen, now);
+    ftb_refill(ftb, now);
+
+    int sfq_idx=0;
+    sfq_idx=XXH3_64bits_withSeed(key, keylen, ftb->perturb_seed)%ftb->divisor;
+    
+    long long deficit_est=0;
+    deficit_est=g_array_get(ftb->sfq, sfq_idx);
+    deficit_est/=MAX(1, ftb->n_active_key/ftb->divisor);
+    if(tokens + deficit_est > ftb->per_weight_quantum*weight)
+    {
+        return 0;
+    }
+    long long allocated_tokens=OC_token_bucket_consume(ftb->bucket, now, cmd, tokens);
+    if(allocated_tokens) g_array_incrby(ftb->sfq, sfq_idx, allocated_tokens);
+    return allocated_tokens;
+}
+
+void fair_token_bucket_info(const struct fair_token_bucket *ftb, struct timeval now, struct fair_token_bucket_info *info)
+{
+    info->active_key_number=ftb->n_active_key;
+    info->divisor=ftb->divisor;
+    OC_token_bucket_info(ftb->bucket, now, &info->bucket_info);
+    return;
+}
+struct ftb_header
+{
+    long long magic;
+    long long payload_sz;
+    long long divisor;
+    long long pertub_seed;
+};
+size_t fair_token_bucket_serialized_size(const struct fair_token_bucket *ftb)
+{
+    size_t sz=0;
+    sz += sizeof(struct ftb_header);
+    for(int i=0; i<FAIR_TB_WEIGHT_MAX; i++)
+    {
+        sz += ST_hyperloglog_serialized_size(ftb->hll[i]);
+    }
+    sz += g_array_serialized_size(ftb->sfq);
+    sz += OC_token_bucket_serialized_size(ftb->bucket);
+    return sz;
+}
+void fair_token_bucket_serialize(const struct fair_token_bucket *ftb, char **blob, size_t *blob_sz)
+{
+    size_t sz=fair_token_bucket_serialized_size(ftb);
+    size_t offset=0;
+    struct ftb_header hdr;
+    hdr.magic=0x5210;
+    hdr.payload_sz=sz;
+    hdr.divisor=ftb->divisor;
+    hdr.pertub_seed=ftb->perturb_seed;
+
+    char *buffer=ALLOC(char, sz);
+    memcpy(buffer+offset, &hdr, sizeof(hdr));
+	offset += sizeof(hdr);
+    char *tmp_buff=NULL;
+    size_t tmp_sz=0;
+    for(int i=0; i<FAIR_TB_WEIGHT_MAX; i++)
+    {
+        ST_hyperloglog_serialize(ftb->hll[i], &tmp_buff, &tmp_sz);
+        memcpy(buffer+offset, tmp_buff, tmp_sz);
+        offset += ST_hyperloglog_serialized_size(ftb->hll[i]);
+        free(tmp_buff);
+    }
+    g_array_serialize(ftb->sfq, &tmp_buff, &tmp_sz);
+    memcpy(buffer+offset, tmp_buff, tmp_sz);
+    assert(tmp_sz==g_array_serialized_size(ftb->sfq));
+    offset += g_array_serialized_size(ftb->sfq);
+    free(tmp_buff);
+    OC_token_bucket_serialize(ftb->bucket, &tmp_buff, &tmp_sz);
+    memcpy(buffer+offset, tmp_buff, tmp_sz);
+    offset += OC_token_bucket_serialized_size(ftb->bucket);
+    free(tmp_buff);
+    *blob=buffer;
+    *blob_sz=sz;
+    assert(offset==sz);
+    return;
+}
+struct fair_token_bucket *fair_token_bucket_deserialize(const char *blob, size_t blob_sz)
+{
+    struct ftb_header hdr;
+    size_t offset=0;
+    memcpy(&hdr, blob+offset, sizeof(hdr));
+    offset += sizeof(hdr);
+    assert(hdr.magic==0x5210);
+    assert(hdr.payload_sz<=blob_sz);
+    struct fair_token_bucket *ftb=ALLOC(struct fair_token_bucket, 1);
+    ftb->perturb_seed=hdr.pertub_seed;
+    ftb->divisor=hdr.divisor;
+    for(int i=0; i<FAIR_TB_WEIGHT_MAX; i++)
+    {
+        ftb->hll[i]=ST_hyperloglog_deserialize(blob+offset, blob_sz-offset);
+        offset += ST_hyperloglog_serialized_size(ftb->hll[i]);
+    }
+    ftb->sfq=g_array_deserialize(blob+offset, blob_sz-offset);
+    offset += g_array_serialized_size(ftb->sfq);
+    ftb->bucket=OC_token_bucket_deserialize(blob+offset, blob_sz-offset);
+    offset += OC_token_bucket_serialized_size(ftb->bucket);
+    assert(offset==hdr.payload_sz);
+    return ftb;
+}
+void fair_token_bucket_merge(struct fair_token_bucket *dst, const struct fair_token_bucket *src)
+{
+    dst->perturb_seed=MAX(dst->perturb_seed, src->perturb_seed);
+    dst->divisor=MAX(dst->divisor, src->divisor);
+    for(int i=0; i<FAIR_TB_WEIGHT_MAX; i++)
+    {
+        ST_hyperloglog_merge(dst->hll[i], src->hll[i]);
+    }
+    g_array_merge(dst->sfq, src->sfq);
+    OC_token_bucket_merge(dst->bucket, src->bucket);
+    return;
+}
+void fair_token_bucket_merge_blob(struct fair_token_bucket *ftb, const char *blob, size_t blob_sz)
+{
+    struct fair_token_bucket *src=fair_token_bucket_deserialize(blob, blob_sz);
+    fair_token_bucket_merge(ftb, src);
+    fair_token_bucket_free(src);
+    return;
+}
+size_t fair_token_bucket_mem_size(const struct fair_token_bucket *ftb)
+{
+    size_t sz=0;
+    sz += sizeof(struct fair_token_bucket);
+    //to do
+    return sz;
+}
+\ No newline at end of file
diff --git a/CRDT/fair_token_bucket.h b/CRDT/fair_token_bucket.h
new file mode 100644
index 0000000..29d5789
--- /dev/null
+++ b/CRDT/fair_token_bucket.h
@@ -0,0 +1,42 @@
+/*
+* A Fair Split Token Bucket CRDT
+* Reference: Unpublished paper, Chao Zheng, "A Stochastic Fair Token Bucket CRDT for Distritubted Rate-limiting"
+* The fair token bucket archieves max-min fairness with Deficit-Round-Robin and SFQ.
+* - Resources are allocated in order of increasing demand
+* - No source gets a resource share larger than its demand
+* - Sources with unsatisfied demands get an equal share of the resource
+* Author: [email protected]
+*/
+
+#pragma once
+#include <stddef.h>
+#include <sys/time.h>
+#include <uuid/uuid.h>
+#include "oc_token_bucket.h"
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+#define FAIR_TB_WEIGHT_MAX  20
+struct fair_token_bucket;
+struct fair_token_bucket *fair_token_bucket_new(uuid_t uuid, struct timeval now, long long CIR, long long CBS, long long divisor);
+void fair_token_bucket_free(struct fair_token_bucket *ftb);
+long long fair_token_bucket_consume(struct fair_token_bucket *ftb, struct timeval now, const char *key, size_t keylen, long long weight, enum tb_consume_type cmd, long long tokens);
+void fair_token_bucket_configure(struct fair_token_bucket *ftb, struct timeval now, long long CIR, long long CBS, long long divisor);
+struct fair_token_bucket_info
+{
+    struct OC_token_bucket_info bucket_info;
+    long long active_key_number;
+    long long divisor;
+};
+void fair_token_bucket_info(const struct fair_token_bucket *ftb, struct timeval now, struct fair_token_bucket_info *info);
+
+void fair_token_bucket_serialize(const struct fair_token_bucket *ftb, char **blob, size_t *blob_sz);
+struct fair_token_bucket *fair_token_bucket_deserialize(const char *blob, size_t blob_sz);
+void fair_token_bucket_merge(struct fair_token_bucket *dst, const struct fair_token_bucket *src);
+void fair_token_bucket_merge_blob(struct fair_token_bucket *ftb, const char *blob, size_t blob_sz);
+size_t fair_token_bucket_mem_size(const struct fair_token_bucket *ftb);
+#ifdef __cplusplus
+}
+#endif
+\ No newline at end of file
diff --git a/CRDT/g_array.c b/CRDT/g_array.c
new file mode 100644
index 0000000..2272bfc
--- /dev/null
+++ b/CRDT/g_array.c
@@ -0,0 +1,214 @@
+#include "g_array.h"
+#include "crdt_utils.h"
+
+#include "uthash.h"
+
+#include <assert.h>
+#define	TO_LOCAL_EPOCH(x)	(x & ((1ULL << 24) - 1))
+struct counter_item
+{
+	long long epoch:24;
+	long long count:40;
+};
+struct counter_array
+{
+	uuid_t replica_id;
+    long long array_sz;
+	long long sequence;
+	struct counter_item  *array;
+	UT_hash_handle hh;
+};
+static struct counter_array *counter_array_new(uuid_t uuid, long long array_sz)
+{
+	struct counter_array *a=ALLOC(struct counter_array, 1);
+	uuid_copy(a->replica_id, uuid);
+	a->array_sz=array_sz;
+	a->array=ALLOC(struct counter_item, a->array_sz);
+	return a;
+}
+static void counter_array_free(struct counter_array *a)
+{
+	free(a->array);
+	free(a);
+	return;
+}
+struct g_array	//Grow only array
+{
+	uuid_t my_id;
+	long long array_sz;
+	long long epoch;
+	struct counter_array *hash;
+};
+struct g_array *g_array_new(uuid_t my_id, long long array_sz)
+{
+	struct g_array *ga=ALLOC(struct g_array, 1);
+	uuid_copy(ga->my_id, my_id);
+	ga->array_sz=array_sz;
+    return ga;
+}
+void g_array_free(struct g_array *ga)
+{
+	struct counter_array *a=NULL, *tmp=NULL;
+	HASH_ITER(hh, ga->hash, a, tmp)
+	{
+		HASH_DELETE(hh, ga->hash, a);
+		counter_array_free(a);
+	}
+	free(ga);
+	return;
+}
+
+long long g_array_get(const struct g_array *ga, long long idx)
+{
+	struct counter_array *a=NULL, *tmp=NULL;
+	long long value=0;
+	HASH_ITER(hh, ga->hash, a, tmp)
+	{
+        if(idx < a->array_sz && a->array[idx].epoch == TO_LOCAL_EPOCH(ga->epoch))
+            value += a->array[idx].count;
+	}
+	return value;
+}
+
+long long g_array_incrby(struct g_array *ga, long long idx, long long increment)
+{
+	struct counter_array *a=NULL;
+	HASH_FIND(hh, ga->hash, ga->my_id, sizeof(ga->my_id), a);
+	if(!a)
+	{
+		a=counter_array_new(ga->my_id, ga->array_sz);
+		HASH_ADD_KEYPTR(hh, ga->hash, a->replica_id, sizeof(a->replica_id), a);
+	}
+	assert(idx < a->array_sz);
+	if(a->array[idx].epoch != TO_LOCAL_EPOCH(ga->epoch))
+	{
+		a->array[idx].epoch = TO_LOCAL_EPOCH(ga->epoch);
+		a->array[idx].count = 0;
+	}
+	a->array[idx].count += increment;
+	a->sequence ++;
+	return g_array_get(ga, idx);
+}
+void g_array_reset(struct g_array *ga)
+{
+	ga->epoch++;
+}
+void g_array_resize(struct g_array *ga, long long new_size)
+{
+	struct counter_array *a=NULL;
+	HASH_FIND(hh, ga->hash, ga->my_id, sizeof(ga->my_id), a);
+	if(a && a->array_sz < new_size)
+	{
+        a->array=(struct counter_item *)realloc(a->array, sizeof(struct counter_item)*new_size);
+        a->array_sz=new_size;
+		a->sequence++;
+	}
+	ga->array_sz=new_size;
+	
+    return;
+}
+size_t g_array_replicas(const struct g_array *ga)
+{
+	return HASH_COUNT(ga->hash);
+}
+const size_t G_ARRAY_ITEM_HEADER_SIZE=offsetof(struct counter_array, array);
+size_t g_array_serialized_size(const struct g_array *ga)
+{
+	size_t sz=0;
+	struct counter_array *item=NULL, *tmp=NULL;
+	sz += sizeof(long long)*2;
+	HASH_ITER(hh, ga->hash, item, tmp)
+	{
+		sz += G_ARRAY_ITEM_HEADER_SIZE;
+        sz += item->array_sz*sizeof(struct counter_item);
+	}
+    return sz;
+}
+void g_array_serialize(const struct g_array *ga, char **blob, size_t *blob_sz)
+{
+	size_t sz=0, offset=0;
+	sz=g_array_serialized_size(ga);
+	
+	char *buffer=ALLOC(char, sz);
+	struct counter_array *item=NULL, *tmp=NULL;
+	*(long long *)(buffer +offset)=ga->epoch;
+	offset += sizeof(long long);
+	*(long long *)(buffer +offset)=HASH_COUNT(ga->hash);
+	offset += sizeof(long long);
+	HASH_ITER(hh, ga->hash, item, tmp)
+	{
+		memcpy(buffer+offset, item, G_ARRAY_ITEM_HEADER_SIZE);
+		offset += G_ARRAY_ITEM_HEADER_SIZE;
+		memcpy(buffer+offset, item->array, item->array_sz*sizeof(struct counter_item));
+		offset += item->array_sz*sizeof(struct counter_item);
+	}
+	assert(offset==sz);
+	*blob=buffer;
+	*blob_sz=sz;
+	return;
+}
+struct g_array * g_array_deserialize(const char *blob, size_t blob_sz)
+{
+	struct g_array *ga=ALLOC(struct g_array, 1);
+	ga->array_sz=0;
+	struct counter_array *item=NULL;
+	size_t offset=0, n_item=0;
+	ga->epoch=*(const long long*)(blob +offset);
+	offset += sizeof(long long);
+	n_item=*(const long long*)(blob +offset);
+	offset += sizeof(long long);
+	for(size_t i=0; i<n_item; i++)
+	{
+		assert(offset<blob_sz);
+		item=ALLOC(struct counter_array, 1);
+		memcpy(item, blob+offset, G_ARRAY_ITEM_HEADER_SIZE);
+		offset += G_ARRAY_ITEM_HEADER_SIZE;
+		item->array=ALLOC(struct counter_item, item->array_sz);
+		memcpy(item->array, blob+offset, item->array_sz*sizeof(struct counter_item));
+		offset += item->array_sz*sizeof(struct counter_item);
+		HASH_ADD_KEYPTR(hh, ga->hash, item->replica_id, sizeof(item->replica_id), item);
+		
+	}
+	assert(offset<=blob_sz);
+	return ga;
+}
+void g_array_merge(struct g_array *dst, const struct g_array *src)
+{
+    struct counter_array *src_item=NULL, *dst_item=NULL, *tmp=NULL;
+	long long max_array_sz=0;
+    HASH_ITER(hh, src->hash, src_item, tmp)
+    {
+        HASH_FIND(hh, dst->hash, src_item->replica_id, sizeof(src_item->replica_id), dst_item);
+        if(!dst_item)
+        {
+            dst_item=ALLOC(struct counter_array, 1);
+			memcpy(dst_item, src_item, G_ARRAY_ITEM_HEADER_SIZE);
+            dst_item->array=ALLOC(struct counter_item, dst_item->array_sz);
+            memcpy(dst_item->array, src_item->array, dst_item->array_sz*sizeof(struct counter_item));
+			HASH_ADD_KEYPTR(hh, dst->hash, dst_item->replica_id, sizeof(dst_item->replica_id), dst_item);
+        }
+		else
+		{
+			if(src_item->sequence > dst_item->sequence)
+			{
+				dst_item->array_sz=src_item->array_sz;
+				dst_item->array=realloc(dst_item->array, sizeof(struct counter_item)*dst_item->array_sz);
+				memcpy(dst_item->array, src_item->array, sizeof(struct counter_item)*dst_item->array_sz);
+			}
+		}
+		max_array_sz=MAX(src_item->array_sz, max_array_sz);
+    }
+	dst->epoch=MAX(dst->epoch, src->epoch);
+	if(dst->array_sz<max_array_sz)
+	{
+		g_array_resize(dst, max_array_sz);
+	}
+    return;
+}
+void g_array_merge_blob(struct g_array *ga, const char *blob, size_t blob_sz)
+{
+	struct g_array *src=g_array_deserialize(blob, blob_sz);
+    g_array_merge(ga, src);
+    g_array_free(src);
+    return;
+}
+\ No newline at end of file
diff --git a/CRDT/g_array.h b/CRDT/g_array.h
new file mode 100644
index 0000000..6410948
--- /dev/null
+++ b/CRDT/g_array.h
@@ -0,0 +1,27 @@
+/*
+* Grow-only counter array.
+* Author: [email protected]
+* 2023-4-17
+*/
+#pragma once
+#include <uuid/uuid.h>
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+struct g_array;
+struct g_array *g_array_new(uuid_t my_id, long long array_sz);
+void g_array_free(struct g_array *ga);
+long long g_array_incrby(struct g_array *ga, long long idx, long long increment);
+long long g_array_get(const struct g_array *ga, long long idx);
+void g_array_resize(struct g_array *ga, long long new_size);
+void g_array_reset(struct g_array *ga);
+size_t g_array_replicas(const struct g_array *ga);
+size_t g_array_serialized_size(const struct g_array *ga);
+void g_array_serialize(const struct g_array *ga, char **blob, size_t *blob_sz);
+struct g_array *g_array_deserialize(const char *blob, size_t blob_sz);
+void g_array_merge(struct g_array *dst, const struct g_array *src);
+void g_array_merge_blob(struct g_array *ga, const char *blob, size_t blob_sz);
+#ifdef __cplusplus
+}
+#endif
diff --git a/CRDT/lww_register.c b/CRDT/lww_register.c
index 67b9540..7781735 100644
--- a/CRDT/lww_register.c
+++ b/CRDT/lww_register.c
@@ -81,7 +81,7 @@ void LWW_register_merge_blob(struct LWW_register *reg, const char *blob, size_t
 	LWW_register_free(to_merge);
 	return;
 }
-size_t LWW_regeister_size(const struct LWW_register *reg)
+size_t LWW_regeister_mem_size(const struct LWW_register *reg)
 {
 	return (sizeof(struct LWW_register)+reg->size);
 }
diff --git a/CRDT/lww_register.h b/CRDT/lww_register.h
index 1cca6ca..2217211 100644
--- a/CRDT/lww_register.h
+++ b/CRDT/lww_register.h
@@ -27,7 +27,7 @@ struct LWW_register *LWW_register_deserialize(const char *blob, size_t blob_sz);
 void LWW_register_merge(struct LWW_register *dst, struct LWW_register *src);
 
 void LWW_register_merge_blob(struct LWW_register *reg, const char *blob, size_t blob_sz);
-size_t LWW_regeister_size(const struct LWW_register *reg);
+size_t LWW_regeister_mem_size(const struct LWW_register *reg);
 
 #ifdef __cplusplus
 }
diff --git a/CRDT/oc_token_bucket.c b/CRDT/oc_token_bucket.c
index 66af171..502596c 100644
--- a/CRDT/oc_token_bucket.c
+++ b/CRDT/oc_token_bucket.c
@@ -21,6 +21,7 @@ struct OC_token_bucket
 	struct timeval refill_timestamp;
 	struct PN_counter *consumed;
 };
+const size_t OCTB_BLOB_HDR_SIZE= offsetof(struct OC_token_bucket, consumed);
 struct OC_token_bucket *OC_token_bucket_new(uuid_t my_id, struct timeval now, long long CIR, long long CBS)
 {
 	struct OC_token_bucket *bucket=ALLOC(struct OC_token_bucket, 1);
@@ -41,154 +42,107 @@ void OC_token_bucket_free(struct OC_token_bucket *bucket)
 	PN_counter_free(bucket->consumed);
 	free(bucket);
 }
-void OC_token_bucket_configure(struct OC_token_bucket *bucket, struct timeval now, long long CIR, long long CBS, long long refill_duration_ms)
+void OC_token_bucket_configure(struct OC_token_bucket *bucket, struct timeval now, long long CIR, long long CBS)
 {
 	memcpy(&bucket->cfg.write_timestamp, &now, sizeof(bucket->cfg.write_timestamp));
 	if(CIR>=0) bucket->cfg.CIR=CIR;
 	if(CBS>=0) bucket->cfg.CBS=CBS;
-	if(refill_duration_ms>0) bucket->cfg.refill_duration_ms=refill_duration_ms;
 }
 
 
-long long OC_token_bucket_control(struct OC_token_bucket *bucket, struct timeval now, enum OC_token_bucket_command cmd, long long tokens)
+long long OC_token_bucket_consume(struct OC_token_bucket *bucket, struct timeval now, enum tb_consume_type cmd, long long tokens)
 {
 	long long delta_time_ms=timeval_delta_ms(bucket->refill_timestamp, now);
 	long long consumed=PN_counter_get(bucket->consumed);
-	long long to_add=0;
 	long long refilled=bucket->refilled;
 	assert(refilled>=0);
 	assert(consumed>=0);
-	int refill_flag=0;
+
+	long long new_refilled=0;
+	long long available=tb_available(bucket->cfg.CIR, bucket->cfg.CBS, consumed, 
+				refilled, delta_time_ms, bucket->cfg.refill_duration_ms, &new_refilled);
+
 	int infinite_flag=0;
     if(bucket->cfg.CBS==0 && bucket->cfg.CIR==0)
     {
 		infinite_flag=1;
-		refilled+=tokens;
-		refill_flag=1;
+		new_refilled += tokens;
     }
+	size_t n_replica=PN_counter_replica_num(bucket->consumed);
 
-    to_add=bucket->cfg.CIR*delta_time_ms/1000;
-	if(delta_time_ms>=bucket->cfg.refill_duration_ms && to_add>0)
+	long long allocated=0;
+	if(infinite_flag)
 	{
-		if(refilled<consumed)
-		{
-			refilled=consumed;
-		}
-		if(to_add + refilled - consumed < bucket->cfg.CBS)
-		{
-			refilled+=to_add;
-		}
-		else
-		{
-			refilled+=bucket->cfg.CBS - (refilled - consumed);
-		}
-		refill_flag=1;
+		allocated=tokens;
 	}
-	size_t n_replica=PN_counter_replica_num(bucket->consumed);
-
-	
-	long long global_available=MAX(refilled-consumed, 0);
-	long long reserved=bucket->cfg.CIR*(n_replica-1)/n_replica;
-	long long local_available=MAX(global_available-reserved, 0);
-//	long long local_available=global_available;
-	long long assigned=0;
-	switch(cmd)
+	else
 	{
-		case OCTB_CMD_CONSUME_AS_MUCH_AS_POSSIBLE:
-			assigned=infinite_flag?tokens:global_available;
-			break;
-		case OCTB_CMD_CONSUME_FORCE:
-			assigned=tokens;
-			break;
-		case OCTB_CMD_CONSUME_FLEXIBLE:
-			assigned=infinite_flag?tokens:MIN(tokens, local_available);;
-			break;
-		case OCTB_CMD_CONSUME_NORMAL:
-			if(infinite_flag)
-			{
-				assigned=tokens;
-			}
-			else
-			{
-				assigned=(tokens<=local_available) ? tokens:0;
-			}
-			break;
-		case OCTB_CMD_READ_AVAILABLE:
-			return global_available;
-		case OCTB_CMD_READ_REFILLED:
-			return refilled;
-		case OCTB_CMD_READ_CONSUEMD:
-			return consumed;
-		default:
-			assert(0);
-			break;
+		allocated=tb_consume(bucket->cfg.CIR, available, n_replica, cmd, tokens);
 	}
-	if(refill_flag)
+	if(allocated>0)
 	{
-		bucket->refilled=refilled;		
+		PN_counter_incrby(bucket->consumed, allocated);
+	}
+	if(refilled!=new_refilled)
+	{
+		bucket->refilled=new_refilled;		
 		memcpy(&bucket->refill_timestamp, &now, sizeof(bucket->refill_timestamp));
 	}
-	assert(assigned>=0);
-	PN_counter_incrby(bucket->consumed, assigned);
-	return assigned;
+	assert(allocated>=0);
+	
+	return allocated;
 }
-long long OC_token_bucket_read_refilled(struct OC_token_bucket *bucket)
-{
-	struct timeval now;
-	gettimeofday(&now, NULL);
-	return OC_token_bucket_control(bucket, now, OCTB_CMD_READ_REFILLED, 0);
 
-}
-long long OC_token_bucket_read_available(struct OC_token_bucket *bucket)
-{
-	struct timeval now;
-	gettimeofday(&now, NULL);
-	return OC_token_bucket_control(bucket, now, OCTB_CMD_READ_AVAILABLE, 0);
-}
-void OC_token_bucket_info(struct OC_token_bucket *bucket, struct OC_token_bucket_info *info)
+void OC_token_bucket_info(struct OC_token_bucket *bucket, struct timeval now, struct OC_token_bucket_info *info)
 {
-	struct timeval now;
-	gettimeofday(&now, NULL);
+	long long delta_time_ms=timeval_delta_ms(bucket->refill_timestamp, now);
 
 	info->CIR=bucket->cfg.CIR;
 	info->CBS=bucket->cfg.CBS;
 	info->refill_duration_ms=bucket->cfg.refill_duration_ms;
-	info->consumed=OC_token_bucket_control(bucket, now, OCTB_CMD_READ_CONSUEMD, 0);
-	info->refilled=OC_token_bucket_control(bucket, now, OCTB_CMD_READ_REFILLED, 0);
-	info->available=OC_token_bucket_control(bucket, now, OCTB_CMD_READ_AVAILABLE, 0);
+	info->consumed=PN_counter_get(bucket->consumed);
+	info->available=tb_available(bucket->cfg.CIR, bucket->cfg.CBS, info->consumed, 
+				 bucket->refilled, delta_time_ms, bucket->cfg.refill_duration_ms, &info->refilled);
 	return;
 }
-
-void OC_token_bucket_serialize(struct OC_token_bucket *bucket, char **blob, size_t *blob_sz)
+size_t OC_token_bucket_serialized_size(const struct OC_token_bucket *bucket)
 {
+	size_t sz=0;
+	sz += OCTB_BLOB_HDR_SIZE;
+	sz += PN_counter_serialized_size(bucket->consumed);
+	return sz;
+}
+void OC_token_bucket_serialize(const struct OC_token_bucket *bucket, char **blob, size_t *blob_sz)
+{
+	size_t offset=0;
+	size_t sz=OC_token_bucket_serialized_size(bucket);
+	char *buffer=ALLOC(char, sz);
+	memcpy(buffer, bucket, OCTB_BLOB_HDR_SIZE);
+	offset += OCTB_BLOB_HDR_SIZE;
+
 	char *pnc_blob=NULL;
 	size_t pnc_blob_sz=0;
 	PN_counter_serialize(bucket->consumed, &pnc_blob, &pnc_blob_sz);
-
-	size_t offset=0;
-	size_t mpack_sz=sizeof(struct OC_token_bucket)-sizeof(struct PN_counter*)+pnc_blob_sz;
-	char *mpack_buff=(char*)malloc(mpack_sz);
-	memcpy(mpack_buff, bucket, sizeof(struct OC_token_bucket)-sizeof(struct PN_counter*));
-	offset+=sizeof(struct OC_token_bucket)-sizeof(struct PN_counter*);
-	memcpy(mpack_buff+offset, pnc_blob, pnc_blob_sz);
-
-	*blob_sz=mpack_sz;
-	*blob=mpack_buff;
-
+	memcpy(buffer+offset, pnc_blob, pnc_blob_sz);
+	offset+=pnc_blob_sz;
 	free(pnc_blob);
-	pnc_blob=NULL;
+	assert(offset==sz);
+	*blob_sz=sz;
+	*blob=buffer;
 	return;
 }
 struct OC_token_bucket *OC_token_bucket_deserialize(const char *blob, size_t blob_sz)
 {
 	struct OC_token_bucket *bucket=ALLOC(struct OC_token_bucket, 1);
 	size_t offset=0;
-	memcpy(bucket, blob, sizeof(struct OC_token_bucket)-sizeof(struct PN_counter*));
-	offset+=sizeof(struct OC_token_bucket)-sizeof(struct PN_counter*);
+	memcpy(bucket, blob+offset, OCTB_BLOB_HDR_SIZE);
+	offset += OCTB_BLOB_HDR_SIZE;
 	bucket->consumed=PN_counter_deserialize(blob+offset, blob_sz-offset);
+	offset += PN_counter_serialized_size(bucket->consumed);
+	assert(offset <= blob_sz);
 	return bucket;
 }
-void OC_token_bucket_merge(struct OC_token_bucket *dst, struct OC_token_bucket *src)
+void OC_token_bucket_merge(struct OC_token_bucket *dst, const struct OC_token_bucket *src)
 {
 	if(timercmp(&(dst->cfg.write_timestamp), &(src->cfg.write_timestamp), <))//Last-Write-Wins
 	{
@@ -211,10 +165,10 @@ void OC_token_bucket_merge_blob(struct OC_token_bucket *bucket, const char *blob
 	OC_token_bucket_free(to_merge);
 	return;
 }
-size_t OC_token_bucket_size(const struct OC_token_bucket *bucket)
+size_t OC_token_bucket_mem_size(const struct OC_token_bucket *bucket)
 {
 	size_t sz=0;
-	sz=sizeof(struct OC_token_bucket);
-	sz+=PN_counter_size(bucket->consumed);
+	sz += sizeof(struct OC_token_bucket);
+	sz += PN_counter_mem_size(bucket->consumed);
 	return sz;
-}
+}
+\ No newline at end of file
diff --git a/CRDT/oc_token_bucket.h b/CRDT/oc_token_bucket.h
index 0dcff30..172a850 100644
--- a/CRDT/oc_token_bucket.h
+++ b/CRDT/oc_token_bucket.h
@@ -4,6 +4,7 @@
 * 2022-9-7
 */
 #pragma once
+#include "token_bucket_common.h"
 #include <stddef.h>
 #include <sys/time.h>
 #include <uuid/uuid.h>
@@ -17,7 +18,7 @@ struct OC_token_bucket;
 // CBS: Committed Burst Size
 struct OC_token_bucket *OC_token_bucket_new(uuid_t my_id, struct timeval now, long long CIR, long long CBS);
 
-void OC_token_bucket_configure(struct OC_token_bucket *bucket, struct timeval now, long long CIR, long long CBS, long long refill_duration_ms);
+void OC_token_bucket_configure(struct OC_token_bucket *bucket, struct timeval now, long long CIR, long long CBS);
 struct OC_token_bucket_info
 {
 	long long CIR;
@@ -28,27 +29,19 @@ struct OC_token_bucket_info
 	long long available;
 	long long number_of_consumers;
 };
-void OC_token_bucket_info(struct OC_token_bucket *bucket, struct OC_token_bucket_info *info);
+void OC_token_bucket_info(struct OC_token_bucket *bucket, struct timeval now, struct OC_token_bucket_info *info);
 void OC_token_bucket_free(struct OC_token_bucket *bucket);
 
-enum OC_token_bucket_command
-{
-	OCTB_CMD_CONSUME_NORMAL,
-	OCTB_CMD_CONSUME_FORCE,
-	OCTB_CMD_CONSUME_FLEXIBLE,
-	OCTB_CMD_CONSUME_AS_MUCH_AS_POSSIBLE,
-	OCTB_CMD_READ_AVAILABLE,
-	OCTB_CMD_READ_REFILLED,
-	OCTB_CMD_READ_CONSUEMD
-};
-long long OC_token_bucket_control(struct OC_token_bucket *bucket, struct timeval now, enum OC_token_bucket_command cmd, long long tokens);
 
-void OC_token_bucket_serialize(struct OC_token_bucket *bucket, char **blob, size_t *blob_sz);
+long long OC_token_bucket_consume(struct OC_token_bucket *bucket, struct timeval now, enum tb_consume_type cmd, long long tokens);
+
+void OC_token_bucket_serialize(const struct OC_token_bucket *bucket, char **blob, size_t *blob_sz);
 struct OC_token_bucket *OC_token_bucket_deserialize(const char *blob, size_t blob_sz);
 
-void OC_token_bucket_merge(struct OC_token_bucket *dst, struct OC_token_bucket *src);
+void OC_token_bucket_merge(struct OC_token_bucket *dst, const struct OC_token_bucket *src);
 void OC_token_bucket_merge_blob(struct OC_token_bucket *bucket,  const char *blob, size_t blob_sz);
-size_t OC_token_bucket_size(const struct OC_token_bucket *bucket);
+size_t OC_token_bucket_serialized_size(const struct OC_token_bucket *bucket);
+size_t OC_token_bucket_mem_size(const struct OC_token_bucket *bucket);
 #ifdef __cplusplus
 }
 #endif
diff --git a/CRDT/or_map.c b/CRDT/or_map.c
index beeae4c..f7baa97 100644
--- a/CRDT/or_map.c
+++ b/CRDT/or_map.c
@@ -477,10 +477,10 @@ size_t OR_record_size(const struct OR_record *record)
 	switch(record->type)
 	{
 		case TYPE_INTEGER:
-			sz+=PN_counter_size(record->counter);				
+			sz+=PN_counter_serialized_size(record->counter);				
 			break;
 		case TYPE_STRING:
-			sz+=LWW_regeister_size(record->string);		
+			sz+=LWW_regeister_mem_size(record->string);		
 			break;
 		default:
 			break;
@@ -749,7 +749,7 @@ void OR_map_merge_blob(struct OR_map *map, const char *blob, size_t blob_sz)
 	OR_map_free(to_merge);
 	return;
 }
-size_t OR_map_size(const struct OR_map *map)
+size_t OR_map_mem_size(const struct OR_map *map)
 {
 	size_t sz=0;
 	struct OR_record *record=NULL, *tmp_record=NULL;
diff --git a/CRDT/or_map.h b/CRDT/or_map.h
index 82522a3..771448b 100644
--- a/CRDT/or_map.h
+++ b/CRDT/or_map.h
@@ -49,7 +49,7 @@ void OR_map_serialize(const struct OR_map *map, char **blob, size_t *blob_sz);
 struct OR_map *OR_map_deserialize(const char *blob, size_t blob_sz);
 void OR_map_merge(struct OR_map *dst, struct OR_map *src);
 void OR_map_merge_blob(struct OR_map *map, const char *blob, size_t blob_sz);
-size_t OR_map_size(const struct OR_map *map);
+size_t OR_map_mem_size(const struct OR_map *map);
 
 #ifdef __cplusplus
 }
diff --git a/CRDT/or_set.c b/CRDT/or_set.c
index 94508e3..d1ea936 100644
--- a/CRDT/or_set.c
+++ b/CRDT/or_set.c
@@ -98,7 +98,7 @@ void OR_set_merge_blob(struct OR_set *set, const char *blob, size_t blob_sz)
 	OR_map_merge_blob(set->map, blob, blob_sz);
 	return;
 }
-size_t OR_set_size(const struct OR_set *set)
+size_t OR_set_mem_size(const struct OR_set *set)
 {
-	return sizeof(struct OR_set)+OR_map_size(set->map);
+	return sizeof(struct OR_set)+OR_map_mem_size(set->map);
 }
diff --git a/CRDT/or_set.h b/CRDT/or_set.h
index 3587477..094683f 100644
--- a/CRDT/or_set.h
+++ b/CRDT/or_set.h
@@ -37,7 +37,7 @@ struct OR_set_member_list* OR_set_members(const struct OR_set *set);
 
 void OR_set_serialize(const struct OR_set *set, char **blob, size_t *blob_sz);
 void OR_set_merge_blob(struct OR_set *set, const char *blob, size_t blob_sz);
-size_t OR_set_size(const struct OR_set *set);
+size_t OR_set_mem_size(const struct OR_set *set);
 
 #ifdef __cplusplus
 }
diff --git a/CRDT/pn_counter.c b/CRDT/pn_counter.c
index f213b21..d1431dd 100644
--- a/CRDT/pn_counter.c
+++ b/CRDT/pn_counter.c
@@ -16,25 +16,27 @@ struct counter_item
 	long long N;
 	UT_hash_handle hh;
 };
-struct counter_item *counter_item_new(uuid_t replica_id)
+static struct counter_item *counter_item_new(uuid_t replica_id)
 {
 	struct counter_item *item=ALLOC(struct counter_item, 1);
 	uuid_copy(item->replica_id, replica_id);
 	return item;
 }
-void counter_item_free(struct counter_item *item)
+static void counter_item_free(struct counter_item *item)
 {
 	free(item);
 }
 struct PN_counter
 {
-
 	uuid_t my_id;
 	struct timeval write_timestamp;
 	long long W; //value wrote by PN_counter_set
 	long long _P, _N; //observed value at PN_counter_set;
 	struct counter_item *hash_item;
 };
+const size_t PNC_BLOB_HDR_SZ=offsetof(struct PN_counter, hash_item);
+const size_t PNC_BLOB_ITEM_SZ=offsetof(struct counter_item, hh);
+
 struct PN_counter *PN_counter_new(uuid_t my_id)
 {
 	struct PN_counter *pnc=ALLOC(struct PN_counter, 1);
@@ -97,24 +99,30 @@ long long PN_counter_incrby(struct PN_counter *pnc, long long increment)
 	}
 	return PN_counter_get(pnc);
 }
-
+size_t PN_counter_serialized_size(const struct PN_counter *pnc)
+{
+	size_t sz=0;
+	sz+=PNC_BLOB_HDR_SZ;
+	sz+=HASH_COUNT(pnc->hash_item) * PNC_BLOB_ITEM_SZ;
+	return sz;
+}
 void PN_counter_serialize(const struct PN_counter *pnc, char **blob, size_t *blob_sz)
 {
 	struct counter_item *c=NULL, *tmp=NULL;
-	size_t mpack_sz=sizeof(struct PN_counter)-sizeof(pnc->hash_item);
-	mpack_sz+=HASH_COUNT(pnc->hash_item)*(sizeof(struct counter_item)-sizeof(UT_hash_handle));
-	char *mpack_buff=(char*)malloc(mpack_sz);
+	size_t sz=PN_counter_serialized_size(pnc);
+
+	char *buffer=ALLOC(char, sz);
 	size_t offset=0;
-	memcpy(mpack_buff, pnc, sizeof(struct PN_counter)-sizeof(pnc->hash_item));
-	offset+=sizeof(struct PN_counter)-sizeof(pnc->hash_item);
+	memcpy(buffer, pnc, PNC_BLOB_HDR_SZ);
+	offset+=PNC_BLOB_HDR_SZ;
 	HASH_ITER(hh, pnc->hash_item, c, tmp)
 	{
-		memcpy(mpack_buff+offset, c, sizeof(struct counter_item)-sizeof(UT_hash_handle));
-		offset+=sizeof(struct counter_item)-sizeof(UT_hash_handle);
+		memcpy(buffer+offset, c, PNC_BLOB_ITEM_SZ);
+		offset+=PNC_BLOB_ITEM_SZ;
 	}
-	assert(offset==mpack_sz);
-	*blob_sz=mpack_sz;
-	*blob=mpack_buff;
+	assert(offset==sz);
+	*blob_sz=sz;
+	*blob=buffer;
 	return;
 }
 struct PN_counter *PN_counter_deserialize(const char *blob, size_t blob_sz)
@@ -122,19 +130,19 @@ struct PN_counter *PN_counter_deserialize(const char *blob, size_t blob_sz)
 	struct PN_counter *pnc=ALLOC(struct PN_counter, 1);
 	struct counter_item *c=NULL, *tmp=NULL;
 	size_t offset=0;
-	memcpy(pnc, blob, sizeof(struct PN_counter)-sizeof(pnc->hash_item));
-	offset+=sizeof(struct PN_counter)-sizeof(pnc->hash_item);
+	memcpy(pnc, blob, PNC_BLOB_HDR_SZ);
+	offset+=PNC_BLOB_HDR_SZ;
 	while(offset<blob_sz)
 	{
 		tmp=(struct counter_item *)(blob+offset);
 		c=ALLOC(struct counter_item, 1);
-		memcpy(c, tmp, sizeof(struct counter_item)-sizeof(UT_hash_handle));
+		memcpy(c, tmp, PNC_BLOB_ITEM_SZ);
 		HASH_ADD_KEYPTR(hh, pnc->hash_item, c->replica_id, sizeof(c->replica_id), c);
-		offset+=sizeof(struct counter_item)-sizeof(UT_hash_handle);
+		offset += PNC_BLOB_ITEM_SZ;
 	}
 	return pnc;
 }
-void PN_counter_merge(struct PN_counter *dst, struct PN_counter *src)
+void PN_counter_merge(struct PN_counter *dst, const struct PN_counter *src)
 {
 	struct counter_item *src_item=NULL, *dst_item=NULL, *tmp=NULL;	
 	if(timercmp(&dst->write_timestamp, &src->write_timestamp, <))//Last-Write-Wins
@@ -168,10 +176,10 @@ size_t PN_counter_replica_num(const struct PN_counter *pnc)
 {
 	return MAX(HASH_COUNT(pnc->hash_item), 1);
 }
-size_t PN_counter_size(const struct PN_counter *pnc)
+size_t PN_counter_mem_size(const struct PN_counter *pnc)
 {
 	size_t sz=0;
-	sz+=HASH_COUNT(pnc->hash_item) * sizeof(struct counter_item);
-	sz+=sizeof(struct PN_counter);
+	sz += sizeof(struct PN_counter);
+	sz += HASH_COUNT(pnc->hash_item)*sizeof(struct counter_item);
 	return sz;
 }
 \ No newline at end of file
diff --git a/CRDT/pn_counter.h b/CRDT/pn_counter.h
index 4563f70..d56cf84 100644
--- a/CRDT/pn_counter.h
+++ b/CRDT/pn_counter.h
@@ -25,10 +25,11 @@ size_t PN_counter_replica_num(const struct PN_counter *pnc);
 long long PN_counter_incrby(struct PN_counter *pnc, long long increment);
 
 void PN_counter_merge_blob(struct PN_counter *pnc, const char *blob, size_t blob_sz);
-void PN_counter_merge(struct PN_counter *dst, struct PN_counter *src);
+void PN_counter_merge(struct PN_counter *dst, const struct PN_counter *src);
 void PN_counter_serialize(const struct PN_counter *pnc, char **blob, size_t *blob_sz);
 struct PN_counter *PN_counter_deserialize(const char *blob, size_t blob_sz);
-size_t PN_counter_size(const struct PN_counter *pnc);
+size_t PN_counter_serialized_size(const struct PN_counter *pnc);
+size_t PN_counter_mem_size(const struct PN_counter *pnc);
 #ifdef __cplusplus
 }
 #endif
diff --git a/CRDT/st_hyperloglog.c b/CRDT/st_hyperloglog.c
new file mode 100644
index 0000000..4e40a43
--- /dev/null
+++ b/CRDT/st_hyperloglog.c
@@ -0,0 +1,523 @@
+/*
+ * The HyperLogLog uses 6 bits for register, and a 64bit hash function. 
+ * For our needs, we always use a dense representation and avoid the sparse/dense conversions.
+ *
+ */
+#include "st_hyperloglog.h"
+#include "xxhash.h"
+#include "mpack.h"
+#include "crdt_utils.h"
+
+#include <stdint.h>
+#include <math.h>
+#include <assert.h>
+
+#define REG_WIDTH 6     // Bits per register
+#define INT_WIDTH 32    // Bits in an int
+#define REG_PER_WORD 5  // floor(INT_WIDTH / REG_WIDTH)
+
+#define NUM_REG(precision) ((1 << precision))
+#define INT_CEIL(num, denom) (((num) + (denom) - 1) / (denom))
+
+
+
+struct ST_HLL_configuration
+{
+    unsigned char precision;
+    unsigned char pad;
+    unsigned short time_window_s;
+    struct timeval timestamp;
+};
+struct ST_hyperloglog
+{
+    struct ST_HLL_configuration cfg;
+    int reset_idx;
+    struct timeval reset_time;
+    uint32_t *registers;
+};
+const size_t BLOB_HDR_SIZE= offsetof(struct ST_hyperloglog, registers);
+#define REGISTER_SIZE(precision) INT_WIDTH*INT_CEIL(NUM_REG(precision), REG_PER_WORD)
+
+struct ST_hyperloglog *ST_hyperloglog_new(unsigned char precision, int time_window_seconds, const struct timeval now)
+{
+    struct ST_hyperloglog *h=ALLOC(struct ST_hyperloglog, 1);
+    // Ensure the precision is somewhat sane
+    if (precision < HLL_MIN_PRECISION || precision > HLL_MAX_PRECISION)
+        return NULL;
+
+    // Store precision
+    h->cfg.precision = precision;
+    h->cfg.time_window_s=time_window_seconds;
+    memcpy(&h->cfg.timestamp, &now, sizeof(h->cfg.timestamp));
+
+    memcpy(&h->reset_time, &now, sizeof(h->reset_time));
+
+    // Determine how many registers are needed
+    int num_reg = NUM_REG(precision);
+
+    // Get the full words required
+    int words = INT_CEIL(num_reg, REG_PER_WORD);
+
+    // Allocate and zero out the registers
+    h->registers = ALLOC(uint32_t, words);
+    return h;
+}
+void ST_hyperloglog_configure(struct ST_hyperloglog *h, unsigned char precision, int time_window_seconds, const struct timeval now)
+{
+    h->cfg.time_window_s=time_window_seconds;
+    if(h->cfg.precision != precision)
+    {
+        free(h->registers);
+        // Determine how many registers are needed
+        int reg = NUM_REG(precision);
+
+        // Get the full words required
+        int words = INT_CEIL(reg, REG_PER_WORD);
+
+        // Allocate and zero out the registers
+        h->registers = ALLOC(uint32_t, words);
+        h->cfg.precision=precision;
+    }
+    memcpy(&h->cfg.timestamp, &now, sizeof(h->cfg.timestamp));
+    return;
+}
+void ST_hyperloglog_free(struct ST_hyperloglog *h)
+{
+    free(h->registers);
+    h->registers=NULL;
+    free(h);
+    return;
+}
+
+static int get_register(const struct ST_hyperloglog *h, int idx) {
+    uint32_t word = *(h->registers + (idx / REG_PER_WORD));
+    word = word >> REG_WIDTH * (idx % REG_PER_WORD);
+    return word & ((1 << REG_WIDTH) - 1);
+}
+
+static void set_register(const struct ST_hyperloglog *h, int idx, int val) {
+    uint32_t *word = h->registers + (idx / REG_PER_WORD);
+
+    // Shift the val into place
+    unsigned shift = REG_WIDTH * (idx % REG_PER_WORD);
+    val = val << shift;
+    uint32_t val_mask = ((1 << REG_WIDTH) - 1) << shift;
+
+    // Store the word
+    *word = (*word & ~val_mask) | val;
+    return;
+}
+static void reset_register(const struct ST_hyperloglog *h, int idx)
+{
+    uint32_t *word = h->registers + (idx / REG_PER_WORD);
+    unsigned shift = REG_WIDTH * (idx % REG_PER_WORD);
+    uint32_t val_mask = ((1 << REG_WIDTH) - 1) << shift;
+    *word &= ~val_mask;
+
+}
+int hll_add_hash(struct ST_hyperloglog *h, uint64_t hash)
+{
+    // Determine the index using the first p bits
+    int idx = hash >> (64 - h->cfg.precision);
+
+    // Shift out the index bits
+    hash = hash << h->cfg.precision | (1 << (h->cfg.precision -1));
+
+    // Determine the count of leading zeros. The __builtin_clzll() is provided by GCC
+    int leading = __builtin_clzll(hash) + 1;
+
+    // Update the register if the new value is larger
+    if (leading > get_register(h, idx)) {
+        set_register(h, idx, leading);
+        return 1;
+    }
+    return 0;
+}
+static void periodic_reset(struct ST_hyperloglog *h, const struct timeval now)
+{
+    if(h->cfg.time_window_s==0) return;
+    int num_reg=NUM_REG(h->cfg.precision);
+    int reset_time_slot_us=h->cfg.time_window_s*2*1000*1000/num_reg;
+    long long delta_us=timeval_delta_us(h->reset_time, now);
+    struct timeval step;
+    //reset_time_slot_us+=1000;
+    step.tv_sec=reset_time_slot_us/1000/1000;
+    step.tv_usec=reset_time_slot_us%(1000*1000);
+    if(delta_us>reset_time_slot_us)
+    {
+        for(int i=0; i<delta_us/reset_time_slot_us; i++)
+        {
+            reset_register(h, h->reset_idx);
+            h->reset_idx = (h->reset_idx+1)%num_reg;
+            timeradd(&h->reset_time, &step, &h->reset_time);
+        }
+    }
+}
+int ST_hyperloglog_add(struct ST_hyperloglog *h, const char *key, size_t keylen, const struct timeval now)
+{
+    periodic_reset(h, now);
+    // Compute the hash value of the key
+    uint64_t hash=0;
+
+    hash=XXH3_64bits_withSeed(key, keylen, 171);
+    // Add the hashed value
+   return hll_add_hash(h, hash);
+}
+void ST_hyperloglog_merge(struct ST_hyperloglog *dst, const struct ST_hyperloglog *src)
+{
+    if(timercmp(&(dst->cfg.timestamp), &(src->cfg.timestamp), <))//Last-Write-Wins
+    {
+        ST_hyperloglog_configure(dst, src->cfg.precision, src->cfg.time_window_s, src->cfg.timestamp);
+    }
+    if(dst->cfg.precision != src->cfg.precision) return;
+    int n_register=NUM_REG(dst->cfg.precision);
+    int s_reg=0, d_reg=0;
+    for(int i=0; i<n_register; i++)
+    {
+        s_reg=get_register(src, i);
+        d_reg=get_register(dst, i);
+        set_register(dst, i, MAX(s_reg, d_reg));
+    }
+    if(timercmp(&(dst->reset_time), &(src->reset_time), <))//Last-Write-Wins
+    {
+        dst->reset_idx=src->reset_idx;
+        memcpy(&dst->reset_time, &src->reset_time, sizeof(src->reset_time));
+    }
+    
+    return;
+}
+size_t ST_hyperloglog_serialized_size(const struct ST_hyperloglog *h)
+{
+    size_t sz=0;
+    size_t num_reg = NUM_REG(h->cfg.precision);
+    size_t words = INT_CEIL(num_reg, REG_PER_WORD);
+
+    sz += BLOB_HDR_SIZE;
+    sz += words*sizeof(int32_t);
+    return sz;
+}
+void ST_hyperloglog_serialize(const struct ST_hyperloglog *h, char **blob, size_t *blob_sz)
+{
+    size_t sz=0, offset=0;
+    size_t num_reg = NUM_REG(h->cfg.precision);
+    size_t words = INT_CEIL(num_reg, REG_PER_WORD);
+
+    sz = ST_hyperloglog_serialized_size(h);
+    
+    char *buffer = ALLOC(char, sz);
+    memcpy(buffer+offset, h, BLOB_HDR_SIZE);
+    offset += BLOB_HDR_SIZE;
+    
+    memcpy(buffer+offset, h->registers,  words*sizeof(int32_t));
+    offset +=  words*sizeof(int32_t);
+    *blob_sz=sz;
+    *blob=buffer;
+    return;
+}
+struct ST_hyperloglog *ST_hyperloglog_deserialize(const char *blob, size_t blob_sz)
+{
+    struct ST_hyperloglog *h=ALLOC(struct ST_hyperloglog, 1);
+    size_t offset=0;
+    memcpy(h, blob, BLOB_HDR_SIZE);
+    offset += BLOB_HDR_SIZE;
+
+    size_t num_reg = NUM_REG(h->cfg.precision);
+    size_t words = INT_CEIL(num_reg, REG_PER_WORD);
+
+    h->registers=ALLOC(uint32_t, words);
+    memcpy(h->registers, blob+offset, words*sizeof(int32_t));
+    return h;
+}
+void ST_hyperloglog_merge_blob(struct ST_hyperloglog *dst, const char *blob, size_t blob_sz)
+{
+    struct ST_hyperloglog *src=ST_hyperloglog_deserialize(blob, blob_sz);
+    ST_hyperloglog_merge(dst, src);
+    ST_hyperloglog_free(src);
+    return;
+}
+double g_switchThreshold[15] = {10, 20, 40, 80, 220, 400, 900, 1800, 3100, 6500,
+    11500, 20000, 50000, 120000, 350000};
+
+static double *g_rawEstimateData[] = {
+    // precision 4
+    (double[]) { 11, 11.717, 12.207, 12.7896, 13.2882, 13.8204, 14.3772, 14.9342, 15.5202, 16.161, 16.7722, 17.4636, 18.0396, 18.6766, 19.3566, 20.0454, 20.7936, 21.4856, 22.2666, 22.9946, 23.766, 24.4692, 25.3638, 26.0764, 26.7864, 27.7602, 28.4814, 29.433, 30.2926, 31.0664, 31.9996, 32.7956, 33.5366, 34.5894, 35.5738, 36.2698, 37.3682, 38.0544, 39.2342, 40.0108, 40.7966, 41.9298, 42.8704, 43.6358, 44.5194, 45.773, 46.6772, 47.6174, 48.4888, 49.3304, 50.2506, 51.4996, 52.3824, 53.3078, 54.3984, 55.5838, 56.6618, 57.2174, 58.3514, 59.0802, 60.1482, 61.0376, 62.3598, 62.8078, 63.9744, 64.914, 65.781, 67.1806, 68.0594, 68.8446, 69.7928, 70.8248, 71.8324, 72.8598, 73.6246, 74.7014, 75.393, 76.6708, 77.2394, },
+    // precision 5
+    (double[]) { 23, 23.1194, 23.8208, 24.2318, 24.77, 25.2436, 25.7774, 26.2848, 26.8224, 27.3742, 27.9336, 28.503, 29.0494, 29.6292, 30.2124, 30.798, 31.367, 31.9728, 32.5944, 33.217, 33.8438, 34.3696, 35.0956, 35.7044, 36.324, 37.0668, 37.6698, 38.3644, 39.049, 39.6918, 40.4146, 41.082, 41.687, 42.5398, 43.2462, 43.857, 44.6606, 45.4168, 46.1248, 46.9222, 47.6804, 48.447, 49.3454, 49.9594, 50.7636, 51.5776, 52.331, 53.19, 53.9676, 54.7564, 55.5314, 56.4442, 57.3708, 57.9774, 58.9624, 59.8796, 60.755, 61.472, 62.2076, 63.1024, 63.8908, 64.7338, 65.7728, 66.629, 67.413, 68.3266, 69.1524, 70.2642, 71.1806, 72.0566, 72.9192, 73.7598, 74.3516, 75.5802, 76.4386, 77.4916, 78.1524, 79.1892, 79.8414, 80.8798, 81.8376, 82.4698, 83.7656, 84.331, 85.5914, 86.6012, 87.7016, 88.5582, 89.3394, 90.3544, 91.4912, 92.308, 93.3552, 93.9746, 95.2052, 95.727, 97.1322, 98.3944, 98.7588, 100.242, 101.1914, 102.2538, 102.8776, 103.6292, 105.1932, 105.9152, 107.0868, 107.6728, 108.7144, 110.3114, 110.8716, 111.245, 112.7908, 113.7064, 114.636, 115.7464, 116.1788, 117.7464, 118.4896, 119.6166, 120.5082, 121.7798, 122.9028, 123.4426, 124.8854, 125.705, 126.4652, 128.3464, 128.3462, 130.0398, 131.0342, 131.0042, 132.4766, 133.511, 134.7252, 135.425, 136.5172, 138.0572, 138.6694, 139.3712, 140.8598, 141.4594, 142.554, 143.4006, 144.7374, 146.1634, 146.8994, 147.605, 147.9304, 149.1636, 150.2468, 151.5876, 152.2096, 153.7032, 154.7146, 155.807, 156.9228, 157.0372, 158.5852, },
+    // precision 6
+    (double[]) { 46, 46.1902, 47.271, 47.8358, 48.8142, 49.2854, 50.317, 51.354, 51.8924, 52.9436, 53.4596, 54.5262, 55.6248, 56.1574, 57.2822, 57.837, 58.9636, 60.074, 60.7042, 61.7976, 62.4772, 63.6564, 64.7942, 65.5004, 66.686, 67.291, 68.5672, 69.8556, 70.4982, 71.8204, 72.4252, 73.7744, 75.0786, 75.8344, 77.0294, 77.8098, 79.0794, 80.5732, 81.1878, 82.5648, 83.2902, 84.6784, 85.3352, 86.8946, 88.3712, 89.0852, 90.499, 91.2686, 92.6844, 94.2234, 94.9732, 96.3356, 97.2286, 98.7262, 100.3284, 101.1048, 102.5962, 103.3562, 105.1272, 106.4184, 107.4974, 109.0822, 109.856, 111.48, 113.2834, 114.0208, 115.637, 116.5174, 118.0576, 119.7476, 120.427, 122.1326, 123.2372, 125.2788, 126.6776, 127.7926, 129.1952, 129.9564, 131.6454, 133.87, 134.5428, 136.2, 137.0294, 138.6278, 139.6782, 141.792, 143.3516, 144.2832, 146.0394, 147.0748, 148.4912, 150.849, 151.696, 153.5404, 154.073, 156.3714, 157.7216, 158.7328, 160.4208, 161.4184, 163.9424, 165.2772, 166.411, 168.1308, 168.769, 170.9258, 172.6828, 173.7502, 175.706, 176.3886, 179.0186, 180.4518, 181.927, 183.4172, 184.4114, 186.033, 188.5124, 189.5564, 191.6008, 192.4172, 193.8044, 194.997, 197.4548, 198.8948, 200.2346, 202.3086, 203.1548, 204.8842, 206.6508, 206.6772, 209.7254, 210.4752, 212.7228, 214.6614, 215.1676, 217.793, 218.0006, 219.9052, 221.66, 223.5588, 225.1636, 225.6882, 227.7126, 229.4502, 231.1978, 232.9756, 233.1654, 236.727, 238.1974, 237.7474, 241.1346, 242.3048, 244.1948, 245.3134, 246.879, 249.1204, 249.853, 252.6792, 253.857, 254.4486, 257.2362, 257.9534, 260.0286, 260.5632, 262.663, 264.723, 265.7566, 267.2566, 267.1624, 270.62, 272.8216, 273.2166, 275.2056, 276.2202, 278.3726, 280.3344, 281.9284, 283.9728, 284.1924, 286.4872, 287.587, 289.807, 291.1206, 292.769, 294.8708, 296.665, 297.1182, 299.4012, 300.6352, 302.1354, 304.1756, 306.1606, 307.3462, 308.5214, 309.4134, 310.8352, 313.9684, 315.837, 316.7796, 318.9858, },
+    // precision 7
+    (double[]) { 92, 93.4934, 94.9758, 96.4574, 97.9718, 99.4954, 101.5302, 103.0756, 104.6374, 106.1782, 107.7888, 109.9522, 111.592, 113.2532, 114.9086, 116.5938, 118.9474, 120.6796, 122.4394, 124.2176, 125.9768, 128.4214, 130.2528, 132.0102, 133.8658, 135.7278, 138.3044, 140.1316, 142.093, 144.0032, 145.9092, 148.6306, 150.5294, 152.5756, 154.6508, 156.662, 159.552, 161.3724, 163.617, 165.5754, 167.7872, 169.8444, 172.7988, 174.8606, 177.2118, 179.3566, 181.4476, 184.5882, 186.6816, 189.0824, 191.0258, 193.6048, 196.4436, 198.7274, 200.957, 203.147, 205.4364, 208.7592, 211.3386, 213.781, 215.8028, 218.656, 221.6544, 223.996, 226.4718, 229.1544, 231.6098, 234.5956, 237.0616, 239.5758, 242.4878, 244.5244, 248.2146, 250.724, 252.8722, 255.5198, 258.0414, 261.941, 264.9048, 266.87, 269.4304, 272.028, 274.4708, 278.37, 281.0624, 283.4668, 286.5532, 289.4352, 293.2564, 295.2744, 298.2118, 300.7472, 304.1456, 307.2928, 309.7504, 312.5528, 315.979, 318.2102, 322.1834, 324.3494, 327.325, 330.6614, 332.903, 337.2544, 339.9042, 343.215, 345.2864, 348.0814, 352.6764, 355.301, 357.139, 360.658, 363.1732, 366.5902, 369.9538, 373.0828, 375.922, 378.9902, 382.7328, 386.4538, 388.1136, 391.2234, 394.0878, 396.708, 401.1556, 404.1852, 406.6372, 409.6822, 412.7796, 416.6078, 418.4916, 422.131, 424.5376, 428.1988, 432.211, 434.4502, 438.5282, 440.912, 444.0448, 447.7432, 450.8524, 453.7988, 456.7858, 458.8868, 463.9886, 466.5064, 468.9124, 472.6616, 475.4682, 478.582, 481.304, 485.2738, 488.6894, 490.329, 496.106, 497.6908, 501.1374, 504.5322, 506.8848, 510.3324, 513.4512, 516.179, 520.4412, 522.6066, 526.167, 528.7794, 533.379, 536.067, 538.46, 542.9116, 545.692, 547.9546, 552.493, 555.2722, 557.335, 562.449, 564.2014, 569.0738, 571.0974, 574.8564, 578.2996, 581.409, 583.9704, 585.8098, 589.6528, 594.5998, 595.958, 600.068, 603.3278, 608.2016, 609.9632, 612.864, 615.43, 620.7794, 621.272, 625.8644, 629.206, 633.219, 634.5154, 638.6102, },
+    // precision 8
+    (double[]) { 184.2152, 187.2454, 190.2096, 193.6652, 196.6312, 199.6822, 203.249, 206.3296, 210.0038, 213.2074, 216.4612, 220.27, 223.5178, 227.4412, 230.8032, 234.1634, 238.1688, 241.6074, 245.6946, 249.2664, 252.8228, 257.0432, 260.6824, 264.9464, 268.6268, 272.2626, 276.8376, 280.4034, 284.8956, 288.8522, 292.7638, 297.3552, 301.3556, 305.7526, 309.9292, 313.8954, 318.8198, 322.7668, 327.298, 331.6688, 335.9466, 340.9746, 345.1672, 349.3474, 354.3028, 358.8912, 364.114, 368.4646, 372.9744, 378.4092, 382.6022, 387.843, 392.5684, 397.1652, 402.5426, 407.4152, 412.5388, 417.3592, 422.1366, 427.486, 432.3918, 437.5076, 442.509, 447.3834, 453.3498, 458.0668, 463.7346, 469.1228, 473.4528, 479.7, 484.644, 491.0518, 495.5774, 500.9068, 506.432, 512.1666, 517.434, 522.6644, 527.4894, 533.6312, 538.3804, 544.292, 550.5496, 556.0234, 562.8206, 566.6146, 572.4188, 579.117, 583.6762, 590.6576, 595.7864, 601.509, 607.5334, 612.9204, 619.772, 624.2924, 630.8654, 636.1836, 642.745, 649.1316, 655.0386, 660.0136, 666.6342, 671.6196, 678.1866, 684.4282, 689.3324, 695.4794, 702.5038, 708.129, 713.528, 720.3204, 726.463, 732.7928, 739.123, 744.7418, 751.2192, 756.5102, 762.6066, 769.0184, 775.2224, 781.4014, 787.7618, 794.1436, 798.6506, 805.6378, 811.766, 819.7514, 824.5776, 828.7322, 837.8048, 843.6302, 849.9336, 854.4798, 861.3388, 867.9894, 873.8196, 880.3136, 886.2308, 892.4588, 899.0816, 905.4076, 912.0064, 917.3878, 923.619, 929.998, 937.3482, 943.9506, 947.991, 955.1144, 962.203, 968.8222, 975.7324, 981.7826, 988.7666, 994.2648, 1000.3128, 1007.4082, 1013.7536, 1020.3376, 1026.7156, 1031.7478, 1037.4292, 1045.393, 1051.2278, 1058.3434, 1062.8726, 1071.884, 1076.806, 1082.9176, 1089.1678, 1095.5032, 1102.525, 1107.2264, 1115.315, 1120.93, 1127.252, 1134.1496, 1139.0408, 1147.5448, 1153.3296, 1158.1974, 1166.5262, 1174.3328, 1175.657, 1184.4222, 1190.9172, 1197.1292, 1204.4606, 1210.4578, 1218.8728, 1225.3336, 1226.6592, 1236.5768, 1241.363, 1249.4074, 1254.6566, 1260.8014, 1266.5454, 1274.5192, },
+    // precision 9
+    (double[]) { 369, 374.8294, 381.2452, 387.6698, 394.1464, 400.2024, 406.8782, 413.6598, 420.462, 427.2826, 433.7102, 440.7416, 447.9366, 455.1046, 462.285, 469.0668, 476.306, 483.8448, 491.301, 498.9886, 506.2422, 513.8138, 521.7074, 529.7428, 537.8402, 545.1664, 553.3534, 561.594, 569.6886, 577.7876, 585.65, 594.228, 602.8036, 611.1666, 620.0818, 628.0824, 637.2574, 646.302, 655.1644, 664.0056, 672.3802, 681.7192, 690.5234, 700.2084, 708.831, 718.485, 728.1112, 737.4764, 746.76, 756.3368, 766.5538, 775.5058, 785.2646, 795.5902, 804.3818, 814.8998, 824.9532, 835.2062, 845.2798, 854.4728, 864.9582, 875.3292, 886.171, 896.781, 906.5716, 916.7048, 927.5322, 937.875, 949.3972, 958.3464, 969.7274, 980.2834, 992.1444, 1003.4264, 1013.0166, 1024.018, 1035.0438, 1046.34, 1057.6856, 1068.9836, 1079.0312, 1091.677, 1102.3188, 1113.4846, 1124.4424, 1135.739, 1147.1488, 1158.9202, 1169.406, 1181.5342, 1193.2834, 1203.8954, 1216.3286, 1226.2146, 1239.6684, 1251.9946, 1262.123, 1275.4338, 1285.7378, 1296.076, 1308.9692, 1320.4964, 1333.0998, 1343.9864, 1357.7754, 1368.3208, 1380.4838, 1392.7388, 1406.0758, 1416.9098, 1428.9728, 1440.9228, 1453.9292, 1462.617, 1476.05, 1490.2996, 1500.6128, 1513.7392, 1524.5174, 1536.6322, 1548.2584, 1562.3766, 1572.423, 1587.1232, 1596.5164, 1610.5938, 1622.5972, 1633.1222, 1647.7674, 1658.5044, 1671.57, 1683.7044, 1695.4142, 1708.7102, 1720.6094, 1732.6522, 1747.841, 1756.4072, 1769.9786, 1782.3276, 1797.5216, 1808.3186, 1819.0694, 1834.354, 1844.575, 1856.2808, 1871.1288, 1880.7852, 1893.9622, 1906.3418, 1920.6548, 1932.9302, 1945.8584, 1955.473, 1968.8248, 1980.6446, 1995.9598, 2008.349, 2019.8556, 2033.0334, 2044.0206, 2059.3956, 2069.9174, 2082.6084, 2093.7036, 2106.6108, 2118.9124, 2132.301, 2144.7628, 2159.8422, 2171.0212, 2183.101, 2193.5112, 2208.052, 2221.3194, 2233.3282, 2247.295, 2257.7222, 2273.342, 2286.5638, 2299.6786, 2310.8114, 2322.3312, 2335.516, 2349.874, 2363.5968, 2373.865, 2387.1918, 2401.8328, 2414.8496, 2424.544, 2436.7592, 2447.1682, 2464.1958, 2474.3438, 2489.0006, 2497.4526, 2513.6586, 2527.19, 2540.7028, 2553.768 },
+    // precision 10
+    (double[]) { 738.1256, 750.4234, 763.1064, 775.4732, 788.4636, 801.0644, 814.488, 827.9654, 841.0832, 854.7864, 868.1992, 882.2176, 896.5228, 910.1716, 924.7752, 938.899, 953.6126, 968.6492, 982.9474, 998.5214, 1013.1064, 1028.6364, 1044.2468, 1059.4588, 1075.3832, 1091.0584, 1106.8606, 1123.3868, 1139.5062, 1156.1862, 1172.463, 1189.339, 1206.1936, 1223.1292, 1240.1854, 1257.2908, 1275.3324, 1292.8518, 1310.5204, 1328.4854, 1345.9318, 1364.552, 1381.4658, 1400.4256, 1419.849, 1438.152, 1456.8956, 1474.8792, 1494.118, 1513.62, 1532.5132, 1551.9322, 1570.7726, 1590.6086, 1610.5332, 1630.5918, 1650.4294, 1669.7662, 1690.4106, 1710.7338, 1730.9012, 1750.4486, 1770.1556, 1791.6338, 1812.7312, 1833.6264, 1853.9526, 1874.8742, 1896.8326, 1918.1966, 1939.5594, 1961.07, 1983.037, 2003.1804, 2026.071, 2047.4884, 2070.0848, 2091.2944, 2114.333, 2135.9626, 2158.2902, 2181.0814, 2202.0334, 2224.4832, 2246.39, 2269.7202, 2292.1714, 2314.2358, 2338.9346, 2360.891, 2384.0264, 2408.3834, 2430.1544, 2454.8684, 2476.9896, 2501.4368, 2522.8702, 2548.0408, 2570.6738, 2593.5208, 2617.0158, 2640.2302, 2664.0962, 2687.4986, 2714.2588, 2735.3914, 2759.6244, 2781.8378, 2808.0072, 2830.6516, 2856.2454, 2877.2136, 2903.4546, 2926.785, 2951.2294, 2976.468, 3000.867, 3023.6508, 3049.91, 3073.5984, 3098.162, 3121.5564, 3146.2328, 3170.9484, 3195.5902, 3221.3346, 3242.7032, 3271.6112, 3296.5546, 3317.7376, 3345.072, 3369.9518, 3394.326, 3418.1818, 3444.6926, 3469.086, 3494.2754, 3517.8698, 3544.248, 3565.3768, 3588.7234, 3616.979, 3643.7504, 3668.6812, 3695.72, 3719.7392, 3742.6224, 3770.4456, 3795.6602, 3819.9058, 3844.002, 3869.517, 3895.6824, 3920.8622, 3947.1364, 3973.985, 3995.4772, 4021.62, 4046.628, 4074.65, 4096.2256, 4121.831, 4146.6406, 4173.276, 4195.0744, 4223.9696, 4251.3708, 4272.9966, 4300.8046, 4326.302, 4353.1248, 4374.312, 4403.0322, 4426.819, 4450.0598, 4478.5206, 4504.8116, 4528.8928, 4553.9584, 4578.8712, 4603.8384, 4632.3872, 4655.5128, 4675.821, 4704.6222, 4731.9862, 4755.4174, 4781.2628, 4804.332, 4832.3048, 4862.8752, 4883.4148, 4906.9544, 4935.3516, 4954.3532, 4984.0248, 5011.217, 5035.3258, 5057.3672, 5084.1828, },
+    // precision 11
+    (double[]) { 1477, 1501.6014, 1526.5802, 1551.7942, 1577.3042, 1603.2062, 1629.8402, 1656.2292, 1682.9462, 1709.9926, 1737.3026, 1765.4252, 1793.0578, 1821.6092, 1849.626, 1878.5568, 1908.527, 1937.5154, 1967.1874, 1997.3878, 2027.37, 2058.1972, 2089.5728, 2120.1012, 2151.9668, 2183.292, 2216.0772, 2247.8578, 2280.6562, 2313.041, 2345.714, 2380.3112, 2414.1806, 2447.9854, 2481.656, 2516.346, 2551.5154, 2586.8378, 2621.7448, 2656.6722, 2693.5722, 2729.1462, 2765.4124, 2802.8728, 2838.898, 2876.408, 2913.4926, 2951.4938, 2989.6776, 3026.282, 3065.7704, 3104.1012, 3143.7388, 3181.6876, 3221.1872, 3261.5048, 3300.0214, 3339.806, 3381.409, 3421.4144, 3461.4294, 3502.2286, 3544.651, 3586.6156, 3627.337, 3670.083, 3711.1538, 3753.5094, 3797.01, 3838.6686, 3882.1678, 3922.8116, 3967.9978, 4009.9204, 4054.3286, 4097.5706, 4140.6014, 4185.544, 4229.5976, 4274.583, 4316.9438, 4361.672, 4406.2786, 4451.8628, 4496.1834, 4543.505, 4589.1816, 4632.5188, 4678.2294, 4724.8908, 4769.0194, 4817.052, 4861.4588, 4910.1596, 4956.4344, 5002.5238, 5048.13, 5093.6374, 5142.8162, 5187.7894, 5237.3984, 5285.6078, 5331.0858, 5379.1036, 5428.6258, 5474.6018, 5522.7618, 5571.5822, 5618.59, 5667.9992, 5714.88, 5763.454, 5808.6982, 5860.3644, 5910.2914, 5953.571, 6005.9232, 6055.1914, 6104.5882, 6154.5702, 6199.7036, 6251.1764, 6298.7596, 6350.0302, 6398.061, 6448.4694, 6495.933, 6548.0474, 6597.7166, 6646.9416, 6695.9208, 6742.6328, 6793.5276, 6842.1934, 6894.2372, 6945.3864, 6996.9228, 7044.2372, 7094.1374, 7142.2272, 7192.2942, 7238.8338, 7288.9006, 7344.0908, 7394.8544, 7443.5176, 7490.4148, 7542.9314, 7595.6738, 7641.9878, 7694.3688, 7743.0448, 7797.522, 7845.53, 7899.594, 7950.3132, 7996.455, 8050.9442, 8092.9114, 8153.1374, 8197.4472, 8252.8278, 8301.8728, 8348.6776, 8401.4698, 8453.551, 8504.6598, 8553.8944, 8604.1276, 8657.6514, 8710.3062, 8758.908, 8807.8706, 8862.1702, 8910.4668, 8960.77, 9007.2766, 9063.164, 9121.0534, 9164.1354, 9218.1594, 9267.767, 9319.0594, 9372.155, 9419.7126, 9474.3722, 9520.1338, 9572.368, 9622.7702, 9675.8448, 9726.5396, 9778.7378, 9827.6554, 9878.1922, 9928.7782, 9978.3984, 10026.578, 10076.5626, 10137.1618, 10177.5244, 10229.9176, },
+    // precision 12
+    (double[]) { 2954, 3003.4782, 3053.3568, 3104.3666, 3155.324, 3206.9598, 3259.648, 3312.539, 3366.1474, 3420.2576, 3474.8376, 3530.6076, 3586.451, 3643.38, 3700.4104, 3757.5638, 3815.9676, 3875.193, 3934.838, 3994.8548, 4055.018, 4117.1742, 4178.4482, 4241.1294, 4304.4776, 4367.4044, 4431.8724, 4496.3732, 4561.4304, 4627.5326, 4693.949, 4761.5532, 4828.7256, 4897.6182, 4965.5186, 5034.4528, 5104.865, 5174.7164, 5244.6828, 5316.6708, 5387.8312, 5459.9036, 5532.476, 5604.8652, 5679.6718, 5753.757, 5830.2072, 5905.2828, 5980.0434, 6056.6264, 6134.3192, 6211.5746, 6290.0816, 6367.1176, 6447.9796, 6526.5576, 6606.1858, 6686.9144, 6766.1142, 6847.0818, 6927.9664, 7010.9096, 7091.0816, 7175.3962, 7260.3454, 7344.018, 7426.4214, 7511.3106, 7596.0686, 7679.8094, 7765.818, 7852.4248, 7936.834, 8022.363, 8109.5066, 8200.4554, 8288.5832, 8373.366, 8463.4808, 8549.7682, 8642.0522, 8728.3288, 8820.9528, 8907.727, 9001.0794, 9091.2522, 9179.988, 9269.852, 9362.6394, 9453.642, 9546.9024, 9640.6616, 9732.6622, 9824.3254, 9917.7484, 10007.9392, 10106.7508, 10196.2152, 10289.8114, 10383.5494, 10482.3064, 10576.8734, 10668.7872, 10764.7156, 10862.0196, 10952.793, 11049.9748, 11146.0702, 11241.4492, 11339.2772, 11434.2336, 11530.741, 11627.6136, 11726.311, 11821.5964, 11918.837, 12015.3724, 12113.0162, 12213.0424, 12306.9804, 12408.4518, 12504.8968, 12604.586, 12700.9332, 12798.705, 12898.5142, 12997.0488, 13094.788, 13198.475, 13292.7764, 13392.9698, 13486.8574, 13590.1616, 13686.5838, 13783.6264, 13887.2638, 13992.0978, 14081.0844, 14189.9956, 14280.0912, 14382.4956, 14486.4384, 14588.1082, 14686.2392, 14782.276, 14888.0284, 14985.1864, 15088.8596, 15187.0998, 15285.027, 15383.6694, 15495.8266, 15591.3736, 15694.2008, 15790.3246, 15898.4116, 15997.4522, 16095.5014, 16198.8514, 16291.7492, 16402.6424, 16499.1266, 16606.2436, 16697.7186, 16796.3946, 16902.3376, 17005.7672, 17100.814, 17206.8282, 17305.8262, 17416.0744, 17508.4092, 17617.0178, 17715.4554, 17816.758, 17920.1748, 18012.9236, 18119.7984, 18223.2248, 18324.2482, 18426.6276, 18525.0932, 18629.8976, 18733.2588, 18831.0466, 18940.1366, 19032.2696, 19131.729, 19243.4864, 19349.6932, 19442.866, 19547.9448, 19653.2798, 19754.4034, 19854.0692, 19965.1224, 20065.1774, 20158.2212, 20253.353, 20366.3264, 20463.22, },
+    // precision 13
+    (double[]) { 5908.5052, 6007.2672, 6107.347, 6208.5794, 6311.2622, 6414.5514, 6519.3376, 6625.6952, 6732.5988, 6841.3552, 6950.5972, 7061.3082, 7173.5646, 7287.109, 7401.8216, 7516.4344, 7633.3802, 7751.2962, 7870.3784, 7990.292, 8110.79, 8233.4574, 8356.6036, 8482.2712, 8607.7708, 8735.099, 8863.1858, 8993.4746, 9123.8496, 9255.6794, 9388.5448, 9522.7516, 9657.3106, 9792.6094, 9930.5642, 10068.794, 10206.7256, 10347.81, 10490.3196, 10632.0778, 10775.9916, 10920.4662, 11066.124, 11213.073, 11358.0362, 11508.1006, 11659.1716, 11808.7514, 11959.4884, 12112.1314, 12265.037, 12420.3756, 12578.933, 12734.311, 12890.0006, 13047.2144, 13207.3096, 13368.5144, 13528.024, 13689.847, 13852.7528, 14018.3168, 14180.5372, 14346.9668, 14513.5074, 14677.867, 14846.2186, 15017.4186, 15184.9716, 15356.339, 15529.2972, 15697.3578, 15871.8686, 16042.187, 16216.4094, 16389.4188, 16565.9126, 16742.3272, 16919.0042, 17094.7592, 17273.965, 17451.8342, 17634.4254, 17810.5984, 17988.9242, 18171.051, 18354.7938, 18539.466, 18721.0408, 18904.9972, 19081.867, 19271.9118, 19451.8694, 19637.9816, 19821.2922, 20013.1292, 20199.3858, 20387.8726, 20572.9514, 20770.7764, 20955.1714, 21144.751, 21329.9952, 21520.709, 21712.7016, 21906.3868, 22096.2626, 22286.0524, 22475.051, 22665.5098, 22862.8492, 23055.5294, 23249.6138, 23437.848, 23636.273, 23826.093, 24020.3296, 24213.3896, 24411.7392, 24602.9614, 24805.7952, 24998.1552, 25193.9588, 25389.0166, 25585.8392, 25780.6976, 25981.2728, 26175.977, 26376.5252, 26570.1964, 26773.387, 26962.9812, 27163.0586, 27368.164, 27565.0534, 27758.7428, 27961.1276, 28163.2324, 28362.3816, 28565.7668, 28758.644, 28956.9768, 29163.4722, 29354.7026, 29561.1186, 29767.9948, 29959.9986, 30164.0492, 30366.9818, 30562.5338, 30762.9928, 30976.1592, 31166.274, 31376.722, 31570.3734, 31770.809, 31974.8934, 32179.5286, 32387.5442, 32582.3504, 32794.076, 32989.9528, 33191.842, 33392.4684, 33595.659, 33801.8672, 34000.3414, 34200.0922, 34402.6792, 34610.0638, 34804.0084, 35011.13, 35218.669, 35418.6634, 35619.0792, 35830.6534, 36028.4966, 36229.7902, 36438.6422, 36630.7764, 36833.3102, 37048.6728, 37247.3916, 37453.5904, 37669.3614, 37854.5526, 38059.305, 38268.0936, 38470.2516, 38674.7064, 38876.167, 39068.3794, 39281.9144, 39492.8566, 39684.8628, 39898.4108, 40093.1836, 40297.6858, 40489.7086, 40717.2424, },
+    // precision 14
+    (double[]) { 11817.475, 12015.0046, 12215.3792, 12417.7504, 12623.1814, 12830.0086, 13040.0072, 13252.503, 13466.178, 13683.2738, 13902.0344, 14123.9798, 14347.394, 14573.7784, 14802.6894, 15033.6824, 15266.9134, 15502.8624, 15741.4944, 15980.7956, 16223.8916, 16468.6316, 16715.733, 16965.5726, 17217.204, 17470.666, 17727.8516, 17986.7886, 18247.6902, 18510.9632, 18775.304, 19044.7486, 19314.4408, 19587.202, 19862.2576, 20135.924, 20417.0324, 20697.9788, 20979.6112, 21265.0274, 21550.723, 21841.6906, 22132.162, 22428.1406, 22722.127, 23020.5606, 23319.7394, 23620.4014, 23925.2728, 24226.9224, 24535.581, 24845.505, 25155.9618, 25470.3828, 25785.9702, 26103.7764, 26420.4132, 26742.0186, 27062.8852, 27388.415, 27714.6024, 28042.296, 28365.4494, 28701.1526, 29031.8008, 29364.2156, 29704.497, 30037.1458, 30380.111, 30723.8168, 31059.5114, 31404.9498, 31751.6752, 32095.2686, 32444.7792, 32794.767, 33145.204, 33498.4226, 33847.6502, 34209.006, 34560.849, 34919.4838, 35274.9778, 35635.1322, 35996.3266, 36359.1394, 36722.8266, 37082.8516, 37447.7354, 37815.9606, 38191.0692, 38559.4106, 38924.8112, 39294.6726, 39663.973, 40042.261, 40416.2036, 40779.2036, 41161.6436, 41540.9014, 41921.1998, 42294.7698, 42678.5264, 43061.3464, 43432.375, 43818.432, 44198.6598, 44583.0138, 44970.4794, 45353.924, 45729.858, 46118.2224, 46511.5724, 46900.7386, 47280.6964, 47668.1472, 48055.6796, 48446.9436, 48838.7146, 49217.7296, 49613.7796, 50010.7508, 50410.0208, 50793.7886, 51190.2456, 51583.1882, 51971.0796, 52376.5338, 52763.319, 53165.5534, 53556.5594, 53948.2702, 54346.352, 54748.7914, 55138.577, 55543.4824, 55941.1748, 56333.7746, 56745.1552, 57142.7944, 57545.2236, 57935.9956, 58348.5268, 58737.5474, 59158.5962, 59542.6896, 59958.8004, 60349.3788, 60755.0212, 61147.6144, 61548.194, 61946.0696, 62348.6042, 62763.603, 63162.781, 63560.635, 63974.3482, 64366.4908, 64771.5876, 65176.7346, 65597.3916, 65995.915, 66394.0384, 66822.9396, 67203.6336, 67612.2032, 68019.0078, 68420.0388, 68821.22, 69235.8388, 69640.0724, 70055.155, 70466.357, 70863.4266, 71276.2482, 71677.0306, 72080.2006, 72493.0214, 72893.5952, 73314.5856, 73714.9852, 74125.3022, 74521.2122, 74933.6814, 75341.5904, 75743.0244, 76166.0278, 76572.1322, 76973.1028, 77381.6284, 77800.6092, 78189.328, 78607.0962, 79012.2508, 79407.8358, 79825.725, 80238.701, 80646.891, 81035.6436, 81460.0448, 81876.3884, },
+    // precision 15
+    (double[]) { 23635.0036, 24030.8034, 24431.4744, 24837.1524, 25246.7928, 25661.326, 26081.3532, 26505.2806, 26933.9892, 27367.7098, 27805.318, 28248.799, 28696.4382, 29148.8244, 29605.5138, 30066.8668, 30534.2344, 31006.32, 31480.778, 31962.2418, 32447.3324, 32938.0232, 33432.731, 33930.728, 34433.9896, 34944.1402, 35457.5588, 35974.5958, 36497.3296, 37021.9096, 37554.326, 38088.0826, 38628.8816, 39171.3192, 39723.2326, 40274.5554, 40832.3142, 41390.613, 41959.5908, 42532.5466, 43102.0344, 43683.5072, 44266.694, 44851.2822, 45440.7862, 46038.0586, 46640.3164, 47241.064, 47846.155, 48454.7396, 49076.9168, 49692.542, 50317.4778, 50939.65, 51572.5596, 52210.2906, 52843.7396, 53481.3996, 54127.236, 54770.406, 55422.6598, 56078.7958, 56736.7174, 57397.6784, 58064.5784, 58730.308, 59404.9784, 60077.0864, 60751.9158, 61444.1386, 62115.817, 62808.7742, 63501.4774, 64187.5454, 64883.6622, 65582.7468, 66274.5318, 66976.9276, 67688.7764, 68402.138, 69109.6274, 69822.9706, 70543.6108, 71265.5202, 71983.3848, 72708.4656, 73433.384, 74158.4664, 74896.4868, 75620.9564, 76362.1434, 77098.3204, 77835.7662, 78582.6114, 79323.9902, 80067.8658, 80814.9246, 81567.0136, 82310.8536, 83061.9952, 83821.4096, 84580.8608, 85335.547, 86092.5802, 86851.6506, 87612.311, 88381.2016, 89146.3296, 89907.8974, 90676.846, 91451.4152, 92224.5518, 92995.8686, 93763.5066, 94551.2796, 95315.1944, 96096.1806, 96881.0918, 97665.679, 98442.68, 99229.3002, 100011.0994, 100790.6386, 101580.1564, 102377.7484, 103152.1392, 103944.2712, 104730.216, 105528.6336, 106324.9398, 107117.6706, 107890.3988, 108695.2266, 109485.238, 110294.7876, 111075.0958, 111878.0496, 112695.2864, 113464.5486, 114270.0474, 115068.608, 115884.3626, 116673.2588, 117483.3716, 118275.097, 119085.4092, 119879.2808, 120687.5868, 121499.9944, 122284.916, 123095.9254, 123912.5038, 124709.0454, 125503.7182, 126323.259, 127138.9412, 127943.8294, 128755.646, 129556.5354, 130375.3298, 131161.4734, 131971.1962, 132787.5458, 133588.1056, 134431.351, 135220.2906, 136023.398, 136846.6558, 137667.0004, 138463.663, 139283.7154, 140074.6146, 140901.3072, 141721.8548, 142543.2322, 143356.1096, 144173.7412, 144973.0948, 145794.3162, 146609.5714, 147420.003, 148237.9784, 149050.5696, 149854.761, 150663.1966, 151494.0754, 152313.1416, 153112.6902, 153935.7206, 154746.9262, 155559.547, 156401.9746, 157228.7036, 158008.7254, 158820.75, 159646.9184, 160470.4458, 161279.5348, 162093.3114, 162918.542, 163729.2842, },
+    // precision 16
+    (double[]) { 47271, 48062.3584, 48862.7074, 49673.152, 50492.8416, 51322.9514, 52161.03, 53009.407, 53867.6348, 54734.206, 55610.5144, 56496.2096, 57390.795, 58297.268, 59210.6448, 60134.665, 61068.0248, 62010.4472, 62962.5204, 63923.5742, 64895.0194, 65876.4182, 66862.6136, 67862.6968, 68868.8908, 69882.8544, 70911.271, 71944.0924, 72990.0326, 74040.692, 75100.6336, 76174.7826, 77252.5998, 78340.2974, 79438.2572, 80545.4976, 81657.2796, 82784.6336, 83915.515, 85059.7362, 86205.9368, 87364.4424, 88530.3358, 89707.3744, 90885.9638, 92080.197, 93275.5738, 94479.391, 95695.918, 96919.2236, 98148.4602, 99382.3474, 100625.6974, 101878.0284, 103141.6278, 104409.4588, 105686.2882, 106967.5402, 108261.6032, 109548.1578, 110852.0728, 112162.231, 113479.0072, 114806.2626, 116137.9072, 117469.5048, 118813.5186, 120165.4876, 121516.2556, 122875.766, 124250.5444, 125621.2222, 127003.2352, 128387.848, 129775.2644, 131181.7776, 132577.3086, 133979.9458, 135394.1132, 136800.9078, 138233.217, 139668.5308, 141085.212, 142535.2122, 143969.0684, 145420.2872, 146878.1542, 148332.7572, 149800.3202, 151269.66, 152743.6104, 154213.0948, 155690.288, 157169.4246, 158672.1756, 160160.059, 161650.6854, 163145.7772, 164645.6726, 166159.1952, 167682.1578, 169177.3328, 170700.0118, 172228.8964, 173732.6664, 175265.5556, 176787.799, 178317.111, 179856.6914, 181400.865, 182943.4612, 184486.742, 186033.4698, 187583.7886, 189148.1868, 190688.4526, 192250.1926, 193810.9042, 195354.2972, 196938.7682, 198493.5898, 200079.2824, 201618.912, 203205.5492, 204765.5798, 206356.1124, 207929.3064, 209498.7196, 211086.229, 212675.1324, 214256.7892, 215826.2392, 217412.8474, 218995.6724, 220618.6038, 222207.1166, 223781.0364, 225387.4332, 227005.7928, 228590.4336, 230217.8738, 231805.1054, 233408.9, 234995.3432, 236601.4956, 238190.7904, 239817.2548, 241411.2832, 243002.4066, 244640.1884, 246255.3128, 247849.3508, 249479.9734, 251106.8822, 252705.027, 254332.9242, 255935.129, 257526.9014, 259154.772, 260777.625, 262390.253, 264004.4906, 265643.59, 267255.4076, 268873.426, 270470.7252, 272106.4804, 273722.4456, 275337.794, 276945.7038, 278592.9154, 280204.3726, 281841.1606, 283489.171, 285130.1716, 286735.3362, 288364.7164, 289961.1814, 291595.5524, 293285.683, 294899.6668, 296499.3434, 298128.0462, 299761.8946, 301394.2424, 302997.6748, 304615.1478, 306269.7724, 307886.114, 309543.1028, 311153.2862, 312782.8546, 314421.2008, 316033.2438, 317692.9636, 319305.2648, 320948.7406, 322566.3364, 324228.4224, 325847.1542, },
+    // precision 17
+    (double[]) { 94542, 96125.811, 97728.019, 99348.558, 100987.9705, 102646.7565, 104324.5125, 106021.7435, 107736.7865, 109469.272, 111223.9465, 112995.219, 114787.432, 116593.152, 118422.71, 120267.2345, 122134.6765, 124020.937, 125927.2705, 127851.255, 129788.9485, 131751.016, 133726.8225, 135722.592, 137736.789, 139770.568, 141821.518, 143891.343, 145982.1415, 148095.387, 150207.526, 152355.649, 154515.6415, 156696.05, 158887.7575, 161098.159, 163329.852, 165569.053, 167837.4005, 170121.6165, 172420.4595, 174732.6265, 177062.77, 179412.502, 181774.035, 184151.939, 186551.6895, 188965.691, 191402.8095, 193857.949, 196305.0775, 198774.6715, 201271.2585, 203764.78, 206299.3695, 208818.1365, 211373.115, 213946.7465, 216532.076, 219105.541, 221714.5375, 224337.5135, 226977.5125, 229613.0655, 232270.2685, 234952.2065, 237645.3555, 240331.1925, 243034.517, 245756.0725, 248517.6865, 251232.737, 254011.3955, 256785.995, 259556.44, 262368.335, 265156.911, 267965.266, 270785.583, 273616.0495, 276487.4835, 279346.639, 282202.509, 285074.3885, 287942.2855, 290856.018, 293774.0345, 296678.5145, 299603.6355, 302552.6575, 305492.9785, 308466.8605, 311392.581, 314347.538, 317319.4295, 320285.9785, 323301.7325, 326298.3235, 329301.3105, 332301.987, 335309.791, 338370.762, 341382.923, 344431.1265, 347464.1545, 350507.28, 353619.2345, 356631.2005, 359685.203, 362776.7845, 365886.488, 368958.2255, 372060.6825, 375165.4335, 378237.935, 381328.311, 384430.5225, 387576.425, 390683.242, 393839.648, 396977.8425, 400101.9805, 403271.296, 406409.8425, 409529.5485, 412678.7, 415847.423, 419020.8035, 422157.081, 425337.749, 428479.6165, 431700.902, 434893.1915, 438049.582, 441210.5415, 444379.2545, 447577.356, 450741.931, 453959.548, 457137.0935, 460329.846, 463537.4815, 466732.3345, 469960.5615, 473164.681, 476347.6345, 479496.173, 482813.1645, 486025.6995, 489249.4885, 492460.1945, 495675.8805, 498908.0075, 502131.802, 505374.3855, 508550.9915, 511806.7305, 515026.776, 518217.0005, 521523.9855, 524705.9855, 527950.997, 531210.0265, 534472.497, 537750.7315, 540926.922, 544207.094, 547429.4345, 550666.3745, 553975.3475, 557150.7185, 560399.6165, 563662.697, 566916.7395, 570146.1215, 573447.425, 576689.6245, 579874.5745, 583202.337, 586503.0255, 589715.635, 592910.161, 596214.3885, 599488.035, 602740.92, 605983.0685, 609248.67, 612491.3605, 615787.912, 619107.5245, 622307.9555, 625577.333, 628840.4385, 632085.2155, 635317.6135, 638691.7195, 641887.467, 645139.9405, 648441.546, 651666.252, 654941.845, },
+    // precision 18
+    (double[]) { 189084, 192250.913, 195456.774, 198696.946, 201977.762, 205294.444, 208651.754, 212042.099, 215472.269, 218941.91, 222443.912, 225996.845, 229568.199, 233193.568, 236844.457, 240543.233, 244279.475, 248044.27, 251854.588, 255693.2, 259583.619, 263494.621, 267445.385, 271454.061, 275468.769, 279549.456, 283646.446, 287788.198, 291966.099, 296181.164, 300431.469, 304718.618, 309024.004, 313393.508, 317760.803, 322209.731, 326675.061, 331160.627, 335654.47, 340241.442, 344841.833, 349467.132, 354130.629, 358819.432, 363574.626, 368296.587, 373118.482, 377914.93, 382782.301, 387680.669, 392601.981, 397544.323, 402529.115, 407546.018, 412593.658, 417638.657, 422762.865, 427886.169, 433017.167, 438213.273, 443441.254, 448692.421, 453937.533, 459239.049, 464529.569, 469910.083, 475274.03, 480684.473, 486070.26, 491515.237, 496995.651, 502476.617, 507973.609, 513497.19, 519083.233, 524726.509, 530305.505, 535945.728, 541584.404, 547274.055, 552967.236, 558667.862, 564360.216, 570128.148, 575965.08, 581701.952, 587532.523, 593361.144, 599246.128, 605033.418, 610958.779, 616837.117, 622772.818, 628672.04, 634675.369, 640574.831, 646585.739, 652574.547, 658611.217, 664642.684, 670713.914, 676737.681, 682797.313, 688837.897, 694917.874, 701009.882, 707173.648, 713257.254, 719415.392, 725636.761, 731710.697, 737906.209, 744103.074, 750313.39, 756504.185, 762712.579, 768876.985, 775167.859, 781359, 787615.959, 793863.597, 800245.477, 806464.582, 812785.294, 819005.925, 825403.057, 831676.197, 837936.284, 844266.968, 850642.711, 856959.756, 863322.774, 869699.931, 876102.478, 882355.787, 888694.463, 895159.952, 901536.143, 907872.631, 914293.672, 920615.14, 927130.974, 933409.404, 939922.178, 946331.47, 952745.93, 959209.264, 965590.224, 972077.284, 978501.961, 984953.19, 991413.271, 997817.479, 1004222.658, 1010725.676, 1017177.138, 1023612.529, 1030098.236, 1036493.719, 1043112.207, 1049537.036, 1056008.096, 1062476.184, 1068942.337, 1075524.95, 1081932.864, 1088426.025, 1094776.005, 1101327.448, 1107901.673, 1114423.639, 1120884.602, 1127324.923, 1133794.24, 1140328.886, 1146849.376, 1153346.682, 1159836.502, 1166478.703, 1172953.304, 1179391.502, 1185950.982, 1192544.052, 1198913.41, 1205430.994, 1212015.525, 1218674.042, 1225121.683, 1231551.101, 1238126.379, 1244673.795, 1251260.649, 1257697.86, 1264320.983, 1270736.319, 1277274.694, 1283804.95, 1290211.514, 1296858.568, 1303455.691, }
+};
+
+static double *g_biasData[] = {
+    // precision 4
+    (double[]) { 10, 9.717, 9.207, 8.7896, 8.2882, 7.8204, 7.3772, 6.9342, 6.5202, 6.161, 5.7722, 5.4636, 5.0396, 4.6766, 4.3566, 4.0454, 3.7936, 3.4856, 3.2666, 2.9946, 2.766, 2.4692, 2.3638, 2.0764, 1.7864, 1.7602, 1.4814, 1.433, 1.2926, 1.0664, 0.999600000000001, 0.7956, 0.5366, 0.589399999999998, 0.573799999999999, 0.269799999999996, 0.368200000000002, 0.0544000000000011, 0.234200000000001, 0.0108000000000033, -0.203400000000002, -0.0701999999999998, -0.129600000000003, -0.364199999999997, -0.480600000000003, -0.226999999999997, -0.322800000000001, -0.382599999999996, -0.511200000000002, -0.669600000000003, -0.749400000000001, -0.500399999999999, -0.617600000000003, -0.6922, -0.601599999999998, -0.416200000000003, -0.338200000000001, -0.782600000000002, -0.648600000000002, -0.919800000000002, -0.851799999999997, -0.962400000000002, -0.6402, -1.1922, -1.0256, -1.086, -1.21899999999999, -0.819400000000002, -0.940600000000003, -1.1554, -1.2072, -1.1752, -1.16759999999999, -1.14019999999999, -1.3754, -1.29859999999999, -1.607, -1.3292, -1.7606, },
+    // precision 5
+    (double[]) { 22, 21.1194, 20.8208, 20.2318, 19.77, 19.2436, 18.7774, 18.2848, 17.8224, 17.3742, 16.9336, 16.503, 16.0494, 15.6292, 15.2124, 14.798, 14.367, 13.9728, 13.5944, 13.217, 12.8438, 12.3696, 12.0956, 11.7044, 11.324, 11.0668, 10.6698, 10.3644, 10.049, 9.6918, 9.4146, 9.082, 8.687, 8.5398, 8.2462, 7.857, 7.6606, 7.4168, 7.1248, 6.9222, 6.6804, 6.447, 6.3454, 5.9594, 5.7636, 5.5776, 5.331, 5.19, 4.9676, 4.7564, 4.5314, 4.4442, 4.3708, 3.9774, 3.9624, 3.8796, 3.755, 3.472, 3.2076, 3.1024, 2.8908, 2.7338, 2.7728, 2.629, 2.413, 2.3266, 2.1524, 2.2642, 2.1806, 2.0566, 1.9192, 1.7598, 1.3516, 1.5802, 1.43859999999999, 1.49160000000001, 1.1524, 1.1892, 0.841399999999993, 0.879800000000003, 0.837599999999995, 0.469800000000006, 0.765600000000006, 0.331000000000003, 0.591399999999993, 0.601200000000006, 0.701599999999999, 0.558199999999999, 0.339399999999998, 0.354399999999998, 0.491200000000006, 0.308000000000007, 0.355199999999996, -0.0254000000000048, 0.205200000000005, -0.272999999999996, 0.132199999999997, 0.394400000000005, -0.241200000000006, 0.242000000000004, 0.191400000000002, 0.253799999999998, -0.122399999999999, -0.370800000000003, 0.193200000000004, -0.0848000000000013, 0.0867999999999967, -0.327200000000005, -0.285600000000002, 0.311400000000006, -0.128399999999999, -0.754999999999995, -0.209199999999996, -0.293599999999998, -0.364000000000004, -0.253600000000006, -0.821200000000005, -0.253600000000006, -0.510400000000004, -0.383399999999995, -0.491799999999998, -0.220200000000006, -0.0972000000000008, -0.557400000000001, -0.114599999999996, -0.295000000000002, -0.534800000000004, 0.346399999999988, -0.65379999999999, 0.0398000000000138, 0.0341999999999985, -0.995800000000003, -0.523400000000009, -0.489000000000004, -0.274799999999999, -0.574999999999989, -0.482799999999997, 0.0571999999999946, -0.330600000000004, -0.628800000000012, -0.140199999999993, -0.540600000000012, -0.445999999999998, -0.599400000000003, -0.262599999999992, 0.163399999999996, -0.100599999999986, -0.39500000000001, -1.06960000000001, -0.836399999999998, -0.753199999999993, -0.412399999999991, -0.790400000000005, -0.29679999999999, -0.28540000000001, -0.193000000000012, -0.0772000000000048, -0.962799999999987, -0.414800000000014, },
+    // precision 6
+    (double[]) { 45, 44.1902, 43.271, 42.8358, 41.8142, 41.2854, 40.317, 39.354, 38.8924, 37.9436, 37.4596, 36.5262, 35.6248, 35.1574, 34.2822, 33.837, 32.9636, 32.074, 31.7042, 30.7976, 30.4772, 29.6564, 28.7942, 28.5004, 27.686, 27.291, 26.5672, 25.8556, 25.4982, 24.8204, 24.4252, 23.7744, 23.0786, 22.8344, 22.0294, 21.8098, 21.0794, 20.5732, 20.1878, 19.5648, 19.2902, 18.6784, 18.3352, 17.8946, 17.3712, 17.0852, 16.499, 16.2686, 15.6844, 15.2234, 14.9732, 14.3356, 14.2286, 13.7262, 13.3284, 13.1048, 12.5962, 12.3562, 12.1272, 11.4184, 11.4974, 11.0822, 10.856, 10.48, 10.2834, 10.0208, 9.637, 9.51739999999999, 9.05759999999999, 8.74760000000001, 8.42700000000001, 8.1326, 8.2372, 8.2788, 7.6776, 7.79259999999999, 7.1952, 6.9564, 6.6454, 6.87, 6.5428, 6.19999999999999, 6.02940000000001, 5.62780000000001, 5.6782, 5.792, 5.35159999999999, 5.28319999999999, 5.0394, 5.07480000000001, 4.49119999999999, 4.84899999999999, 4.696, 4.54040000000001, 4.07300000000001, 4.37139999999999, 3.7216, 3.7328, 3.42080000000001, 3.41839999999999, 3.94239999999999, 3.27719999999999, 3.411, 3.13079999999999, 2.76900000000001, 2.92580000000001, 2.68279999999999, 2.75020000000001, 2.70599999999999, 2.3886, 3.01859999999999, 2.45179999999999, 2.92699999999999, 2.41720000000001, 2.41139999999999, 2.03299999999999, 2.51240000000001, 2.5564, 2.60079999999999, 2.41720000000001, 1.80439999999999, 1.99700000000001, 2.45480000000001, 1.8948, 2.2346, 2.30860000000001, 2.15479999999999, 1.88419999999999, 1.6508, 0.677199999999999, 1.72540000000001, 1.4752, 1.72280000000001, 1.66139999999999, 1.16759999999999, 1.79300000000001, 1.00059999999999, 0.905200000000008, 0.659999999999997, 1.55879999999999, 1.1636, 0.688199999999995, 0.712600000000009, 0.450199999999995, 1.1978, 0.975599999999986, 0.165400000000005, 1.727, 1.19739999999999, -0.252600000000001, 1.13460000000001, 1.3048, 1.19479999999999, 0.313400000000001, 0.878999999999991, 1.12039999999999, 0.853000000000009, 1.67920000000001, 0.856999999999999, 0.448599999999999, 1.2362, 0.953399999999988, 1.02859999999998, 0.563199999999995, 0.663000000000011, 0.723000000000013, 0.756599999999992, 0.256599999999992, -0.837600000000009, 0.620000000000005, 0.821599999999989, 0.216600000000028, 0.205600000000004, 0.220199999999977, 0.372599999999977, 0.334400000000016, 0.928400000000011, 0.972800000000007, 0.192400000000021, 0.487199999999973, -0.413000000000011, 0.807000000000016, 0.120600000000024, 0.769000000000005, 0.870799999999974, 0.66500000000002, 0.118200000000002, 0.401200000000017, 0.635199999999998, 0.135400000000004, 0.175599999999974, 1.16059999999999, 0.34620000000001, 0.521400000000028, -0.586599999999976, -1.16480000000001, 0.968399999999974, 0.836999999999989, 0.779600000000016, 0.985799999999983, },
+    // precision 7
+    (double[]) { 91, 89.4934, 87.9758, 86.4574, 84.9718, 83.4954, 81.5302, 80.0756, 78.6374, 77.1782, 75.7888, 73.9522, 72.592, 71.2532, 69.9086, 68.5938, 66.9474, 65.6796, 64.4394, 63.2176, 61.9768, 60.4214, 59.2528, 58.0102, 56.8658, 55.7278, 54.3044, 53.1316, 52.093, 51.0032, 49.9092, 48.6306, 47.5294, 46.5756, 45.6508, 44.662, 43.552, 42.3724, 41.617, 40.5754, 39.7872, 38.8444, 37.7988, 36.8606, 36.2118, 35.3566, 34.4476, 33.5882, 32.6816, 32.0824, 31.0258, 30.6048, 29.4436, 28.7274, 27.957, 27.147, 26.4364, 25.7592, 25.3386, 24.781, 23.8028, 23.656, 22.6544, 21.996, 21.4718, 21.1544, 20.6098, 19.5956, 19.0616, 18.5758, 18.4878, 17.5244, 17.2146, 16.724, 15.8722, 15.5198, 15.0414, 14.941, 14.9048, 13.87, 13.4304, 13.028, 12.4708, 12.37, 12.0624, 11.4668, 11.5532, 11.4352, 11.2564, 10.2744, 10.2118, 9.74720000000002, 10.1456, 9.2928, 8.75040000000001, 8.55279999999999, 8.97899999999998, 8.21019999999999, 8.18340000000001, 7.3494, 7.32499999999999, 7.66140000000001, 6.90300000000002, 7.25439999999998, 6.9042, 7.21499999999997, 6.28640000000001, 6.08139999999997, 6.6764, 6.30099999999999, 5.13900000000001, 5.65800000000002, 5.17320000000001, 4.59019999999998, 4.9538, 5.08280000000002, 4.92200000000003, 4.99020000000002, 4.7328, 5.4538, 4.11360000000002, 4.22340000000003, 4.08780000000002, 3.70800000000003, 4.15559999999999, 4.18520000000001, 3.63720000000001, 3.68220000000002, 3.77960000000002, 3.6078, 2.49160000000001, 3.13099999999997, 2.5376, 3.19880000000001, 3.21100000000001, 2.4502, 3.52820000000003, 2.91199999999998, 3.04480000000001, 2.7432, 2.85239999999999, 2.79880000000003, 2.78579999999999, 1.88679999999999, 2.98860000000002, 2.50639999999999, 1.91239999999999, 2.66160000000002, 2.46820000000002, 1.58199999999999, 1.30399999999997, 2.27379999999999, 2.68939999999998, 1.32900000000001, 3.10599999999999, 1.69080000000002, 2.13740000000001, 2.53219999999999, 1.88479999999998, 1.33240000000001, 1.45119999999997, 1.17899999999997, 2.44119999999998, 1.60659999999996, 2.16700000000003, 0.77940000000001, 2.37900000000002, 2.06700000000001, 1.46000000000004, 2.91160000000002, 1.69200000000001, 0.954600000000028, 2.49300000000005, 2.2722, 1.33500000000004, 2.44899999999996, 1.20140000000004, 3.07380000000001, 2.09739999999999, 2.85640000000001, 2.29960000000005, 2.40899999999999, 1.97040000000004, 0.809799999999996, 1.65279999999996, 2.59979999999996, 0.95799999999997, 2.06799999999998, 2.32780000000002, 4.20159999999998, 1.96320000000003, 1.86400000000003, 1.42999999999995, 3.77940000000001, 1.27200000000005, 1.86440000000005, 2.20600000000002, 3.21900000000005, 1.5154, 2.61019999999996, },
+    // precision 8
+    (double[]) { 183.2152, 180.2454, 177.2096, 173.6652, 170.6312, 167.6822, 164.249, 161.3296, 158.0038, 155.2074, 152.4612, 149.27, 146.5178, 143.4412, 140.8032, 138.1634, 135.1688, 132.6074, 129.6946, 127.2664, 124.8228, 122.0432, 119.6824, 116.9464, 114.6268, 112.2626, 109.8376, 107.4034, 104.8956, 102.8522, 100.7638, 98.3552, 96.3556, 93.7526, 91.9292, 89.8954, 87.8198, 85.7668, 83.298, 81.6688, 79.9466, 77.9746, 76.1672, 74.3474, 72.3028, 70.8912, 69.114, 67.4646, 65.9744, 64.4092, 62.6022, 60.843, 59.5684, 58.1652, 56.5426, 55.4152, 53.5388, 52.3592, 51.1366, 49.486, 48.3918, 46.5076, 45.509, 44.3834, 43.3498, 42.0668, 40.7346, 40.1228, 38.4528, 37.7, 36.644, 36.0518, 34.5774, 33.9068, 32.432, 32.1666, 30.434, 29.6644, 28.4894, 27.6312, 26.3804, 26.292, 25.5496000000001, 25.0234, 24.8206, 22.6146, 22.4188, 22.117, 20.6762, 20.6576, 19.7864, 19.509, 18.5334, 17.9204, 17.772, 16.2924, 16.8654, 15.1836, 15.745, 15.1316, 15.0386, 14.0136, 13.6342, 12.6196, 12.1866, 12.4281999999999, 11.3324, 10.4794000000001, 11.5038, 10.129, 9.52800000000002, 10.3203999999999, 9.46299999999997, 9.79280000000006, 9.12300000000005, 8.74180000000001, 9.2192, 7.51020000000005, 7.60659999999996, 7.01840000000004, 7.22239999999999, 7.40139999999997, 6.76179999999999, 7.14359999999999, 5.65060000000005, 5.63779999999997, 5.76599999999996, 6.75139999999999, 5.57759999999996, 3.73220000000003, 5.8048, 5.63019999999995, 4.93359999999996, 3.47979999999995, 4.33879999999999, 3.98940000000005, 3.81960000000004, 3.31359999999995, 3.23080000000004, 3.4588, 3.08159999999998, 3.4076, 3.00639999999999, 2.38779999999997, 2.61900000000003, 1.99800000000005, 3.34820000000002, 2.95060000000001, 0.990999999999985, 2.11440000000005, 2.20299999999997, 2.82219999999995, 2.73239999999998, 2.7826, 3.76660000000004, 2.26480000000004, 2.31280000000004, 2.40819999999997, 2.75360000000001, 3.33759999999995, 2.71559999999999, 1.7478000000001, 1.42920000000004, 2.39300000000003, 2.22779999999989, 2.34339999999997, 0.87259999999992, 3.88400000000001, 1.80600000000004, 1.91759999999999, 1.16779999999994, 1.50320000000011, 2.52500000000009, 0.226400000000012, 2.31500000000005, 0.930000000000064, 1.25199999999995, 2.14959999999996, 0.0407999999999902, 2.5447999999999, 1.32960000000003, 0.197400000000016, 2.52620000000002, 3.33279999999991, -1.34300000000007, 0.422199999999975, 0.917200000000093, 1.12920000000008, 1.46060000000011, 1.45779999999991, 2.8728000000001, 3.33359999999993, -1.34079999999994, 1.57680000000005, 0.363000000000056, 1.40740000000005, 0.656600000000026, 0.801400000000058, -0.454600000000028, 1.51919999999996, },
+    // precision 9
+    (double[]) { 368, 361.8294, 355.2452, 348.6698, 342.1464, 336.2024, 329.8782, 323.6598, 317.462, 311.2826, 305.7102, 299.7416, 293.9366, 288.1046, 282.285, 277.0668, 271.306, 265.8448, 260.301, 254.9886, 250.2422, 244.8138, 239.7074, 234.7428, 229.8402, 225.1664, 220.3534, 215.594, 210.6886, 205.7876, 201.65, 197.228, 192.8036, 188.1666, 184.0818, 180.0824, 176.2574, 172.302, 168.1644, 164.0056, 160.3802, 156.7192, 152.5234, 149.2084, 145.831, 142.485, 139.1112, 135.4764, 131.76, 129.3368, 126.5538, 122.5058, 119.2646, 116.5902, 113.3818, 110.8998, 107.9532, 105.2062, 102.2798, 99.4728, 96.9582, 94.3292, 92.171, 89.7809999999999, 87.5716, 84.7048, 82.5322, 79.875, 78.3972, 75.3464, 73.7274, 71.2834, 70.1444, 68.4263999999999, 66.0166, 64.018, 62.0437999999999, 60.3399999999999, 58.6856, 57.9836, 55.0311999999999, 54.6769999999999, 52.3188, 51.4846, 49.4423999999999, 47.739, 46.1487999999999, 44.9202, 43.4059999999999, 42.5342000000001, 41.2834, 38.8954000000001, 38.3286000000001, 36.2146, 36.6684, 35.9946, 33.123, 33.4338, 31.7378000000001, 29.076, 28.9692, 27.4964, 27.0998, 25.9864, 26.7754, 24.3208, 23.4838, 22.7388000000001, 24.0758000000001, 21.9097999999999, 20.9728, 19.9228000000001, 19.9292, 16.617, 17.05, 18.2996000000001, 15.6128000000001, 15.7392, 14.5174, 13.6322, 12.2583999999999, 13.3766000000001, 11.423, 13.1232, 9.51639999999998, 10.5938000000001, 9.59719999999993, 8.12220000000002, 9.76739999999995, 7.50440000000003, 7.56999999999994, 6.70440000000008, 6.41419999999994, 6.71019999999999, 5.60940000000005, 4.65219999999999, 6.84099999999989, 3.4072000000001, 3.97859999999991, 3.32760000000007, 5.52160000000003, 3.31860000000006, 2.06940000000009, 4.35400000000004, 1.57500000000005, 0.280799999999999, 2.12879999999996, -0.214799999999968, -0.0378000000000611, -0.658200000000079, 0.654800000000023, -0.0697999999999865, 0.858400000000074, -2.52700000000004, -2.1751999999999, -3.35539999999992, -1.04019999999991, -0.651000000000067, -2.14439999999991, -1.96659999999997, -3.97939999999994, -0.604400000000169, -3.08260000000018, -3.39159999999993, -5.29640000000018, -5.38920000000007, -5.08759999999984, -4.69900000000007, -5.23720000000003, -3.15779999999995, -4.97879999999986, -4.89899999999989, -7.48880000000008, -5.94799999999987, -5.68060000000014, -6.67180000000008, -4.70499999999993, -7.27779999999984, -4.6579999999999, -4.4362000000001, -4.32139999999981, -5.18859999999995, -6.66879999999992, -6.48399999999992, -5.1260000000002, -4.4032000000002, -6.13500000000022, -5.80819999999994, -4.16719999999987, -4.15039999999999, -7.45600000000013, -7.24080000000004, -9.83179999999993, -5.80420000000004, -8.6561999999999, -6.99940000000015, -10.5473999999999, -7.34139999999979, -6.80999999999995, -6.29719999999998, -6.23199999999997, },
+    // precision 10
+    (double[]) { 737.1256, 724.4234, 711.1064, 698.4732, 685.4636, 673.0644, 660.488, 647.9654, 636.0832, 623.7864, 612.1992, 600.2176, 588.5228, 577.1716, 565.7752, 554.899, 543.6126, 532.6492, 521.9474, 511.5214, 501.1064, 490.6364, 480.2468, 470.4588, 460.3832, 451.0584, 440.8606, 431.3868, 422.5062, 413.1862, 404.463, 395.339, 386.1936, 378.1292, 369.1854, 361.2908, 353.3324, 344.8518, 337.5204, 329.4854, 321.9318, 314.552, 306.4658, 299.4256, 292.849, 286.152, 278.8956, 271.8792, 265.118, 258.62, 252.5132, 245.9322, 239.7726, 233.6086, 227.5332, 222.5918, 216.4294, 210.7662, 205.4106, 199.7338, 194.9012, 188.4486, 183.1556, 178.6338, 173.7312, 169.6264, 163.9526, 159.8742, 155.8326, 151.1966, 147.5594, 143.07, 140.037, 134.1804, 131.071, 127.4884, 124.0848, 120.2944, 117.333, 112.9626, 110.2902, 107.0814, 103.0334, 99.4832000000001, 96.3899999999999, 93.7202000000002, 90.1714000000002, 87.2357999999999, 85.9346, 82.8910000000001, 80.0264000000002, 78.3834000000002, 75.1543999999999, 73.8683999999998, 70.9895999999999, 69.4367999999999, 64.8701999999998, 65.0408000000002, 61.6738, 59.5207999999998, 57.0158000000001, 54.2302, 53.0962, 50.4985999999999, 52.2588000000001, 47.3914, 45.6244000000002, 42.8377999999998, 43.0072, 40.6516000000001, 40.2453999999998, 35.2136, 36.4546, 33.7849999999999, 33.2294000000002, 32.4679999999998, 30.8670000000002, 28.6507999999999, 28.9099999999999, 27.5983999999999, 26.1619999999998, 24.5563999999999, 23.2328000000002, 21.9484000000002, 21.5902000000001, 21.3346000000001, 17.7031999999999, 20.6111999999998, 19.5545999999999, 15.7375999999999, 17.0720000000001, 16.9517999999998, 15.326, 13.1817999999998, 14.6925999999999, 13.0859999999998, 13.2754, 10.8697999999999, 11.248, 7.3768, 4.72339999999986, 7.97899999999981, 8.7503999999999, 7.68119999999999, 9.7199999999998, 7.73919999999998, 5.6224000000002, 7.44560000000001, 6.6601999999998, 5.9058, 4.00199999999995, 4.51699999999983, 4.68240000000014, 3.86220000000003, 5.13639999999987, 5.98500000000013, 2.47719999999981, 2.61999999999989, 1.62800000000016, 4.65000000000009, 0.225599999999758, 0.831000000000131, -0.359400000000278, 1.27599999999984, -2.92559999999958, -0.0303999999996449, 2.37079999999969, -2.0033999999996, 0.804600000000391, 0.30199999999968, 1.1247999999996, -2.6880000000001, 0.0321999999996478, -1.18099999999959, -3.9402, -1.47940000000017, -0.188400000000001, -2.10720000000038, -2.04159999999956, -3.12880000000041, -4.16160000000036, -0.612799999999879, -3.48719999999958, -8.17900000000009, -5.37780000000021, -4.01379999999972, -5.58259999999973, -5.73719999999958, -7.66799999999967, -5.69520000000011, -1.1247999999996, -5.58520000000044, -8.04560000000038, -4.64840000000004, -11.6468000000004, -7.97519999999986, -5.78300000000036, -7.67420000000038, -10.6328000000003, -9.81720000000041, },
+    // precision 11
+    (double[]) { 1476, 1449.6014, 1423.5802, 1397.7942, 1372.3042, 1347.2062, 1321.8402, 1297.2292, 1272.9462, 1248.9926, 1225.3026, 1201.4252, 1178.0578, 1155.6092, 1132.626, 1110.5568, 1088.527, 1066.5154, 1045.1874, 1024.3878, 1003.37, 982.1972, 962.5728, 942.1012, 922.9668, 903.292, 884.0772, 864.8578, 846.6562, 828.041, 809.714, 792.3112, 775.1806, 757.9854, 740.656, 724.346, 707.5154, 691.8378, 675.7448, 659.6722, 645.5722, 630.1462, 614.4124, 600.8728, 585.898, 572.408, 558.4926, 544.4938, 531.6776, 517.282, 505.7704, 493.1012, 480.7388, 467.6876, 456.1872, 445.5048, 433.0214, 420.806, 411.409, 400.4144, 389.4294, 379.2286, 369.651, 360.6156, 350.337, 342.083, 332.1538, 322.5094, 315.01, 305.6686, 298.1678, 287.8116, 280.9978, 271.9204, 265.3286, 257.5706, 249.6014, 242.544, 235.5976, 229.583, 220.9438, 214.672, 208.2786, 201.8628, 195.1834, 191.505, 186.1816, 178.5188, 172.2294, 167.8908, 161.0194, 158.052, 151.4588, 148.1596, 143.4344, 138.5238, 133.13, 127.6374, 124.8162, 118.7894, 117.3984, 114.6078, 109.0858, 105.1036, 103.6258, 98.6018000000004, 95.7618000000002, 93.5821999999998, 88.5900000000001, 86.9992000000002, 82.8800000000001, 80.4539999999997, 74.6981999999998, 74.3644000000004, 73.2914000000001, 65.5709999999999, 66.9232000000002, 65.1913999999997, 62.5882000000001, 61.5702000000001, 55.7035999999998, 56.1764000000003, 52.7596000000003, 53.0302000000001, 49.0609999999997, 48.4694, 44.933, 46.0474000000004, 44.7165999999997, 41.9416000000001, 39.9207999999999, 35.6328000000003, 35.5276000000003, 33.1934000000001, 33.2371999999996, 33.3864000000003, 33.9228000000003, 30.2371999999996, 29.1373999999996, 25.2272000000003, 24.2942000000003, 19.8338000000003, 18.9005999999999, 23.0907999999999, 21.8544000000002, 19.5176000000001, 15.4147999999996, 16.9314000000004, 18.6737999999996, 12.9877999999999, 14.3688000000002, 12.0447999999997, 15.5219999999999, 12.5299999999997, 14.5940000000001, 14.3131999999996, 9.45499999999993, 12.9441999999999, 3.91139999999996, 13.1373999999996, 5.44720000000052, 9.82779999999912, 7.87279999999919, 3.67760000000089, 5.46980000000076, 5.55099999999948, 5.65979999999945, 3.89439999999922, 3.1275999999998, 5.65140000000065, 6.3062000000009, 3.90799999999945, 1.87060000000019, 5.17020000000048, 2.46680000000015, 0.770000000000437, -3.72340000000077, 1.16400000000067, 8.05340000000069, 0.135399999999208, 2.15940000000046, 0.766999999999825, 1.0594000000001, 3.15500000000065, -0.287399999999252, 2.37219999999979, -2.86620000000039, -1.63199999999961, -2.22979999999916, -0.15519999999924, -1.46039999999994, -0.262199999999211, -2.34460000000036, -2.8078000000005, -3.22179999999935, -5.60159999999996, -8.42200000000048, -9.43740000000071, 0.161799999999857, -10.4755999999998, -10.0823999999993, },
+    // precision 12
+    (double[]) { 2953, 2900.4782, 2848.3568, 2796.3666, 2745.324, 2694.9598, 2644.648, 2595.539, 2546.1474, 2498.2576, 2450.8376, 2403.6076, 2357.451, 2311.38, 2266.4104, 2221.5638, 2176.9676, 2134.193, 2090.838, 2048.8548, 2007.018, 1966.1742, 1925.4482, 1885.1294, 1846.4776, 1807.4044, 1768.8724, 1731.3732, 1693.4304, 1657.5326, 1621.949, 1586.5532, 1551.7256, 1517.6182, 1483.5186, 1450.4528, 1417.865, 1385.7164, 1352.6828, 1322.6708, 1291.8312, 1260.9036, 1231.476, 1201.8652, 1173.6718, 1145.757, 1119.2072, 1092.2828, 1065.0434, 1038.6264, 1014.3192, 988.5746, 965.0816, 940.1176, 917.9796, 894.5576, 871.1858, 849.9144, 827.1142, 805.0818, 783.9664, 763.9096, 742.0816, 724.3962, 706.3454, 688.018, 667.4214, 650.3106, 633.0686, 613.8094, 597.818, 581.4248, 563.834, 547.363, 531.5066, 520.455400000001, 505.583199999999, 488.366, 476.480799999999, 459.7682, 450.0522, 434.328799999999, 423.952799999999, 408.727000000001, 399.079400000001, 387.252200000001, 373.987999999999, 360.852000000001, 351.6394, 339.642, 330.902400000001, 322.661599999999, 311.662200000001, 301.3254, 291.7484, 279.939200000001, 276.7508, 263.215200000001, 254.811400000001, 245.5494, 242.306399999999, 234.8734, 223.787200000001, 217.7156, 212.0196, 200.793, 195.9748, 189.0702, 182.449199999999, 177.2772, 170.2336, 164.741, 158.613600000001, 155.311, 147.5964, 142.837, 137.3724, 132.0162, 130.0424, 121.9804, 120.451800000001, 114.8968, 111.585999999999, 105.933199999999, 101.705, 98.5141999999996, 95.0488000000005, 89.7880000000005, 91.4750000000004, 83.7764000000006, 80.9698000000008, 72.8574000000008, 73.1615999999995, 67.5838000000003, 62.6263999999992, 63.2638000000006, 66.0977999999996, 52.0843999999997, 58.9956000000002, 47.0912000000008, 46.4956000000002, 48.4383999999991, 47.1082000000006, 43.2392, 37.2759999999998, 40.0283999999992, 35.1864000000005, 35.8595999999998, 32.0998, 28.027, 23.6694000000007, 33.8266000000003, 26.3736000000008, 27.2008000000005, 21.3245999999999, 26.4115999999995, 23.4521999999997, 19.5013999999992, 19.8513999999996, 10.7492000000002, 18.6424000000006, 13.1265999999996, 18.2436000000016, 6.71860000000015, 3.39459999999963, 6.33759999999893, 7.76719999999841, 0.813999999998487, 3.82819999999992, 0.826199999999517, 8.07440000000133, -1.59080000000176, 5.01780000000144, 0.455399999998917, -0.24199999999837, 0.174800000000687, -9.07640000000174, -4.20160000000033, -3.77520000000004, -4.75179999999818, -5.3724000000002, -8.90680000000066, -6.10239999999976, -5.74120000000039, -9.95339999999851, -3.86339999999836, -13.7304000000004, -16.2710000000006, -7.51359999999841, -3.30679999999847, -13.1339999999982, -10.0551999999989, -6.72019999999975, -8.59660000000076, -10.9307999999983, -1.8775999999998, -4.82259999999951, -13.7788, -21.6470000000008, -10.6735999999983, -15.7799999999988, },
+    // precision 13
+    (double[]) { 5907.5052, 5802.2672, 5697.347, 5593.5794, 5491.2622, 5390.5514, 5290.3376, 5191.6952, 5093.5988, 4997.3552, 4902.5972, 4808.3082, 4715.5646, 4624.109, 4533.8216, 4444.4344, 4356.3802, 4269.2962, 4183.3784, 4098.292, 4014.79, 3932.4574, 3850.6036, 3771.2712, 3691.7708, 3615.099, 3538.1858, 3463.4746, 3388.8496, 3315.6794, 3244.5448, 3173.7516, 3103.3106, 3033.6094, 2966.5642, 2900.794, 2833.7256, 2769.81, 2707.3196, 2644.0778, 2583.9916, 2523.4662, 2464.124, 2406.073, 2347.0362, 2292.1006, 2238.1716, 2182.7514, 2128.4884, 2077.1314, 2025.037, 1975.3756, 1928.933, 1879.311, 1831.0006, 1783.2144, 1738.3096, 1694.5144, 1649.024, 1606.847, 1564.7528, 1525.3168, 1482.5372, 1443.9668, 1406.5074, 1365.867, 1329.2186, 1295.4186, 1257.9716, 1225.339, 1193.2972, 1156.3578, 1125.8686, 1091.187, 1061.4094, 1029.4188, 1000.9126, 972.3272, 944.004199999999, 915.7592, 889.965, 862.834200000001, 840.4254, 812.598399999999, 785.924200000001, 763.050999999999, 741.793799999999, 721.466, 699.040799999999, 677.997200000002, 649.866999999998, 634.911800000002, 609.8694, 591.981599999999, 570.2922, 557.129199999999, 538.3858, 521.872599999999, 502.951400000002, 495.776399999999, 475.171399999999, 459.751, 439.995200000001, 426.708999999999, 413.7016, 402.3868, 387.262599999998, 372.0524, 357.050999999999, 342.5098, 334.849200000001, 322.529399999999, 311.613799999999, 295.848000000002, 289.273000000001, 274.093000000001, 263.329600000001, 251.389599999999, 245.7392, 231.9614, 229.7952, 217.155200000001, 208.9588, 199.016599999999, 190.839199999999, 180.6976, 176.272799999999, 166.976999999999, 162.5252, 151.196400000001, 149.386999999999, 133.981199999998, 130.0586, 130.164000000001, 122.053400000001, 110.7428, 108.1276, 106.232400000001, 100.381600000001, 98.7668000000012, 86.6440000000002, 79.9768000000004, 82.4722000000002, 68.7026000000005, 70.1186000000016, 71.9948000000004, 58.998599999999, 59.0492000000013, 56.9818000000014, 47.5338000000011, 42.9928, 51.1591999999982, 37.2740000000013, 42.7220000000016, 31.3734000000004, 26.8090000000011, 25.8934000000008, 26.5286000000015, 29.5442000000003, 19.3503999999994, 26.0760000000009, 17.9527999999991, 14.8419999999969, 10.4683999999979, 8.65899999999965, 9.86720000000059, 4.34139999999752, -0.907800000000861, -3.32080000000133, -0.936199999996461, -11.9916000000012, -8.87000000000262, -6.33099999999831, -11.3366000000024, -15.9207999999999, -9.34659999999712, -15.5034000000014, -19.2097999999969, -15.357799999998, -28.2235999999975, -30.6898000000001, -19.3271999999997, -25.6083999999973, -24.409599999999, -13.6385999999984, -33.4473999999973, -32.6949999999997, -28.9063999999998, -31.7483999999968, -32.2935999999972, -35.8329999999987, -47.620600000002, -39.0855999999985, -33.1434000000008, -46.1371999999974, -37.5892000000022, -46.8164000000033, -47.3142000000007, -60.2914000000019, -37.7575999999972, },
+    // precision 14
+    (double[]) { 11816.475, 11605.0046, 11395.3792, 11188.7504, 10984.1814, 10782.0086, 10582.0072, 10384.503, 10189.178, 9996.2738, 9806.0344, 9617.9798, 9431.394, 9248.7784, 9067.6894, 8889.6824, 8712.9134, 8538.8624, 8368.4944, 8197.7956, 8031.8916, 7866.6316, 7703.733, 7544.5726, 7386.204, 7230.666, 7077.8516, 6926.7886, 6778.6902, 6631.9632, 6487.304, 6346.7486, 6206.4408, 6070.202, 5935.2576, 5799.924, 5671.0324, 5541.9788, 5414.6112, 5290.0274, 5166.723, 5047.6906, 4929.162, 4815.1406, 4699.127, 4588.5606, 4477.7394, 4369.4014, 4264.2728, 4155.9224, 4055.581, 3955.505, 3856.9618, 3761.3828, 3666.9702, 3575.7764, 3482.4132, 3395.0186, 3305.8852, 3221.415, 3138.6024, 3056.296, 2970.4494, 2896.1526, 2816.8008, 2740.2156, 2670.497, 2594.1458, 2527.111, 2460.8168, 2387.5114, 2322.9498, 2260.6752, 2194.2686, 2133.7792, 2074.767, 2015.204, 1959.4226, 1898.6502, 1850.006, 1792.849, 1741.4838, 1687.9778, 1638.1322, 1589.3266, 1543.1394, 1496.8266, 1447.8516, 1402.7354, 1361.9606, 1327.0692, 1285.4106, 1241.8112, 1201.6726, 1161.973, 1130.261, 1094.2036, 1048.2036, 1020.6436, 990.901400000002, 961.199800000002, 924.769800000002, 899.526400000002, 872.346400000002, 834.375, 810.432000000001, 780.659800000001, 756.013800000001, 733.479399999997, 707.923999999999, 673.858, 652.222399999999, 636.572399999997, 615.738599999997, 586.696400000001, 564.147199999999, 541.679600000003, 523.943599999999, 505.714599999999, 475.729599999999, 461.779600000002, 449.750800000002, 439.020799999998, 412.7886, 400.245600000002, 383.188199999997, 362.079599999997, 357.533799999997, 334.319000000003, 327.553399999997, 308.559399999998, 291.270199999999, 279.351999999999, 271.791400000002, 252.576999999997, 247.482400000001, 236.174800000001, 218.774599999997, 220.155200000001, 208.794399999999, 201.223599999998, 182.995600000002, 185.5268, 164.547400000003, 176.5962, 150.689599999998, 157.8004, 138.378799999999, 134.021200000003, 117.614399999999, 108.194000000003, 97.0696000000025, 89.6042000000016, 95.6030000000028, 84.7810000000027, 72.635000000002, 77.3482000000004, 59.4907999999996, 55.5875999999989, 50.7346000000034, 61.3916000000027, 50.9149999999936, 39.0384000000049, 58.9395999999979, 29.633600000001, 28.2032000000036, 26.0078000000067, 17.0387999999948, 9.22000000000116, 13.8387999999977, 8.07240000000456, 14.1549999999988, 15.3570000000036, 3.42660000000615, 6.24820000000182, -2.96940000000177, -8.79940000000352, -5.97860000000219, -14.4048000000039, -3.4143999999942, -13.0148000000045, -11.6977999999945, -25.7878000000055, -22.3185999999987, -24.409599999999, -31.9756000000052, -18.9722000000038, -22.8678000000073, -30.8972000000067, -32.3715999999986, -22.3907999999938, -43.6720000000059, -35.9038, -39.7492000000057, -54.1641999999993, -45.2749999999942, -42.2989999999991, -44.1089999999967, -64.3564000000042, -49.9551999999967, -42.6116000000038, },
+    // precision 15
+    (double[]) { 23634.0036, 23210.8034, 22792.4744, 22379.1524, 21969.7928, 21565.326, 21165.3532, 20770.2806, 20379.9892, 19994.7098, 19613.318, 19236.799, 18865.4382, 18498.8244, 18136.5138, 17778.8668, 17426.2344, 17079.32, 16734.778, 16397.2418, 16063.3324, 15734.0232, 15409.731, 15088.728, 14772.9896, 14464.1402, 14157.5588, 13855.5958, 13559.3296, 13264.9096, 12978.326, 12692.0826, 12413.8816, 12137.3192, 11870.2326, 11602.5554, 11340.3142, 11079.613, 10829.5908, 10583.5466, 10334.0344, 10095.5072, 9859.694, 9625.2822, 9395.7862, 9174.0586, 8957.3164, 8738.064, 8524.155, 8313.7396, 8116.9168, 7913.542, 7718.4778, 7521.65, 7335.5596, 7154.2906, 6968.7396, 6786.3996, 6613.236, 6437.406, 6270.6598, 6107.7958, 5945.7174, 5787.6784, 5635.5784, 5482.308, 5337.9784, 5190.0864, 5045.9158, 4919.1386, 4771.817, 4645.7742, 4518.4774, 4385.5454, 4262.6622, 4142.74679999999, 4015.5318, 3897.9276, 3790.7764, 3685.13800000001, 3573.6274, 3467.9706, 3368.61079999999, 3271.5202, 3170.3848, 3076.4656, 2982.38400000001, 2888.4664, 2806.4868, 2711.9564, 2634.1434, 2551.3204, 2469.7662, 2396.61139999999, 2318.9902, 2243.8658, 2171.9246, 2105.01360000001, 2028.8536, 1960.9952, 1901.4096, 1841.86079999999, 1777.54700000001, 1714.5802, 1654.65059999999, 1596.311, 1546.2016, 1492.3296, 1433.8974, 1383.84600000001, 1339.4152, 1293.5518, 1245.8686, 1193.50659999999, 1162.27959999999, 1107.19439999999, 1069.18060000001, 1035.09179999999, 999.679000000004, 957.679999999993, 925.300199999998, 888.099400000006, 848.638600000006, 818.156400000007, 796.748399999997, 752.139200000005, 725.271200000003, 692.216, 671.633600000001, 647.939799999993, 621.670599999998, 575.398799999995, 561.226599999995, 532.237999999998, 521.787599999996, 483.095799999996, 467.049599999998, 465.286399999997, 415.548599999995, 401.047399999996, 380.607999999993, 377.362599999993, 347.258799999996, 338.371599999999, 310.096999999994, 301.409199999995, 276.280799999993, 265.586800000005, 258.994399999996, 223.915999999997, 215.925399999993, 213.503800000006, 191.045400000003, 166.718200000003, 166.259000000005, 162.941200000001, 148.829400000002, 141.645999999993, 123.535399999993, 122.329800000007, 89.473399999988, 80.1962000000058, 77.5457999999926, 59.1056000000099, 83.3509999999951, 52.2906000000075, 36.3979999999865, 40.6558000000077, 42.0003999999899, 19.6630000000005, 19.7153999999864, -8.38539999999921, -0.692799999989802, 0.854800000000978, 3.23219999999856, -3.89040000000386, -5.25880000001052, -24.9052000000083, -22.6837999999989, -26.4286000000138, -34.997000000003, -37.0216000000073, -43.430400000012, -58.2390000000014, -68.8034000000043, -56.9245999999985, -57.8583999999973, -77.3097999999882, -73.2793999999994, -81.0738000000129, -87.4530000000086, -65.0254000000132, -57.296399999992, -96.2746000000043, -103.25, -96.081600000005, -91.5542000000132, -102.465200000006, -107.688599999994, -101.458000000013, -109.715800000005, },
+    // precision 16
+    (double[]) { 47270, 46423.3584, 45585.7074, 44757.152, 43938.8416, 43130.9514, 42330.03, 41540.407, 40759.6348, 39988.206, 39226.5144, 38473.2096, 37729.795, 36997.268, 36272.6448, 35558.665, 34853.0248, 34157.4472, 33470.5204, 32793.5742, 32127.0194, 31469.4182, 30817.6136, 30178.6968, 29546.8908, 28922.8544, 28312.271, 27707.0924, 27114.0326, 26526.692, 25948.6336, 25383.7826, 24823.5998, 24272.2974, 23732.2572, 23201.4976, 22674.2796, 22163.6336, 21656.515, 21161.7362, 20669.9368, 20189.4424, 19717.3358, 19256.3744, 18795.9638, 18352.197, 17908.5738, 17474.391, 17052.918, 16637.2236, 16228.4602, 15823.3474, 15428.6974, 15043.0284, 14667.6278, 14297.4588, 13935.2882, 13578.5402, 13234.6032, 12882.1578, 12548.0728, 12219.231, 11898.0072, 11587.2626, 11279.9072, 10973.5048, 10678.5186, 10392.4876, 10105.2556, 9825.766, 9562.5444, 9294.2222, 9038.2352, 8784.848, 8533.2644, 8301.7776, 8058.30859999999, 7822.94579999999, 7599.11319999999, 7366.90779999999, 7161.217, 6957.53080000001, 6736.212, 6548.21220000001, 6343.06839999999, 6156.28719999999, 5975.15419999999, 5791.75719999999, 5621.32019999999, 5451.66, 5287.61040000001, 5118.09479999999, 4957.288, 4798.4246, 4662.17559999999, 4512.05900000001, 4364.68539999999, 4220.77720000001, 4082.67259999999, 3957.19519999999, 3842.15779999999, 3699.3328, 3583.01180000001, 3473.8964, 3338.66639999999, 3233.55559999999, 3117.799, 3008.111, 2909.69140000001, 2814.86499999999, 2719.46119999999, 2624.742, 2532.46979999999, 2444.7886, 2370.1868, 2272.45259999999, 2196.19260000001, 2117.90419999999, 2023.2972, 1969.76819999999, 1885.58979999999, 1833.2824, 1733.91200000001, 1682.54920000001, 1604.57980000001, 1556.11240000001, 1491.3064, 1421.71960000001, 1371.22899999999, 1322.1324, 1264.7892, 1196.23920000001, 1143.8474, 1088.67240000001, 1073.60380000001, 1023.11660000001, 959.036400000012, 927.433199999999, 906.792799999996, 853.433599999989, 841.873800000001, 791.1054, 756.899999999994, 704.343200000003, 672.495599999995, 622.790399999998, 611.254799999995, 567.283200000005, 519.406599999988, 519.188400000014, 495.312800000014, 451.350799999986, 443.973399999988, 431.882199999993, 392.027000000002, 380.924200000009, 345.128999999986, 298.901400000002, 287.771999999997, 272.625, 247.253000000026, 222.490600000019, 223.590000000026, 196.407599999977, 176.425999999978, 134.725199999986, 132.4804, 110.445599999977, 86.7939999999944, 56.7038000000175, 64.915399999998, 38.3726000000024, 37.1606000000029, 46.170999999973, 49.1716000000015, 15.3362000000197, 6.71639999997569, -34.8185999999987, -39.4476000000141, 12.6830000000191, -12.3331999999937, -50.6565999999875, -59.9538000000175, -65.1054000000004, -70.7576000000117, -106.325200000021, -126.852200000023, -110.227599999984, -132.885999999999, -113.897200000007, -142.713800000027, -151.145399999979, -150.799200000009, -177.756200000003, -156.036399999983, -182.735199999996, -177.259399999981, -198.663600000029, -174.577600000019, -193.84580000001, },
+    // precision 17
+    (double[]) { 94541, 92848.811, 91174.019, 89517.558, 87879.9705, 86262.7565, 84663.5125, 83083.7435, 81521.7865, 79977.272, 78455.9465, 76950.219, 75465.432, 73994.152, 72546.71, 71115.2345, 69705.6765, 68314.937, 66944.2705, 65591.255, 64252.9485, 62938.016, 61636.8225, 60355.592, 59092.789, 57850.568, 56624.518, 55417.343, 54231.1415, 53067.387, 51903.526, 50774.649, 49657.6415, 48561.05, 47475.7575, 46410.159, 45364.852, 44327.053, 43318.4005, 42325.6165, 41348.4595, 40383.6265, 39436.77, 38509.502, 37594.035, 36695.939, 35818.6895, 34955.691, 34115.8095, 33293.949, 32465.0775, 31657.6715, 30877.2585, 30093.78, 29351.3695, 28594.1365, 27872.115, 27168.7465, 26477.076, 25774.541, 25106.5375, 24452.5135, 23815.5125, 23174.0655, 22555.2685, 21960.2065, 21376.3555, 20785.1925, 20211.517, 19657.0725, 19141.6865, 18579.737, 18081.3955, 17578.995, 17073.44, 16608.335, 16119.911, 15651.266, 15194.583, 14749.0495, 14343.4835, 13925.639, 13504.509, 13099.3885, 12691.2855, 12328.018, 11969.0345, 11596.5145, 11245.6355, 10917.6575, 10580.9785, 10277.8605, 9926.58100000001, 9605.538, 9300.42950000003, 8989.97850000003, 8728.73249999998, 8448.3235, 8175.31050000002, 7898.98700000002, 7629.79100000003, 7413.76199999999, 7149.92300000001, 6921.12650000001, 6677.1545, 6443.28000000003, 6278.23450000002, 6014.20049999998, 5791.20299999998, 5605.78450000001, 5438.48800000001, 5234.2255, 5059.6825, 4887.43349999998, 4682.935, 4496.31099999999, 4322.52250000002, 4191.42499999999, 4021.24200000003, 3900.64799999999, 3762.84250000003, 3609.98050000001, 3502.29599999997, 3363.84250000003, 3206.54849999998, 3079.70000000001, 2971.42300000001, 2867.80349999998, 2727.08100000001, 2630.74900000001, 2496.6165, 2440.902, 2356.19150000002, 2235.58199999999, 2120.54149999999, 2012.25449999998, 1933.35600000003, 1820.93099999998, 1761.54800000001, 1663.09350000002, 1578.84600000002, 1509.48149999999, 1427.3345, 1379.56150000001, 1306.68099999998, 1212.63449999999, 1084.17300000001, 1124.16450000001, 1060.69949999999, 1007.48849999998, 941.194499999983, 879.880500000028, 836.007500000007, 782.802000000025, 748.385499999975, 647.991500000004, 626.730500000005, 570.776000000013, 484.000500000024, 513.98550000001, 418.985499999952, 386.996999999974, 370.026500000036, 355.496999999974, 356.731499999994, 255.92200000002, 259.094000000041, 205.434499999974, 165.374500000034, 197.347500000033, 95.718499999959, 67.6165000000037, 54.6970000000438, 31.7395000000251, -15.8784999999916, 8.42500000004657, -26.3754999999655, -118.425500000012, -66.6629999999423, -42.9745000000112, -107.364999999991, -189.839000000036, -162.611499999999, -164.964999999967, -189.079999999958, -223.931499999948, -235.329999999958, -269.639500000048, -249.087999999989, -206.475499999942, -283.04449999996, -290.667000000016, -304.561499999953, -336.784499999951, -380.386500000022, -283.280499999993, -364.533000000054, -389.059499999974, -364.454000000027, -415.748000000021, -417.155000000028, },
+    // precision 18
+    (double[]) { 189083, 185696.913, 182348.774, 179035.946, 175762.762, 172526.444, 169329.754, 166166.099, 163043.269, 159958.91, 156907.912, 153906.845, 150924.199, 147996.568, 145093.457, 142239.233, 139421.475, 136632.27, 133889.588, 131174.2, 128511.619, 125868.621, 123265.385, 120721.061, 118181.769, 115709.456, 113252.446, 110840.198, 108465.099, 106126.164, 103823.469, 101556.618, 99308.004, 97124.508, 94937.803, 92833.731, 90745.061, 88677.627, 86617.47, 84650.442, 82697.833, 80769.132, 78879.629, 77014.432, 75215.626, 73384.587, 71652.482, 69895.93, 68209.301, 66553.669, 64921.981, 63310.323, 61742.115, 60205.018, 58698.658, 57190.657, 55760.865, 54331.169, 52908.167, 51550.273, 50225.254, 48922.421, 47614.533, 46362.049, 45098.569, 43926.083, 42736.03, 41593.473, 40425.26, 39316.237, 38243.651, 37170.617, 36114.609, 35084.19, 34117.233, 33206.509, 32231.505, 31318.728, 30403.404, 29540.0550000001, 28679.236, 27825.862, 26965.216, 26179.148, 25462.08, 24645.952, 23922.523, 23198.144, 22529.128, 21762.4179999999, 21134.779, 20459.117, 19840.818, 19187.04, 18636.3689999999, 17982.831, 17439.7389999999, 16874.547, 16358.2169999999, 15835.684, 15352.914, 14823.681, 14329.313, 13816.897, 13342.874, 12880.882, 12491.648, 12021.254, 11625.392, 11293.7610000001, 10813.697, 10456.209, 10099.074, 9755.39000000001, 9393.18500000006, 9047.57900000003, 8657.98499999999, 8395.85900000005, 8033, 7736.95900000003, 7430.59699999995, 7258.47699999996, 6924.58200000005, 6691.29399999999, 6357.92500000005, 6202.05700000003, 5921.19700000004, 5628.28399999999, 5404.96799999999, 5226.71100000001, 4990.75600000005, 4799.77399999998, 4622.93099999998, 4472.478, 4171.78700000001, 3957.46299999999, 3868.95200000005, 3691.14300000004, 3474.63100000005, 3341.67200000002, 3109.14000000001, 3071.97400000005, 2796.40399999998, 2756.17799999996, 2611.46999999997, 2471.93000000005, 2382.26399999997, 2209.22400000005, 2142.28399999999, 2013.96100000001, 1911.18999999994, 1818.27099999995, 1668.47900000005, 1519.65800000005, 1469.67599999998, 1367.13800000004, 1248.52899999998, 1181.23600000003, 1022.71900000004, 1088.20700000005, 959.03600000008, 876.095999999903, 791.183999999892, 703.337000000058, 731.949999999953, 586.86400000006, 526.024999999907, 323.004999999888, 320.448000000091, 340.672999999952, 309.638999999966, 216.601999999955, 102.922999999952, 19.2399999999907, -0.114000000059605, -32.6240000000689, -89.3179999999702, -153.497999999905, -64.2970000000205, -143.695999999996, -259.497999999905, -253.017999999924, -213.948000000091, -397.590000000084, -434.006000000052, -403.475000000093, -297.958000000101, -404.317000000039, -528.898999999976, -506.621000000043, -513.205000000075, -479.351000000024, -596.139999999898, -527.016999999993, -664.681000000099, -680.306000000099, -704.050000000047, -850.486000000034, -757.43200000003, -713.308999999892, }
+};
+/*
+ * Returns the bias correctors from the
+ * hyperloglog paper
+ */
+static double alpha(unsigned char precision) {
+    switch (precision) {
+        case 4:
+            return 0.673;
+        case 5:
+            return 0.697;
+        case 6:
+            return 0.709;
+        default:
+            return 0.7213 / (1 + 1.079 / NUM_REG(precision));
+    }
+}
+
+/*
+ * Computes the raw cardinality estimate
+ */
+static double raw_estimate(const struct ST_hyperloglog *h, int *num_zero)
+{
+    unsigned char precision = h->cfg.precision;
+    int num_reg = NUM_REG(precision);
+    double multi = alpha(precision) * num_reg * num_reg;
+
+    int reg_val;
+    double inv_sum = 0;
+    for (int i=0; i < num_reg; i++) {
+        reg_val = get_register(h, i);
+        inv_sum += pow(2.0, -1 * reg_val);
+        if (!reg_val) *num_zero += 1;
+    }
+    return multi * (1.0 / inv_sum);
+}
+
+/*
+ * Estimates cardinality using a linear counting.
+ * Used when some registers still have a zero value.
+ */
+static double linear_count(const struct ST_hyperloglog *h, int num_zero)
+{
+    int registers = NUM_REG(h->cfg.precision);
+    return registers *
+        log((double)registers / (double)num_zero);
+}
+static double linear_count2(const struct ST_hyperloglog *h, int num_zero)
+{
+    int m = NUM_REG(h->cfg.precision);
+    int n_calc=0, n_zero=0;
+
+    double Mi=0, Wi=0;
+    for (int i=0; i < m; i++) 
+    {
+        Wi=(i>=h->reset_idx)?(m+h->reset_idx-i):(h->reset_idx-i)+1;
+        Mi = get_register(h, i);
+        if(Wi>m/8)
+        {
+            n_calc++;
+            if(Mi==0)
+            {
+                n_zero++;
+            }
+        }
+
+    }
+    double est=m* log((double)n_calc / (double)n_zero);
+    return est;
+}
+/**
+ * Binary searches for the nearest matching index
+ * @return The matching index, or closest match
+ */
+static int binary_search(double val, int num, const double *array) {
+    int low=0, mid, high=num-1;
+    while (low < high) {
+        mid = (low + high) / 2;
+        if (val > array[mid]) {
+            low = mid + 1;
+        } else if (val == array[mid]) {
+            return mid;
+        } else {
+            high = mid - 1;
+        }
+    }
+    return low;
+}
+/**
+ * Interpolates the bias estimate using the
+ * empircal data collected by Google, from the
+ * paper mentioned above.
+ */
+static double bias_estimate(const struct ST_hyperloglog *h, double raw_est) {
+    // Determine the samples available
+    int samples;
+    int precision = h->cfg.precision;
+    switch (precision) {
+        case 4:
+            samples = 80;
+            break;
+        case 5:
+            samples = 160;
+            break;
+        default:
+            samples = 200;
+            break;
+    }
+
+    // Get the proper arrays based on precision
+    double *estimates = *(g_rawEstimateData+(precision-4));
+    double *biases = *(g_biasData+(precision-4));
+
+    // Get the matching biases
+    int idx = binary_search(raw_est, samples, estimates);
+    if (idx == 0)
+        return biases[0];
+    else if (idx == samples)
+        return biases[samples-1];
+    else
+        return (biases[idx] + biases[idx-1]) / 2;
+}
+static double raw_estimate2(const struct ST_hyperloglog *h, int *num_zero)
+{
+    unsigned char precision = h->cfg.precision;
+    int m = NUM_REG(precision);
+    int n_calc=0;
+
+    double Mi=0, Wi=0;
+    double lambda=0;
+    double harmonic_mean=0;
+    for (int i=0; i < m; i++) 
+    {
+        Wi=(i>=h->reset_idx)?(m+h->reset_idx-i):(h->reset_idx-i)+1;
+        Mi = get_register(h, i);
+        if(!Mi)
+            *num_zero += 1;
+        //if (!Mi) *num_zero += 1;
+        if( Wi < m/8 && h->cfg.time_window_s)
+        {
+            continue;
+        }
+        
+        lambda=pow(2.0, Mi-1)*m*m/Wi;
+        harmonic_mean += 1/lambda;        
+        n_calc++;
+    }
+    harmonic_mean=n_calc/harmonic_mean;
+    return harmonic_mean;
+}
+ __attribute__ ((unused)) static double ST_HLL_count_no_sliding(const struct ST_hyperloglog *h)
+{
+    int num_zero = 0;
+    int num_reg = NUM_REG(h->cfg.precision);
+    double raw_est = raw_estimate(h, &num_zero);
+
+    // Check if we need to apply bias correction
+    
+    if (raw_est <= 5 * num_reg) {
+        raw_est -= bias_estimate(h, raw_est);
+    }
+
+    // Check if linear counting should be used
+    double alt_est;
+    if (num_zero) {
+        alt_est = linear_count(h, num_zero);
+    } else {
+        alt_est = raw_est;
+    }
+
+    // Determine which estimate to use
+    if (alt_est <= g_switchThreshold[h->cfg.precision-4]) {
+        return alt_est;
+    } else {
+        return raw_est;
+    }
+}
+double ST_hyperloglog_count(const struct ST_hyperloglog *h)
+{
+    int num_zero=0;
+    double raw_est=raw_estimate2(h, &num_zero);
+    raw_est*=alpha(h->cfg.precision);
+
+    double num_reg=NUM_REG(h->cfg.precision);
+    if (raw_est <= 5 * num_reg) {
+        raw_est -= bias_estimate(h, raw_est);
+    }
+    if(raw_est<=5*num_reg/2)
+    {
+        if(num_zero)
+            return linear_count2(h, num_zero);
+    }
+    if(raw_est>INT32_MAX/30)
+    {
+        return INT32_MIN*log(1-raw_est/INT32_MAX);
+    }
+    return raw_est;
+}
+
+/**
+ * Computes the upper bound on variance given
+ * a precision
+ * @arg precision The precision to use
+ * @return The expected variance in the count,
+ * or zero on error.
+ */
+double ST_hyperloglog_error_for_precision(unsigned char precision)
+{
+    // Check that the error bound is sane
+    if (precision < HLL_MIN_PRECISION || precision > HLL_MAX_PRECISION)
+        return 0;
+
+    /*
+     * Error of HLL is 1.04 / sqrt(m)
+     * m is given by 2^p
+     */
+
+    return 1.04 / sqrt(NUM_REG(precision));
+}
+\ No newline at end of file
diff --git a/CRDT/st_hyperloglog.h b/CRDT/st_hyperloglog.h
new file mode 100644
index 0000000..cb872b9
--- /dev/null
+++ b/CRDT/st_hyperloglog.h
@@ -0,0 +1,38 @@
+/*
+* Staggered HyperLogLog CRDT
+* ST HLL Reference: Cornacchia, Alessandro, et al. "Staggered HLL: Near-continuous-time cardinality estimation with no overhead." 
+* Computer Communications 193 (2022): 168-175.
+* https://www.sciencedirect.com/science/article/abs/pii/S0140366422002407
+* HLL Reference: HyperLogLog in Practice: Algorithmic Engineering of a State of The Art Cardinality Estimation Algorithm
+* https://storage.googleapis.com/pub-tools-public-publication-data/pdf/40671.pdf
+* The HyperLogLog implementation is adapted from armon/hlld (https://github.com/armon/hlld/blob/master/src/hll.c)
+* Author: [email protected]
+*/
+#pragma once
+#include <stddef.h>
+#include <sys/time.h>
+#include <uuid/uuid.h>
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+// Ensure precision in a sane bound
+#define HLL_MIN_PRECISION 4      // 16 registers
+#define HLL_MAX_PRECISION 18     // 262,144 registers
+
+struct ST_hyperloglog;
+struct ST_hyperloglog *ST_hyperloglog_new(unsigned char precision, int time_window_seconds, const struct timeval now);
+void ST_hyperloglog_free(struct ST_hyperloglog *h);
+//Return 1 if at least 1 ST HyperLogLog internal register was altered. 0 otherwise.
+int ST_hyperloglog_add(struct ST_hyperloglog *h, const char *key, size_t keylen, const struct timeval now);
+double ST_hyperloglog_count(const struct ST_hyperloglog *h);
+size_t ST_hyperloglog_serialized_size(const struct ST_hyperloglog *h);
+void ST_hyperloglog_serialize(const struct ST_hyperloglog *h, char **blob, size_t *blob_sz);
+struct ST_hyperloglog *ST_hyperloglog_deserialize(const char *blob, size_t blob_sz);
+void ST_hyperloglog_merge(struct ST_hyperloglog *dst, const struct ST_hyperloglog *src);
+void ST_hyperloglog_merge_blob(struct ST_hyperloglog *dst, const char *blob, size_t blob_sz);
+double ST_hyperloglog_error_for_precision(unsigned char precision);
+void ST_hyperloglog_configure(struct ST_hyperloglog *h, unsigned char precision, int time_window_seconds, const struct timeval now);
+#ifdef __cplusplus
+}
+#endif
+\ No newline at end of file
diff --git a/CRDT/token_bucket_common.c b/CRDT/token_bucket_common.c
new file mode 100644
index 0000000..895b392
--- /dev/null
+++ b/CRDT/token_bucket_common.c
@@ -0,0 +1,57 @@
+#include "crdt_utils.h"
+#include "token_bucket_common.h"
+
+#include <assert.h>
+
+long long tb_available(long long CIR, long long CBS, long long consumed, long long refilled, long long delta_ms, long long refill_interval_ms, long long *new_refilled)
+{
+
+	long long to_add = CIR*delta_ms/1000;
+	*new_refilled = refilled;
+	if(refilled==0 && consumed==0)
+	{
+		*new_refilled=CBS;
+		return *new_refilled;
+	}
+	if(delta_ms > refill_interval_ms && to_add>0)
+	{
+		if(refilled < consumed)
+		{
+			*new_refilled = consumed;
+		}
+		if(to_add + refilled - consumed < CBS)
+		{
+			*new_refilled += to_add;
+		}
+		else
+		{
+			*new_refilled += CBS - (refilled - consumed);
+		}
+	}
+	return MAX(*new_refilled-consumed, 0);
+}
+long long tb_consume(long long CIR, long long available, size_t n_replica, enum tb_consume_type cmd, long long tokens)
+{
+	long long reserved=CIR*(n_replica-1)/n_replica;
+	long long local_available=MAX(available-reserved, 0);
+	long long allocated=0;
+	switch(cmd)
+	{
+		case TB_CONSUME_AS_MUCH_AS_POSSIBLE:
+			allocated=available;
+			break;
+		case TB_CONSUME_FORCE:
+			allocated=tokens;
+			break;
+		case TB_CONSUME_FLEXIBLE:
+			allocated=MIN(tokens, local_available);;
+			break;
+		case TB_CONSUME_NORMAL:
+			allocated=(tokens<=local_available) ? tokens:0;			
+			break;
+		default:
+			assert(0);
+			break;
+	}
+    return allocated;
+}
+\ No newline at end of file
diff --git a/CRDT/token_bucket_common.h b/CRDT/token_bucket_common.h
new file mode 100644
index 0000000..a3f737f
--- /dev/null
+++ b/CRDT/token_bucket_common.h
@@ -0,0 +1,21 @@
+#pragma once
+#include <stddef.h>
+#include <sys/time.h>
+#include <uuid/uuid.h>
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+enum tb_consume_type
+{
+	TB_CONSUME_NORMAL,
+	TB_CONSUME_FORCE,
+	TB_CONSUME_FLEXIBLE,
+	TB_CONSUME_AS_MUCH_AS_POSSIBLE,
+};
+long long tb_available(long long CIR, long long CBS, long long consumed, long long refilled, long long delta_ms, long long refill_interval_ms, long long *new_refilled);
+long long tb_consume(long long CIR, long long available, size_t n_replica, enum tb_consume_type cmd, long long tokens);
+#ifdef __cplusplus
+}
+#endif
+\ No newline at end of file
diff --git a/deps/xxhash/xxh_x86dispatch.c b/deps/xxhash/xxh_x86dispatch.c
new file mode 100644
index 0000000..59daf19
--- /dev/null
+++ b/deps/xxhash/xxh_x86dispatch.c
@@ -0,0 +1,802 @@
+/*
+ * xxHash - Extremely Fast Hash algorithm
+ * Copyright (C) 2020-2021 Yann Collet
+ *
+ * BSD 2-Clause License (https://www.opensource.org/licenses/bsd-license.php)
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ *    * Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *    * Redistributions in binary form must reproduce the above
+ *      copyright notice, this list of conditions and the following disclaimer
+ *      in the documentation and/or other materials provided with the
+ *      distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * You can contact the author at:
+ *   - xxHash homepage: https://www.xxhash.com
+ *   - xxHash source repository: https://github.com/Cyan4973/xxHash
+ */
+
+
+/*!
+ * @file xxh_x86dispatch.c
+ *
+ * Automatic dispatcher code for the @ref XXH3_family on x86-based targets.
+ *
+ * Optional add-on.
+ *
+ * **Compile this file with the default flags for your target.** Do not compile
+ * with flags like `-mavx*`, `-march=native`, or `/arch:AVX*`, there will be
+ * an error. See @ref XXH_X86DISPATCH_ALLOW_AVX for details.
+ *
+ * @defgroup dispatch x86 Dispatcher
+ * @{
+ */
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+#if !(defined(__x86_64__) || defined(__i386__) || defined(_M_IX86) || defined(_M_X64))
+#  error "Dispatching is currently only supported on x86 and x86_64."
+#endif
+
+/*!
+ * @def XXH_X86DISPATCH_ALLOW_AVX
+ * @brief Disables the AVX sanity check.
+ *
+ * Don't compile xxh_x86dispatch.c with options like `-mavx*`, `-march=native`,
+ * or `/arch:AVX*`. It is intended to be compiled for the minimum target, and
+ * it selectively enables SSE2, AVX2, and AVX512 when it is needed.
+ *
+ * Using this option _globally_ allows this feature, and therefore makes it
+ * undefined behavior to execute on any CPU without said feature.
+ *
+ * Even if the source code isn't directly using AVX intrinsics in a function,
+ * the compiler can still generate AVX code from autovectorization and by
+ * "upgrading" SSE2 intrinsics to use the VEX prefixes (a.k.a. AVX128).
+ *
+ * Use the same flags that you use to compile the rest of the program; this
+ * file will safely generate SSE2, AVX2, and AVX512 without these flags.
+ *
+ * Define XXH_X86DISPATCH_ALLOW_AVX to ignore this check, and feel free to open
+ * an issue if there is a target in the future where AVX is a default feature.
+ */
+#ifdef XXH_DOXYGEN
+#  define XXH_X86DISPATCH_ALLOW_AVX
+#endif
+
+#if defined(__AVX__) && !defined(XXH_X86DISPATCH_ALLOW_AVX)
+#  error "Do not compile xxh_x86dispatch.c with AVX enabled! See the comment above."
+#endif
+
+#ifdef __has_include
+#  define XXH_HAS_INCLUDE(header) __has_include(header)
+#else
+#  define XXH_HAS_INCLUDE(header) 0
+#endif
+
+/*!
+ * @def XXH_DISPATCH_SCALAR
+ * @brief Enables/dispatching the scalar code path.
+ *
+ * If this is defined to 0, SSE2 support is assumed. This reduces code size
+ * when the scalar path is not needed.
+ *
+ * This is automatically defined to 0 when...
+ *   - SSE2 support is enabled in the compiler
+ *   - Targeting x86_64
+ *   - Targeting Android x86
+ *   - Targeting macOS
+ */
+#ifndef XXH_DISPATCH_SCALAR
+#  if defined(__SSE2__) || (defined(_M_IX86_FP) && _M_IX86_FP >= 2) /* SSE2 on by default */ \
+     || defined(__x86_64__) || defined(_M_X64) /* x86_64 */ \
+     || defined(__ANDROID__) || defined(__APPLEv__) /* Android or macOS */
+#     define XXH_DISPATCH_SCALAR 0 /* disable */
+#  else
+#     define XXH_DISPATCH_SCALAR 1
+#  endif
+#endif
+/*!
+ * @def XXH_DISPATCH_AVX2
+ * @brief Enables/disables dispatching for AVX2.
+ *
+ * This is automatically detected if it is not defined.
+ *  - GCC 4.7 and later are known to support AVX2, but >4.9 is required for
+ *    to get the AVX2 intrinsics and typedefs without -mavx -mavx2.
+ *  - Visual Studio 2013 Update 2 and later are known to support AVX2.
+ *  - The GCC/Clang internal header `<avx2intrin.h>` is detected. While this is
+ *    not allowed to be included directly, it still appears in the builtin
+ *    include path and is detectable with `__has_include`.
+ *
+ * @see XXH_AVX2
+ */
+#ifndef XXH_DISPATCH_AVX2
+#  if (defined(__GNUC__) && (__GNUC__ > 4)) /* GCC 5.0+ */ \
+   || (defined(_MSC_VER) && _MSC_VER >= 1900) /* VS 2015+ */ \
+   || (defined(_MSC_FULL_VER) && _MSC_FULL_VER >= 180030501) /* VS 2013 Update 2 */ \
+   || XXH_HAS_INCLUDE(<avx2intrin.h>) /* GCC/Clang internal header */
+#    define XXH_DISPATCH_AVX2 1   /* enable dispatch towards AVX2 */
+#  else
+#    define XXH_DISPATCH_AVX2 0
+#  endif
+#endif /* XXH_DISPATCH_AVX2 */
+
+/*!
+ * @def XXH_DISPATCH_AVX512
+ * @brief Enables/disables dispatching for AVX512.
+ *
+ * Automatically detected if one of the following conditions is met:
+ *  - GCC 4.9 and later are known to support AVX512.
+ *  - Visual Studio 2017  and later are known to support AVX2.
+ *  - The GCC/Clang internal header `<avx512fintrin.h>` is detected. While this
+ *    is not allowed to be included directly, it still appears in the builtin
+ *    include path and is detectable with `__has_include`.
+ *
+ * @see XXH_AVX512
+ */
+#ifndef XXH_DISPATCH_AVX512
+#  if (defined(__GNUC__) \
+       && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 9))) /* GCC 4.9+ */ \
+   || (defined(_MSC_VER) && _MSC_VER >= 1910) /* VS 2017+ */ \
+   || XXH_HAS_INCLUDE(<avx512fintrin.h>) /* GCC/Clang internal header */
+#    define XXH_DISPATCH_AVX512 1   /* enable dispatch towards AVX512 */
+#  else
+#    define XXH_DISPATCH_AVX512 0
+#  endif
+#endif /* XXH_DISPATCH_AVX512 */
+
+/*!
+ * @def XXH_TARGET_SSE2
+ * @brief Allows a function to be compiled with SSE2 intrinsics.
+ *
+ * Uses `__attribute__((__target__("sse2")))` on GCC to allow SSE2 to be used
+ * even with `-mno-sse2`.
+ *
+ * @def XXH_TARGET_AVX2
+ * @brief Like @ref XXH_TARGET_SSE2, but for AVX2.
+ *
+ * @def XXH_TARGET_AVX512
+ * @brief Like @ref XXH_TARGET_SSE2, but for AVX512.
+ */
+#if defined(__GNUC__)
+#  include <emmintrin.h> /* SSE2 */
+#  if XXH_DISPATCH_AVX2 || XXH_DISPATCH_AVX512
+#    include <immintrin.h> /* AVX2, AVX512F */
+#  endif
+#  define XXH_TARGET_SSE2 __attribute__((__target__("sse2")))
+#  define XXH_TARGET_AVX2 __attribute__((__target__("avx2")))
+#  define XXH_TARGET_AVX512 __attribute__((__target__("avx512f")))
+#elif defined(_MSC_VER)
+#  include <intrin.h>
+#  define XXH_TARGET_SSE2
+#  define XXH_TARGET_AVX2
+#  define XXH_TARGET_AVX512
+#else
+#  error "Dispatching is currently not supported for your compiler."
+#endif
+
+#ifdef XXH_DISPATCH_DEBUG
+/* debug logging */
+#  include <stdio.h>
+#  define XXH_debugPrint(str) { fprintf(stderr, "DEBUG: xxHash dispatch: %s \n", str); fflush(NULL); }
+#else
+#  define XXH_debugPrint(str) ((void)0)
+#  undef NDEBUG /* avoid redefinition */
+#  define NDEBUG
+#endif
+#include <assert.h>
+
+#define XXH_INLINE_ALL
+#define XXH_X86DISPATCH
+#include "xxhash.h"
+
+#ifndef XXH_HAS_ATTRIBUTE
+#  ifdef __has_attribute
+#    define XXH_HAS_ATTRIBUTE(...) __has_attribute(__VA_ARGS__)
+#  else
+#    define XXH_HAS_ATTRIBUTE(...) 0
+#  endif
+#endif
+
+#if XXH_HAS_ATTRIBUTE(constructor)
+#  define XXH_CONSTRUCTOR __attribute__((constructor))
+#  define XXH_DISPATCH_MAYBE_NULL 0
+#else
+#  define XXH_CONSTRUCTOR
+#  define XXH_DISPATCH_MAYBE_NULL 1
+#endif
+
+
+/*
+ * Support both AT&T and Intel dialects
+ *
+ * GCC doesn't convert AT&T syntax to Intel syntax, and will error out if
+ * compiled with -masm=intel. Instead, it supports dialect switching with
+ * curly braces: { AT&T syntax | Intel syntax }
+ *
+ * Clang's integrated assembler automatically converts AT&T syntax to Intel if
+ * needed, making the dialect switching useless (it isn't even supported).
+ *
+ * Note: Comments are written in the inline assembly itself.
+ */
+#ifdef __clang__
+#  define XXH_I_ATT(intel, att) att "\n\t"
+#else
+#  define XXH_I_ATT(intel, att) "{" att "|" intel "}\n\t"
+#endif
+
+/*!
+ * @internal
+ * @brief Runs CPUID.
+ *
+ * @param eax , ecx The parameters to pass to CPUID, %eax and %ecx respectively.
+ * @param abcd The array to store the result in, `{ eax, ebx, ecx, edx }`
+ */
+static void XXH_cpuid(xxh_u32 eax, xxh_u32 ecx, xxh_u32* abcd)
+{
+#if defined(_MSC_VER)
+    __cpuidex(abcd, eax, ecx);
+#else
+    xxh_u32 ebx, edx;
+# if defined(__i386__) && defined(__PIC__)
+    __asm__(
+        "# Call CPUID\n\t"
+        "#\n\t"
+        "# On 32-bit x86 with PIC enabled, we are not allowed to overwrite\n\t"
+        "# EBX, so we use EDI instead.\n\t"
+        XXH_I_ATT("mov     edi, ebx",   "movl    %%ebx, %%edi")
+        XXH_I_ATT("cpuid",              "cpuid"               )
+        XXH_I_ATT("xchg    edi, ebx",   "xchgl   %%ebx, %%edi")
+        : "=D" (ebx),
+# else
+    __asm__(
+        "# Call CPUID\n\t"
+        XXH_I_ATT("cpuid",              "cpuid")
+        : "=b" (ebx),
+# endif
+              "+a" (eax), "+c" (ecx), "=d" (edx));
+    abcd[0] = eax;
+    abcd[1] = ebx;
+    abcd[2] = ecx;
+    abcd[3] = edx;
+#endif
+}
+
+/*
+ * Modified version of Intel's guide
+ * https://software.intel.com/en-us/articles/how-to-detect-new-instruction-support-in-the-4th-generation-intel-core-processor-family
+ */
+
+#if XXH_DISPATCH_AVX2 || XXH_DISPATCH_AVX512
+/*!
+ * @internal
+ * @brief Runs `XGETBV`.
+ *
+ * While the CPU may support AVX2, the operating system might not properly save
+ * the full YMM/ZMM registers.
+ *
+ * xgetbv is used for detecting this: Any compliant operating system will define
+ * a set of flags in the xcr0 register indicating how it saves the AVX registers.
+ *
+ * You can manually disable this flag on Windows by running, as admin:
+ *
+ *   bcdedit.exe /set xsavedisable 1
+ *
+ * and rebooting. Run the same command with 0 to re-enable it.
+ */
+static xxh_u64 XXH_xgetbv(void)
+{
+#if defined(_MSC_VER)
+    return _xgetbv(0);  /* min VS2010 SP1 compiler is required */
+#else
+    xxh_u32 xcr0_lo, xcr0_hi;
+    __asm__(
+        "# Call XGETBV\n\t"
+        "#\n\t"
+        "# Older assemblers (e.g. macOS's ancient GAS version) don't support\n\t"
+        "# the XGETBV opcode, so we encode it by hand instead.\n\t"
+        "# See <https://github.com/asmjit/asmjit/issues/78> for details.\n\t"
+        ".byte   0x0f, 0x01, 0xd0\n\t"
+       : "=a" (xcr0_lo), "=d" (xcr0_hi) : "c" (0));
+    return xcr0_lo | ((xxh_u64)xcr0_hi << 32);
+#endif
+}
+#endif
+
+#define XXH_SSE2_CPUID_MASK (1 << 26)
+#define XXH_OSXSAVE_CPUID_MASK ((1 << 26) | (1 << 27))
+#define XXH_AVX2_CPUID_MASK (1 << 5)
+#define XXH_AVX2_XGETBV_MASK ((1 << 2) | (1 << 1))
+#define XXH_AVX512F_CPUID_MASK (1 << 16)
+#define XXH_AVX512F_XGETBV_MASK ((7 << 5) | (1 << 2) | (1 << 1))
+
+/*!
+ * @internal
+ * @brief Returns the best XXH3 implementation.
+ *
+ * Runs various CPUID/XGETBV tests to try and determine the best implementation.
+ *
+ * @return The best @ref XXH_VECTOR implementation.
+ * @see XXH_VECTOR_TYPES
+ */
+static int XXH_featureTest(void)
+{
+    xxh_u32 abcd[4];
+    xxh_u32 max_leaves;
+    int best = XXH_SCALAR;
+#if XXH_DISPATCH_AVX2 || XXH_DISPATCH_AVX512
+    xxh_u64 xgetbv_val;
+#endif
+#if defined(__GNUC__) && defined(__i386__)
+    xxh_u32 cpuid_supported;
+    __asm__(
+        "# For the sake of ruthless backwards compatibility, check if CPUID\n\t"
+        "# is supported in the EFLAGS on i386.\n\t"
+        "# This is not necessary on x86_64 - CPUID is mandatory.\n\t"
+        "#   The ID flag (bit 21) in the EFLAGS register indicates support\n\t"
+        "#   for the CPUID instruction. If a software procedure can set and\n\t"
+        "#   clear this flag, the processor executing the procedure supports\n\t"
+        "#   the CPUID instruction.\n\t"
+        "#   <https://c9x.me/x86/html/file_module_x86_id_45.html>\n\t"
+        "#\n\t"
+        "# Routine is from <https://wiki.osdev.org/CPUID>.\n\t"
+
+        "# Save EFLAGS\n\t"
+        XXH_I_ATT("pushfd",                           "pushfl"                    )
+        "# Store EFLAGS\n\t"
+        XXH_I_ATT("pushfd",                           "pushfl"                    )
+        "# Invert the ID bit in stored EFLAGS\n\t"
+        XXH_I_ATT("xor     dword ptr[esp], 0x200000", "xorl    $0x200000, (%%esp)")
+        "# Load stored EFLAGS (with ID bit inverted)\n\t"
+        XXH_I_ATT("popfd",                            "popfl"                     )
+        "# Store EFLAGS again (ID bit may or not be inverted)\n\t"
+        XXH_I_ATT("pushfd",                           "pushfl"                    )
+        "# eax = modified EFLAGS (ID bit may or may not be inverted)\n\t"
+        XXH_I_ATT("pop     eax",                      "popl    %%eax"             )
+        "# eax = whichever bits were changed\n\t"
+        XXH_I_ATT("xor     eax, dword ptr[esp]",      "xorl    (%%esp), %%eax"    )
+        "# Restore original EFLAGS\n\t"
+        XXH_I_ATT("popfd",                            "popfl"                     )
+        "# eax = zero if ID bit can't be changed, else non-zero\n\t"
+        XXH_I_ATT("and     eax, 0x200000",            "andl    $0x200000, %%eax"  )
+        : "=a" (cpuid_supported) :: "cc");
+
+    if (XXH_unlikely(!cpuid_supported)) {
+        XXH_debugPrint("CPUID support is not detected!");
+        return best;
+    }
+
+#endif
+    /* Check how many CPUID pages we have */
+    XXH_cpuid(0, 0, abcd);
+    max_leaves = abcd[0];
+
+    /* Shouldn't happen on hardware, but happens on some QEMU configs. */
+    if (XXH_unlikely(max_leaves == 0)) {
+        XXH_debugPrint("Max CPUID leaves == 0!");
+        return best;
+    }
+
+    /* Check for SSE2, OSXSAVE and xgetbv */
+    XXH_cpuid(1, 0, abcd);
+
+    /*
+     * Test for SSE2. The check is redundant on x86_64, but it doesn't hurt.
+     */
+    if (XXH_unlikely((abcd[3] & XXH_SSE2_CPUID_MASK) != XXH_SSE2_CPUID_MASK))
+        return best;
+
+    XXH_debugPrint("SSE2 support detected.");
+
+    best = XXH_SSE2;
+#if XXH_DISPATCH_AVX2 || XXH_DISPATCH_AVX512
+    /* Make sure we have enough leaves */
+    if (XXH_unlikely(max_leaves < 7))
+        return best;
+
+    /* Test for OSXSAVE and XGETBV */
+    if ((abcd[2] & XXH_OSXSAVE_CPUID_MASK) != XXH_OSXSAVE_CPUID_MASK)
+        return best;
+
+    /* CPUID check for AVX features */
+    XXH_cpuid(7, 0, abcd);
+
+    xgetbv_val = XXH_xgetbv();
+#if XXH_DISPATCH_AVX2
+    /* Validate that AVX2 is supported by the CPU */
+    if ((abcd[1] & XXH_AVX2_CPUID_MASK) != XXH_AVX2_CPUID_MASK)
+        return best;
+
+    /* Validate that the OS supports YMM registers */
+    if ((xgetbv_val & XXH_AVX2_XGETBV_MASK) != XXH_AVX2_XGETBV_MASK) {
+        XXH_debugPrint("AVX2 supported by the CPU, but not the OS.");
+        return best;
+    }
+
+    /* AVX2 supported */
+    XXH_debugPrint("AVX2 support detected.");
+    best = XXH_AVX2;
+#endif
+#if XXH_DISPATCH_AVX512
+    /* Check if AVX512F is supported by the CPU */
+    if ((abcd[1] & XXH_AVX512F_CPUID_MASK) != XXH_AVX512F_CPUID_MASK) {
+        XXH_debugPrint("AVX512F not supported by CPU");
+        return best;
+    }
+
+    /* Validate that the OS supports ZMM registers */
+    if ((xgetbv_val & XXH_AVX512F_XGETBV_MASK) != XXH_AVX512F_XGETBV_MASK) {
+        XXH_debugPrint("AVX512F supported by the CPU, but not the OS.");
+        return best;
+    }
+
+    /* AVX512F supported */
+    XXH_debugPrint("AVX512F support detected.");
+    best = XXH_AVX512;
+#endif
+#endif
+    return best;
+}
+
+
+/* ===   Vector implementations   === */
+
+/*!
+ * @internal
+ * @brief Defines the various dispatch functions.
+ *
+ * TODO: Consolidate?
+ *
+ * @param suffix The suffix for the functions, e.g. sse2 or scalar
+ * @param target XXH_TARGET_* or empty.
+ */
+#define XXH_DEFINE_DISPATCH_FUNCS(suffix, target)                             \
+                                                                              \
+/* ===   XXH3, default variants   === */                                      \
+                                                                              \
+XXH_NO_INLINE target XXH64_hash_t                                             \
+XXHL64_default_##suffix(XXH_NOESCAPE const void* XXH_RESTRICT input,          \
+                        size_t len)                                           \
+{                                                                             \
+    return XXH3_hashLong_64b_internal(                                        \
+               input, len, XXH3_kSecret, sizeof(XXH3_kSecret),                \
+               XXH3_accumulate_##suffix, XXH3_scrambleAcc_##suffix            \
+    );                                                                        \
+}                                                                             \
+                                                                              \
+/* ===   XXH3, Seeded variants   === */                                       \
+                                                                              \
+XXH_NO_INLINE target XXH64_hash_t                                             \
+XXHL64_seed_##suffix(XXH_NOESCAPE const void* XXH_RESTRICT input, size_t len, \
+                     XXH64_hash_t seed)                                       \
+{                                                                             \
+    return XXH3_hashLong_64b_withSeed_internal(                               \
+                    input, len, seed, XXH3_accumulate_##suffix,               \
+                    XXH3_scrambleAcc_##suffix, XXH3_initCustomSecret_##suffix \
+    );                                                                        \
+}                                                                             \
+                                                                              \
+/* ===   XXH3, Secret variants   === */                                       \
+                                                                              \
+XXH_NO_INLINE target XXH64_hash_t                                             \
+XXHL64_secret_##suffix(XXH_NOESCAPE const void* XXH_RESTRICT input,           \
+                       size_t len, XXH_NOESCAPE const void* secret,           \
+                       size_t secretLen)                                      \
+{                                                                             \
+    return XXH3_hashLong_64b_internal(                                        \
+                    input, len, secret, secretLen,                            \
+                    XXH3_accumulate_##suffix, XXH3_scrambleAcc_##suffix       \
+    );                                                                        \
+}                                                                             \
+                                                                              \
+/* ===   XXH3 update variants   === */                                        \
+                                                                              \
+XXH_NO_INLINE target XXH_errorcode                                            \
+XXH3_update_##suffix(XXH_NOESCAPE XXH3_state_t* state,                        \
+                     XXH_NOESCAPE const void* input, size_t len)              \
+{                                                                             \
+    return XXH3_update(state, (const xxh_u8*)input, len,                      \
+                    XXH3_accumulate_##suffix, XXH3_scrambleAcc_##suffix);     \
+}                                                                             \
+                                                                              \
+/* ===   XXH128 default variants   === */                                     \
+                                                                              \
+XXH_NO_INLINE target XXH128_hash_t                                            \
+XXHL128_default_##suffix(XXH_NOESCAPE  const void* XXH_RESTRICT input,        \
+                         size_t len)                                          \
+{                                                                             \
+    return XXH3_hashLong_128b_internal(                                       \
+                    input, len, XXH3_kSecret, sizeof(XXH3_kSecret),           \
+                    XXH3_accumulate_##suffix, XXH3_scrambleAcc_##suffix       \
+    );                                                                        \
+}                                                                             \
+                                                                              \
+/* ===   XXH128 Secret variants   === */                                      \
+                                                                              \
+XXH_NO_INLINE target XXH128_hash_t                                            \
+XXHL128_secret_##suffix(XXH_NOESCAPE const void* XXH_RESTRICT input,          \
+                        size_t len,                                           \
+                        XXH_NOESCAPE const void* XXH_RESTRICT secret,         \
+                        size_t secretLen)                                     \
+{                                                                             \
+    return XXH3_hashLong_128b_internal(                                       \
+                    input, len, (const xxh_u8*)secret, secretLen,             \
+                    XXH3_accumulate_##suffix, XXH3_scrambleAcc_##suffix);     \
+}                                                                             \
+                                                                              \
+/* ===   XXH128 Seeded variants   === */                                      \
+                                                                              \
+XXH_NO_INLINE target XXH128_hash_t                                            \
+XXHL128_seed_##suffix(XXH_NOESCAPE const void* XXH_RESTRICT input, size_t len,\
+                      XXH64_hash_t seed)                                      \
+{                                                                             \
+    return XXH3_hashLong_128b_withSeed_internal(input, len, seed,             \
+                    XXH3_accumulate_##suffix, XXH3_scrambleAcc_##suffix,      \
+                    XXH3_initCustomSecret_##suffix);                          \
+}
+
+/* End XXH_DEFINE_DISPATCH_FUNCS */
+
+#if XXH_DISPATCH_SCALAR
+XXH_DEFINE_DISPATCH_FUNCS(scalar, /* nothing */)
+#endif
+XXH_DEFINE_DISPATCH_FUNCS(sse2, XXH_TARGET_SSE2)
+#if XXH_DISPATCH_AVX2
+XXH_DEFINE_DISPATCH_FUNCS(avx2, XXH_TARGET_AVX2)
+#endif
+#if XXH_DISPATCH_AVX512
+XXH_DEFINE_DISPATCH_FUNCS(avx512, XXH_TARGET_AVX512)
+#endif
+#undef XXH_DEFINE_DISPATCH_FUNCS
+
+/* ====    Dispatchers    ==== */
+
+typedef XXH64_hash_t (*XXH3_dispatchx86_hashLong64_default)(XXH_NOESCAPE const void* XXH_RESTRICT, size_t);
+
+typedef XXH64_hash_t (*XXH3_dispatchx86_hashLong64_withSeed)(XXH_NOESCAPE const void* XXH_RESTRICT, size_t, XXH64_hash_t);
+
+typedef XXH64_hash_t (*XXH3_dispatchx86_hashLong64_withSecret)(XXH_NOESCAPE const void* XXH_RESTRICT, size_t, XXH_NOESCAPE const void* XXH_RESTRICT, size_t);
+
+typedef XXH_errorcode (*XXH3_dispatchx86_update)(XXH_NOESCAPE XXH3_state_t*, XXH_NOESCAPE const void*, size_t);
+
+typedef struct {
+    XXH3_dispatchx86_hashLong64_default    hashLong64_default;
+    XXH3_dispatchx86_hashLong64_withSeed   hashLong64_seed;
+    XXH3_dispatchx86_hashLong64_withSecret hashLong64_secret;
+    XXH3_dispatchx86_update                update;
+} XXH_dispatchFunctions_s;
+
+#define XXH_NB_DISPATCHES 4
+
+/*!
+ * @internal
+ * @brief Table of dispatchers for @ref XXH3_64bits().
+ *
+ * @pre The indices must match @ref XXH_VECTOR_TYPE.
+ */
+static const XXH_dispatchFunctions_s XXH_kDispatch[XXH_NB_DISPATCHES] = {
+#if XXH_DISPATCH_SCALAR
+    /* Scalar */ { XXHL64_default_scalar, XXHL64_seed_scalar, XXHL64_secret_scalar, XXH3_update_scalar },
+#else
+    /* Scalar */ { NULL, NULL, NULL, NULL },
+#endif
+    /* SSE2   */ { XXHL64_default_sse2,   XXHL64_seed_sse2,   XXHL64_secret_sse2,   XXH3_update_sse2 },
+#if XXH_DISPATCH_AVX2
+    /* AVX2   */ { XXHL64_default_avx2,   XXHL64_seed_avx2,   XXHL64_secret_avx2,   XXH3_update_avx2 },
+#else
+    /* AVX2   */ { NULL, NULL, NULL, NULL },
+#endif
+#if XXH_DISPATCH_AVX512
+    /* AVX512 */ { XXHL64_default_avx512, XXHL64_seed_avx512, XXHL64_secret_avx512, XXH3_update_avx512 }
+#else
+    /* AVX512 */ { NULL, NULL, NULL, NULL }
+#endif
+};
+/*!
+ * @internal
+ * @brief The selected dispatch table for @ref XXH3_64bits().
+ */
+static XXH_dispatchFunctions_s XXH_g_dispatch = { NULL, NULL, NULL, NULL };
+
+
+typedef XXH128_hash_t (*XXH3_dispatchx86_hashLong128_default)(XXH_NOESCAPE const void* XXH_RESTRICT, size_t);
+
+typedef XXH128_hash_t (*XXH3_dispatchx86_hashLong128_withSeed)(XXH_NOESCAPE const void* XXH_RESTRICT, size_t, XXH64_hash_t);
+
+typedef XXH128_hash_t (*XXH3_dispatchx86_hashLong128_withSecret)(XXH_NOESCAPE const void* XXH_RESTRICT, size_t, const void* XXH_RESTRICT, size_t);
+
+typedef struct {
+    XXH3_dispatchx86_hashLong128_default    hashLong128_default;
+    XXH3_dispatchx86_hashLong128_withSeed   hashLong128_seed;
+    XXH3_dispatchx86_hashLong128_withSecret hashLong128_secret;
+    XXH3_dispatchx86_update                 update;
+} XXH_dispatch128Functions_s;
+
+
+/*!
+ * @internal
+ * @brief Table of dispatchers for @ref XXH3_128bits().
+ *
+ * @pre The indices must match @ref XXH_VECTOR_TYPE.
+ */
+static const XXH_dispatch128Functions_s XXH_kDispatch128[XXH_NB_DISPATCHES] = {
+#if XXH_DISPATCH_SCALAR
+    /* Scalar */ { XXHL128_default_scalar, XXHL128_seed_scalar, XXHL128_secret_scalar, XXH3_update_scalar },
+#else
+    /* Scalar */ { NULL, NULL, NULL, NULL },
+#endif
+    /* SSE2   */ { XXHL128_default_sse2,   XXHL128_seed_sse2,   XXHL128_secret_sse2,   XXH3_update_sse2 },
+#if XXH_DISPATCH_AVX2
+    /* AVX2   */ { XXHL128_default_avx2,   XXHL128_seed_avx2,   XXHL128_secret_avx2,   XXH3_update_avx2 },
+#else
+    /* AVX2   */ { NULL, NULL, NULL, NULL },
+#endif
+#if XXH_DISPATCH_AVX512
+    /* AVX512 */ { XXHL128_default_avx512, XXHL128_seed_avx512, XXHL128_secret_avx512, XXH3_update_avx512 }
+#else
+    /* AVX512 */ { NULL, NULL, NULL, NULL }
+#endif
+};
+
+/*!
+ * @internal
+ * @brief The selected dispatch table for @ref XXH3_64bits().
+ */
+static XXH_dispatch128Functions_s XXH_g_dispatch128 = { NULL, NULL, NULL, NULL };
+
+/*!
+ * @internal
+ * @brief Runs a CPUID check and sets the correct dispatch tables.
+ */
+static XXH_CONSTRUCTOR void XXH_setDispatch(void)
+{
+    int vecID = XXH_featureTest();
+    XXH_STATIC_ASSERT(XXH_AVX512 == XXH_NB_DISPATCHES-1);
+    assert(XXH_SCALAR <= vecID && vecID <= XXH_AVX512);
+#if !XXH_DISPATCH_SCALAR
+    assert(vecID != XXH_SCALAR);
+#endif
+#if !XXH_DISPATCH_AVX512
+    assert(vecID != XXH_AVX512);
+#endif
+#if !XXH_DISPATCH_AVX2
+    assert(vecID != XXH_AVX2);
+#endif
+    XXH_g_dispatch = XXH_kDispatch[vecID];
+    XXH_g_dispatch128 = XXH_kDispatch128[vecID];
+}
+
+
+/* ====    XXH3 public functions    ==== */
+
+static XXH64_hash_t
+XXH3_hashLong_64b_defaultSecret_selection(const void* input, size_t len,
+                                          XXH64_hash_t seed64, const xxh_u8* secret, size_t secretLen)
+{
+    (void)seed64; (void)secret; (void)secretLen;
+    if (XXH_DISPATCH_MAYBE_NULL && XXH_g_dispatch.hashLong64_default == NULL)
+        XXH_setDispatch();
+    return XXH_g_dispatch.hashLong64_default(input, len);
+}
+
+XXH64_hash_t XXH3_64bits_dispatch(XXH_NOESCAPE const void* input, size_t len)
+{
+    return XXH3_64bits_internal(input, len, 0, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_hashLong_64b_defaultSecret_selection);
+}
+
+static XXH64_hash_t
+XXH3_hashLong_64b_withSeed_selection(const void* input, size_t len,
+                                     XXH64_hash_t seed64, const xxh_u8* secret, size_t secretLen)
+{
+    (void)secret; (void)secretLen;
+    if (XXH_DISPATCH_MAYBE_NULL && XXH_g_dispatch.hashLong64_seed == NULL)
+        XXH_setDispatch();
+    return XXH_g_dispatch.hashLong64_seed(input, len, seed64);
+}
+
+XXH64_hash_t XXH3_64bits_withSeed_dispatch(XXH_NOESCAPE const void* input, size_t len, XXH64_hash_t seed)
+{
+    return XXH3_64bits_internal(input, len, seed, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_hashLong_64b_withSeed_selection);
+}
+
+static XXH64_hash_t
+XXH3_hashLong_64b_withSecret_selection(const void* input, size_t len,
+                                       XXH64_hash_t seed64, const xxh_u8* secret, size_t secretLen)
+{
+    (void)seed64;
+    if (XXH_DISPATCH_MAYBE_NULL && XXH_g_dispatch.hashLong64_secret == NULL)
+        XXH_setDispatch();
+    return XXH_g_dispatch.hashLong64_secret(input, len, secret, secretLen);
+}
+
+XXH64_hash_t XXH3_64bits_withSecret_dispatch(XXH_NOESCAPE const void* input, size_t len, XXH_NOESCAPE const void* secret, size_t secretLen)
+{
+    return XXH3_64bits_internal(input, len, 0, secret, secretLen, XXH3_hashLong_64b_withSecret_selection);
+}
+
+XXH_errorcode
+XXH3_64bits_update_dispatch(XXH_NOESCAPE XXH3_state_t* state, XXH_NOESCAPE const void* input, size_t len)
+{
+    if (XXH_DISPATCH_MAYBE_NULL && XXH_g_dispatch.update == NULL)
+        XXH_setDispatch();
+
+    return XXH_g_dispatch.update(state, (const xxh_u8*)input, len);
+}
+
+
+/* ====    XXH128 public functions    ==== */
+
+static XXH128_hash_t
+XXH3_hashLong_128b_defaultSecret_selection(const void* input, size_t len,
+                                           XXH64_hash_t seed64, const void* secret, size_t secretLen)
+{
+    (void)seed64; (void)secret; (void)secretLen;
+    if (XXH_DISPATCH_MAYBE_NULL && XXH_g_dispatch128.hashLong128_default == NULL)
+        XXH_setDispatch();
+    return XXH_g_dispatch128.hashLong128_default(input, len);
+}
+
+XXH128_hash_t XXH3_128bits_dispatch(XXH_NOESCAPE const void* input, size_t len)
+{
+    return XXH3_128bits_internal(input, len, 0, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_hashLong_128b_defaultSecret_selection);
+}
+
+static XXH128_hash_t
+XXH3_hashLong_128b_withSeed_selection(const void* input, size_t len,
+                                      XXH64_hash_t seed64, const void* secret, size_t secretLen)
+{
+    (void)secret; (void)secretLen;
+    if (XXH_DISPATCH_MAYBE_NULL && XXH_g_dispatch128.hashLong128_seed == NULL)
+        XXH_setDispatch();
+    return XXH_g_dispatch128.hashLong128_seed(input, len, seed64);
+}
+
+XXH128_hash_t XXH3_128bits_withSeed_dispatch(XXH_NOESCAPE const void* input, size_t len, XXH64_hash_t seed)
+{
+    return XXH3_128bits_internal(input, len, seed, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_hashLong_128b_withSeed_selection);
+}
+
+static XXH128_hash_t
+XXH3_hashLong_128b_withSecret_selection(const void* input, size_t len,
+                                        XXH64_hash_t seed64, const void* secret, size_t secretLen)
+{
+    (void)seed64;
+    if (XXH_DISPATCH_MAYBE_NULL && XXH_g_dispatch128.hashLong128_secret == NULL)
+        XXH_setDispatch();
+    return XXH_g_dispatch128.hashLong128_secret(input, len, secret, secretLen);
+}
+
+XXH128_hash_t XXH3_128bits_withSecret_dispatch(XXH_NOESCAPE const void* input, size_t len, XXH_NOESCAPE const void* secret, size_t secretLen)
+{
+    return XXH3_128bits_internal(input, len, 0, secret, secretLen, XXH3_hashLong_128b_withSecret_selection);
+}
+
+XXH_errorcode
+XXH3_128bits_update_dispatch(XXH_NOESCAPE XXH3_state_t* state, XXH_NOESCAPE const void* input, size_t len)
+{
+    if (XXH_DISPATCH_MAYBE_NULL && XXH_g_dispatch128.update == NULL)
+        XXH_setDispatch();
+    return XXH_g_dispatch128.update(state, (const xxh_u8*)input, len);
+}
+
+#if defined (__cplusplus)
+}
+#endif
+/*! @} */
diff --git a/deps/xxhash/xxh_x86dispatch.h b/deps/xxhash/xxh_x86dispatch.h
new file mode 100644
index 0000000..b87cea9
--- /dev/null
+++ b/deps/xxhash/xxh_x86dispatch.h
@@ -0,0 +1,85 @@
+/*
+ * xxHash - XXH3 Dispatcher for x86-based targets
+ * Copyright (C) 2020-2021 Yann Collet
+ *
+ * BSD 2-Clause License (https://www.opensource.org/licenses/bsd-license.php)
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ *    * Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *    * Redistributions in binary form must reproduce the above
+ *      copyright notice, this list of conditions and the following disclaimer
+ *      in the documentation and/or other materials provided with the
+ *      distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * You can contact the author at:
+ *   - xxHash homepage: https://www.xxhash.com
+ *   - xxHash source repository: https://github.com/Cyan4973/xxHash
+ */
+
+#ifndef XXH_X86DISPATCH_H_13563687684
+#define XXH_X86DISPATCH_H_13563687684
+
+#include "xxhash.h"  /* XXH64_hash_t, XXH3_state_t */
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+XXH_PUBLIC_API XXH64_hash_t  XXH3_64bits_dispatch(XXH_NOESCAPE const void* input, size_t len);
+XXH_PUBLIC_API XXH64_hash_t  XXH3_64bits_withSeed_dispatch(XXH_NOESCAPE const void* input, size_t len, XXH64_hash_t seed);
+XXH_PUBLIC_API XXH64_hash_t  XXH3_64bits_withSecret_dispatch(XXH_NOESCAPE const void* input, size_t len, XXH_NOESCAPE const void* secret, size_t secretLen);
+XXH_PUBLIC_API XXH_errorcode XXH3_64bits_update_dispatch(XXH_NOESCAPE XXH3_state_t* state, XXH_NOESCAPE const void* input, size_t len);
+
+XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_dispatch(XXH_NOESCAPE const void* input, size_t len);
+XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_withSeed_dispatch(XXH_NOESCAPE const void* input, size_t len, XXH64_hash_t seed);
+XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_withSecret_dispatch(XXH_NOESCAPE const void* input, size_t len, XXH_NOESCAPE const void* secret, size_t secretLen);
+XXH_PUBLIC_API XXH_errorcode XXH3_128bits_update_dispatch(XXH_NOESCAPE XXH3_state_t* state, XXH_NOESCAPE const void* input, size_t len);
+
+#if defined (__cplusplus)
+}
+#endif
+
+
+/* automatic replacement of XXH3 functions.
+ * can be disabled by setting XXH_DISPATCH_DISABLE_REPLACE */
+#ifndef XXH_DISPATCH_DISABLE_REPLACE
+
+# undef  XXH3_64bits
+# define XXH3_64bits XXH3_64bits_dispatch
+# undef  XXH3_64bits_withSeed
+# define XXH3_64bits_withSeed XXH3_64bits_withSeed_dispatch
+# undef  XXH3_64bits_withSecret
+# define XXH3_64bits_withSecret XXH3_64bits_withSecret_dispatch
+# undef  XXH3_64bits_update
+# define XXH3_64bits_update XXH3_64bits_update_dispatch
+
+# undef  XXH128
+# define XXH128 XXH3_128bits_withSeed_dispatch
+# undef  XXH3_128bits
+# define XXH3_128bits XXH3_128bits_dispatch
+# undef  XXH3_128bits_withSeed
+# define XXH3_128bits_withSeed XXH3_128bits_withSeed_dispatch
+# undef  XXH3_128bits_withSecret
+# define XXH3_128bits_withSecret XXH3_128bits_withSecret_dispatch
+# undef  XXH3_128bits_update
+# define XXH3_128bits_update XXH3_128bits_update_dispatch
+
+#endif /* XXH_DISPATCH_DISABLE_REPLACE */
+
+#endif /* XXH_X86DISPATCH_H_13563687684 */
diff --git a/deps/xxhash/xxhash.c b/deps/xxhash/xxhash.c
index 0fae88c..083b039 100644
--- a/deps/xxhash/xxhash.c
+++ b/deps/xxhash/xxhash.c
@@ -1,6 +1,6 @@
 /*
  * xxHash - Extremely Fast Hash algorithm
- * Copyright (C) 2012-2020 Yann Collet
+ * Copyright (C) 2012-2021 Yann Collet
  *
  * BSD 2-Clause License (https://www.opensource.org/licenses/bsd-license.php)
  *
diff --git a/deps/xxhash/xxhash.h b/deps/xxhash/xxhash.h
index 08ab794..aacb918 100644
--- a/deps/xxhash/xxhash.h
+++ b/deps/xxhash/xxhash.h
@@ -1,7 +1,7 @@
 /*
  * xxHash - Extremely Fast Hash algorithm
  * Header File
- * Copyright (C) 2012-2020 Yann Collet
+ * Copyright (C) 2012-2021 Yann Collet
  *
  * BSD 2-Clause License (https://www.opensource.org/licenses/bsd-license.php)
  *
@@ -32,49 +32,142 @@
  *   - xxHash homepage: https://www.xxhash.com
  *   - xxHash source repository: https://github.com/Cyan4973/xxHash
  */
+
 /*!
  * @mainpage xxHash
  *
+ * xxHash is an extremely fast non-cryptographic hash algorithm, working at RAM speed
+ * limits.
+ *
+ * It is proposed in four flavors, in three families:
+ * 1. @ref XXH32_family
+ *   - Classic 32-bit hash function. Simple, compact, and runs on almost all
+ *     32-bit and 64-bit systems.
+ * 2. @ref XXH64_family
+ *   - Classic 64-bit adaptation of XXH32. Just as simple, and runs well on most
+ *     64-bit systems (but _not_ 32-bit systems).
+ * 3. @ref XXH3_family
+ *   - Modern 64-bit and 128-bit hash function family which features improved
+ *     strength and performance across the board, especially on smaller data.
+ *     It benefits greatly from SIMD and 64-bit without requiring it.
+ *
+ * Benchmarks
+ * ---
+ * The reference system uses an Intel i7-9700K CPU, and runs Ubuntu x64 20.04.
+ * The open source benchmark program is compiled with clang v10.0 using -O3 flag.
+ *
+ * | Hash Name            | ISA ext | Width | Large Data Speed | Small Data Velocity |
+ * | -------------------- | ------- | ----: | ---------------: | ------------------: |
+ * | XXH3_64bits()        | @b AVX2 |    64 |        59.4 GB/s |               133.1 |
+ * | MeowHash             | AES-NI  |   128 |        58.2 GB/s |                52.5 |
+ * | XXH3_128bits()       | @b AVX2 |   128 |        57.9 GB/s |               118.1 |
+ * | CLHash               | PCLMUL  |    64 |        37.1 GB/s |                58.1 |
+ * | XXH3_64bits()        | @b SSE2 |    64 |        31.5 GB/s |               133.1 |
+ * | XXH3_128bits()       | @b SSE2 |   128 |        29.6 GB/s |               118.1 |
+ * | RAM sequential read  |         |   N/A |        28.0 GB/s |                 N/A |
+ * | ahash                | AES-NI  |    64 |        22.5 GB/s |               107.2 |
+ * | City64               |         |    64 |        22.0 GB/s |                76.6 |
+ * | T1ha2                |         |    64 |        22.0 GB/s |                99.0 |
+ * | City128              |         |   128 |        21.7 GB/s |                57.7 |
+ * | FarmHash             | AES-NI  |    64 |        21.3 GB/s |                71.9 |
+ * | XXH64()              |         |    64 |        19.4 GB/s |                71.0 |
+ * | SpookyHash           |         |    64 |        19.3 GB/s |                53.2 |
+ * | Mum                  |         |    64 |        18.0 GB/s |                67.0 |
+ * | CRC32C               | SSE4.2  |    32 |        13.0 GB/s |                57.9 |
+ * | XXH32()              |         |    32 |         9.7 GB/s |                71.9 |
+ * | City32               |         |    32 |         9.1 GB/s |                66.0 |
+ * | Blake3*              | @b AVX2 |   256 |         4.4 GB/s |                 8.1 |
+ * | Murmur3              |         |    32 |         3.9 GB/s |                56.1 |
+ * | SipHash*             |         |    64 |         3.0 GB/s |                43.2 |
+ * | Blake3*              | @b SSE2 |   256 |         2.4 GB/s |                 8.1 |
+ * | HighwayHash          |         |    64 |         1.4 GB/s |                 6.0 |
+ * | FNV64                |         |    64 |         1.2 GB/s |                62.7 |
+ * | Blake2*              |         |   256 |         1.1 GB/s |                 5.1 |
+ * | SHA1*                |         |   160 |         0.8 GB/s |                 5.6 |
+ * | MD5*                 |         |   128 |         0.6 GB/s |                 7.8 |
+ * @note
+ *   - Hashes which require a specific ISA extension are noted. SSE2 is also noted,
+ *     even though it is mandatory on x64.
+ *   - Hashes with an asterisk are cryptographic. Note that MD5 is non-cryptographic
+ *     by modern standards.
+ *   - Small data velocity is a rough average of algorithm's efficiency for small
+ *     data. For more accurate information, see the wiki.
+ *   - More benchmarks and strength tests are found on the wiki:
+ *         https://github.com/Cyan4973/xxHash/wiki
+ *
+ * Usage
+ * ------
+ * All xxHash variants use a similar API. Changing the algorithm is a trivial
+ * substitution.
+ *
+ * @pre
+ *    For functions which take an input and length parameter, the following
+ *    requirements are assumed:
+ *    - The range from [`input`, `input + length`) is valid, readable memory.
+ *      - The only exception is if the `length` is `0`, `input` may be `NULL`.
+ *    - For C++, the objects must have the *TriviallyCopyable* property, as the
+ *      functions access bytes directly as if it was an array of `unsigned char`.
+ *
+ * @anchor single_shot_example
+ * **Single Shot**
+ *
+ * These functions are stateless functions which hash a contiguous block of memory,
+ * immediately returning the result. They are the easiest and usually the fastest
+ * option.
+ *
+ * XXH32(), XXH64(), XXH3_64bits(), XXH3_128bits()
+ *
+ * @code{.c}
+ *   #include <string.h>
+ *   #include "xxhash.h"
+ *
+ *   // Example for a function which hashes a null terminated string with XXH32().
+ *   XXH32_hash_t hash_string(const char* string, XXH32_hash_t seed)
+ *   {
+ *       // NULL pointers are only valid if the length is zero
+ *       size_t length = (string == NULL) ? 0 : strlen(string);
+ *       return XXH32(string, length, seed);
+ *   }
+ * @endcode
+ *
+ * @anchor streaming_example
+ * **Streaming**
+ *
+ * These groups of functions allow incremental hashing of unknown size, even
+ * more than what would fit in a size_t.
+ *
+ * XXH32_reset(), XXH64_reset(), XXH3_64bits_reset(), XXH3_128bits_reset()
+ *
+ * @code{.c}
+ *   #include <stdio.h>
+ *   #include <assert.h>
+ *   #include "xxhash.h"
+ *   // Example for a function which hashes a FILE incrementally with XXH3_64bits().
+ *   XXH64_hash_t hashFile(FILE* f)
+ *   {
+ *       // Allocate a state struct. Do not just use malloc() or new.
+ *       XXH3_state_t* state = XXH3_createState();
+ *       assert(state != NULL && "Out of memory!");
+ *       // Reset the state to start a new hashing session.
+ *       XXH3_64bits_reset(state);
+ *       char buffer[4096];
+ *       size_t count;
+ *       // Read the file in chunks
+ *       while ((count = fread(buffer, 1, sizeof(buffer), f)) != 0) {
+ *           // Run update() as many times as necessary to process the data
+ *           XXH3_64bits_update(state, buffer, count);
+ *       }
+ *       // Retrieve the finalized hash. This will not change the state.
+ *       XXH64_hash_t result = XXH3_64bits_digest(state);
+ *       // Free the state. Do not use free().
+ *       XXH3_freeState(state);
+ *       return result;
+ *   }
+ * @endcode
+ *
  * @file xxhash.h
  * xxHash prototypes and implementation
  */
-/* TODO: update */
-/* Notice extracted from xxHash homepage:
-
-xxHash is an extremely fast hash algorithm, running at RAM speed limits.
-It also successfully passes all tests from the SMHasher suite.
-
-Comparison (single thread, Windows Seven 32 bits, using SMHasher on a Core 2 Duo @3GHz)
-
-Name            Speed       Q.Score   Author
-xxHash          5.4 GB/s     10
-CrapWow         3.2 GB/s      2       Andrew
-MurmurHash 3a   2.7 GB/s     10       Austin Appleby
-SpookyHash      2.0 GB/s     10       Bob Jenkins
-SBox            1.4 GB/s      9       Bret Mulvey
-Lookup3         1.2 GB/s      9       Bob Jenkins
-SuperFastHash   1.2 GB/s      1       Paul Hsieh
-CityHash64      1.05 GB/s    10       Pike & Alakuijala
-FNV             0.55 GB/s     5       Fowler, Noll, Vo
-CRC32           0.43 GB/s     9
-MD5-32          0.33 GB/s    10       Ronald L. Rivest
-SHA1-32         0.28 GB/s    10
-
-Q.Score is a measure of quality of the hash function.
-It depends on successfully passing SMHasher test set.
-10 is a perfect score.
-
-Note: SMHasher's CRC32 implementation is not the fastest one.
-Other speed-oriented implementations can be faster,
-especially in combination with PCLMUL instruction:
-https://fastcompression.blogspot.com/2019/03/presenting-xxh3.html?showComment=1552696407071#c3490092340461170735
-
-A 64-bit version, named XXH64, is available since r35.
-It offers much better speed, but for 64-bit applications only.
-Name     Speed on 64 bits    Speed on 32 bits
-XXH64       13.8 GB/s            1.9 GB/s
-XXH32        6.8 GB/s            6.0 GB/s
-*/
 
 #if defined (__cplusplus)
 extern "C" {
@@ -84,21 +177,53 @@ extern "C" {
  *  INLINE mode
  ******************************/
 /*!
- * XXH_INLINE_ALL (and XXH_PRIVATE_API)
+ * @defgroup public Public API
+ * Contains details on the public xxHash functions.
+ * @{
+ */
+#ifdef XXH_DOXYGEN
+/*!
+ * @brief Exposes the implementation and marks all functions as `inline`.
+ *
  * Use these build macros to inline xxhash into the target unit.
  * Inlining improves performance on small inputs, especially when the length is
  * expressed as a compile-time constant:
  *
- *      https://fastcompression.blogspot.com/2018/03/xxhash-for-small-keys-impressive-power.html
+ *  https://fastcompression.blogspot.com/2018/03/xxhash-for-small-keys-impressive-power.html
  *
  * It also keeps xxHash symbols private to the unit, so they are not exported.
  *
  * Usage:
+ * @code{.c}
  *     #define XXH_INLINE_ALL
  *     #include "xxhash.h"
- *
+ * @endcode
  * Do not compile and link xxhash.o as a separate object, as it is not useful.
  */
+#  define XXH_INLINE_ALL
+#  undef XXH_INLINE_ALL
+/*!
+ * @brief Exposes the implementation without marking functions as inline.
+ */
+#  define XXH_PRIVATE_API
+#  undef XXH_PRIVATE_API
+/*!
+ * @brief Emulate a namespace by transparently prefixing all symbols.
+ *
+ * If you want to include _and expose_ xxHash functions from within your own
+ * library, but also want to avoid symbol collisions with other libraries which
+ * may also include xxHash, you can use @ref XXH_NAMESPACE to automatically prefix
+ * any public symbol from xxhash library with the value of @ref XXH_NAMESPACE
+ * (therefore, avoid empty or numeric values).
+ *
+ * Note that no change is required within the calling program as long as it
+ * includes `xxhash.h`: Regular symbol names will be automatically translated
+ * by this header.
+ */
+#  define XXH_NAMESPACE /* YOUR NAME HERE */
+#  undef XXH_NAMESPACE
+#endif
+
 #if (defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API)) \
     && !defined(XXH_INLINE_ALL_31684351384)
    /* this section should be traversed only once */
@@ -213,21 +338,13 @@ extern "C" {
 #  undef XXHASH_H_STATIC_13879238742
 #endif /* XXH_INLINE_ALL || XXH_PRIVATE_API */
 
-
-
 /* ****************************************************************
  *  Stable API
  *****************************************************************/
 #ifndef XXHASH_H_5627135585666179
 #define XXHASH_H_5627135585666179 1
 
-
-/*!
- * @defgroup public Public API
- * Contains details on the public xxHash functions.
- * @{
- */
-/* specific declaration modes for Windows */
+/*! @brief Marks a global symbol. */
 #if !defined(XXH_INLINE_ALL) && !defined(XXH_PRIVATE_API)
 #  if defined(WIN32) && defined(_MSC_VER) && (defined(XXH_IMPORT) || defined(XXH_EXPORT))
 #    ifdef XXH_EXPORT
@@ -240,24 +357,6 @@ extern "C" {
 #  endif
 #endif
 
-#ifdef XXH_DOXYGEN
-/*!
- * @brief Emulate a namespace by transparently prefixing all symbols.
- *
- * If you want to include _and expose_ xxHash functions from within your own
- * library, but also want to avoid symbol collisions with other libraries which
- * may also include xxHash, you can use XXH_NAMESPACE to automatically prefix
- * any public symbol from xxhash library with the value of XXH_NAMESPACE
- * (therefore, avoid empty or numeric values).
- *
- * Note that no change is required within the calling program as long as it
- * includes `xxhash.h`: Regular symbol names will be automatically translated
- * by this header.
- */
-#  define XXH_NAMESPACE /* YOUR NAME HERE */
-#  undef XXH_NAMESPACE
-#endif
-
 #ifdef XXH_NAMESPACE
 #  define XXH_CAT(A,B) A##B
 #  define XXH_NAME2(A,B) XXH_CAT(A,B)
@@ -318,11 +417,39 @@ extern "C" {
 
 
 /* *************************************
+*  Compiler specifics
+***************************************/
+
+/* specific declaration modes for Windows */
+#if !defined(XXH_INLINE_ALL) && !defined(XXH_PRIVATE_API)
+#  if defined(WIN32) && defined(_MSC_VER) && (defined(XXH_IMPORT) || defined(XXH_EXPORT))
+#    ifdef XXH_EXPORT
+#      define XXH_PUBLIC_API __declspec(dllexport)
+#    elif XXH_IMPORT
+#      define XXH_PUBLIC_API __declspec(dllimport)
+#    endif
+#  else
+#    define XXH_PUBLIC_API   /* do nothing */
+#  endif
+#endif
+
+#if defined (__GNUC__)
+# define XXH_CONSTF  __attribute__((const))
+# define XXH_PUREF   __attribute__((pure))
+# define XXH_MALLOCF __attribute__((malloc))
+#else
+# define XXH_CONSTF  /* disable */
+# define XXH_PUREF
+# define XXH_MALLOCF
+#endif
+
+/* *************************************
 *  Version
 ***************************************/
 #define XXH_VERSION_MAJOR    0
 #define XXH_VERSION_MINOR    8
 #define XXH_VERSION_RELEASE  1
+/*! @brief Version number, encoded as two digits each */
 #define XXH_VERSION_NUMBER  (XXH_VERSION_MAJOR *100*100 + XXH_VERSION_MINOR *100 + XXH_VERSION_RELEASE)
 
 /*!
@@ -331,16 +458,22 @@ extern "C" {
  * This is mostly useful when xxHash is compiled as a shared library,
  * since the returned value comes from the library, as opposed to header file.
  *
- * @return `XXH_VERSION_NUMBER` of the invoked library.
+ * @return @ref XXH_VERSION_NUMBER of the invoked library.
  */
-XXH_PUBLIC_API unsigned XXH_versionNumber (void);
+XXH_PUBLIC_API XXH_CONSTF unsigned XXH_versionNumber (void);
 
 
 /* ****************************
 *  Common basic types
 ******************************/
 #include <stddef.h>   /* size_t */
-typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode;
+/*!
+ * @brief Exit code for the streaming API.
+ */
+typedef enum {
+    XXH_OK = 0, /*!< OK */
+    XXH_ERROR   /*!< Error */
+} XXH_errorcode;
 
 
 /*-**********************************************************************
@@ -364,29 +497,27 @@ typedef uint32_t XXH32_hash_t;
 #   include <limits.h>
 #   if UINT_MAX == 0xFFFFFFFFUL
       typedef unsigned int XXH32_hash_t;
+#   elif ULONG_MAX == 0xFFFFFFFFUL
+      typedef unsigned long XXH32_hash_t;
 #   else
-#     if ULONG_MAX == 0xFFFFFFFFUL
-        typedef unsigned long XXH32_hash_t;
-#     else
-#       error "unsupported platform: need a 32-bit type"
-#     endif
+#     error "unsupported platform: need a 32-bit type"
 #   endif
 #endif
 
 /*!
  * @}
  *
- * @defgroup xxh32_family XXH32 family
+ * @defgroup XXH32_family XXH32 family
  * @ingroup public
  * Contains functions used in the classic 32-bit xxHash algorithm.
  *
  * @note
  *   XXH32 is useful for older platforms, with no or poor 64-bit performance.
- *   Note that @ref xxh3_family provides competitive speed
- *   for both 32-bit and 64-bit systems, and offers true 64/128 bit hash results.
+ *   Note that the @ref XXH3_family provides competitive speed for both 32-bit
+ *   and 64-bit systems, and offers true 64/128 bit hash results.
  *
- * @see @ref xxh64_family, @ref xxh3_family : Other xxHash families
- * @see @ref xxh32_impl for implementation details
+ * @see @ref XXH64_family, @ref XXH3_family : Other xxHash families
+ * @see @ref XXH32_impl for implementation details
  * @{
  */
 
@@ -395,6 +526,8 @@ typedef uint32_t XXH32_hash_t;
  *
  * Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark): 5.4 GB/s
  *
+ * See @ref single_shot_example "Single Shot Example" for an example.
+ *
  * @param input The block of data to be hashed, at least @p length bytes in size.
  * @param length The length of @p input, in bytes.
  * @param seed The 32-bit seed to alter the hash's output predictably.
@@ -412,8 +545,9 @@ typedef uint32_t XXH32_hash_t;
  * @see
  *    XXH32_createState(), XXH32_update(), XXH32_digest(): Streaming version.
  */
-XXH_PUBLIC_API XXH32_hash_t XXH32 (const void* input, size_t length, XXH32_hash_t seed);
+XXH_PUBLIC_API XXH_PUREF XXH32_hash_t XXH32 (const void* input, size_t length, XXH32_hash_t seed);
 
+#ifndef XXH_NO_STREAM
 /*!
  * Streaming functions generate the xxHash value from an incremental input.
  * This method is slower than single-call functions, due to state management.
@@ -436,32 +570,7 @@ XXH_PUBLIC_API XXH32_hash_t XXH32 (const void* input, size_t length, XXH32_hash_
  *
  * When done, release the state using `XXH*_freeState()`.
  *
- * Example code for incrementally hashing a file:
- * @code{.c}
- *    #include <stdio.h>
- *    #include <xxhash.h>
- *    #define BUFFER_SIZE 256
- *
- *    // Note: XXH64 and XXH3 use the same interface.
- *    XXH32_hash_t
- *    hashFile(FILE* stream)
- *    {
- *        XXH32_state_t* state;
- *        unsigned char buf[BUFFER_SIZE];
- *        size_t amt;
- *        XXH32_hash_t hash;
- *
- *        state = XXH32_createState();       // Create a state
- *        assert(state != NULL);             // Error check here
- *        XXH32_reset(state, 0xbaad5eed);    // Reset state with our seed
- *        while ((amt = fread(buf, 1, sizeof(buf), stream)) != 0) {
- *            XXH32_update(state, buf, amt); // Hash the file in chunks
- *        }
- *        hash = XXH32_digest(state);        // Finalize the hash
- *        XXH32_freeState(state);            // Clean up
- *        return hash;
- *    }
- * @endcode
+ * @see streaming_example at the top of @ref xxhash.h for an example.
  */
 
 /*!
@@ -478,7 +587,7 @@ typedef struct XXH32_state_s XXH32_state_t;
  * Must be freed with XXH32_freeState().
  * @return An allocated XXH32_state_t on success, `NULL` on failure.
  */
-XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void);
+XXH_PUBLIC_API XXH_MALLOCF XXH32_state_t* XXH32_createState(void);
 /*!
  * @brief Frees an @ref XXH32_state_t.
  *
@@ -546,7 +655,8 @@ XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* statePtr, const void*
  *
  * @return The calculated xxHash32 value from that state.
  */
-XXH_PUBLIC_API XXH32_hash_t  XXH32_digest (const XXH32_state_t* statePtr);
+XXH_PUBLIC_API XXH_PUREF XXH32_hash_t XXH32_digest (const XXH32_state_t* statePtr);
+#endif /* !XXH_NO_STREAM */
 
 /*******   Canonical representation   *******/
 
@@ -597,7 +707,7 @@ XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t
  *
  * @return The converted hash.
  */
-XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src);
+XXH_PUBLIC_API XXH_PUREF XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src);
 
 
 #ifdef __has_attribute
@@ -606,8 +716,15 @@ XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src
 # define XXH_HAS_ATTRIBUTE(x) 0
 #endif
 
+/*
+ * C23 __STDC_VERSION__ number hasn't been specified yet. For now
+ * leave as `201711L` (C17 + 1).
+ * TODO: Update to correct value when its been specified.
+ */
+#define XXH_C23_VN 201711L
+
 /* C-language Attributes are added in C23. */
-#if defined(__STDC_VERSION__) && (__STDC_VERSION__ > 201710L) && defined(__has_c_attribute)
+#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= XXH_C23_VN) && defined(__has_c_attribute)
 # define XXH_HAS_C_ATTRIBUTE(x) __has_c_attribute(x)
 #else
 # define XXH_HAS_C_ATTRIBUTE(x) 0
@@ -620,21 +737,31 @@ XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src
 #endif
 
 /*
-Define XXH_FALLTHROUGH macro for annotating switch case with the 'fallthrough' attribute
-introduced in CPP17 and C23.
-CPP17 : https://en.cppreference.com/w/cpp/language/attributes/fallthrough
-C23   : https://en.cppreference.com/w/c/language/attributes/fallthrough
-*/
-#if XXH_HAS_C_ATTRIBUTE(x)
-# define XXH_FALLTHROUGH [[fallthrough]]
-#elif XXH_HAS_CPP_ATTRIBUTE(x)
+ * Define XXH_FALLTHROUGH macro for annotating switch case with the 'fallthrough' attribute
+ * introduced in CPP17 and C23.
+ * CPP17 : https://en.cppreference.com/w/cpp/language/attributes/fallthrough
+ * C23   : https://en.cppreference.com/w/c/language/attributes/fallthrough
+ */
+#if XXH_HAS_C_ATTRIBUTE(fallthrough) || XXH_HAS_CPP_ATTRIBUTE(fallthrough)
 # define XXH_FALLTHROUGH [[fallthrough]]
 #elif XXH_HAS_ATTRIBUTE(__fallthrough__)
-# define XXH_FALLTHROUGH __attribute__ ((fallthrough))
+# define XXH_FALLTHROUGH __attribute__ ((__fallthrough__))
+#else
+# define XXH_FALLTHROUGH /* fallthrough */
+#endif
+
+/*
+ * Define XXH_NOESCAPE for annotated pointers in public API.
+ * https://clang.llvm.org/docs/AttributeReference.html#noescape
+ * As of writing this, only supported by clang.
+ */
+#if XXH_HAS_ATTRIBUTE(noescape)
+# define XXH_NOESCAPE __attribute__((noescape))
 #else
-# define XXH_FALLTHROUGH
+# define XXH_NOESCAPE
 #endif
 
+
 /*!
  * @}
  * @ingroup public
@@ -671,7 +798,7 @@ typedef uint64_t XXH64_hash_t;
 /*!
  * @}
  *
- * @defgroup xxh64_family XXH64 family
+ * @defgroup XXH64_family XXH64 family
  * @ingroup public
  * @{
  * Contains functions used in the classic 64-bit xxHash algorithm.
@@ -682,7 +809,6 @@ typedef uint64_t XXH64_hash_t;
  *   It provides better speed for systems with vector processing capabilities.
  */
 
-
 /*!
  * @brief Calculates the 64-bit hash of @p input using xxHash64.
  *
@@ -706,32 +832,35 @@ typedef uint64_t XXH64_hash_t;
  * @see
  *    XXH64_createState(), XXH64_update(), XXH64_digest(): Streaming version.
  */
-XXH_PUBLIC_API XXH64_hash_t XXH64(const void* input, size_t length, XXH64_hash_t seed);
+XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH64(XXH_NOESCAPE const void* input, size_t length, XXH64_hash_t seed);
 
 /*******   Streaming   *******/
+#ifndef XXH_NO_STREAM
 /*!
  * @brief The opaque state struct for the XXH64 streaming API.
  *
  * @see XXH64_state_s for details.
  */
 typedef struct XXH64_state_s XXH64_state_t;   /* incomplete type */
-XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void);
+XXH_PUBLIC_API XXH_MALLOCF XXH64_state_t* XXH64_createState(void);
 XXH_PUBLIC_API XXH_errorcode  XXH64_freeState(XXH64_state_t* statePtr);
-XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* dst_state, const XXH64_state_t* src_state);
-
-XXH_PUBLIC_API XXH_errorcode XXH64_reset  (XXH64_state_t* statePtr, XXH64_hash_t seed);
-XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* statePtr, const void* input, size_t length);
-XXH_PUBLIC_API XXH64_hash_t  XXH64_digest (const XXH64_state_t* statePtr);
+XXH_PUBLIC_API void XXH64_copyState(XXH_NOESCAPE XXH64_state_t* dst_state, const XXH64_state_t* src_state);
 
+XXH_PUBLIC_API XXH_errorcode XXH64_reset  (XXH_NOESCAPE XXH64_state_t* statePtr, XXH64_hash_t seed);
+XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH_NOESCAPE XXH64_state_t* statePtr, XXH_NOESCAPE const void* input, size_t length);
+XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH64_digest (XXH_NOESCAPE const XXH64_state_t* statePtr);
+#endif /* !XXH_NO_STREAM */
 /*******   Canonical representation   *******/
 typedef struct { unsigned char digest[sizeof(XXH64_hash_t)]; } XXH64_canonical_t;
-XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash);
-XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src);
+XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH_NOESCAPE XXH64_canonical_t* dst, XXH64_hash_t hash);
+XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH64_hashFromCanonical(XXH_NOESCAPE const XXH64_canonical_t* src);
+
+#ifndef XXH_NO_XXH3
 
 /*!
  * @}
  * ************************************************************************
- * @defgroup xxh3_family XXH3 family
+ * @defgroup XXH3_family XXH3 family
  * @ingroup public
  * @{
  *
@@ -751,16 +880,18 @@ XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src
  *
  * XXH3's speed benefits greatly from SIMD and 64-bit arithmetic,
  * but does not require it.
- * Any 32-bit and 64-bit targets that can run XXH32 smoothly
- * can run XXH3 at competitive speeds, even without vector support.
- * Further details are explained in the implementation.
+ * Most 32-bit and 64-bit targets that can run XXH32 smoothly can run XXH3
+ * at competitive speeds, even without vector support. Further details are
+ * explained in the implementation.
  *
  * Optimized implementations are provided for AVX512, AVX2, SSE2, NEON, POWER8,
- * ZVector and scalar targets. This can be controlled via the XXH_VECTOR macro.
+ * ZVector and scalar targets. This can be controlled via the @ref XXH_VECTOR
+ * macro. For the x86 family, an automatic dispatcher is included separately
+ * in @ref xxh_x86dispatch.c.
  *
  * XXH3 implementation is portable:
  * it has a generic C90 formulation that can be compiled on any platform,
- * all implementations generage exactly the same hash value on all platforms.
+ * all implementations generate exactly the same hash value on all platforms.
  * Starting from v0.8.0, it's also labelled "stable", meaning that
  * any future version will also generate the same hash value.
  *
@@ -772,24 +903,42 @@ XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src
  *
  * The API supports one-shot hashing, streaming mode, and custom secrets.
  */
-
 /*-**********************************************************************
 *  XXH3 64-bit variant
 ************************************************************************/
 
-/* XXH3_64bits():
- * default 64-bit variant, using default secret and default seed of 0.
- * It's the fastest variant. */
-XXH_PUBLIC_API XXH64_hash_t XXH3_64bits(const void* data, size_t len);
+/*!
+ * @brief 64-bit unseeded variant of XXH3.
+ *
+ * This is equivalent to @ref XXH3_64bits_withSeed() with a seed of 0, however
+ * it may have slightly better performance due to constant propagation of the
+ * defaults.
+ *
+ * @see
+ *    XXH32(), XXH64(), XXH3_128bits(): equivalent for the other xxHash algorithms
+ * @see
+ *    XXH3_64bits_withSeed(), XXH3_64bits_withSecret(): other seeding variants
+ * @see
+ *    XXH3_64bits_reset(), XXH3_64bits_update(), XXH3_64bits_digest(): Streaming version.
+ */
+XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH3_64bits(XXH_NOESCAPE const void* input, size_t length);
 
-/*
- * XXH3_64bits_withSeed():
- * This variant generates a custom secret on the fly
- * based on default secret altered using the `seed` value.
+/*!
+ * @brief 64-bit seeded variant of XXH3
+ *
+ * This variant generates a custom secret on the fly based on default secret
+ * altered using the `seed` value.
+ *
  * While this operation is decently fast, note that it's not completely free.
- * Note: seed==0 produces the same results as XXH3_64bits().
+ *
+ * @note
+ *    seed == 0 produces the same results as @ref XXH3_64bits().
+ *
+ * @param input The data to hash
+ * @param length The length
+ * @param seed The 64-bit seed to alter the state.
  */
-XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_withSeed(const void* data, size_t len, XXH64_hash_t seed);
+XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH3_64bits_withSeed(XXH_NOESCAPE const void* input, size_t length, XXH64_hash_t seed);
 
 /*!
  * The bare minimum size for a custom secret.
@@ -800,8 +949,9 @@ XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_withSeed(const void* data, size_t len, X
  */
 #define XXH3_SECRET_SIZE_MIN 136
 
-/*
- * XXH3_64bits_withSecret():
+/*!
+ * @brief 64-bit variant of XXH3 with a custom "secret".
+ *
  * It's possible to provide any blob of bytes as a "secret" to generate the hash.
  * This makes it more difficult for an external actor to prepare an intentional collision.
  * The main condition is that secretSize *must* be large enough (>= XXH3_SECRET_SIZE_MIN).
@@ -817,10 +967,11 @@ XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_withSeed(const void* data, size_t len, X
  * This is not necessarily the case when using the blob of bytes directly
  * because, when hashing _small_ inputs, only a portion of the secret is employed.
  */
-XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_withSecret(const void* data, size_t len, const void* secret, size_t secretSize);
+XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH3_64bits_withSecret(XXH_NOESCAPE const void* data, size_t len, XXH_NOESCAPE const void* secret, size_t secretSize);
 
 
 /*******   Streaming   *******/
+#ifndef XXH_NO_STREAM
 /*
  * Streaming requires state maintenance.
  * This operation costs memory and CPU.
@@ -834,23 +985,23 @@ XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_withSecret(const void* data, size_t len,
  * @see XXH3_state_s for details.
  */
 typedef struct XXH3_state_s XXH3_state_t;
-XXH_PUBLIC_API XXH3_state_t* XXH3_createState(void);
+XXH_PUBLIC_API XXH_MALLOCF XXH3_state_t* XXH3_createState(void);
 XXH_PUBLIC_API XXH_errorcode XXH3_freeState(XXH3_state_t* statePtr);
-XXH_PUBLIC_API void XXH3_copyState(XXH3_state_t* dst_state, const XXH3_state_t* src_state);
+XXH_PUBLIC_API void XXH3_copyState(XXH_NOESCAPE XXH3_state_t* dst_state, XXH_NOESCAPE const XXH3_state_t* src_state);
 
 /*
  * XXH3_64bits_reset():
  * Initialize with default parameters.
  * digest will be equivalent to `XXH3_64bits()`.
  */
-XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset(XXH3_state_t* statePtr);
+XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset(XXH_NOESCAPE XXH3_state_t* statePtr);
 /*
  * XXH3_64bits_reset_withSeed():
  * Generate a custom secret from `seed`, and store it into `statePtr`.
  * digest will be equivalent to `XXH3_64bits_withSeed()`.
  */
-XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset_withSeed(XXH3_state_t* statePtr, XXH64_hash_t seed);
-/*
+XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset_withSeed(XXH_NOESCAPE XXH3_state_t* statePtr, XXH64_hash_t seed);
+/*!
  * XXH3_64bits_reset_withSecret():
  * `secret` is referenced, it _must outlive_ the hash streaming session.
  * Similar to one-shot API, `secretSize` must be >= `XXH3_SECRET_SIZE_MIN`,
@@ -859,10 +1010,11 @@ XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset_withSeed(XXH3_state_t* statePtr,
  * When in doubt about the randomness of a candidate `secret`,
  * consider employing `XXH3_generateSecret()` instead (see below).
  */
-XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset_withSecret(XXH3_state_t* statePtr, const void* secret, size_t secretSize);
+XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset_withSecret(XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* secret, size_t secretSize);
 
-XXH_PUBLIC_API XXH_errorcode XXH3_64bits_update (XXH3_state_t* statePtr, const void* input, size_t length);
-XXH_PUBLIC_API XXH64_hash_t  XXH3_64bits_digest (const XXH3_state_t* statePtr);
+XXH_PUBLIC_API XXH_errorcode XXH3_64bits_update (XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* input, size_t length);
+XXH_PUBLIC_API XXH_PUREF XXH64_hash_t  XXH3_64bits_digest (XXH_NOESCAPE const XXH3_state_t* statePtr);
+#endif /* !XXH_NO_STREAM */
 
 /* note : canonical representation of XXH3 is the same as XXH64
  * since they both produce XXH64_hash_t values */
@@ -883,11 +1035,31 @@ typedef struct {
     XXH64_hash_t high64;  /*!< `value >> 64` */
 } XXH128_hash_t;
 
-XXH_PUBLIC_API XXH128_hash_t XXH3_128bits(const void* data, size_t len);
-XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_withSeed(const void* data, size_t len, XXH64_hash_t seed);
-XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_withSecret(const void* data, size_t len, const void* secret, size_t secretSize);
+/*!
+ * @brief Unseeded 128-bit variant of XXH3
+ *
+ * The 128-bit variant of XXH3 has more strength, but it has a bit of overhead
+ * for shorter inputs.
+ *
+ * This is equivalent to @ref XXH3_128bits_withSeed() with a seed of 0, however
+ * it may have slightly better performance due to constant propagation of the
+ * defaults.
+ *
+ * @see
+ *    XXH32(), XXH64(), XXH3_64bits(): equivalent for the other xxHash algorithms
+ * @see
+ *    XXH3_128bits_withSeed(), XXH3_128bits_withSecret(): other seeding variants
+ * @see
+ *    XXH3_128bits_reset(), XXH3_128bits_update(), XXH3_128bits_digest(): Streaming version.
+ */
+XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH3_128bits(XXH_NOESCAPE const void* data, size_t len);
+/*! @brief Seeded 128-bit variant of XXH3. @see XXH3_64bits_withSeed(). */
+XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH3_128bits_withSeed(XXH_NOESCAPE const void* data, size_t len, XXH64_hash_t seed);
+/*! @brief Custom secret 128-bit variant of XXH3. @see XXH3_64bits_withSecret(). */
+XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH3_128bits_withSecret(XXH_NOESCAPE const void* data, size_t len, XXH_NOESCAPE const void* secret, size_t secretSize);
 
 /*******   Streaming   *******/
+#ifndef XXH_NO_STREAM
 /*
  * Streaming requires state maintenance.
  * This operation costs memory and CPU.
@@ -900,12 +1072,13 @@ XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_withSecret(const void* data, size_t le
  * All reset and streaming functions have same meaning as their 64-bit counterpart.
  */
 
-XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset(XXH3_state_t* statePtr);
-XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset_withSeed(XXH3_state_t* statePtr, XXH64_hash_t seed);
-XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset_withSecret(XXH3_state_t* statePtr, const void* secret, size_t secretSize);
+XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset(XXH_NOESCAPE XXH3_state_t* statePtr);
+XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset_withSeed(XXH_NOESCAPE XXH3_state_t* statePtr, XXH64_hash_t seed);
+XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset_withSecret(XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* secret, size_t secretSize);
 
-XXH_PUBLIC_API XXH_errorcode XXH3_128bits_update (XXH3_state_t* statePtr, const void* input, size_t length);
-XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_digest (const XXH3_state_t* statePtr);
+XXH_PUBLIC_API XXH_errorcode XXH3_128bits_update (XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* input, size_t length);
+XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH3_128bits_digest (XXH_NOESCAPE const XXH3_state_t* statePtr);
+#endif /* !XXH_NO_STREAM */
 
 /* Following helper functions make it possible to compare XXH128_hast_t values.
  * Since XXH128_hash_t is a structure, this capability is not offered by the language.
@@ -915,26 +1088,26 @@ XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_digest (const XXH3_state_t* statePtr);
  * XXH128_isEqual():
  * Return: 1 if `h1` and `h2` are equal, 0 if they are not.
  */
-XXH_PUBLIC_API int XXH128_isEqual(XXH128_hash_t h1, XXH128_hash_t h2);
+XXH_PUBLIC_API XXH_PUREF int XXH128_isEqual(XXH128_hash_t h1, XXH128_hash_t h2);
 
 /*!
- * XXH128_cmp():
- *
+ * @brief Compares two @ref XXH128_hash_t
  * This comparator is compatible with stdlib's `qsort()`/`bsearch()`.
  *
- * return: >0 if *h128_1  > *h128_2
- *         =0 if *h128_1 == *h128_2
- *         <0 if *h128_1  < *h128_2
+ * @return: >0 if *h128_1  > *h128_2
+ *          =0 if *h128_1 == *h128_2
+ *          <0 if *h128_1  < *h128_2
  */
-XXH_PUBLIC_API int XXH128_cmp(const void* h128_1, const void* h128_2);
+XXH_PUBLIC_API XXH_PUREF int XXH128_cmp(XXH_NOESCAPE const void* h128_1, XXH_NOESCAPE const void* h128_2);
 
 
 /*******   Canonical representation   *******/
 typedef struct { unsigned char digest[sizeof(XXH128_hash_t)]; } XXH128_canonical_t;
-XXH_PUBLIC_API void XXH128_canonicalFromHash(XXH128_canonical_t* dst, XXH128_hash_t hash);
-XXH_PUBLIC_API XXH128_hash_t XXH128_hashFromCanonical(const XXH128_canonical_t* src);
+XXH_PUBLIC_API void XXH128_canonicalFromHash(XXH_NOESCAPE XXH128_canonical_t* dst, XXH128_hash_t hash);
+XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH128_hashFromCanonical(XXH_NOESCAPE const XXH128_canonical_t* src);
 
 
+#endif  /* !XXH_NO_XXH3 */
 #endif  /* XXH_NO_LONG_LONG */
 
 /*!
@@ -978,7 +1151,7 @@ struct XXH32_state_s {
    XXH32_hash_t v[4];         /*!< Accumulator lanes */
    XXH32_hash_t mem32[4];     /*!< Internal buffer for partial reads. Treated as unsigned char[16]. */
    XXH32_hash_t memsize;      /*!< Amount of data in @ref mem32 */
-   XXH32_hash_t reserved;     /*!< Reserved field. Do not read or write to it, it may be removed. */
+   XXH32_hash_t reserved;     /*!< Reserved field. Do not read nor write to it. */
 };   /* typedef'd to XXH32_state_t */
 
 
@@ -1002,9 +1175,11 @@ struct XXH64_state_s {
    XXH64_hash_t mem64[4];     /*!< Internal buffer for partial reads. Treated as unsigned char[32]. */
    XXH32_hash_t memsize;      /*!< Amount of data in @ref mem64 */
    XXH32_hash_t reserved32;   /*!< Reserved field, needed for padding anyways*/
-   XXH64_hash_t reserved64;   /*!< Reserved field. Do not read or write to it, it may be removed. */
+   XXH64_hash_t reserved64;   /*!< Reserved field. Do not read or write to it. */
 };   /* typedef'd to XXH64_state_t */
 
+#ifndef XXH_NO_XXH3
+
 #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) /* >= C11 */
 #  include <stdalign.h>
 #  define XXH_ALIGN(n)      alignas(n)
@@ -1070,7 +1245,7 @@ struct XXH64_state_s {
  */
 struct XXH3_state_s {
    XXH_ALIGN_MEMBER(64, XXH64_hash_t acc[8]);
-       /*!< The 8 accumulators. Similar to `vN` in @ref XXH32_state_s::v1 and @ref XXH64_state_s */
+       /*!< The 8 accumulators. See @ref XXH32_state_s::v and @ref XXH64_state_s::v */
    XXH_ALIGN_MEMBER(64, unsigned char customSecret[XXH3_SECRET_DEFAULT_SIZE]);
        /*!< Used to store a custom secret generated from a seed. */
    XXH_ALIGN_MEMBER(64, unsigned char buffer[XXH3_INTERNALBUFFER_SIZE]);
@@ -1110,69 +1285,119 @@ struct XXH3_state_s {
  * Note that this doesn't prepare the state for a streaming operation,
  * it's still necessary to use XXH3_NNbits_reset*() afterwards.
  */
-#define XXH3_INITSTATE(XXH3_state_ptr)   { (XXH3_state_ptr)->seed = 0; }
+#define XXH3_INITSTATE(XXH3_state_ptr)                       \
+    do {                                                     \
+        XXH3_state_t* tmp_xxh3_state_ptr = (XXH3_state_ptr); \
+        tmp_xxh3_state_ptr->seed = 0;                        \
+        tmp_xxh3_state_ptr->extSecret = NULL;                \
+    } while(0)
 
 
-/* XXH128() :
+/*!
  * simple alias to pre-selected XXH3_128bits variant
  */
-XXH_PUBLIC_API XXH128_hash_t XXH128(const void* data, size_t len, XXH64_hash_t seed);
+XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH128(XXH_NOESCAPE const void* data, size_t len, XXH64_hash_t seed);
 
 
 /* ===   Experimental API   === */
 /* Symbols defined below must be considered tied to a specific library version. */
 
-/*
+/*!
  * XXH3_generateSecret():
  *
  * Derive a high-entropy secret from any user-defined content, named customSeed.
  * The generated secret can be used in combination with `*_withSecret()` functions.
- * The `_withSecret()` variants are useful to provide a higher level of protection than 64-bit seed,
- * as it becomes much more difficult for an external actor to guess how to impact the calculation logic.
+ * The `_withSecret()` variants are useful to provide a higher level of protection
+ * than 64-bit seed, as it becomes much more difficult for an external actor to
+ * guess how to impact the calculation logic.
  *
  * The function accepts as input a custom seed of any length and any content,
- * and derives from it a high-entropy secret of length @secretSize
- * into an already allocated buffer @secretBuffer.
- * @secretSize must be >= XXH3_SECRET_SIZE_MIN
+ * and derives from it a high-entropy secret of length @p secretSize into an
+ * already allocated buffer @p secretBuffer.
  *
  * The generated secret can then be used with any `*_withSecret()` variant.
- * Functions `XXH3_128bits_withSecret()`, `XXH3_64bits_withSecret()`,
- * `XXH3_128bits_reset_withSecret()` and `XXH3_64bits_reset_withSecret()`
+ * The functions @ref XXH3_128bits_withSecret(), @ref XXH3_64bits_withSecret(),
+ * @ref XXH3_128bits_reset_withSecret() and @ref XXH3_64bits_reset_withSecret()
  * are part of this list. They all accept a `secret` parameter
- * which must be large enough for implementation reasons (>= XXH3_SECRET_SIZE_MIN)
+ * which must be large enough for implementation reasons (>= @ref XXH3_SECRET_SIZE_MIN)
  * _and_ feature very high entropy (consist of random-looking bytes).
- * These conditions can be a high bar to meet, so
- * XXH3_generateSecret() can be employed to ensure proper quality.
+ * These conditions can be a high bar to meet, so @ref XXH3_generateSecret() can
+ * be employed to ensure proper quality.
+ *
+ * @p customSeed can be anything. It can have any size, even small ones,
+ * and its content can be anything, even "poor entropy" sources such as a bunch
+ * of zeroes. The resulting `secret` will nonetheless provide all required qualities.
  *
- * customSeed can be anything. It can have any size, even small ones,
- * and its content can be anything, even "poor entropy" sources such as a bunch of zeroes.
- * The resulting `secret` will nonetheless provide all required qualities.
+ * @pre
+ *   - @p secretSize must be >= @ref XXH3_SECRET_SIZE_MIN
+ *   - When @p customSeedSize > 0, supplying NULL as customSeed is undefined behavior.
  *
- * When customSeedSize > 0, supplying NULL as customSeed is undefined behavior.
+ * Example code:
+ * @code{.c}
+ *    #include <stdio.h>
+ *    #include <stdlib.h>
+ *    #include <string.h>
+ *    #define XXH_STATIC_LINKING_ONLY // expose unstable API
+ *    #include "xxhash.h"
+ *    // Hashes argv[2] using the entropy from argv[1].
+ *    int main(int argc, char* argv[])
+ *    {
+ *        char secret[XXH3_SECRET_SIZE_MIN];
+ *        if (argv != 3) { return 1; }
+ *        XXH3_generateSecret(secret, sizeof(secret), argv[1], strlen(argv[1]));
+ *        XXH64_hash_t h = XXH3_64bits_withSecret(
+ *             argv[2], strlen(argv[2]),
+ *             secret, sizeof(secret)
+ *        );
+ *        printf("%016llx\n", (unsigned long long) h);
+ *    }
+ * @endcode
  */
-XXH_PUBLIC_API XXH_errorcode XXH3_generateSecret(void* secretBuffer, size_t secretSize, const void* customSeed, size_t customSeedSize);
-
+XXH_PUBLIC_API XXH_errorcode XXH3_generateSecret(XXH_NOESCAPE void* secretBuffer, size_t secretSize, XXH_NOESCAPE const void* customSeed, size_t customSeedSize);
 
-/*
- * XXH3_generateSecret_fromSeed():
- *
- * Generate the same secret as the _withSeed() variants.
- *
- * The resulting secret has a length of XXH3_SECRET_DEFAULT_SIZE (necessarily).
- * @secretBuffer must be already allocated, of size at least XXH3_SECRET_DEFAULT_SIZE bytes.
+/*!
+ * @brief Generate the same secret as the _withSeed() variants.
  *
  * The generated secret can be used in combination with
  *`*_withSecret()` and `_withSecretandSeed()` variants.
- * This generator is notably useful in combination with `_withSecretandSeed()`,
- * as a way to emulate a faster `_withSeed()` variant.
+ *
+ * Example C++ `std::string` hash class:
+ * @code{.cpp}
+ *    #include <string>
+ *    #define XXH_STATIC_LINKING_ONLY // expose unstable API
+ *    #include "xxhash.h"
+ *    // Slow, seeds each time
+ *    class HashSlow {
+ *        XXH64_hash_t seed;
+ *    public:
+ *        HashSlow(XXH64_hash_t s) : seed{s} {}
+ *        size_t operator()(const std::string& x) const {
+ *            return size_t{XXH3_64bits_withSeed(x.c_str(), x.length(), seed)};
+ *        }
+ *    };
+ *    // Fast, caches the seeded secret for future uses.
+ *    class HashFast {
+ *        unsigned char secret[XXH3_SECRET_SIZE_MIN];
+ *    public:
+ *        HashFast(XXH64_hash_t s) {
+ *            XXH3_generateSecret_fromSeed(secret, seed);
+ *        }
+ *        size_t operator()(const std::string& x) const {
+ *            return size_t{
+ *                XXH3_64bits_withSecret(x.c_str(), x.length(), secret, sizeof(secret))
+ *            };
+ *        }
+ *    };
+ * @endcode
+ * @param secretBuffer A writable buffer of @ref XXH3_SECRET_SIZE_MIN bytes
+ * @param seed The seed to seed the state.
  */
-XXH_PUBLIC_API void XXH3_generateSecret_fromSeed(void* secretBuffer, XXH64_hash_t seed);
+XXH_PUBLIC_API void XXH3_generateSecret_fromSeed(XXH_NOESCAPE void* secretBuffer, XXH64_hash_t seed);
 
-/*
- * *_withSecretandSeed() :
+/*!
  * These variants generate hash values using either
- * @seed for "short" keys (< XXH3_MIDSIZE_MAX = 240 bytes)
- * or @secret for "large" keys (>= XXH3_MIDSIZE_MAX).
+ * @p seed for "short" keys (< XXH3_MIDSIZE_MAX = 240 bytes)
+ * or @p secret for "large" keys (>= XXH3_MIDSIZE_MAX).
  *
  * This generally benefits speed, compared to `_withSeed()` or `_withSecret()`.
  * `_withSeed()` has to generate the secret on the fly for "large" keys.
@@ -1181,7 +1406,7 @@ XXH_PUBLIC_API void XXH3_generateSecret_fromSeed(void* secretBuffer, XXH64_hash_
  * which requires more instructions than _withSeed() variants.
  * Therefore, _withSecretandSeed variant combines the best of both worlds.
  *
- * When @secret has been generated by XXH3_generateSecret_fromSeed(),
+ * When @p secret has been generated by XXH3_generateSecret_fromSeed(),
  * this variant produces *exactly* the same results as `_withSeed()` variant,
  * hence offering only a pure speed benefit on "large" input,
  * by skipping the need to regenerate the secret for every large input.
@@ -1190,32 +1415,34 @@ XXH_PUBLIC_API void XXH3_generateSecret_fromSeed(void* secretBuffer, XXH64_hash_
  * for example with XXH3_64bits(), which then becomes the seed,
  * and then employ both the seed and the secret in _withSecretandSeed().
  * On top of speed, an added benefit is that each bit in the secret
- * has a 50% chance to swap each bit in the output,
- * via its impact to the seed.
+ * has a 50% chance to swap each bit in the output, via its impact to the seed.
+ *
  * This is not guaranteed when using the secret directly in "small data" scenarios,
  * because only portions of the secret are employed for small data.
  */
-XXH_PUBLIC_API XXH64_hash_t
-XXH3_64bits_withSecretandSeed(const void* data, size_t len,
-                              const void* secret, size_t secretSize,
+XXH_PUBLIC_API XXH_PUREF XXH64_hash_t
+XXH3_64bits_withSecretandSeed(XXH_NOESCAPE const void* data, size_t len,
+                              XXH_NOESCAPE const void* secret, size_t secretSize,
                               XXH64_hash_t seed);
-
-XXH_PUBLIC_API XXH128_hash_t
-XXH3_128bits_withSecretandSeed(const void* data, size_t len,
-                               const void* secret, size_t secretSize,
+/*! @copydoc XXH3_64bits_withSecretandSeed() */
+XXH_PUBLIC_API XXH_PUREF XXH128_hash_t
+XXH3_128bits_withSecretandSeed(XXH_NOESCAPE const void* input, size_t length,
+                               XXH_NOESCAPE const void* secret, size_t secretSize,
                                XXH64_hash_t seed64);
-
+#ifndef XXH_NO_STREAM
+/*! @copydoc XXH3_64bits_withSecretandSeed() */
 XXH_PUBLIC_API XXH_errorcode
-XXH3_64bits_reset_withSecretandSeed(XXH3_state_t* statePtr,
-                                    const void* secret, size_t secretSize,
+XXH3_64bits_reset_withSecretandSeed(XXH_NOESCAPE XXH3_state_t* statePtr,
+                                    XXH_NOESCAPE const void* secret, size_t secretSize,
                                     XXH64_hash_t seed64);
-
+/*! @copydoc XXH3_64bits_withSecretandSeed() */
 XXH_PUBLIC_API XXH_errorcode
-XXH3_128bits_reset_withSecretandSeed(XXH3_state_t* statePtr,
-                                     const void* secret, size_t secretSize,
+XXH3_128bits_reset_withSecretandSeed(XXH_NOESCAPE XXH3_state_t* statePtr,
+                                     XXH_NOESCAPE const void* secret, size_t secretSize,
                                      XXH64_hash_t seed64);
+#endif /* !XXH_NO_STREAM */
 
-
+#endif  /* !XXH_NO_XXH3 */
 #endif  /* XXH_NO_LONG_LONG */
 #if defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API)
 #  define XXH_IMPLEMENTATION
@@ -1269,7 +1496,7 @@ XXH3_128bits_reset_withSecretandSeed(XXH3_state_t* statePtr,
 /*!
  * @brief Define this to disable 64-bit code.
  *
- * Useful if only using the @ref xxh32_family and you have a strict C90 compiler.
+ * Useful if only using the @ref XXH32_family and you have a strict C90 compiler.
  */
 #  define XXH_NO_LONG_LONG
 #  undef XXH_NO_LONG_LONG /* don't actually */
@@ -1292,7 +1519,7 @@ XXH3_128bits_reset_withSecretandSeed(XXH3_state_t* statePtr,
  *     Use `memcpy()`. Safe and portable. Note that most modern compilers will
  *     eliminate the function call and treat it as an unaligned access.
  *
- *  - `XXH_FORCE_MEMORY_ACCESS=1`: `__attribute__((packed))`
+ *  - `XXH_FORCE_MEMORY_ACCESS=1`: `__attribute__((aligned(1)))`
  *   @par
  *     Depends on compiler extensions and is therefore not portable.
  *     This method is safe _if_ your compiler supports it,
@@ -1319,13 +1546,41 @@ XXH3_128bits_reset_withSecretandSeed(XXH3_state_t* statePtr,
  *   care, as what works on one compiler/platform/optimization level may cause
  *   another to read garbage data or even crash.
  *
- * See http://fastcompression.blogspot.com/2015/08/accessing-unaligned-memory.html for details.
+ * See https://fastcompression.blogspot.com/2015/08/accessing-unaligned-memory.html for details.
  *
  * Prefer these methods in priority order (0 > 3 > 1 > 2)
  */
 #  define XXH_FORCE_MEMORY_ACCESS 0
 
 /*!
+ * @def XXH_SIZE_OPT
+ * @brief Controls how much xxHash optimizes for size.
+ *
+ * xxHash, when compiled, tends to result in a rather large binary size. This
+ * is mostly due to heavy usage to forced inlining and constant folding of the
+ * @ref XXH3_family to increase performance.
+ *
+ * However, some developers prefer size over speed. This option can
+ * significantly reduce the size of the generated code. When using the `-Os`
+ * or `-Oz` options on GCC or Clang, this is defined to 1 by default,
+ * otherwise it is defined to 0.
+ *
+ * Most of these size optimizations can be controlled manually.
+ *
+ * This is a number from 0-2.
+ *  - `XXH_SIZE_OPT` == 0: Default. xxHash makes no size optimizations. Speed
+ *    comes first.
+ *  - `XXH_SIZE_OPT` == 1: Default for `-Os` and `-Oz`. xxHash is more
+ *    conservative and disables hacks that increase code size. It implies the
+ *    options @ref XXH_NO_INLINE_HINTS == 1, @ref XXH_FORCE_ALIGN_CHECK == 0,
+ *    and @ref XXH3_NEON_LANES == 8 if they are not already defined.
+ *  - `XXH_SIZE_OPT` == 2: xxHash tries to make itself as small as possible.
+ *    Performance may cry. For example, the single shot functions just use the
+ *    streaming API.
+ */
+#  define XXH_SIZE_OPT 0
+
+/*!
  * @def XXH_FORCE_ALIGN_CHECK
  * @brief If defined to non-zero, adds a special path for aligned inputs (XXH32()
  * and XXH64() only).
@@ -1346,9 +1601,11 @@ XXH3_128bits_reset_withSecretandSeed(XXH3_state_t* statePtr,
  *
  * In these cases, the alignment check can be removed by setting this macro to 0.
  * Then the code will always use unaligned memory access.
- * Align check is automatically disabled on x86, x64 & arm64,
+ * Align check is automatically disabled on x86, x64, ARM64, and some ARM chips
  * which are platforms known to offer good unaligned memory accesses performance.
  *
+ * It is also disabled by default when @ref XXH_SIZE_OPT >= 1.
+ *
  * This option does not affect XXH3 (only XXH32 and XXH64).
  */
 #  define XXH_FORCE_ALIGN_CHECK 0
@@ -1370,12 +1627,29 @@ XXH3_128bits_reset_withSecretandSeed(XXH3_state_t* statePtr,
  * XXH_NO_INLINE_HINTS marks all internal functions as static, giving the
  * compiler full control on whether to inline or not.
  *
- * When not optimizing (-O0), optimizing for size (-Os, -Oz), or using
- * -fno-inline with GCC or Clang, this will automatically be defined.
+ * When not optimizing (-O0), using `-fno-inline` with GCC or Clang, or if
+ * @ref XXH_SIZE_OPT >= 1, this will automatically be defined.
  */
 #  define XXH_NO_INLINE_HINTS 0
 
 /*!
+ * @def XXH3_INLINE_SECRET
+ * @brief Determines whether to inline the XXH3 withSecret code.
+ *
+ * When the secret size is known, the compiler can improve the performance
+ * of XXH3_64bits_withSecret() and XXH3_128bits_withSecret().
+ *
+ * However, if the secret size is not known, it doesn't have any benefit. This
+ * happens when xxHash is compiled into a global symbol. Therefore, if
+ * @ref XXH_INLINE_ALL is *not* defined, this will be defined to 0.
+ *
+ * Additionally, this defaults to 0 on GCC 12+, which has an issue with function pointers
+ * that are *sometimes* force inline on -Og, and it is impossible to automatically
+ * detect this optimization level.
+ */
+#  define XXH3_INLINE_SECRET 0
+
+/*!
  * @def XXH32_ENDJMP
  * @brief Whether to use a jump for `XXH32_finalize`.
  *
@@ -1396,34 +1670,45 @@ XXH3_128bits_reset_withSecretandSeed(XXH3_state_t* statePtr,
  */
 #  define XXH_OLD_NAMES
 #  undef XXH_OLD_NAMES /* don't actually use, it is ugly. */
+
+/*!
+ * @def XXH_NO_STREAM
+ * @brief Disables the streaming API.
+ *
+ * When xxHash is not inlined and the streaming functions are not used, disabling
+ * the streaming functions can improve code size significantly, especially with
+ * the @ref XXH3_family which tends to make constant folded copies of itself.
+ */
+#  define XXH_NO_STREAM
+#  undef XXH_NO_STREAM /* don't actually */
 #endif /* XXH_DOXYGEN */
 /*!
  * @}
  */
 
 #ifndef XXH_FORCE_MEMORY_ACCESS   /* can be defined externally, on command line for example */
-   /* prefer __packed__ structures (method 1) for gcc on armv7+ and mips */
-#  if !defined(__clang__) && \
-( \
-    (defined(__INTEL_COMPILER) && !defined(_WIN32)) || \
-    ( \
-        defined(__GNUC__) && ( \
-            (defined(__ARM_ARCH) && __ARM_ARCH >= 7) || \
-            ( \
-                defined(__mips__) && \
-                (__mips <= 5 || __mips_isa_rev < 6) && \
-                (!defined(__mips16) || defined(__mips_mips16e2)) \
-            ) \
-        ) \
-    ) \
-)
+   /* prefer __packed__ structures (method 1) for GCC
+    * < ARMv7 with unaligned access (e.g. Raspbian armhf) still uses byte shifting, so we use memcpy
+    * which for some reason does unaligned loads. */
+#  if defined(__GNUC__) && !(defined(__ARM_ARCH) && __ARM_ARCH < 7 && defined(__ARM_FEATURE_UNALIGNED))
 #    define XXH_FORCE_MEMORY_ACCESS 1
 #  endif
 #endif
 
+#ifndef XXH_SIZE_OPT
+   /* default to 1 for -Os or -Oz */
+#  if (defined(__GNUC__) || defined(__clang__)) && defined(__OPTIMIZE_SIZE__)
+#    define XXH_SIZE_OPT 1
+#  else
+#    define XXH_SIZE_OPT 0
+#  endif
+#endif
+
 #ifndef XXH_FORCE_ALIGN_CHECK  /* can be defined externally */
-#  if defined(__i386)  || defined(__x86_64__) || defined(__aarch64__) \
-   || defined(_M_IX86) || defined(_M_X64)     || defined(_M_ARM64) /* visual */
+   /* don't check on sizeopt, x86, aarch64, or arm when unaligned access is available */
+#  if XXH_SIZE_OPT >= 1 || \
+      defined(__i386)  || defined(__x86_64__) || defined(__aarch64__) || defined(__ARM_FEATURE_UNALIGNED) \
+   || defined(_M_IX86) || defined(_M_X64)     || defined(_M_ARM64)    || defined(_M_ARM) /* visual */
 #    define XXH_FORCE_ALIGN_CHECK 0
 #  else
 #    define XXH_FORCE_ALIGN_CHECK 1
@@ -1431,14 +1716,22 @@ XXH3_128bits_reset_withSecretandSeed(XXH3_state_t* statePtr,
 #endif
 
 #ifndef XXH_NO_INLINE_HINTS
-#  if defined(__OPTIMIZE_SIZE__) /* -Os, -Oz */ \
-   || defined(__NO_INLINE__)     /* -O0, -fno-inline */
+#  if XXH_SIZE_OPT >= 1 || defined(__NO_INLINE__)  /* -O0, -fno-inline */
 #    define XXH_NO_INLINE_HINTS 1
 #  else
 #    define XXH_NO_INLINE_HINTS 0
 #  endif
 #endif
 
+#ifndef XXH3_INLINE_SECRET
+#  if (defined(__GNUC__) && !defined(__clang__) && __GNUC__ >= 12) \
+     || !defined(XXH_INLINE_ALL)
+#    define XXH3_INLINE_SECRET 0
+#  else
+#    define XXH3_INLINE_SECRET 1
+#  endif
+#endif
+
 #ifndef XXH32_ENDJMP
 /* generally preferable for performance */
 #  define XXH32_ENDJMP 0
@@ -1453,6 +1746,24 @@ XXH3_128bits_reset_withSecretandSeed(XXH3_state_t* statePtr,
 /* *************************************
 *  Includes & Memory related functions
 ***************************************/
+#if defined(XXH_NO_STREAM)
+/* nothing */
+#elif defined(XXH_NO_STDLIB)
+
+/* When requesting to disable any mention of stdlib,
+ * the library loses the ability to invoked malloc / free.
+ * In practice, it means that functions like `XXH*_createState()`
+ * will always fail, and return NULL.
+ * This flag is useful in situations where
+ * xxhash.h is integrated into some kernel, embedded or limited environment
+ * without access to dynamic allocation.
+ */
+
+static XXH_CONSTF void* XXH_malloc(size_t s) { (void)s; return NULL; }
+static void XXH_free(void* p) { (void)p; }
+
+#else
+
 /*
  * Modify the local functions below should you wish to use
  * different memory routines for malloc() and free()
@@ -1463,7 +1774,7 @@ XXH3_128bits_reset_withSecretandSeed(XXH3_state_t* statePtr,
  * @internal
  * @brief Modify this function to use a different routine than malloc().
  */
-static void* XXH_malloc(size_t s) { return malloc(s); }
+static XXH_MALLOCF void* XXH_malloc(size_t s) { return malloc(s); }
 
 /*!
  * @internal
@@ -1471,6 +1782,8 @@ static void* XXH_malloc(size_t s) { return malloc(s); }
  */
 static void XXH_free(void* p) { free(p); }
 
+#endif  /* XXH_NO_STDLIB */
+
 #include <string.h>
 
 /*!
@@ -1515,6 +1828,11 @@ static void* XXH_memcpy(void* dest, const void* src, size_t size)
 #  define XXH_NO_INLINE static
 #endif
 
+#if XXH3_INLINE_SECRET
+#  define XXH3_WITH_SECRET_INLINE XXH_FORCE_INLINE
+#else
+#  define XXH3_WITH_SECRET_INLINE XXH_NO_INLINE
+#endif
 
 
 /* *************************************
@@ -1540,14 +1858,13 @@ static void* XXH_memcpy(void* dest, const void* src, size_t size)
 #  include <assert.h>   /* note: can still be disabled with NDEBUG */
 #  define XXH_ASSERT(c)   assert(c)
 #else
-#  define XXH_ASSERT(c)   ((void)0)
+#  define XXH_ASSERT(c)   XXH_ASSUME(c)
 #endif
 
 /* note: use after variable declarations */
 #ifndef XXH_STATIC_ASSERT
 #  if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)    /* C11 */
-#    include <assert.h>
-#    define XXH_STATIC_ASSERT_WITH_MESSAGE(c,m) do { static_assert((c),m); } while(0)
+#    define XXH_STATIC_ASSERT_WITH_MESSAGE(c,m) do { _Static_assert((c),m); } while(0)
 #  elif defined(__cplusplus) && (__cplusplus >= 201103L)            /* C++11 */
 #    define XXH_STATIC_ASSERT_WITH_MESSAGE(c,m) do { static_assert((c),m); } while(0)
 #  else
@@ -1573,11 +1890,17 @@ static void* XXH_memcpy(void* dest, const void* src, size_t size)
  * XXH3_initCustomSecret_scalar().
  */
 #if defined(__GNUC__) || defined(__clang__)
-#  define XXH_COMPILER_GUARD(var) __asm__ __volatile__("" : "+r" (var))
+#  define XXH_COMPILER_GUARD(var) __asm__("" : "+r" (var))
 #else
 #  define XXH_COMPILER_GUARD(var) ((void)0)
 #endif
 
+#if defined(__clang__)
+#  define XXH_COMPILER_GUARD_W(var) __asm__("" : "+w" (var))
+#else
+#  define XXH_COMPILER_GUARD_W(var) ((void)0)
+#endif
+
 /* *************************************
 *  Basic Types
 ***************************************/
@@ -1665,25 +1988,26 @@ static xxh_u32 XXH_read32(const void* memPtr) { return *(const xxh_u32*) memPtr;
 #elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1))
 
 /*
- * __pack instructions are safer but compiler specific, hence potentially
- * problematic for some compilers.
- *
- * Currently only defined for GCC and ICC.
+ * __attribute__((aligned(1))) is supported by gcc and clang. Originally the
+ * documentation claimed that it only increased the alignment, but actually it
+ * can decrease it on gcc, clang, and icc:
+ * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=69502,
+ * https://gcc.godbolt.org/z/xYez1j67Y.
  */
 #ifdef XXH_OLD_NAMES
 typedef union { xxh_u32 u32; } __attribute__((packed)) unalign;
 #endif
 static xxh_u32 XXH_read32(const void* ptr)
 {
-    typedef union { xxh_u32 u32; } __attribute__((packed)) xxh_unalign;
-    return ((const xxh_unalign*)ptr)->u32;
+    typedef __attribute__((aligned(1))) xxh_u32 xxh_unalign32;
+    return *((const xxh_unalign32*)ptr);
 }
 
 #else
 
 /*
  * Portable and safe solution. Generally efficient.
- * see: http://fastcompression.blogspot.com/2015/08/accessing-unaligned-memory.html
+ * see: https://fastcompression.blogspot.com/2015/08/accessing-unaligned-memory.html
  */
 static xxh_u32 XXH_read32(const void* memPtr)
 {
@@ -1759,6 +2083,51 @@ static int XXH_isLittleEndian(void)
 #  define XXH_HAS_BUILTIN(x) 0
 #endif
 
+
+
+/*
+ * C23 and future versions have standard "unreachable()".
+ * Once it has been implemented reliably we can add it as an
+ * additional case:
+ *
+ * ```
+ * #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= XXH_C23_VN)
+ * #  include <stddef.h>
+ * #  ifdef unreachable
+ * #    define XXH_UNREACHABLE() unreachable()
+ * #  endif
+ * #endif
+ * ```
+ *
+ * Note C++23 also has std::unreachable() which can be detected
+ * as follows:
+ * ```
+ * #if defined(__cpp_lib_unreachable) && (__cpp_lib_unreachable >= 202202L)
+ * #  include <utility>
+ * #  define XXH_UNREACHABLE() std::unreachable()
+ * #endif
+ * ```
+ * NB: `__cpp_lib_unreachable` is defined in the `<version>` header.
+ * We don't use that as including `<utility>` in `extern "C"` blocks
+ * doesn't work on GCC12
+ */
+
+#if XXH_HAS_BUILTIN(__builtin_unreachable)
+#  define XXH_UNREACHABLE() __builtin_unreachable()
+
+#elif defined(_MSC_VER)
+#  define XXH_UNREACHABLE() __assume(0)
+
+#else
+#  define XXH_UNREACHABLE()
+#endif
+
+#if XXH_HAS_BUILTIN(__builtin_assume)
+#  define XXH_ASSUME(c) __builtin_assume(c)
+#else
+#  define XXH_ASSUME(c) if (!(c)) { XXH_UNREACHABLE(); }
+#endif
+
 /*!
  * @internal
  * @def XXH_rotl32(x,r)
@@ -1881,8 +2250,10 @@ XXH_PUBLIC_API unsigned XXH_versionNumber (void) { return XXH_VERSION_NUMBER; }
 *********************************************************************/
 /*!
  * @}
- * @defgroup xxh32_impl XXH32 implementation
+ * @defgroup XXH32_impl XXH32 implementation
  * @ingroup impl
+ *
+ * Details on the XXH32 implementation.
  * @{
  */
  /* #define instead of static const, to be used as initializers */
@@ -1946,9 +2317,9 @@ static xxh_u32 XXH32_round(xxh_u32 acc, xxh_u32 input)
      *   can load data, while v3 can multiply. SSE forces them to operate
      *   together.
      *
-     * This is also enabled on AArch64, as Clang autovectorizes it incorrectly
-     * and it is pointless writing a NEON implementation that is basically the
-     * same speed as scalar for XXH32.
+     * This is also enabled on AArch64, as Clang is *very aggressive* in vectorizing
+     * the loop. NEON is only faster on the A53, and with the newer cores, it is less
+     * than half the speed.
      */
     XXH_COMPILER_GUARD(acc);
 #endif
@@ -1962,17 +2333,17 @@ static xxh_u32 XXH32_round(xxh_u32 acc, xxh_u32 input)
  * The final mix ensures that all input bits have a chance to impact any bit in
  * the output digest, resulting in an unbiased distribution.
  *
- * @param h32 The hash to avalanche.
+ * @param hash The hash to avalanche.
  * @return The avalanched hash.
  */
-static xxh_u32 XXH32_avalanche(xxh_u32 h32)
+static xxh_u32 XXH32_avalanche(xxh_u32 hash)
 {
-    h32 ^= h32 >> 15;
-    h32 *= XXH_PRIME32_2;
-    h32 ^= h32 >> 13;
-    h32 *= XXH_PRIME32_3;
-    h32 ^= h32 >> 16;
-    return(h32);
+    hash ^= hash >> 15;
+    hash *= XXH_PRIME32_2;
+    hash ^= hash >> 13;
+    hash *= XXH_PRIME32_3;
+    hash ^= hash >> 16;
+    return hash;
 }
 
 #define XXH_get32bits(p) XXH_readLE32_align(p, align)
@@ -1985,24 +2356,25 @@ static xxh_u32 XXH32_avalanche(xxh_u32 h32)
  * This final stage will digest them to ensure that all input bytes are present
  * in the final mix.
  *
- * @param h32 The hash to finalize.
+ * @param hash The hash to finalize.
  * @param ptr The pointer to the remaining input.
  * @param len The remaining length, modulo 16.
  * @param align Whether @p ptr is aligned.
  * @return The finalized hash.
+ * @see XXH64_finalize().
  */
-static xxh_u32
-XXH32_finalize(xxh_u32 h32, const xxh_u8* ptr, size_t len, XXH_alignment align)
+static XXH_PUREF xxh_u32
+XXH32_finalize(xxh_u32 hash, const xxh_u8* ptr, size_t len, XXH_alignment align)
 {
-#define XXH_PROCESS1 do {                           \
-    h32 += (*ptr++) * XXH_PRIME32_5;                \
-    h32 = XXH_rotl32(h32, 11) * XXH_PRIME32_1;      \
+#define XXH_PROCESS1 do {                             \
+    hash += (*ptr++) * XXH_PRIME32_5;                 \
+    hash = XXH_rotl32(hash, 11) * XXH_PRIME32_1;      \
 } while (0)
 
-#define XXH_PROCESS4 do {                           \
-    h32 += XXH_get32bits(ptr) * XXH_PRIME32_3;      \
-    ptr += 4;                                   \
-    h32  = XXH_rotl32(h32, 17) * XXH_PRIME32_4;     \
+#define XXH_PROCESS4 do {                             \
+    hash += XXH_get32bits(ptr) * XXH_PRIME32_3;       \
+    ptr += 4;                                         \
+    hash  = XXH_rotl32(hash, 17) * XXH_PRIME32_4;     \
 } while (0)
 
     if (ptr==NULL) XXH_ASSERT(len == 0);
@@ -2018,49 +2390,49 @@ XXH32_finalize(xxh_u32 h32, const xxh_u8* ptr, size_t len, XXH_alignment align)
             XXH_PROCESS1;
             --len;
         }
-        return XXH32_avalanche(h32);
+        return XXH32_avalanche(hash);
     } else {
          switch(len&15) /* or switch(bEnd - p) */ {
            case 12:      XXH_PROCESS4;
-                         XXH_FALLTHROUGH;
+                         XXH_FALLTHROUGH;  /* fallthrough */
            case 8:       XXH_PROCESS4;
-                         XXH_FALLTHROUGH;
+                         XXH_FALLTHROUGH;  /* fallthrough */
            case 4:       XXH_PROCESS4;
-                         return XXH32_avalanche(h32);
+                         return XXH32_avalanche(hash);
 
            case 13:      XXH_PROCESS4;
-                         XXH_FALLTHROUGH;
+                         XXH_FALLTHROUGH;  /* fallthrough */
            case 9:       XXH_PROCESS4;
-                         XXH_FALLTHROUGH;
+                         XXH_FALLTHROUGH;  /* fallthrough */
            case 5:       XXH_PROCESS4;
                          XXH_PROCESS1;
-                         return XXH32_avalanche(h32);
+                         return XXH32_avalanche(hash);
 
            case 14:      XXH_PROCESS4;
-                         XXH_FALLTHROUGH;
+                         XXH_FALLTHROUGH;  /* fallthrough */
            case 10:      XXH_PROCESS4;
-                         XXH_FALLTHROUGH;
+                         XXH_FALLTHROUGH;  /* fallthrough */
            case 6:       XXH_PROCESS4;
                          XXH_PROCESS1;
                          XXH_PROCESS1;
-                         return XXH32_avalanche(h32);
+                         return XXH32_avalanche(hash);
 
            case 15:      XXH_PROCESS4;
-                         XXH_FALLTHROUGH;
+                         XXH_FALLTHROUGH;  /* fallthrough */
            case 11:      XXH_PROCESS4;
-                         XXH_FALLTHROUGH;
+                         XXH_FALLTHROUGH;  /* fallthrough */
            case 7:       XXH_PROCESS4;
-                         XXH_FALLTHROUGH;
+                         XXH_FALLTHROUGH;  /* fallthrough */
            case 3:       XXH_PROCESS1;
-                         XXH_FALLTHROUGH;
+                         XXH_FALLTHROUGH;  /* fallthrough */
            case 2:       XXH_PROCESS1;
-                         XXH_FALLTHROUGH;
+                         XXH_FALLTHROUGH;  /* fallthrough */
            case 1:       XXH_PROCESS1;
-                         XXH_FALLTHROUGH;
-           case 0:       return XXH32_avalanche(h32);
+                         XXH_FALLTHROUGH;  /* fallthrough */
+           case 0:       return XXH32_avalanche(hash);
         }
         XXH_ASSERT(0);
-        return h32;   /* reaching this point is deemed impossible */
+        return hash;   /* reaching this point is deemed impossible */
     }
 }
 
@@ -2080,7 +2452,7 @@ XXH32_finalize(xxh_u32 h32, const xxh_u8* ptr, size_t len, XXH_alignment align)
  * @param align Whether @p input is aligned.
  * @return The calculated hash.
  */
-XXH_FORCE_INLINE xxh_u32
+XXH_FORCE_INLINE XXH_PUREF xxh_u32
 XXH32_endian_align(const xxh_u8* input, size_t len, xxh_u32 seed, XXH_alignment align)
 {
     xxh_u32 h32;
@@ -2113,10 +2485,10 @@ XXH32_endian_align(const xxh_u8* input, size_t len, xxh_u32 seed, XXH_alignment
     return XXH32_finalize(h32, input, len&15, align);
 }
 
-/*! @ingroup xxh32_family */
+/*! @ingroup XXH32_family */
 XXH_PUBLIC_API XXH32_hash_t XXH32 (const void* input, size_t len, XXH32_hash_t seed)
 {
-#if 0
+#if !defined(XXH_NO_STREAM) && XXH_SIZE_OPT >= 2
     /* Simple version, good for code maintenance, but unfortunately slow for small inputs */
     XXH32_state_t state;
     XXH32_reset(&state, seed);
@@ -2135,42 +2507,39 @@ XXH_PUBLIC_API XXH32_hash_t XXH32 (const void* input, size_t len, XXH32_hash_t s
 
 
 /*******   Hash streaming   *******/
-/*!
- * @ingroup xxh32_family
- */
+#ifndef XXH_NO_STREAM
+/*! @ingroup XXH32_family */
 XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void)
 {
     return (XXH32_state_t*)XXH_malloc(sizeof(XXH32_state_t));
 }
-/*! @ingroup xxh32_family */
+/*! @ingroup XXH32_family */
 XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr)
 {
     XXH_free(statePtr);
     return XXH_OK;
 }
 
-/*! @ingroup xxh32_family */
+/*! @ingroup XXH32_family */
 XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* dstState, const XXH32_state_t* srcState)
 {
     XXH_memcpy(dstState, srcState, sizeof(*dstState));
 }
 
-/*! @ingroup xxh32_family */
+/*! @ingroup XXH32_family */
 XXH_PUBLIC_API XXH_errorcode XXH32_reset(XXH32_state_t* statePtr, XXH32_hash_t seed)
 {
-    XXH32_state_t state;   /* using a local state to memcpy() in order to avoid strict-aliasing warnings */
-    memset(&state, 0, sizeof(state));
-    state.v[0] = seed + XXH_PRIME32_1 + XXH_PRIME32_2;
-    state.v[1] = seed + XXH_PRIME32_2;
-    state.v[2] = seed + 0;
-    state.v[3] = seed - XXH_PRIME32_1;
-    /* do not write into reserved, planned to be removed in a future version */
-    XXH_memcpy(statePtr, &state, sizeof(state) - sizeof(state.reserved));
+    XXH_ASSERT(statePtr != NULL);
+    memset(statePtr, 0, sizeof(*statePtr));
+    statePtr->v[0] = seed + XXH_PRIME32_1 + XXH_PRIME32_2;
+    statePtr->v[1] = seed + XXH_PRIME32_2;
+    statePtr->v[2] = seed + 0;
+    statePtr->v[3] = seed - XXH_PRIME32_1;
     return XXH_OK;
 }
 
 
-/*! @ingroup xxh32_family */
+/*! @ingroup XXH32_family */
 XXH_PUBLIC_API XXH_errorcode
 XXH32_update(XXH32_state_t* state, const void* input, size_t len)
 {
@@ -2225,7 +2594,7 @@ XXH32_update(XXH32_state_t* state, const void* input, size_t len)
 }
 
 
-/*! @ingroup xxh32_family */
+/*! @ingroup XXH32_family */
 XXH_PUBLIC_API XXH32_hash_t XXH32_digest(const XXH32_state_t* state)
 {
     xxh_u32 h32;
@@ -2243,12 +2612,12 @@ XXH_PUBLIC_API XXH32_hash_t XXH32_digest(const XXH32_state_t* state)
 
     return XXH32_finalize(h32, (const xxh_u8*)state->mem32, state->memsize, XXH_aligned);
 }
-
+#endif /* !XXH_NO_STREAM */
 
 /*******   Canonical representation   *******/
 
 /*!
- * @ingroup xxh32_family
+ * @ingroup XXH32_family
  * The default return values from XXH functions are unsigned 32 and 64 bit
  * integers.
  *
@@ -2267,7 +2636,7 @@ XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t
     if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap32(hash);
     XXH_memcpy(dst, &hash, sizeof(*dst));
 }
-/*! @ingroup xxh32_family */
+/*! @ingroup XXH32_family */
 XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src)
 {
     return XXH_readBE32(src);
@@ -2308,25 +2677,26 @@ static xxh_u64 XXH_read64(const void* memPtr)
 #elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1))
 
 /*
- * __pack instructions are safer, but compiler specific, hence potentially
- * problematic for some compilers.
- *
- * Currently only defined for GCC and ICC.
+ * __attribute__((aligned(1))) is supported by gcc and clang. Originally the
+ * documentation claimed that it only increased the alignment, but actually it
+ * can decrease it on gcc, clang, and icc:
+ * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=69502,
+ * https://gcc.godbolt.org/z/xYez1j67Y.
  */
 #ifdef XXH_OLD_NAMES
 typedef union { xxh_u32 u32; xxh_u64 u64; } __attribute__((packed)) unalign64;
 #endif
 static xxh_u64 XXH_read64(const void* ptr)
 {
-    typedef union { xxh_u32 u32; xxh_u64 u64; } __attribute__((packed)) xxh_unalign64;
-    return ((const xxh_unalign64*)ptr)->u64;
+    typedef __attribute__((aligned(1))) xxh_u64 xxh_unalign64;
+    return *((const xxh_unalign64*)ptr);
 }
 
 #else
 
 /*
  * Portable and safe solution. Generally efficient.
- * see: http://fastcompression.blogspot.com/2015/08/accessing-unaligned-memory.html
+ * see: https://fastcompression.blogspot.com/2015/08/accessing-unaligned-memory.html
  */
 static xxh_u64 XXH_read64(const void* memPtr)
 {
@@ -2410,8 +2780,10 @@ XXH_readLE64_align(const void* ptr, XXH_alignment align)
 /*******   xxh64   *******/
 /*!
  * @}
- * @defgroup xxh64_impl XXH64 implementation
+ * @defgroup XXH64_impl XXH64 implementation
  * @ingroup impl
+ *
+ * Details on the XXH64 implementation.
  * @{
  */
 /* #define rather that static const, to be used as initializers */
@@ -2429,6 +2801,7 @@ XXH_readLE64_align(const void* ptr, XXH_alignment align)
 #  define PRIME64_5 XXH_PRIME64_5
 #endif
 
+/*! @copydoc XXH32_round */
 static xxh_u64 XXH64_round(xxh_u64 acc, xxh_u64 input)
 {
     acc += input * XXH_PRIME64_2;
@@ -2445,43 +2818,59 @@ static xxh_u64 XXH64_mergeRound(xxh_u64 acc, xxh_u64 val)
     return acc;
 }
 
-static xxh_u64 XXH64_avalanche(xxh_u64 h64)
+/*! @copydoc XXH32_avalanche */
+static xxh_u64 XXH64_avalanche(xxh_u64 hash)
 {
-    h64 ^= h64 >> 33;
-    h64 *= XXH_PRIME64_2;
-    h64 ^= h64 >> 29;
-    h64 *= XXH_PRIME64_3;
-    h64 ^= h64 >> 32;
-    return h64;
+    hash ^= hash >> 33;
+    hash *= XXH_PRIME64_2;
+    hash ^= hash >> 29;
+    hash *= XXH_PRIME64_3;
+    hash ^= hash >> 32;
+    return hash;
 }
 
 
 #define XXH_get64bits(p) XXH_readLE64_align(p, align)
 
-static xxh_u64
-XXH64_finalize(xxh_u64 h64, const xxh_u8* ptr, size_t len, XXH_alignment align)
+/*!
+ * @internal
+ * @brief Processes the last 0-31 bytes of @p ptr.
+ *
+ * There may be up to 31 bytes remaining to consume from the input.
+ * This final stage will digest them to ensure that all input bytes are present
+ * in the final mix.
+ *
+ * @param hash The hash to finalize.
+ * @param ptr The pointer to the remaining input.
+ * @param len The remaining length, modulo 32.
+ * @param align Whether @p ptr is aligned.
+ * @return The finalized hash
+ * @see XXH32_finalize().
+ */
+static XXH_PUREF xxh_u64
+XXH64_finalize(xxh_u64 hash, const xxh_u8* ptr, size_t len, XXH_alignment align)
 {
     if (ptr==NULL) XXH_ASSERT(len == 0);
     len &= 31;
     while (len >= 8) {
         xxh_u64 const k1 = XXH64_round(0, XXH_get64bits(ptr));
         ptr += 8;
-        h64 ^= k1;
-        h64  = XXH_rotl64(h64,27) * XXH_PRIME64_1 + XXH_PRIME64_4;
+        hash ^= k1;
+        hash  = XXH_rotl64(hash,27) * XXH_PRIME64_1 + XXH_PRIME64_4;
         len -= 8;
     }
     if (len >= 4) {
-        h64 ^= (xxh_u64)(XXH_get32bits(ptr)) * XXH_PRIME64_1;
+        hash ^= (xxh_u64)(XXH_get32bits(ptr)) * XXH_PRIME64_1;
         ptr += 4;
-        h64 = XXH_rotl64(h64, 23) * XXH_PRIME64_2 + XXH_PRIME64_3;
+        hash = XXH_rotl64(hash, 23) * XXH_PRIME64_2 + XXH_PRIME64_3;
         len -= 4;
     }
     while (len > 0) {
-        h64 ^= (*ptr++) * XXH_PRIME64_5;
-        h64 = XXH_rotl64(h64, 11) * XXH_PRIME64_1;
+        hash ^= (*ptr++) * XXH_PRIME64_5;
+        hash = XXH_rotl64(hash, 11) * XXH_PRIME64_1;
         --len;
     }
-    return  XXH64_avalanche(h64);
+    return  XXH64_avalanche(hash);
 }
 
 #ifdef XXH_OLD_NAMES
@@ -2494,7 +2883,15 @@ XXH64_finalize(xxh_u64 h64, const xxh_u8* ptr, size_t len, XXH_alignment align)
 #  undef XXH_PROCESS8_64
 #endif
 
-XXH_FORCE_INLINE xxh_u64
+/*!
+ * @internal
+ * @brief The implementation for @ref XXH64().
+ *
+ * @param input , len , seed Directly passed from @ref XXH64().
+ * @param align Whether @p input is aligned.
+ * @return The calculated hash.
+ */
+XXH_FORCE_INLINE XXH_PUREF xxh_u64
 XXH64_endian_align(const xxh_u8* input, size_t len, xxh_u64 seed, XXH_alignment align)
 {
     xxh_u64 h64;
@@ -2531,10 +2928,10 @@ XXH64_endian_align(const xxh_u8* input, size_t len, xxh_u64 seed, XXH_alignment
 }
 
 
-/*! @ingroup xxh64_family */
-XXH_PUBLIC_API XXH64_hash_t XXH64 (const void* input, size_t len, XXH64_hash_t seed)
+/*! @ingroup XXH64_family */
+XXH_PUBLIC_API XXH64_hash_t XXH64 (XXH_NOESCAPE const void* input, size_t len, XXH64_hash_t seed)
 {
-#if 0
+#if !defined(XXH_NO_STREAM) && XXH_SIZE_OPT >= 2
     /* Simple version, good for code maintenance, but unfortunately slow for small inputs */
     XXH64_state_t state;
     XXH64_reset(&state, seed);
@@ -2552,42 +2949,40 @@ XXH_PUBLIC_API XXH64_hash_t XXH64 (const void* input, size_t len, XXH64_hash_t s
 }
 
 /*******   Hash Streaming   *******/
-
-/*! @ingroup xxh64_family*/
+#ifndef XXH_NO_STREAM
+/*! @ingroup XXH64_family*/
 XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void)
 {
     return (XXH64_state_t*)XXH_malloc(sizeof(XXH64_state_t));
 }
-/*! @ingroup xxh64_family */
+/*! @ingroup XXH64_family */
 XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr)
 {
     XXH_free(statePtr);
     return XXH_OK;
 }
 
-/*! @ingroup xxh64_family */
-XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* dstState, const XXH64_state_t* srcState)
+/*! @ingroup XXH64_family */
+XXH_PUBLIC_API void XXH64_copyState(XXH_NOESCAPE XXH64_state_t* dstState, const XXH64_state_t* srcState)
 {
     XXH_memcpy(dstState, srcState, sizeof(*dstState));
 }
 
-/*! @ingroup xxh64_family */
-XXH_PUBLIC_API XXH_errorcode XXH64_reset(XXH64_state_t* statePtr, XXH64_hash_t seed)
+/*! @ingroup XXH64_family */
+XXH_PUBLIC_API XXH_errorcode XXH64_reset(XXH_NOESCAPE XXH64_state_t* statePtr, XXH64_hash_t seed)
 {
-    XXH64_state_t state;   /* use a local state to memcpy() in order to avoid strict-aliasing warnings */
-    memset(&state, 0, sizeof(state));
-    state.v[0] = seed + XXH_PRIME64_1 + XXH_PRIME64_2;
-    state.v[1] = seed + XXH_PRIME64_2;
-    state.v[2] = seed + 0;
-    state.v[3] = seed - XXH_PRIME64_1;
-     /* do not write into reserved64, might be removed in a future version */
-    XXH_memcpy(statePtr, &state, sizeof(state) - sizeof(state.reserved64));
+    XXH_ASSERT(statePtr != NULL);
+    memset(statePtr, 0, sizeof(*statePtr));
+    statePtr->v[0] = seed + XXH_PRIME64_1 + XXH_PRIME64_2;
+    statePtr->v[1] = seed + XXH_PRIME64_2;
+    statePtr->v[2] = seed + 0;
+    statePtr->v[3] = seed - XXH_PRIME64_1;
     return XXH_OK;
 }
 
-/*! @ingroup xxh64_family */
+/*! @ingroup XXH64_family */
 XXH_PUBLIC_API XXH_errorcode
-XXH64_update (XXH64_state_t* state, const void* input, size_t len)
+XXH64_update (XXH_NOESCAPE XXH64_state_t* state, XXH_NOESCAPE const void* input, size_t len)
 {
     if (input==NULL) {
         XXH_ASSERT(len == 0);
@@ -2637,8 +3032,8 @@ XXH64_update (XXH64_state_t* state, const void* input, size_t len)
 }
 
 
-/*! @ingroup xxh64_family */
-XXH_PUBLIC_API XXH64_hash_t XXH64_digest(const XXH64_state_t* state)
+/*! @ingroup XXH64_family */
+XXH_PUBLIC_API XXH64_hash_t XXH64_digest(XXH_NOESCAPE const XXH64_state_t* state)
 {
     xxh_u64 h64;
 
@@ -2656,20 +3051,20 @@ XXH_PUBLIC_API XXH64_hash_t XXH64_digest(const XXH64_state_t* state)
 
     return XXH64_finalize(h64, (const xxh_u8*)state->mem64, (size_t)state->total_len, XXH_aligned);
 }
-
+#endif /* !XXH_NO_STREAM */
 
 /******* Canonical representation   *******/
 
-/*! @ingroup xxh64_family */
-XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash)
+/*! @ingroup XXH64_family */
+XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH_NOESCAPE XXH64_canonical_t* dst, XXH64_hash_t hash)
 {
     XXH_STATIC_ASSERT(sizeof(XXH64_canonical_t) == sizeof(XXH64_hash_t));
     if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap64(hash);
     XXH_memcpy(dst, &hash, sizeof(*dst));
 }
 
-/*! @ingroup xxh64_family */
-XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src)
+/*! @ingroup XXH64_family */
+XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(XXH_NOESCAPE const XXH64_canonical_t* src)
 {
     return XXH_readBE64(src);
 }
@@ -2682,7 +3077,7 @@ XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src
 ************************************************************************ */
 /*!
  * @}
- * @defgroup xxh3_impl XXH3 implementation
+ * @defgroup XXH3_impl XXH3 implementation
  * @ingroup impl
  * @{
  */
@@ -2690,11 +3085,19 @@ XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src
 /* ===   Compiler specifics   === */
 
 #if ((defined(sun) || defined(__sun)) && __cplusplus) /* Solaris includes __STDC_VERSION__ with C++. Tested with GCC 5.5 */
-#  define XXH_RESTRICT /* disable */
+#  define XXH_RESTRICT   /* disable */
 #elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* >= C99 */
 #  define XXH_RESTRICT   restrict
+#elif (defined (__GNUC__) && ((__GNUC__ > 3) || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1))) \
+   || (defined (__clang__)) \
+   || (defined (_MSC_VER) && (_MSC_VER >= 1400)) \
+   || (defined (__INTEL_COMPILER) && (__INTEL_COMPILER >= 1300))
+/*
+ * There are a LOT more compilers that recognize __restrict but this
+ * covers the major ones.
+ */
+#  define XXH_RESTRICT   __restrict
 #else
-/* Note: it might be useful to define __restrict or __restrict__ for some C++ compilers */
 #  define XXH_RESTRICT   /* disable */
 #endif
 
@@ -2708,17 +3111,24 @@ XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src
 #    define XXH_unlikely(x) (x)
 #endif
 
-#if defined(__GNUC__)
-#  if defined(__AVX2__)
-#    include <immintrin.h>
-#  elif defined(__SSE2__)
-#    include <emmintrin.h>
-#  elif defined(__ARM_NEON__) || defined(__ARM_NEON)
+#if defined(__GNUC__) || defined(__clang__)
+#  if defined(__ARM_FEATURE_SVE)
+#    include <arm_sve.h>
+#  endif
+#  if defined(__ARM_NEON__) || defined(__ARM_NEON) \
+   || (defined(_M_ARM) && _M_ARM >= 7) \
+   || defined(_M_ARM64) || defined(_M_ARM64EC)
 #    define inline __inline__  /* circumvent a clang bug */
 #    include <arm_neon.h>
 #    undef inline
+#  elif defined(__AVX2__)
+#    include <immintrin.h>
+#  elif defined(__SSE2__)
+#    include <emmintrin.h>
 #  endif
-#elif defined(_MSC_VER)
+#endif
+
+#if defined(_MSC_VER)
 #  include <intrin.h>
 #endif
 
@@ -2832,12 +3242,13 @@ enum XXH_VECTOR_TYPE /* fake enum */ {
     XXH_AVX512 = 3,  /*!< AVX512 for Skylake and Icelake */
     XXH_NEON   = 4,  /*!< NEON for most ARMv7-A and all AArch64 */
     XXH_VSX    = 5,  /*!< VSX and ZVector for POWER8/z13 (64-bit) */
+    XXH_SVE    = 6,  /*!< SVE for some ARMv8-A and ARMv9-A */
 };
 /*!
  * @ingroup tuning
  * @brief Selects the minimum alignment for XXH3's accumulators.
  *
- * When using SIMD, this should match the alignment reqired for said vector
+ * When using SIMD, this should match the alignment required for said vector
  * type, so, for example, 32 for AVX2.
  *
  * Default: Auto detected.
@@ -2853,23 +3264,26 @@ enum XXH_VECTOR_TYPE /* fake enum */ {
 #  define XXH_AVX512 3
 #  define XXH_NEON   4
 #  define XXH_VSX    5
+#  define XXH_SVE    6
 #endif
 
 #ifndef XXH_VECTOR    /* can be defined on command line */
-#  if defined(__AVX512F__)
-#    define XXH_VECTOR XXH_AVX512
-#  elif defined(__AVX2__)
-#    define XXH_VECTOR XXH_AVX2
-#  elif defined(__SSE2__) || defined(_M_AMD64) || defined(_M_X64) || (defined(_M_IX86_FP) && (_M_IX86_FP == 2))
-#    define XXH_VECTOR XXH_SSE2
+#  if defined(__ARM_FEATURE_SVE)
+#    define XXH_VECTOR XXH_SVE
 #  elif ( \
         defined(__ARM_NEON__) || defined(__ARM_NEON) /* gcc */ \
-     || defined(_M_ARM64) || defined(_M_ARM_ARMV7VE) /* msvc */ \
+     || defined(_M_ARM) || defined(_M_ARM64) || defined(_M_ARM64EC) /* msvc */ \
    ) && ( \
         defined(_WIN32) || defined(__LITTLE_ENDIAN__) /* little endian only */ \
     || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) \
    )
 #    define XXH_VECTOR XXH_NEON
+#  elif defined(__AVX512F__)
+#    define XXH_VECTOR XXH_AVX512
+#  elif defined(__AVX2__)
+#    define XXH_VECTOR XXH_AVX2
+#  elif defined(__SSE2__) || defined(_M_AMD64) || defined(_M_X64) || (defined(_M_IX86_FP) && (_M_IX86_FP == 2))
+#    define XXH_VECTOR XXH_SSE2
 #  elif (defined(__PPC64__) && defined(__POWER8_VECTOR__)) \
      || (defined(__s390x__) && defined(__VEC__)) \
      && defined(__GNUC__) /* TODO: IBM XL */
@@ -2879,6 +3293,17 @@ enum XXH_VECTOR_TYPE /* fake enum */ {
 #  endif
 #endif
 
+/* __ARM_FEATURE_SVE is only supported by GCC & Clang. */
+#if (XXH_VECTOR == XXH_SVE) && !defined(__ARM_FEATURE_SVE)
+#  ifdef _MSC_VER
+#    pragma warning(once : 4606)
+#  else
+#    warning "__ARM_FEATURE_SVE isn't supported. Use SCALAR instead."
+#  endif
+#  undef XXH_VECTOR
+#  define XXH_VECTOR XXH_SCALAR
+#endif
+
 /*
  * Controls the alignment of the accumulator,
  * for compatibility with aligned vector loads, which are usually faster.
@@ -2898,16 +3323,26 @@ enum XXH_VECTOR_TYPE /* fake enum */ {
 #     define XXH_ACC_ALIGN 16
 #  elif XXH_VECTOR == XXH_AVX512  /* avx512 */
 #     define XXH_ACC_ALIGN 64
+#  elif XXH_VECTOR == XXH_SVE   /* sve */
+#     define XXH_ACC_ALIGN 64
 #  endif
 #endif
 
 #if defined(XXH_X86DISPATCH) || XXH_VECTOR == XXH_SSE2 \
     || XXH_VECTOR == XXH_AVX2 || XXH_VECTOR == XXH_AVX512
 #  define XXH_SEC_ALIGN XXH_ACC_ALIGN
+#elif XXH_VECTOR == XXH_SVE
+#  define XXH_SEC_ALIGN XXH_ACC_ALIGN
 #else
 #  define XXH_SEC_ALIGN 8
 #endif
 
+#if defined(__GNUC__) || defined(__clang__)
+#  define XXH_ALIASING __attribute__((may_alias))
+#else
+#  define XXH_ALIASING /* nothing */
+#endif
+
 /*
  * UGLY HACK:
  * GCC usually generates the best code with -O3 for xxHash.
@@ -2931,111 +3366,127 @@ enum XXH_VECTOR_TYPE /* fake enum */ {
  */
 #if XXH_VECTOR == XXH_AVX2 /* AVX2 */ \
   && defined(__GNUC__) && !defined(__clang__) /* GCC, not Clang */ \
-  && defined(__OPTIMIZE__) && !defined(__OPTIMIZE_SIZE__) /* respect -O0 and -Os */
+  && defined(__OPTIMIZE__) && XXH_SIZE_OPT <= 0 /* respect -O0 and -Os */
 #  pragma GCC push_options
 #  pragma GCC optimize("-O2")
 #endif
 
-
 #if XXH_VECTOR == XXH_NEON
+
 /*
- * NEON's setup for vmlal_u32 is a little more complicated than it is on
- * SSE2, AVX2, and VSX.
- *
- * While PMULUDQ and VMULEUW both perform a mask, VMLAL.U32 performs an upcast.
- *
- * To do the same operation, the 128-bit 'Q' register needs to be split into
- * two 64-bit 'D' registers, performing this operation::
- *
- *   [                a                 |                 b                ]
- *            |              '---------. .--------'                |
- *            |                         x                          |
- *            |              .---------' '--------.                |
- *   [ a & 0xFFFFFFFF | b & 0xFFFFFFFF ],[    a >> 32     |     b >> 32    ]
- *
- * Due to significant changes in aarch64, the fastest method for aarch64 is
- * completely different than the fastest method for ARMv7-A.
- *
- * ARMv7-A treats D registers as unions overlaying Q registers, so modifying
- * D11 will modify the high half of Q5. This is similar to how modifying AH
- * will only affect bits 8-15 of AX on x86.
- *
- * VZIP takes two registers, and puts even lanes in one register and odd lanes
- * in the other.
+ * UGLY HACK: While AArch64 GCC on Linux does not seem to care, on macOS, GCC -O3
+ * optimizes out the entire hashLong loop because of the aliasing violation.
  *
- * On ARMv7-A, this strangely modifies both parameters in place instead of
- * taking the usual 3-operand form.
+ * However, GCC is also inefficient at load-store optimization with vld1q/vst1q,
+ * so the only option is to mark it as aliasing.
+ */
+typedef uint64x2_t xxh_aliasing_uint64x2_t XXH_ALIASING;
+
+/*!
+ * @internal
+ * @brief `vld1q_u64` but faster and alignment-safe.
  *
- * Therefore, if we want to do this, we can simply use a D-form VZIP.32 on the
- * lower and upper halves of the Q register to end up with the high and low
- * halves where we want - all in one instruction.
+ * On AArch64, unaligned access is always safe, but on ARMv7-a, it is only
+ * *conditionally* safe (`vld1` has an alignment bit like `movdq[ua]` in x86).
  *
- *   vzip.32   d10, d11       @ d10 = { d10[0], d11[0] }; d11 = { d10[1], d11[1] }
+ * GCC for AArch64 sees `vld1q_u8` as an intrinsic instead of a load, so it
+ * prohibits load-store optimizations. Therefore, a direct dereference is used.
  *
- * Unfortunately we need inline assembly for this: Instructions modifying two
- * registers at once is not possible in GCC or Clang's IR, and they have to
- * create a copy.
+ * Otherwise, `vld1q_u8` is used with `vreinterpretq_u8_u64` to do a safe
+ * unaligned load.
+ */
+#if defined(__aarch64__) && defined(__GNUC__) && !defined(__clang__)
+XXH_FORCE_INLINE uint64x2_t XXH_vld1q_u64(void const* ptr) /* silence -Wcast-align */
+{
+    return *(xxh_aliasing_uint64x2_t const *)ptr;
+}
+#else
+XXH_FORCE_INLINE uint64x2_t XXH_vld1q_u64(void const* ptr)
+{
+    return vreinterpretq_u64_u8(vld1q_u8((uint8_t const*)ptr));
+}
+#endif
+
+/*!
+ * @internal
+ * @brief `vmlal_u32` on low and high halves of a vector.
  *
- * aarch64 requires a different approach.
+ * This is a workaround for AArch64 GCC < 11 which implemented arm_neon.h with
+ * inline assembly and were therefore incapable of merging the `vget_{low, high}_u32`
+ * with `vmlal_u32`.
+ */
+#if defined(__aarch64__) && defined(__GNUC__) && !defined(__clang__) && __GNUC__ < 11
+XXH_FORCE_INLINE uint64x2_t
+XXH_vmlal_low_u32(uint64x2_t acc, uint32x4_t lhs, uint32x4_t rhs)
+{
+    /* Inline assembly is the only way */
+    __asm__("umlal   %0.2d, %1.2s, %2.2s" : "+w" (acc) : "w" (lhs), "w" (rhs));
+    return acc;
+}
+XXH_FORCE_INLINE uint64x2_t
+XXH_vmlal_high_u32(uint64x2_t acc, uint32x4_t lhs, uint32x4_t rhs)
+{
+    /* This intrinsic works as expected */
+    return vmlal_high_u32(acc, lhs, rhs);
+}
+#else
+/* Portable intrinsic versions */
+XXH_FORCE_INLINE uint64x2_t
+XXH_vmlal_low_u32(uint64x2_t acc, uint32x4_t lhs, uint32x4_t rhs)
+{
+    return vmlal_u32(acc, vget_low_u32(lhs), vget_low_u32(rhs));
+}
+/*! @copydoc XXH_vmlal_low_u32
+ * Assume the compiler converts this to vmlal_high_u32 on aarch64 */
+XXH_FORCE_INLINE uint64x2_t
+XXH_vmlal_high_u32(uint64x2_t acc, uint32x4_t lhs, uint32x4_t rhs)
+{
+    return vmlal_u32(acc, vget_high_u32(lhs), vget_high_u32(rhs));
+}
+#endif
+
+/*!
+ * @ingroup tuning
+ * @brief Controls the NEON to scalar ratio for XXH3
  *
- * In order to make it easier to write a decent compiler for aarch64, many
- * quirks were removed, such as conditional execution.
+ * This can be set to 2, 4, 6, or 8.
  *
- * NEON was also affected by this.
+ * ARM Cortex CPUs are _very_ sensitive to how their pipelines are used.
  *
- * aarch64 cannot access the high bits of a Q-form register, and writes to a
- * D-form register zero the high bits, similar to how writes to W-form scalar
- * registers (or DWORD registers on x86_64) work.
+ * For example, the Cortex-A73 can dispatch 3 micro-ops per cycle, but only 2 of those
+ * can be NEON. If you are only using NEON instructions, you are only using 2/3 of the CPU
+ * bandwidth.
  *
- * The formerly free vget_high intrinsics now require a vext (with a few
- * exceptions)
+ * This is even more noticeable on the more advanced cores like the Cortex-A76 which
+ * can dispatch 8 micro-ops per cycle, but still only 2 NEON micro-ops at once.
  *
- * Additionally, VZIP was replaced by ZIP1 and ZIP2, which are the equivalent
- * of PUNPCKL* and PUNPCKH* in SSE, respectively, in order to only modify one
- * operand.
+ * Therefore, to make the most out of the pipeline, it is beneficial to run 6 NEON lanes
+ * and 2 scalar lanes, which is chosen by default.
  *
- * The equivalent of the VZIP.32 on the lower and upper halves would be this
- * mess:
+ * This does not apply to Apple processors or 32-bit processors, which run better with
+ * full NEON. These will default to 8. Additionally, size-optimized builds run 8 lanes.
  *
- *   ext     v2.4s, v0.4s, v0.4s, #2 // v2 = { v0[2], v0[3], v0[0], v0[1] }
- *   zip1    v1.2s, v0.2s, v2.2s     // v1 = { v0[0], v2[0] }
- *   zip2    v0.2s, v0.2s, v1.2s     // v0 = { v0[1], v2[1] }
+ * This change benefits CPUs with large micro-op buffers without negatively affecting
+ * most other CPUs:
  *
- * Instead, we use a literal downcast, vmovn_u64 (XTN), and vshrn_n_u64 (SHRN):
+ *  | Chipset               | Dispatch type       | NEON only | 6:2 hybrid | Diff. |
+ *  |:----------------------|:--------------------|----------:|-----------:|------:|
+ *  | Snapdragon 730 (A76)  | 2 NEON/8 micro-ops  |  8.8 GB/s |  10.1 GB/s |  ~16% |
+ *  | Snapdragon 835 (A73)  | 2 NEON/3 micro-ops  |  5.1 GB/s |   5.3 GB/s |   ~5% |
+ *  | Marvell PXA1928 (A53) | In-order dual-issue |  1.9 GB/s |   1.9 GB/s |    0% |
+ *  | Apple M1              | 4 NEON/8 micro-ops  | 37.3 GB/s |  36.1 GB/s |  ~-3% |
  *
- *   shrn    v1.2s, v0.2d, #32  // v1 = (uint32x2_t)(v0 >> 32);
- *   xtn     v0.2s, v0.2d       // v0 = (uint32x2_t)(v0 & 0xFFFFFFFF);
+ * It also seems to fix some bad codegen on GCC, making it almost as fast as clang.
  *
- * This is available on ARMv7-A, but is less efficient than a single VZIP.32.
+ * @see XXH3_accumulate_512_neon()
  */
-
-/*!
- * Function-like macro:
- * void XXH_SPLIT_IN_PLACE(uint64x2_t &in, uint32x2_t &outLo, uint32x2_t &outHi)
- * {
- *     outLo = (uint32x2_t)(in & 0xFFFFFFFF);
- *     outHi = (uint32x2_t)(in >> 32);
- *     in = UNDEFINED;
- * }
- */
-# if !defined(XXH_NO_VZIP_HACK) /* define to disable */ \
-   && defined(__GNUC__) \
-   && !defined(__aarch64__) && !defined(__arm64__) && !defined(_M_ARM64)
-#  define XXH_SPLIT_IN_PLACE(in, outLo, outHi)                                              \
-    do {                                                                                    \
-      /* Undocumented GCC/Clang operand modifier: %e0 = lower D half, %f0 = upper D half */ \
-      /* https://github.com/gcc-mirror/gcc/blob/38cf91e5/gcc/config/arm/arm.c#L22486 */     \
-      /* https://github.com/llvm-mirror/llvm/blob/2c4ca683/lib/Target/ARM/ARMAsmPrinter.cpp#L399 */ \
-      __asm__("vzip.32  %e0, %f0" : "+w" (in));                                             \
-      (outLo) = vget_low_u32 (vreinterpretq_u32_u64(in));                                   \
-      (outHi) = vget_high_u32(vreinterpretq_u32_u64(in));                                   \
-   } while (0)
-# else
-#  define XXH_SPLIT_IN_PLACE(in, outLo, outHi)                                            \
-    do {                                                                                  \
-      (outLo) = vmovn_u64    (in);                                                        \
-      (outHi) = vshrn_n_u64  ((in), 32);                                                  \
-    } while (0)
+# ifndef XXH3_NEON_LANES
+#  if (defined(__aarch64__) || defined(__arm64__) || defined(_M_ARM64) || defined(_M_ARM64EC)) \
+   && !defined(__APPLE__) && XXH_SIZE_OPT <= 0
+#   define XXH3_NEON_LANES 6
+#  else
+#   define XXH3_NEON_LANES XXH_ACC_NB
+#  endif
 # endif
 #endif  /* XXH_VECTOR == XXH_NEON */
 
@@ -3048,27 +3499,42 @@ enum XXH_VECTOR_TYPE /* fake enum */ {
  * inconsistent intrinsics, spotty coverage, and multiple endiannesses.
  */
 #if XXH_VECTOR == XXH_VSX
+/* Annoyingly, these headers _may_ define three macros: `bool`, `vector`,
+ * and `pixel`. This is a problem for obvious reasons.
+ *
+ * These keywords are unnecessary; the spec literally says they are
+ * equivalent to `__bool`, `__vector`, and `__pixel` and may be undef'd
+ * after including the header.
+ *
+ * We use pragma push_macro/pop_macro to keep the namespace clean. */
+#  pragma push_macro("bool")
+#  pragma push_macro("vector")
+#  pragma push_macro("pixel")
+/* silence potential macro redefined warnings */
+#  undef bool
+#  undef vector
+#  undef pixel
+
 #  if defined(__s390x__)
 #    include <s390intrin.h>
 #  else
-/* gcc's altivec.h can have the unwanted consequence to unconditionally
- * #define bool, vector, and pixel keywords,
- * with bad consequences for programs already using these keywords for other purposes.
- * The paragraph defining these macros is skipped when __APPLE_ALTIVEC__ is defined.
- * __APPLE_ALTIVEC__ is _generally_ defined automatically by the compiler,
- * but it seems that, in some cases, it isn't.
- * Force the build macro to be defined, so that keywords are not altered.
- */
-#    if defined(__GNUC__) && !defined(__APPLE_ALTIVEC__)
-#      define __APPLE_ALTIVEC__
-#    endif
 #    include <altivec.h>
 #  endif
 
+/* Restore the original macro values, if applicable. */
+#  pragma pop_macro("pixel")
+#  pragma pop_macro("vector")
+#  pragma pop_macro("bool")
+
 typedef __vector unsigned long long xxh_u64x2;
 typedef __vector unsigned char xxh_u8x16;
 typedef __vector unsigned xxh_u32x4;
 
+/*
+ * UGLY HACK: Similar to aarch64 macOS GCC, s390x GCC has the same aliasing issue.
+ */
+typedef xxh_u64x2 xxh_aliasing_u64x2 XXH_ALIASING;
+
 # ifndef XXH_VSX_BE
 #  if defined(__BIG_ENDIAN__) \
   || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
@@ -3120,8 +3586,9 @@ XXH_FORCE_INLINE xxh_u64x2 XXH_vec_loadu(const void *ptr)
  /* s390x is always big endian, no issue on this platform */
 #  define XXH_vec_mulo vec_mulo
 #  define XXH_vec_mule vec_mule
-# elif defined(__clang__) && XXH_HAS_BUILTIN(__builtin_altivec_vmuleuw)
+# elif defined(__clang__) && XXH_HAS_BUILTIN(__builtin_altivec_vmuleuw) && !defined(__ibmxl__)
 /* Clang has a better way to control this, we can just use the builtin which doesn't swap. */
+ /* The IBM XL Compiler (which defined __clang__) only implements the vec_* operations */
 #  define XXH_vec_mulo __builtin_altivec_vmulouw
 #  define XXH_vec_mule __builtin_altivec_vmuleuw
 # else
@@ -3142,13 +3609,29 @@ XXH_FORCE_INLINE xxh_u64x2 XXH_vec_mule(xxh_u32x4 a, xxh_u32x4 b)
 # endif /* XXH_vec_mulo, XXH_vec_mule */
 #endif /* XXH_VECTOR == XXH_VSX */
 
+#if XXH_VECTOR == XXH_SVE
+#define ACCRND(acc, offset) \
+do { \
+    svuint64_t input_vec = svld1_u64(mask, xinput + offset);         \
+    svuint64_t secret_vec = svld1_u64(mask, xsecret + offset);       \
+    svuint64_t mixed = sveor_u64_x(mask, secret_vec, input_vec);     \
+    svuint64_t swapped = svtbl_u64(input_vec, kSwap);                \
+    svuint64_t mixed_lo = svextw_u64_x(mask, mixed);                 \
+    svuint64_t mixed_hi = svlsr_n_u64_x(mask, mixed, 32);            \
+    svuint64_t mul = svmad_u64_x(mask, mixed_lo, mixed_hi, swapped); \
+    acc = svadd_u64_x(mask, acc, mul);                               \
+} while (0)
+#endif /* XXH_VECTOR == XXH_SVE */
+
 
 /* prefetch
  * can be disabled, by declaring XXH_NO_PREFETCH build macro */
 #if defined(XXH_NO_PREFETCH)
 #  define XXH_PREFETCH(ptr)  (void)(ptr)  /* disabled */
 #else
-#  if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86))  /* _mm_prefetch() not defined outside of x86/x64 */
+#  if XXH_SIZE_OPT >= 1
+#    define XXH_PREFETCH(ptr) (void)(ptr)
+#  elif defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86))  /* _mm_prefetch() not defined outside of x86/x64 */
 #    include <mmintrin.h>   /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */
 #    define XXH_PREFETCH(ptr)  _mm_prefetch((const char*)(ptr), _MM_HINT_T0)
 #  elif defined(__GNUC__) && ( (__GNUC__ >= 4) || ( (__GNUC__ == 3) && (__GNUC_MINOR__ >= 1) ) )
@@ -3213,12 +3696,11 @@ XXH_mult32to64(xxh_u64 x, xxh_u64 y)
    return (x & 0xFFFFFFFF) * (y & 0xFFFFFFFF);
 }
 #elif defined(_MSC_VER) && defined(_M_IX86)
-#    include <intrin.h>
 #    define XXH_mult32to64(x, y) __emulu((unsigned)(x), (unsigned)(y))
 #else
 /*
  * Downcast + upcast is usually better than masking on older compilers like
- * GCC 4.2 (especially 32-bit ones), all without affecting newer compilers.
+ * GCC q 4.2 (especially 32-bit ones), all without affecting newer compilers.
  *
  * The other method, (x & 0xFFFFFFFF) * (y & 0xFFFFFFFF), will AND both operands
  * and perform a full 64x64 multiply -- entirely redundant on 32-bit.
@@ -3253,7 +3735,7 @@ XXH_mult64to128(xxh_u64 lhs, xxh_u64 rhs)
      * In that case it is best to use the portable one.
      * https://github.com/Cyan4973/xxHash/issues/211#issuecomment-515575677
      */
-#if defined(__GNUC__) && !defined(__wasm__) \
+#if (defined(__GNUC__) || defined(__clang__)) && !defined(__wasm__) \
     && defined(__SIZEOF_INT128__) \
     || (defined(_INTEGRAL_MAX_BITS) && _INTEGRAL_MAX_BITS >= 128)
 
@@ -3270,7 +3752,7 @@ XXH_mult64to128(xxh_u64 lhs, xxh_u64 rhs)
      *
      * This compiles to single operand MUL on x64.
      */
-#elif defined(_M_X64) || defined(_M_IA64)
+#elif (defined(_M_X64) || defined(_M_IA64)) && !defined(_M_ARM64EC)
 
 #ifndef _MSC_VER
 #   pragma intrinsic(_umul128)
@@ -3287,7 +3769,7 @@ XXH_mult64to128(xxh_u64 lhs, xxh_u64 rhs)
      *
      * This compiles to the same MUL + UMULH as GCC/Clang's __uint128_t method.
      */
-#elif defined(_M_ARM64)
+#elif defined(_M_ARM64) || defined(_M_ARM64EC)
 
 #ifndef _MSC_VER
 #   pragma intrinsic(__umulh)
@@ -3377,7 +3859,7 @@ XXH3_mul128_fold64(xxh_u64 lhs, xxh_u64 rhs)
 }
 
 /*! Seems to produce slightly better code on GCC for some reason. */
-XXH_FORCE_INLINE xxh_u64 XXH_xorshift64(xxh_u64 v64, int shift)
+XXH_FORCE_INLINE XXH_CONSTF xxh_u64 XXH_xorshift64(xxh_u64 v64, int shift)
 {
     XXH_ASSERT(0 <= shift && shift < 64);
     return v64 ^ (v64 >> shift);
@@ -3444,7 +3926,7 @@ static XXH64_hash_t XXH3_rrmxmx(xxh_u64 h64, xxh_u64 len)
  *
  * This adds an extra layer of strength for custom secrets.
  */
-XXH_FORCE_INLINE XXH64_hash_t
+XXH_FORCE_INLINE XXH_PUREF XXH64_hash_t
 XXH3_len_1to3_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
 {
     XXH_ASSERT(input != NULL);
@@ -3466,7 +3948,7 @@ XXH3_len_1to3_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_h
     }
 }
 
-XXH_FORCE_INLINE XXH64_hash_t
+XXH_FORCE_INLINE XXH_PUREF XXH64_hash_t
 XXH3_len_4to8_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
 {
     XXH_ASSERT(input != NULL);
@@ -3482,7 +3964,7 @@ XXH3_len_4to8_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_h
     }
 }
 
-XXH_FORCE_INLINE XXH64_hash_t
+XXH_FORCE_INLINE XXH_PUREF XXH64_hash_t
 XXH3_len_9to16_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
 {
     XXH_ASSERT(input != NULL);
@@ -3499,7 +3981,7 @@ XXH3_len_9to16_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_
     }
 }
 
-XXH_FORCE_INLINE XXH64_hash_t
+XXH_FORCE_INLINE XXH_PUREF XXH64_hash_t
 XXH3_len_0to16_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
 {
     XXH_ASSERT(len <= 16);
@@ -3569,7 +4051,7 @@ XXH_FORCE_INLINE xxh_u64 XXH3_mix16B(const xxh_u8* XXH_RESTRICT input,
 }
 
 /* For mid range keys, XXH3 uses a Mum-hash variant. */
-XXH_FORCE_INLINE XXH64_hash_t
+XXH_FORCE_INLINE XXH_PUREF XXH64_hash_t
 XXH3_len_17to128_64b(const xxh_u8* XXH_RESTRICT input, size_t len,
                      const xxh_u8* XXH_RESTRICT secret, size_t secretSize,
                      XXH64_hash_t seed)
@@ -3577,29 +4059,39 @@ XXH3_len_17to128_64b(const xxh_u8* XXH_RESTRICT input, size_t len,
     XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN); (void)secretSize;
     XXH_ASSERT(16 < len && len <= 128);
 
-    {   xxh_u64 acc = len * XXH_PRIME64_1;
+    {   xxh_u64 acc = len * XXH_PRIME64_1, acc_end;
+#if XXH_SIZE_OPT >= 1
+        /* Smaller and cleaner, but slightly slower. */
+        unsigned int i = (unsigned int)(len - 1) / 32;
+        do {
+            acc += XXH3_mix16B(input+16 * i, secret+32*i, seed);
+            acc += XXH3_mix16B(input+len-16*(i+1), secret+32*i+16, seed);
+        } while (i-- != 0);
+        acc_end = 0;
+#else
+        acc += XXH3_mix16B(input+0, secret+0, seed);
+        acc_end = XXH3_mix16B(input+len-16, secret+16, seed);
         if (len > 32) {
+            acc += XXH3_mix16B(input+16, secret+32, seed);
+            acc_end += XXH3_mix16B(input+len-32, secret+48, seed);
             if (len > 64) {
+                acc += XXH3_mix16B(input+32, secret+64, seed);
+                acc_end += XXH3_mix16B(input+len-48, secret+80, seed);
+
                 if (len > 96) {
                     acc += XXH3_mix16B(input+48, secret+96, seed);
-                    acc += XXH3_mix16B(input+len-64, secret+112, seed);
+                    acc_end += XXH3_mix16B(input+len-64, secret+112, seed);
                 }
-                acc += XXH3_mix16B(input+32, secret+64, seed);
-                acc += XXH3_mix16B(input+len-48, secret+80, seed);
             }
-            acc += XXH3_mix16B(input+16, secret+32, seed);
-            acc += XXH3_mix16B(input+len-32, secret+48, seed);
         }
-        acc += XXH3_mix16B(input+0, secret+0, seed);
-        acc += XXH3_mix16B(input+len-16, secret+16, seed);
-
-        return XXH3_avalanche(acc);
+#endif
+        return XXH3_avalanche(acc + acc_end);
     }
 }
 
 #define XXH3_MIDSIZE_MAX 240
 
-XXH_NO_INLINE XXH64_hash_t
+XXH_NO_INLINE XXH_PUREF XXH64_hash_t
 XXH3_len_129to240_64b(const xxh_u8* XXH_RESTRICT input, size_t len,
                       const xxh_u8* XXH_RESTRICT secret, size_t secretSize,
                       XXH64_hash_t seed)
@@ -3611,13 +4103,17 @@ XXH3_len_129to240_64b(const xxh_u8* XXH_RESTRICT input, size_t len,
     #define XXH3_MIDSIZE_LASTOFFSET  17
 
     {   xxh_u64 acc = len * XXH_PRIME64_1;
-        int const nbRounds = (int)len / 16;
-        int i;
+        xxh_u64 acc_end;
+        unsigned int const nbRounds = (unsigned int)len / 16;
+        unsigned int i;
+        XXH_ASSERT(128 < len && len <= XXH3_MIDSIZE_MAX);
         for (i=0; i<8; i++) {
             acc += XXH3_mix16B(input+(16*i), secret+(16*i), seed);
         }
-        acc = XXH3_avalanche(acc);
+        /* last bytes */
+        acc_end = XXH3_mix16B(input + len - 16, secret + XXH3_SECRET_SIZE_MIN - XXH3_MIDSIZE_LASTOFFSET, seed);
         XXH_ASSERT(nbRounds >= 8);
+        acc = XXH3_avalanche(acc);
 #if defined(__clang__)                                /* Clang */ \
     && (defined(__ARM_NEON) || defined(__ARM_NEON__)) /* NEON */ \
     && !defined(XXH_ENABLE_AUTOVECTORIZE)             /* Define to disable */
@@ -3644,11 +4140,13 @@ XXH3_len_129to240_64b(const xxh_u8* XXH_RESTRICT input, size_t len,
         #pragma clang loop vectorize(disable)
 #endif
         for (i=8 ; i < nbRounds; i++) {
-            acc += XXH3_mix16B(input+(16*i), secret+(16*(i-8)) + XXH3_MIDSIZE_STARTOFFSET, seed);
+            /*
+             * Prevents clang for unrolling the acc loop and interleaving with this one.
+             */
+            XXH_COMPILER_GUARD(acc);
+            acc_end += XXH3_mix16B(input+(16*i), secret+(16*(i-8)) + XXH3_MIDSIZE_STARTOFFSET, seed);
         }
-        /* last bytes */
-        acc += XXH3_mix16B(input + len - 16, secret + XXH3_SECRET_SIZE_MIN - XXH3_MIDSIZE_LASTOFFSET, seed);
-        return XXH3_avalanche(acc);
+        return XXH3_avalanche(acc + acc_end);
     }
 }
 
@@ -3664,6 +4162,47 @@ XXH3_len_129to240_64b(const xxh_u8* XXH_RESTRICT input, size_t len,
 #  define ACC_NB XXH_ACC_NB
 #endif
 
+#ifndef XXH_PREFETCH_DIST
+#  ifdef __clang__
+#    define XXH_PREFETCH_DIST 320
+#  else
+#    if (XXH_VECTOR == XXH_AVX512)
+#      define XXH_PREFETCH_DIST 512
+#    else
+#      define XXH_PREFETCH_DIST 384
+#    endif
+#  endif  /* __clang__ */
+#endif  /* XXH_PREFETCH_DIST */
+
+/*
+ * These macros are to generate an XXH3_accumulate() function.
+ * The two arguments select the name suffix and target attribute.
+ *
+ * The name of this symbol is XXH3_accumulate_<name>() and it calls
+ * XXH3_accumulate_512_<name>().
+ *
+ * It may be useful to hand implement this function if the compiler fails to
+ * optimize the inline function.
+ */
+#define XXH3_ACCUMULATE_TEMPLATE(name)                      \
+void                                                        \
+XXH3_accumulate_##name(xxh_u64* XXH_RESTRICT acc,           \
+                       const xxh_u8* XXH_RESTRICT input,    \
+                       const xxh_u8* XXH_RESTRICT secret,   \
+                       size_t nbStripes)                    \
+{                                                           \
+    size_t n;                                               \
+    for (n = 0; n < nbStripes; n++ ) {                      \
+        const xxh_u8* const in = input + n*XXH_STRIPE_LEN;  \
+        XXH_PREFETCH(in + XXH_PREFETCH_DIST);               \
+        XXH3_accumulate_512_##name(                         \
+                 acc,                                       \
+                 in,                                        \
+                 secret + n*XXH_SECRET_CONSUME_RATE);       \
+    }                                                       \
+}
+
+
 XXH_FORCE_INLINE void XXH_writeLE64(void* dst, xxh_u64 v64)
 {
     if (!XXH_CPU_LITTLE_ENDIAN) v64 = XXH_swap64(v64);
@@ -3684,6 +4223,7 @@ XXH_FORCE_INLINE void XXH_writeLE64(void* dst, xxh_u64 v64)
     typedef long long xxh_i64;
 #endif
 
+
 /*
  * XXH3_accumulate_512 is the tightest loop for long inputs, and it is the most optimized.
  *
@@ -3731,7 +4271,7 @@ XXH3_accumulate_512_avx512(void* XXH_RESTRICT acc,
         /* data_key    = data_vec ^ key_vec; */
         __m512i const data_key    = _mm512_xor_si512     (data_vec, key_vec);
         /* data_key_lo = data_key >> 32; */
-        __m512i const data_key_lo = _mm512_shuffle_epi32 (data_key, (_MM_PERM_ENUM)_MM_SHUFFLE(0, 3, 0, 1));
+        __m512i const data_key_lo = _mm512_srli_epi64 (data_key, 32);
         /* product     = (data_key & 0xffffffff) * (data_key_lo & 0xffffffff); */
         __m512i const product     = _mm512_mul_epu32     (data_key, data_key_lo);
         /* xacc[0] += swap(data_vec); */
@@ -3741,6 +4281,7 @@ XXH3_accumulate_512_avx512(void* XXH_RESTRICT acc,
         *xacc = _mm512_add_epi64(product, sum);
     }
 }
+XXH_FORCE_INLINE XXH_TARGET_AVX512 XXH3_ACCUMULATE_TEMPLATE(avx512)
 
 /*
  * XXH3_scrambleAcc: Scrambles the accumulators to improve mixing.
@@ -3774,13 +4315,12 @@ XXH3_scrambleAcc_avx512(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
         /* xacc[0] ^= (xacc[0] >> 47) */
         __m512i const acc_vec     = *xacc;
         __m512i const shifted     = _mm512_srli_epi64    (acc_vec, 47);
-        __m512i const data_vec    = _mm512_xor_si512     (acc_vec, shifted);
         /* xacc[0] ^= secret; */
         __m512i const key_vec     = _mm512_loadu_si512   (secret);
-        __m512i const data_key    = _mm512_xor_si512     (data_vec, key_vec);
+        __m512i const data_key    = _mm512_ternarylogic_epi32(key_vec, acc_vec, shifted, 0x96 /* key_vec ^ acc_vec ^ shifted */);
 
         /* xacc[0] *= XXH_PRIME32_1; */
-        __m512i const data_key_hi = _mm512_shuffle_epi32 (data_key, (_MM_PERM_ENUM)_MM_SHUFFLE(0, 3, 0, 1));
+        __m512i const data_key_hi = _mm512_srli_epi64 (data_key, 32);
         __m512i const prod_lo     = _mm512_mul_epu32     (data_key, prime32);
         __m512i const prod_hi     = _mm512_mul_epu32     (data_key_hi, prime32);
         *xacc = _mm512_add_epi64(prod_lo, _mm512_slli_epi64(prod_hi, 32));
@@ -3795,7 +4335,8 @@ XXH3_initCustomSecret_avx512(void* XXH_RESTRICT customSecret, xxh_u64 seed64)
     XXH_ASSERT(((size_t)customSecret & 63) == 0);
     (void)(&XXH_writeLE64);
     {   int const nbRounds = XXH_SECRET_DEFAULT_SIZE / sizeof(__m512i);
-        __m512i const seed = _mm512_mask_set1_epi64(_mm512_set1_epi64((xxh_i64)seed64), 0xAA, (xxh_i64)(0U - seed64));
+        __m512i const seed_pos = _mm512_set1_epi64((xxh_i64)seed64);
+        __m512i const seed     = _mm512_mask_sub_epi64(seed_pos, 0xAA, _mm512_set1_epi8(0), seed_pos);
 
         const __m512i* const src  = (const __m512i*) ((const void*) XXH3_kSecret);
               __m512i* const dest = (      __m512i*) customSecret;
@@ -3803,14 +4344,7 @@ XXH3_initCustomSecret_avx512(void* XXH_RESTRICT customSecret, xxh_u64 seed64)
         XXH_ASSERT(((size_t)src & 63) == 0); /* control alignment */
         XXH_ASSERT(((size_t)dest & 63) == 0);
         for (i=0; i < nbRounds; ++i) {
-            /* GCC has a bug, _mm512_stream_load_si512 accepts 'void*', not 'void const*',
-             * this will warn "discards 'const' qualifier". */
-            union {
-                const __m512i* cp;
-                void* p;
-            } remote_const_void;
-            remote_const_void.cp = src + i;
-            dest[i] = _mm512_add_epi64(_mm512_stream_load_si512(remote_const_void.p), seed);
+            dest[i] = _mm512_add_epi64(_mm512_load_si512(src + i), seed);
     }   }
 }
 
@@ -3846,7 +4380,7 @@ XXH3_accumulate_512_avx2( void* XXH_RESTRICT acc,
             /* data_key    = data_vec ^ key_vec; */
             __m256i const data_key    = _mm256_xor_si256     (data_vec, key_vec);
             /* data_key_lo = data_key >> 32; */
-            __m256i const data_key_lo = _mm256_shuffle_epi32 (data_key, _MM_SHUFFLE(0, 3, 0, 1));
+            __m256i const data_key_lo = _mm256_srli_epi64 (data_key, 32);
             /* product     = (data_key & 0xffffffff) * (data_key_lo & 0xffffffff); */
             __m256i const product     = _mm256_mul_epu32     (data_key, data_key_lo);
             /* xacc[i] += swap(data_vec); */
@@ -3856,6 +4390,7 @@ XXH3_accumulate_512_avx2( void* XXH_RESTRICT acc,
             xacc[i] = _mm256_add_epi64(product, sum);
     }   }
 }
+XXH_FORCE_INLINE XXH_TARGET_AVX2 XXH3_ACCUMULATE_TEMPLATE(avx2)
 
 XXH_FORCE_INLINE XXH_TARGET_AVX2 void
 XXH3_scrambleAcc_avx2(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
@@ -3878,7 +4413,7 @@ XXH3_scrambleAcc_avx2(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
             __m256i const data_key    = _mm256_xor_si256     (data_vec, key_vec);
 
             /* xacc[i] *= XXH_PRIME32_1; */
-            __m256i const data_key_hi = _mm256_shuffle_epi32 (data_key, _MM_SHUFFLE(0, 3, 0, 1));
+            __m256i const data_key_hi = _mm256_srli_epi64 (data_key, 32);
             __m256i const prod_lo     = _mm256_mul_epu32     (data_key, prime32);
             __m256i const prod_hi     = _mm256_mul_epu32     (data_key_hi, prime32);
             xacc[i] = _mm256_add_epi64(prod_lo, _mm256_slli_epi64(prod_hi, 32));
@@ -3910,12 +4445,12 @@ XXH_FORCE_INLINE XXH_TARGET_AVX2 void XXH3_initCustomSecret_avx2(void* XXH_RESTR
         XXH_ASSERT(((size_t)dest & 31) == 0);
 
         /* GCC -O2 need unroll loop manually */
-        dest[0] = _mm256_add_epi64(_mm256_stream_load_si256(src+0), seed);
-        dest[1] = _mm256_add_epi64(_mm256_stream_load_si256(src+1), seed);
-        dest[2] = _mm256_add_epi64(_mm256_stream_load_si256(src+2), seed);
-        dest[3] = _mm256_add_epi64(_mm256_stream_load_si256(src+3), seed);
-        dest[4] = _mm256_add_epi64(_mm256_stream_load_si256(src+4), seed);
-        dest[5] = _mm256_add_epi64(_mm256_stream_load_si256(src+5), seed);
+        dest[0] = _mm256_add_epi64(_mm256_load_si256(src+0), seed);
+        dest[1] = _mm256_add_epi64(_mm256_load_si256(src+1), seed);
+        dest[2] = _mm256_add_epi64(_mm256_load_si256(src+2), seed);
+        dest[3] = _mm256_add_epi64(_mm256_load_si256(src+3), seed);
+        dest[4] = _mm256_add_epi64(_mm256_load_si256(src+4), seed);
+        dest[5] = _mm256_add_epi64(_mm256_load_si256(src+5), seed);
     }
 }
 
@@ -3962,6 +4497,7 @@ XXH3_accumulate_512_sse2( void* XXH_RESTRICT acc,
             xacc[i] = _mm_add_epi64(product, sum);
     }   }
 }
+XXH_FORCE_INLINE XXH_TARGET_SSE2 XXH3_ACCUMULATE_TEMPLATE(sse2)
 
 XXH_FORCE_INLINE XXH_TARGET_SSE2 void
 XXH3_scrambleAcc_sse2(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
@@ -4029,96 +4565,196 @@ XXH_FORCE_INLINE XXH_TARGET_SSE2 void XXH3_initCustomSecret_sse2(void* XXH_RESTR
 
 #if (XXH_VECTOR == XXH_NEON)
 
+/* forward declarations for the scalar routines */
+XXH_FORCE_INLINE void
+XXH3_scalarRound(void* XXH_RESTRICT acc, void const* XXH_RESTRICT input,
+                 void const* XXH_RESTRICT secret, size_t lane);
+
+XXH_FORCE_INLINE void
+XXH3_scalarScrambleRound(void* XXH_RESTRICT acc,
+                         void const* XXH_RESTRICT secret, size_t lane);
+
+/*!
+ * @internal
+ * @brief The bulk processing loop for NEON.
+ *
+ * The NEON code path is actually partially scalar when running on AArch64. This
+ * is to optimize the pipelining and can have up to 15% speedup depending on the
+ * CPU, and it also mitigates some GCC codegen issues.
+ *
+ * @see XXH3_NEON_LANES for configuring this and details about this optimization.
+ *
+ * NEON's 32-bit to 64-bit long multiply takes a half vector of 32-bit
+ * integers instead of the other platforms which mask full 64-bit vectors,
+ * so the setup is more complicated than just shifting right.
+ *
+ * Additionally, there is an optimization for 4 lanes at once noted below.
+ *
+ * Since, as stated, the most optimal amount of lanes for Cortexes is 6,
+ * there needs to be *three* versions of the accumulate operation used
+ * for the remaining 2 lanes.
+ */
 XXH_FORCE_INLINE void
 XXH3_accumulate_512_neon( void* XXH_RESTRICT acc,
                     const void* XXH_RESTRICT input,
                     const void* XXH_RESTRICT secret)
 {
     XXH_ASSERT((((size_t)acc) & 15) == 0);
-    {
-        uint64x2_t* const xacc = (uint64x2_t *) acc;
+    XXH_STATIC_ASSERT(XXH3_NEON_LANES > 0 && XXH3_NEON_LANES <= XXH_ACC_NB && XXH3_NEON_LANES % 2 == 0);
+    {   /* GCC for darwin arm64 does not like aliasing here */
+        xxh_aliasing_uint64x2_t* const xacc = (xxh_aliasing_uint64x2_t*) acc;
         /* We don't use a uint32x4_t pointer because it causes bus errors on ARMv7. */
         uint8_t const* const xinput = (const uint8_t *) input;
         uint8_t const* const xsecret  = (const uint8_t *) secret;
 
         size_t i;
-        for (i=0; i < XXH_STRIPE_LEN / sizeof(uint64x2_t); i++) {
+        /* Scalar lanes use the normal scalarRound routine */
+        for (i = XXH3_NEON_LANES; i < XXH_ACC_NB; i++) {
+            XXH3_scalarRound(acc, input, secret, i);
+        }
+        i = 0;
+        /* 4 NEON lanes at a time. */
+        for (; i+1 < XXH3_NEON_LANES / 2; i+=2) {
             /* data_vec = xinput[i]; */
-            uint8x16_t data_vec    = vld1q_u8(xinput  + (i * 16));
+            uint64x2_t data_vec_1 = XXH_vld1q_u64(xinput  + (i * 16));
+            uint64x2_t data_vec_2 = XXH_vld1q_u64(xinput  + ((i+1) * 16));
             /* key_vec  = xsecret[i];  */
-            uint8x16_t key_vec     = vld1q_u8(xsecret + (i * 16));
-            uint64x2_t data_key;
-            uint32x2_t data_key_lo, data_key_hi;
-            /* xacc[i] += swap(data_vec); */
-            uint64x2_t const data64  = vreinterpretq_u64_u8(data_vec);
-            uint64x2_t const swapped = vextq_u64(data64, data64, 1);
-            xacc[i] = vaddq_u64 (xacc[i], swapped);
+            uint64x2_t key_vec_1  = XXH_vld1q_u64(xsecret + (i * 16));
+            uint64x2_t key_vec_2  = XXH_vld1q_u64(xsecret + ((i+1) * 16));
+            /* data_swap = swap(data_vec) */
+            uint64x2_t data_swap_1 = vextq_u64(data_vec_1, data_vec_1, 1);
+            uint64x2_t data_swap_2 = vextq_u64(data_vec_2, data_vec_2, 1);
             /* data_key = data_vec ^ key_vec; */
-            data_key = vreinterpretq_u64_u8(veorq_u8(data_vec, key_vec));
-            /* data_key_lo = (uint32x2_t) (data_key & 0xFFFFFFFF);
-             * data_key_hi = (uint32x2_t) (data_key >> 32);
-             * data_key = UNDEFINED; */
-            XXH_SPLIT_IN_PLACE(data_key, data_key_lo, data_key_hi);
-            /* xacc[i] += (uint64x2_t) data_key_lo * (uint64x2_t) data_key_hi; */
-            xacc[i] = vmlal_u32 (xacc[i], data_key_lo, data_key_hi);
+            uint64x2_t data_key_1 = veorq_u64(data_vec_1, key_vec_1);
+            uint64x2_t data_key_2 = veorq_u64(data_vec_2, key_vec_2);
 
+            /*
+             * If we reinterpret the 64x2 vectors as 32x4 vectors, we can use a
+             * de-interleave operation for 4 lanes in 1 step with `vuzpq_u32` to
+             * get one vector with the low 32 bits of each lane, and one vector
+             * with the high 32 bits of each lane.
+             *
+             * This compiles to two instructions on AArch64 and has a paired vector
+             * result, which is an artifact from ARMv7a's version which modified both
+             * vectors in place.
+             *
+             *  [ dk11L | dk11H | dk12L | dk12H ] -> [ dk11L | dk12L | dk21L | dk22L ]
+             *  [ dk21L | dk21H | dk22L | dk22H ] -> [ dk11H | dk12H | dk21H | dk22H ]
+             */
+            uint32x4x2_t unzipped = vuzpq_u32(
+                vreinterpretq_u32_u64(data_key_1),
+                vreinterpretq_u32_u64(data_key_2)
+            );
+            /* data_key_lo = data_key & 0xFFFFFFFF */
+            uint32x4_t data_key_lo = unzipped.val[0];
+            /* data_key_hi = data_key >> 32 */
+            uint32x4_t data_key_hi = unzipped.val[1];
+            /*
+             * Then, we can split the vectors horizontally and multiply which, as for most
+             * widening intrinsics, have a variant that works on both high half vectors
+             * for free on AArch64.
+             *
+             * sum = data_swap + (u64x2) data_key_lo * (u64x2) data_key_hi
+             */
+            uint64x2_t sum_1 = XXH_vmlal_low_u32(data_swap_1, data_key_lo, data_key_hi);
+            uint64x2_t sum_2 = XXH_vmlal_high_u32(data_swap_2, data_key_lo, data_key_hi);
+            /*
+             * Clang reorders
+             *    a += b * c;     // umlal   swap.2d, dkl.2s, dkh.2s
+             *    c += a;         // add     acc.2d, acc.2d, swap.2d
+             * to
+             *    c += a;         // add     acc.2d, acc.2d, swap.2d
+             *    c += b * c;     // umlal   acc.2d, dkl.2s, dkh.2s
+             *
+             * While it would make sense in theory since the addition is faster,
+             * for reasons likely related to umlal being limited to certain NEON
+             * pipelines, this is worse. A compiler guard fixes this.
+             */
+            XXH_COMPILER_GUARD_W(sum_1);
+            XXH_COMPILER_GUARD_W(sum_2);
+            /* xacc[i] = acc_vec + sum; */
+            xacc[i]   = vaddq_u64(xacc[i], sum_1);
+            xacc[i+1] = vaddq_u64(xacc[i+1], sum_2);
+        }
+        /* Operate on the remaining NEON lanes 2 at a time. */
+        for (; i < XXH3_NEON_LANES / 2; i++) {
+            /* data_vec = xinput[i]; */
+            uint64x2_t data_vec = XXH_vld1q_u64(xinput  + (i * 16));
+            /* key_vec  = xsecret[i];  */
+            uint64x2_t key_vec  = XXH_vld1q_u64(xsecret + (i * 16));
+            /* acc_vec_2 = swap(data_vec) */
+            uint64x2_t data_swap = vextq_u64(data_vec, data_vec, 1);
+            /* data_key = data_vec ^ key_vec; */
+            uint64x2_t data_key = veorq_u64(data_vec, key_vec);
+            /* For two lanes, just use VMOVN and VSHRN. */
+            /* data_key_lo = data_key & 0xFFFFFFFF; */
+            uint32x2_t data_key_lo = vmovn_u64(data_key);
+            /* data_key_hi = data_key >> 32; */
+            uint32x2_t data_key_hi = vshrn_n_u64(data_key, 32);
+            /* sum = data_swap + (u64x2) data_key_lo * (u64x2) data_key_hi; */
+            uint64x2_t sum = vmlal_u32(data_swap, data_key_lo, data_key_hi);
+            /* Same Clang workaround as before */
+            XXH_COMPILER_GUARD_W(sum);
+            /* xacc[i] = acc_vec + sum; */
+            xacc[i] = vaddq_u64 (xacc[i], sum);
         }
     }
 }
+XXH_FORCE_INLINE XXH3_ACCUMULATE_TEMPLATE(neon)
 
 XXH_FORCE_INLINE void
 XXH3_scrambleAcc_neon(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
 {
     XXH_ASSERT((((size_t)acc) & 15) == 0);
 
-    {   uint64x2_t* xacc       = (uint64x2_t*) acc;
+    {   xxh_aliasing_uint64x2_t* xacc       = (xxh_aliasing_uint64x2_t*) acc;
         uint8_t const* xsecret = (uint8_t const*) secret;
         uint32x2_t prime       = vdup_n_u32 (XXH_PRIME32_1);
 
         size_t i;
-        for (i=0; i < XXH_STRIPE_LEN/sizeof(uint64x2_t); i++) {
+        /* AArch64 uses both scalar and neon at the same time */
+        for (i = XXH3_NEON_LANES; i < XXH_ACC_NB; i++) {
+            XXH3_scalarScrambleRound(acc, secret, i);
+        }
+        for (i=0; i < XXH3_NEON_LANES / 2; i++) {
             /* xacc[i] ^= (xacc[i] >> 47); */
             uint64x2_t acc_vec  = xacc[i];
-            uint64x2_t shifted  = vshrq_n_u64 (acc_vec, 47);
-            uint64x2_t data_vec = veorq_u64   (acc_vec, shifted);
+            uint64x2_t shifted  = vshrq_n_u64(acc_vec, 47);
+            uint64x2_t data_vec = veorq_u64(acc_vec, shifted);
 
             /* xacc[i] ^= xsecret[i]; */
-            uint8x16_t key_vec  = vld1q_u8    (xsecret + (i * 16));
-            uint64x2_t data_key = veorq_u64   (data_vec, vreinterpretq_u64_u8(key_vec));
+            uint64x2_t key_vec  = XXH_vld1q_u64(xsecret + (i * 16));
+            uint64x2_t data_key = veorq_u64(data_vec, key_vec);
 
             /* xacc[i] *= XXH_PRIME32_1 */
-            uint32x2_t data_key_lo, data_key_hi;
-            /* data_key_lo = (uint32x2_t) (xacc[i] & 0xFFFFFFFF);
-             * data_key_hi = (uint32x2_t) (xacc[i] >> 32);
-             * xacc[i] = UNDEFINED; */
-            XXH_SPLIT_IN_PLACE(data_key, data_key_lo, data_key_hi);
-            {   /*
-                 * prod_hi = (data_key >> 32) * XXH_PRIME32_1;
-                 *
-                 * Avoid vmul_u32 + vshll_n_u32 since Clang 6 and 7 will
-                 * incorrectly "optimize" this:
-                 *   tmp     = vmul_u32(vmovn_u64(a), vmovn_u64(b));
-                 *   shifted = vshll_n_u32(tmp, 32);
-                 * to this:
-                 *   tmp     = "vmulq_u64"(a, b); // no such thing!
-                 *   shifted = vshlq_n_u64(tmp, 32);
-                 *
-                 * However, unlike SSE, Clang lacks a 64-bit multiply routine
-                 * for NEON, and it scalarizes two 64-bit multiplies instead.
-                 *
-                 * vmull_u32 has the same timing as vmul_u32, and it avoids
-                 * this bug completely.
-                 * See https://bugs.llvm.org/show_bug.cgi?id=39967
-                 */
-                uint64x2_t prod_hi = vmull_u32 (data_key_hi, prime);
-                /* xacc[i] = prod_hi << 32; */
-                xacc[i] = vshlq_n_u64(prod_hi, 32);
-                /* xacc[i] += (prod_hi & 0xFFFFFFFF) * XXH_PRIME32_1; */
-                xacc[i] = vmlal_u32(xacc[i], data_key_lo, prime);
-            }
-    }   }
+            uint32x2_t data_key_lo = vmovn_u64(data_key);
+            uint32x2_t data_key_hi = vshrn_n_u64(data_key, 32);
+            /*
+             * prod_hi = (data_key >> 32) * XXH_PRIME32_1;
+             *
+             * Avoid vmul_u32 + vshll_n_u32 since Clang 6 and 7 will
+             * incorrectly "optimize" this:
+             *   tmp     = vmul_u32(vmovn_u64(a), vmovn_u64(b));
+             *   shifted = vshll_n_u32(tmp, 32);
+             * to this:
+             *   tmp     = "vmulq_u64"(a, b); // no such thing!
+             *   shifted = vshlq_n_u64(tmp, 32);
+             *
+             * However, unlike SSE, Clang lacks a 64-bit multiply routine
+             * for NEON, and it scalarizes two 64-bit multiplies instead.
+             *
+             * vmull_u32 has the same timing as vmul_u32, and it avoids
+             * this bug completely.
+             * See https://bugs.llvm.org/show_bug.cgi?id=39967
+             */
+            uint64x2_t prod_hi = vmull_u32 (data_key_hi, prime);
+            /* xacc[i] = prod_hi << 32; */
+            prod_hi = vshlq_n_u64(prod_hi, 32);
+            /* xacc[i] += (prod_hi & 0xFFFFFFFF) * XXH_PRIME32_1; */
+            xacc[i] = vmlal_u32(prod_hi, data_key_lo, prime);
+        }
+    }
 }
-
 #endif
 
 #if (XXH_VECTOR == XXH_VSX)
@@ -4129,23 +4765,23 @@ XXH3_accumulate_512_vsx(  void* XXH_RESTRICT acc,
                     const void* XXH_RESTRICT secret)
 {
     /* presumed aligned */
-    unsigned long long* const xacc = (unsigned long long*) acc;
-    xxh_u64x2 const* const xinput   = (xxh_u64x2 const*) input;   /* no alignment restriction */
-    xxh_u64x2 const* const xsecret  = (xxh_u64x2 const*) secret;    /* no alignment restriction */
+    xxh_aliasing_u64x2* const xacc = (xxh_aliasing_u64x2*) acc;
+    xxh_u8 const* const xinput   = (xxh_u8 const*) input;   /* no alignment restriction */
+    xxh_u8 const* const xsecret  = (xxh_u8 const*) secret;    /* no alignment restriction */
     xxh_u64x2 const v32 = { 32, 32 };
     size_t i;
     for (i = 0; i < XXH_STRIPE_LEN / sizeof(xxh_u64x2); i++) {
         /* data_vec = xinput[i]; */
-        xxh_u64x2 const data_vec = XXH_vec_loadu(xinput + i);
+        xxh_u64x2 const data_vec = XXH_vec_loadu(xinput + 16*i);
         /* key_vec = xsecret[i]; */
-        xxh_u64x2 const key_vec  = XXH_vec_loadu(xsecret + i);
+        xxh_u64x2 const key_vec  = XXH_vec_loadu(xsecret + 16*i);
         xxh_u64x2 const data_key = data_vec ^ key_vec;
         /* shuffled = (data_key << 32) | (data_key >> 32); */
         xxh_u32x4 const shuffled = (xxh_u32x4)vec_rl(data_key, v32);
         /* product = ((xxh_u64x2)data_key & 0xFFFFFFFF) * ((xxh_u64x2)shuffled & 0xFFFFFFFF); */
         xxh_u64x2 const product  = XXH_vec_mulo((xxh_u32x4)data_key, shuffled);
         /* acc_vec = xacc[i]; */
-        xxh_u64x2 acc_vec        = vec_xl(0, xacc + 2 * i);
+        xxh_u64x2 acc_vec        = xacc[i];
         acc_vec += product;
 
         /* swap high and low halves */
@@ -4154,18 +4790,18 @@ XXH3_accumulate_512_vsx(  void* XXH_RESTRICT acc,
 #else
         acc_vec += vec_xxpermdi(data_vec, data_vec, 2);
 #endif
-        /* xacc[i] = acc_vec; */
-        vec_xst(acc_vec, 0, xacc + 2 * i);
+        xacc[i] = acc_vec;
     }
 }
+XXH_FORCE_INLINE XXH3_ACCUMULATE_TEMPLATE(vsx)
 
 XXH_FORCE_INLINE void
 XXH3_scrambleAcc_vsx(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
 {
     XXH_ASSERT((((size_t)acc) & 15) == 0);
 
-    {         xxh_u64x2* const xacc    =       (xxh_u64x2*) acc;
-        const xxh_u64x2* const xsecret = (const xxh_u64x2*) secret;
+    {   xxh_aliasing_u64x2* const xacc = (xxh_aliasing_u64x2*) acc;
+        const xxh_u8* const xsecret = (const xxh_u8*) secret;
         /* constants */
         xxh_u64x2 const v32  = { 32, 32 };
         xxh_u64x2 const v47 = { 47, 47 };
@@ -4177,7 +4813,7 @@ XXH3_scrambleAcc_vsx(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
             xxh_u64x2 const data_vec = acc_vec ^ (acc_vec >> v47);
 
             /* xacc[i] ^= xsecret[i]; */
-            xxh_u64x2 const key_vec  = XXH_vec_loadu(xsecret + i);
+            xxh_u64x2 const key_vec  = XXH_vec_loadu(xsecret + 16*i);
             xxh_u64x2 const data_key = data_vec ^ key_vec;
 
             /* xacc[i] *= XXH_PRIME32_1 */
@@ -4191,40 +4827,233 @@ XXH3_scrambleAcc_vsx(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
 
 #endif
 
+#if (XXH_VECTOR == XXH_SVE)
+
+XXH_FORCE_INLINE void
+XXH3_accumulate_512_sve( void* XXH_RESTRICT acc,
+                   const void* XXH_RESTRICT input,
+                   const void* XXH_RESTRICT secret)
+{
+    uint64_t *xacc = (uint64_t *)acc;
+    const uint64_t *xinput = (const uint64_t *)(const void *)input;
+    const uint64_t *xsecret = (const uint64_t *)(const void *)secret;
+    svuint64_t kSwap = sveor_n_u64_z(svptrue_b64(), svindex_u64(0, 1), 1);
+    uint64_t element_count = svcntd();
+    if (element_count >= 8) {
+        svbool_t mask = svptrue_pat_b64(SV_VL8);
+        svuint64_t vacc = svld1_u64(mask, xacc);
+        ACCRND(vacc, 0);
+        svst1_u64(mask, xacc, vacc);
+    } else if (element_count == 2) {   /* sve128 */
+        svbool_t mask = svptrue_pat_b64(SV_VL2);
+        svuint64_t acc0 = svld1_u64(mask, xacc + 0);
+        svuint64_t acc1 = svld1_u64(mask, xacc + 2);
+        svuint64_t acc2 = svld1_u64(mask, xacc + 4);
+        svuint64_t acc3 = svld1_u64(mask, xacc + 6);
+        ACCRND(acc0, 0);
+        ACCRND(acc1, 2);
+        ACCRND(acc2, 4);
+        ACCRND(acc3, 6);
+        svst1_u64(mask, xacc + 0, acc0);
+        svst1_u64(mask, xacc + 2, acc1);
+        svst1_u64(mask, xacc + 4, acc2);
+        svst1_u64(mask, xacc + 6, acc3);
+    } else {
+        svbool_t mask = svptrue_pat_b64(SV_VL4);
+        svuint64_t acc0 = svld1_u64(mask, xacc + 0);
+        svuint64_t acc1 = svld1_u64(mask, xacc + 4);
+        ACCRND(acc0, 0);
+        ACCRND(acc1, 4);
+        svst1_u64(mask, xacc + 0, acc0);
+        svst1_u64(mask, xacc + 4, acc1);
+    }
+}
+
+XXH_FORCE_INLINE void
+XXH3_accumulate_sve(xxh_u64* XXH_RESTRICT acc,
+               const xxh_u8* XXH_RESTRICT input,
+               const xxh_u8* XXH_RESTRICT secret,
+               size_t nbStripes)
+{
+    if (nbStripes != 0) {
+        uint64_t *xacc = (uint64_t *)acc;
+        const uint64_t *xinput = (const uint64_t *)(const void *)input;
+        const uint64_t *xsecret = (const uint64_t *)(const void *)secret;
+        svuint64_t kSwap = sveor_n_u64_z(svptrue_b64(), svindex_u64(0, 1), 1);
+        uint64_t element_count = svcntd();
+        if (element_count >= 8) {
+            svbool_t mask = svptrue_pat_b64(SV_VL8);
+            svuint64_t vacc = svld1_u64(mask, xacc + 0);
+            do {
+                /* svprfd(svbool_t, void *, enum svfprop); */
+                svprfd(mask, xinput + 128, SV_PLDL1STRM);
+                ACCRND(vacc, 0);
+                xinput += 8;
+                xsecret += 1;
+                nbStripes--;
+           } while (nbStripes != 0);
+
+           svst1_u64(mask, xacc + 0, vacc);
+        } else if (element_count == 2) { /* sve128 */
+            svbool_t mask = svptrue_pat_b64(SV_VL2);
+            svuint64_t acc0 = svld1_u64(mask, xacc + 0);
+            svuint64_t acc1 = svld1_u64(mask, xacc + 2);
+            svuint64_t acc2 = svld1_u64(mask, xacc + 4);
+            svuint64_t acc3 = svld1_u64(mask, xacc + 6);
+            do {
+                svprfd(mask, xinput + 128, SV_PLDL1STRM);
+                ACCRND(acc0, 0);
+                ACCRND(acc1, 2);
+                ACCRND(acc2, 4);
+                ACCRND(acc3, 6);
+                xinput += 8;
+                xsecret += 1;
+                nbStripes--;
+           } while (nbStripes != 0);
+
+           svst1_u64(mask, xacc + 0, acc0);
+           svst1_u64(mask, xacc + 2, acc1);
+           svst1_u64(mask, xacc + 4, acc2);
+           svst1_u64(mask, xacc + 6, acc3);
+        } else {
+            svbool_t mask = svptrue_pat_b64(SV_VL4);
+            svuint64_t acc0 = svld1_u64(mask, xacc + 0);
+            svuint64_t acc1 = svld1_u64(mask, xacc + 4);
+            do {
+                svprfd(mask, xinput + 128, SV_PLDL1STRM);
+                ACCRND(acc0, 0);
+                ACCRND(acc1, 4);
+                xinput += 8;
+                xsecret += 1;
+                nbStripes--;
+           } while (nbStripes != 0);
+
+           svst1_u64(mask, xacc + 0, acc0);
+           svst1_u64(mask, xacc + 4, acc1);
+       }
+    }
+}
+
+#endif
+
 /* scalar variants - universal */
 
+#if defined(__aarch64__) && (defined(__GNUC__) || defined(__clang__))
+/*
+ * In XXH3_scalarRound(), GCC and Clang have a similar codegen issue, where they
+ * emit an excess mask and a full 64-bit multiply-add (MADD X-form).
+ *
+ * While this might not seem like much, as AArch64 is a 64-bit architecture, only
+ * big Cortex designs have a full 64-bit multiplier.
+ *
+ * On the little cores, the smaller 32-bit multiplier is used, and full 64-bit
+ * multiplies expand to 2-3 multiplies in microcode. This has a major penalty
+ * of up to 4 latency cycles and 2 stall cycles in the multiply pipeline.
+ *
+ * Thankfully, AArch64 still provides the 32-bit long multiply-add (UMADDL) which does
+ * not have this penalty and does the mask automatically.
+ */
+XXH_FORCE_INLINE xxh_u64
+XXH_mult32to64_add64(xxh_u64 lhs, xxh_u64 rhs, xxh_u64 acc)
+{
+    xxh_u64 ret;
+    /* note: %x = 64-bit register, %w = 32-bit register */
+    __asm__("umaddl %x0, %w1, %w2, %x3" : "=r" (ret) : "r" (lhs), "r" (rhs), "r" (acc));
+    return ret;
+}
+#else
+XXH_FORCE_INLINE xxh_u64
+XXH_mult32to64_add64(xxh_u64 lhs, xxh_u64 rhs, xxh_u64 acc)
+{
+    return XXH_mult32to64((xxh_u32)lhs, (xxh_u32)rhs) + acc;
+}
+#endif
+
+/*!
+ * @internal
+ * @brief Scalar round for @ref XXH3_accumulate_512_scalar().
+ *
+ * This is extracted to its own function because the NEON path uses a combination
+ * of NEON and scalar.
+ */
+XXH_FORCE_INLINE void
+XXH3_scalarRound(void* XXH_RESTRICT acc,
+                 void const* XXH_RESTRICT input,
+                 void const* XXH_RESTRICT secret,
+                 size_t lane)
+{
+    xxh_u64* xacc = (xxh_u64*) acc;
+    xxh_u8 const* xinput  = (xxh_u8 const*) input;
+    xxh_u8 const* xsecret = (xxh_u8 const*) secret;
+    XXH_ASSERT(lane < XXH_ACC_NB);
+    XXH_ASSERT(((size_t)acc & (XXH_ACC_ALIGN-1)) == 0);
+    {
+        xxh_u64 const data_val = XXH_readLE64(xinput + lane * 8);
+        xxh_u64 const data_key = data_val ^ XXH_readLE64(xsecret + lane * 8);
+        xacc[lane ^ 1] += data_val; /* swap adjacent lanes */
+        xacc[lane] = XXH_mult32to64_add64(data_key /* & 0xFFFFFFFF */, data_key >> 32, xacc[lane]);
+    }
+}
+
+/*!
+ * @internal
+ * @brief Processes a 64 byte block of data using the scalar path.
+ */
 XXH_FORCE_INLINE void
 XXH3_accumulate_512_scalar(void* XXH_RESTRICT acc,
                      const void* XXH_RESTRICT input,
                      const void* XXH_RESTRICT secret)
 {
-    xxh_u64* const xacc = (xxh_u64*) acc; /* presumed aligned */
-    const xxh_u8* const xinput  = (const xxh_u8*) input;  /* no alignment restriction */
-    const xxh_u8* const xsecret = (const xxh_u8*) secret;   /* no alignment restriction */
     size_t i;
-    XXH_ASSERT(((size_t)acc & (XXH_ACC_ALIGN-1)) == 0);
+    /* ARM GCC refuses to unroll this loop, resulting in a 24% slowdown on ARMv6. */
+#if defined(__GNUC__) && !defined(__clang__) \
+  && (defined(__arm__) || defined(__thumb2__)) \
+  && defined(__ARM_FEATURE_UNALIGNED) /* no unaligned access just wastes bytes */ \
+  && XXH_SIZE_OPT <= 0
+#  pragma GCC unroll 8
+#endif
     for (i=0; i < XXH_ACC_NB; i++) {
-        xxh_u64 const data_val = XXH_readLE64(xinput + 8*i);
-        xxh_u64 const data_key = data_val ^ XXH_readLE64(xsecret + i*8);
-        xacc[i ^ 1] += data_val; /* swap adjacent lanes */
-        xacc[i] += XXH_mult32to64(data_key & 0xFFFFFFFF, data_key >> 32);
+        XXH3_scalarRound(acc, input, secret, i);
     }
 }
+XXH_FORCE_INLINE XXH3_ACCUMULATE_TEMPLATE(scalar)
 
+/*!
+ * @internal
+ * @brief Scalar scramble step for @ref XXH3_scrambleAcc_scalar().
+ *
+ * This is extracted to its own function because the NEON path uses a combination
+ * of NEON and scalar.
+ */
 XXH_FORCE_INLINE void
-XXH3_scrambleAcc_scalar(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
+XXH3_scalarScrambleRound(void* XXH_RESTRICT acc,
+                         void const* XXH_RESTRICT secret,
+                         size_t lane)
 {
     xxh_u64* const xacc = (xxh_u64*) acc;   /* presumed aligned */
     const xxh_u8* const xsecret = (const xxh_u8*) secret;   /* no alignment restriction */
-    size_t i;
     XXH_ASSERT((((size_t)acc) & (XXH_ACC_ALIGN-1)) == 0);
-    for (i=0; i < XXH_ACC_NB; i++) {
-        xxh_u64 const key64 = XXH_readLE64(xsecret + 8*i);
-        xxh_u64 acc64 = xacc[i];
+    XXH_ASSERT(lane < XXH_ACC_NB);
+    {
+        xxh_u64 const key64 = XXH_readLE64(xsecret + lane * 8);
+        xxh_u64 acc64 = xacc[lane];
         acc64 = XXH_xorshift64(acc64, 47);
         acc64 ^= key64;
         acc64 *= XXH_PRIME32_1;
-        xacc[i] = acc64;
+        xacc[lane] = acc64;
+    }
+}
+
+/*!
+ * @internal
+ * @brief Scrambles the accumulators after a large chunk has been read
+ */
+XXH_FORCE_INLINE void
+XXH3_scrambleAcc_scalar(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
+{
+    size_t i;
+    for (i=0; i < XXH_ACC_NB; i++) {
+        XXH3_scalarScrambleRound(acc, secret, i);
     }
 }
 
@@ -4239,15 +5068,16 @@ XXH3_initCustomSecret_scalar(void* XXH_RESTRICT customSecret, xxh_u64 seed64)
     const xxh_u8* kSecretPtr = XXH3_kSecret;
     XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE & 15) == 0);
 
-#if defined(__clang__) && defined(__aarch64__)
+#if defined(__GNUC__) && defined(__aarch64__)
     /*
      * UGLY HACK:
-     * Clang generates a bunch of MOV/MOVK pairs for aarch64, and they are
+     * GCC and Clang generate a bunch of MOV/MOVK pairs for aarch64, and they are
      * placed sequentially, in order, at the top of the unrolled loop.
      *
      * While MOVK is great for generating constants (2 cycles for a 64-bit
-     * constant compared to 4 cycles for LDR), long MOVK chains stall the
-     * integer pipelines:
+     * constant compared to 4 cycles for LDR), it fights for bandwidth with
+     * the arithmetic instructions.
+     *
      *   I   L   S
      * MOVK
      * MOVK
@@ -4256,7 +5086,7 @@ XXH3_initCustomSecret_scalar(void* XXH_RESTRICT customSecret, xxh_u64 seed64)
      * ADD
      * SUB      STR
      *          STR
-     * By forcing loads from memory (as the asm line causes Clang to assume
+     * By forcing loads from memory (as the asm line causes the compiler to assume
      * that XXH3_kSecretPtr has been changed), the pipelines are used more
      * efficiently:
      *   I   L   S
@@ -4264,23 +5094,20 @@ XXH3_initCustomSecret_scalar(void* XXH_RESTRICT customSecret, xxh_u64 seed64)
      *  ADD LDR
      *  SUB     STR
      *          STR
+     *
+     * See XXH3_NEON_LANES for details on the pipsline.
+     *
      * XXH3_64bits_withSeed, len == 256, Snapdragon 835
      *   without hack: 2654.4 MB/s
      *   with hack:    3202.9 MB/s
      */
     XXH_COMPILER_GUARD(kSecretPtr);
 #endif
-    /*
-     * Note: in debug mode, this overrides the asm optimization
-     * and Clang will emit MOVK chains again.
-     */
-    XXH_ASSERT(kSecretPtr == XXH3_kSecret);
-
     {   int const nbRounds = XXH_SECRET_DEFAULT_SIZE / 16;
         int i;
         for (i=0; i < nbRounds; i++) {
             /*
-             * The asm hack causes Clang to assume that kSecretPtr aliases with
+             * The asm hack causes the compiler to assume that kSecretPtr aliases with
              * customSecret, and on aarch64, this prevented LDP from merging two
              * loads together for free. Putting the loads together before the stores
              * properly generates LDP.
@@ -4293,7 +5120,7 @@ XXH3_initCustomSecret_scalar(void* XXH_RESTRICT customSecret, xxh_u64 seed64)
 }
 
 
-typedef void (*XXH3_f_accumulate_512)(void* XXH_RESTRICT, const void*, const void*);
+typedef void (*XXH3_f_accumulate)(xxh_u64* XXH_RESTRICT, const xxh_u8* XXH_RESTRICT, const xxh_u8* XXH_RESTRICT, size_t);
 typedef void (*XXH3_f_scrambleAcc)(void* XXH_RESTRICT, const void*);
 typedef void (*XXH3_f_initCustomSecret)(void* XXH_RESTRICT, xxh_u64);
 
@@ -4301,82 +5128,63 @@ typedef void (*XXH3_f_initCustomSecret)(void* XXH_RESTRICT, xxh_u64);
 #if (XXH_VECTOR == XXH_AVX512)
 
 #define XXH3_accumulate_512 XXH3_accumulate_512_avx512
+#define XXH3_accumulate     XXH3_accumulate_avx512
 #define XXH3_scrambleAcc    XXH3_scrambleAcc_avx512
 #define XXH3_initCustomSecret XXH3_initCustomSecret_avx512
 
 #elif (XXH_VECTOR == XXH_AVX2)
 
 #define XXH3_accumulate_512 XXH3_accumulate_512_avx2
+#define XXH3_accumulate     XXH3_accumulate_avx2
 #define XXH3_scrambleAcc    XXH3_scrambleAcc_avx2
 #define XXH3_initCustomSecret XXH3_initCustomSecret_avx2
 
 #elif (XXH_VECTOR == XXH_SSE2)
 
 #define XXH3_accumulate_512 XXH3_accumulate_512_sse2
+#define XXH3_accumulate     XXH3_accumulate_sse2
 #define XXH3_scrambleAcc    XXH3_scrambleAcc_sse2
 #define XXH3_initCustomSecret XXH3_initCustomSecret_sse2
 
 #elif (XXH_VECTOR == XXH_NEON)
 
 #define XXH3_accumulate_512 XXH3_accumulate_512_neon
+#define XXH3_accumulate     XXH3_accumulate_neon
 #define XXH3_scrambleAcc    XXH3_scrambleAcc_neon
 #define XXH3_initCustomSecret XXH3_initCustomSecret_scalar
 
 #elif (XXH_VECTOR == XXH_VSX)
 
 #define XXH3_accumulate_512 XXH3_accumulate_512_vsx
+#define XXH3_accumulate     XXH3_accumulate_vsx
 #define XXH3_scrambleAcc    XXH3_scrambleAcc_vsx
 #define XXH3_initCustomSecret XXH3_initCustomSecret_scalar
 
+#elif (XXH_VECTOR == XXH_SVE)
+#define XXH3_accumulate_512 XXH3_accumulate_512_sve
+#define XXH3_accumulate     XXH3_accumulate_sve
+#define XXH3_scrambleAcc    XXH3_scrambleAcc_scalar
+#define XXH3_initCustomSecret XXH3_initCustomSecret_scalar
+
 #else /* scalar */
 
 #define XXH3_accumulate_512 XXH3_accumulate_512_scalar
+#define XXH3_accumulate     XXH3_accumulate_scalar
 #define XXH3_scrambleAcc    XXH3_scrambleAcc_scalar
 #define XXH3_initCustomSecret XXH3_initCustomSecret_scalar
 
 #endif
 
-
-
-#ifndef XXH_PREFETCH_DIST
-#  ifdef __clang__
-#    define XXH_PREFETCH_DIST 320
-#  else
-#    if (XXH_VECTOR == XXH_AVX512)
-#      define XXH_PREFETCH_DIST 512
-#    else
-#      define XXH_PREFETCH_DIST 384
-#    endif
-#  endif  /* __clang__ */
-#endif  /* XXH_PREFETCH_DIST */
-
-/*
- * XXH3_accumulate()
- * Loops over XXH3_accumulate_512().
- * Assumption: nbStripes will not overflow the secret size
- */
-XXH_FORCE_INLINE void
-XXH3_accumulate(     xxh_u64* XXH_RESTRICT acc,
-                const xxh_u8* XXH_RESTRICT input,
-                const xxh_u8* XXH_RESTRICT secret,
-                      size_t nbStripes,
-                      XXH3_f_accumulate_512 f_acc512)
-{
-    size_t n;
-    for (n = 0; n < nbStripes; n++ ) {
-        const xxh_u8* const in = input + n*XXH_STRIPE_LEN;
-        XXH_PREFETCH(in + XXH_PREFETCH_DIST);
-        f_acc512(acc,
-                 in,
-                 secret + n*XXH_SECRET_CONSUME_RATE);
-    }
-}
+#if XXH_SIZE_OPT >= 1 /* don't do SIMD for initialization */
+#  undef XXH3_initCustomSecret
+#  define XXH3_initCustomSecret XXH3_initCustomSecret_scalar
+#endif
 
 XXH_FORCE_INLINE void
 XXH3_hashLong_internal_loop(xxh_u64* XXH_RESTRICT acc,
                       const xxh_u8* XXH_RESTRICT input, size_t len,
                       const xxh_u8* XXH_RESTRICT secret, size_t secretSize,
-                            XXH3_f_accumulate_512 f_acc512,
+                            XXH3_f_accumulate f_acc,
                             XXH3_f_scrambleAcc f_scramble)
 {
     size_t const nbStripesPerBlock = (secretSize - XXH_STRIPE_LEN) / XXH_SECRET_CONSUME_RATE;
@@ -4388,7 +5196,7 @@ XXH3_hashLong_internal_loop(xxh_u64* XXH_RESTRICT acc,
     XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN);
 
     for (n = 0; n < nb_blocks; n++) {
-        XXH3_accumulate(acc, input + n*block_len, secret, nbStripesPerBlock, f_acc512);
+        f_acc(acc, input + n*block_len, secret, nbStripesPerBlock);
         f_scramble(acc, secret + secretSize - XXH_STRIPE_LEN);
     }
 
@@ -4396,12 +5204,12 @@ XXH3_hashLong_internal_loop(xxh_u64* XXH_RESTRICT acc,
     XXH_ASSERT(len > XXH_STRIPE_LEN);
     {   size_t const nbStripes = ((len - 1) - (block_len * nb_blocks)) / XXH_STRIPE_LEN;
         XXH_ASSERT(nbStripes <= (secretSize / XXH_SECRET_CONSUME_RATE));
-        XXH3_accumulate(acc, input + nb_blocks*block_len, secret, nbStripes, f_acc512);
+        f_acc(acc, input + nb_blocks*block_len, secret, nbStripes);
 
         /* last stripe */
         {   const xxh_u8* const p = input + len - XXH_STRIPE_LEN;
 #define XXH_SECRET_LASTACC_START 7  /* not aligned on 8, last secret is different from acc & scrambler */
-            f_acc512(acc, p, secret + secretSize - XXH_STRIPE_LEN - XXH_SECRET_LASTACC_START);
+            XXH3_accumulate_512(acc, p, secret + secretSize - XXH_STRIPE_LEN - XXH_SECRET_LASTACC_START);
     }   }
 }
 
@@ -4446,12 +5254,12 @@ XXH3_mergeAccs(const xxh_u64* XXH_RESTRICT acc, const xxh_u8* XXH_RESTRICT secre
 XXH_FORCE_INLINE XXH64_hash_t
 XXH3_hashLong_64b_internal(const void* XXH_RESTRICT input, size_t len,
                            const void* XXH_RESTRICT secret, size_t secretSize,
-                           XXH3_f_accumulate_512 f_acc512,
+                           XXH3_f_accumulate f_acc,
                            XXH3_f_scrambleAcc f_scramble)
 {
     XXH_ALIGN(XXH_ACC_ALIGN) xxh_u64 acc[XXH_ACC_NB] = XXH3_INIT_ACC;
 
-    XXH3_hashLong_internal_loop(acc, (const xxh_u8*)input, len, (const xxh_u8*)secret, secretSize, f_acc512, f_scramble);
+    XXH3_hashLong_internal_loop(acc, (const xxh_u8*)input, len, (const xxh_u8*)secret, secretSize, f_acc, f_scramble);
 
     /* converge into final hash */
     XXH_STATIC_ASSERT(sizeof(acc) == 64);
@@ -4465,13 +5273,15 @@ XXH3_hashLong_64b_internal(const void* XXH_RESTRICT input, size_t len,
  * It's important for performance to transmit secret's size (when it's static)
  * so that the compiler can properly optimize the vectorized loop.
  * This makes a big performance difference for "medium" keys (<1 KB) when using AVX instruction set.
+ * When the secret size is unknown, or on GCC 12 where the mix of NO_INLINE and FORCE_INLINE
+ * breaks -Og, this is XXH_NO_INLINE.
  */
-XXH_FORCE_INLINE XXH64_hash_t
+XXH3_WITH_SECRET_INLINE XXH64_hash_t
 XXH3_hashLong_64b_withSecret(const void* XXH_RESTRICT input, size_t len,
                              XXH64_hash_t seed64, const xxh_u8* XXH_RESTRICT secret, size_t secretLen)
 {
     (void)seed64;
-    return XXH3_hashLong_64b_internal(input, len, secret, secretLen, XXH3_accumulate_512, XXH3_scrambleAcc);
+    return XXH3_hashLong_64b_internal(input, len, secret, secretLen, XXH3_accumulate, XXH3_scrambleAcc);
 }
 
 /*
@@ -4480,12 +5290,12 @@ XXH3_hashLong_64b_withSecret(const void* XXH_RESTRICT input, size_t len,
  * Note that inside this no_inline function, we do inline the internal loop,
  * and provide a statically defined secret size to allow optimization of vector loop.
  */
-XXH_NO_INLINE XXH64_hash_t
+XXH_NO_INLINE XXH_PUREF XXH64_hash_t
 XXH3_hashLong_64b_default(const void* XXH_RESTRICT input, size_t len,
                           XXH64_hash_t seed64, const xxh_u8* XXH_RESTRICT secret, size_t secretLen)
 {
     (void)seed64; (void)secret; (void)secretLen;
-    return XXH3_hashLong_64b_internal(input, len, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_accumulate_512, XXH3_scrambleAcc);
+    return XXH3_hashLong_64b_internal(input, len, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_accumulate, XXH3_scrambleAcc);
 }
 
 /*
@@ -4502,18 +5312,20 @@ XXH3_hashLong_64b_default(const void* XXH_RESTRICT input, size_t len,
 XXH_FORCE_INLINE XXH64_hash_t
 XXH3_hashLong_64b_withSeed_internal(const void* input, size_t len,
                                     XXH64_hash_t seed,
-                                    XXH3_f_accumulate_512 f_acc512,
+                                    XXH3_f_accumulate f_acc,
                                     XXH3_f_scrambleAcc f_scramble,
                                     XXH3_f_initCustomSecret f_initSec)
 {
+#if XXH_SIZE_OPT <= 0
     if (seed == 0)
         return XXH3_hashLong_64b_internal(input, len,
                                           XXH3_kSecret, sizeof(XXH3_kSecret),
-                                          f_acc512, f_scramble);
+                                          f_acc, f_scramble);
+#endif
     {   XXH_ALIGN(XXH_SEC_ALIGN) xxh_u8 secret[XXH_SECRET_DEFAULT_SIZE];
         f_initSec(secret, seed);
         return XXH3_hashLong_64b_internal(input, len, secret, sizeof(secret),
-                                          f_acc512, f_scramble);
+                                          f_acc, f_scramble);
     }
 }
 
@@ -4521,12 +5333,12 @@ XXH3_hashLong_64b_withSeed_internal(const void* input, size_t len,
  * It's important for performance that XXH3_hashLong is not inlined.
  */
 XXH_NO_INLINE XXH64_hash_t
-XXH3_hashLong_64b_withSeed(const void* input, size_t len,
-                           XXH64_hash_t seed, const xxh_u8* secret, size_t secretLen)
+XXH3_hashLong_64b_withSeed(const void* XXH_RESTRICT input, size_t len,
+                           XXH64_hash_t seed, const xxh_u8* XXH_RESTRICT secret, size_t secretLen)
 {
     (void)secret; (void)secretLen;
     return XXH3_hashLong_64b_withSeed_internal(input, len, seed,
-                XXH3_accumulate_512, XXH3_scrambleAcc, XXH3_initCustomSecret);
+                XXH3_accumulate, XXH3_scrambleAcc, XXH3_initCustomSecret);
 }
 
 
@@ -4558,37 +5370,37 @@ XXH3_64bits_internal(const void* XXH_RESTRICT input, size_t len,
 
 /* ===   Public entry point   === */
 
-/*! @ingroup xxh3_family */
-XXH_PUBLIC_API XXH64_hash_t XXH3_64bits(const void* input, size_t len)
+/*! @ingroup XXH3_family */
+XXH_PUBLIC_API XXH64_hash_t XXH3_64bits(XXH_NOESCAPE const void* input, size_t length)
 {
-    return XXH3_64bits_internal(input, len, 0, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_hashLong_64b_default);
+    return XXH3_64bits_internal(input, length, 0, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_hashLong_64b_default);
 }
 
-/*! @ingroup xxh3_family */
+/*! @ingroup XXH3_family */
 XXH_PUBLIC_API XXH64_hash_t
-XXH3_64bits_withSecret(const void* input, size_t len, const void* secret, size_t secretSize)
+XXH3_64bits_withSecret(XXH_NOESCAPE const void* input, size_t length, XXH_NOESCAPE const void* secret, size_t secretSize)
 {
-    return XXH3_64bits_internal(input, len, 0, secret, secretSize, XXH3_hashLong_64b_withSecret);
+    return XXH3_64bits_internal(input, length, 0, secret, secretSize, XXH3_hashLong_64b_withSecret);
 }
 
-/*! @ingroup xxh3_family */
+/*! @ingroup XXH3_family */
 XXH_PUBLIC_API XXH64_hash_t
-XXH3_64bits_withSeed(const void* input, size_t len, XXH64_hash_t seed)
+XXH3_64bits_withSeed(XXH_NOESCAPE const void* input, size_t length, XXH64_hash_t seed)
 {
-    return XXH3_64bits_internal(input, len, seed, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_hashLong_64b_withSeed);
+    return XXH3_64bits_internal(input, length, seed, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_hashLong_64b_withSeed);
 }
 
 XXH_PUBLIC_API XXH64_hash_t
-XXH3_64bits_withSecretandSeed(const void* input, size_t len, const void* secret, size_t secretSize, XXH64_hash_t seed)
+XXH3_64bits_withSecretandSeed(XXH_NOESCAPE const void* input, size_t length, XXH_NOESCAPE const void* secret, size_t secretSize, XXH64_hash_t seed)
 {
-    if (len <= XXH3_MIDSIZE_MAX)
-        return XXH3_64bits_internal(input, len, seed, XXH3_kSecret, sizeof(XXH3_kSecret), NULL);
-    return XXH3_hashLong_64b_withSecret(input, len, seed, (const xxh_u8*)secret, secretSize);
+    if (length <= XXH3_MIDSIZE_MAX)
+        return XXH3_64bits_internal(input, length, seed, XXH3_kSecret, sizeof(XXH3_kSecret), NULL);
+    return XXH3_hashLong_64b_withSecret(input, length, seed, (const xxh_u8*)secret, secretSize);
 }
 
 
 /* ===   XXH3 streaming   === */
-
+#ifndef XXH_NO_STREAM
 /*
  * Malloc's a pointer that is always aligned to align.
  *
@@ -4612,7 +5424,7 @@ XXH3_64bits_withSecretandSeed(const void* input, size_t len, const void* secret,
  *
  * Align must be a power of 2 and 8 <= align <= 128.
  */
-static void* XXH_alignedMalloc(size_t s, size_t align)
+static XXH_MALLOCF void* XXH_alignedMalloc(size_t s, size_t align)
 {
     XXH_ASSERT(align <= 128 && align >= 8); /* range check */
     XXH_ASSERT((align & (align-1)) == 0);   /* power of 2 */
@@ -4654,7 +5466,7 @@ static void XXH_alignedFree(void* p)
         XXH_free(base);
     }
 }
-/*! @ingroup xxh3_family */
+/*! @ingroup XXH3_family */
 XXH_PUBLIC_API XXH3_state_t* XXH3_createState(void)
 {
     XXH3_state_t* const state = (XXH3_state_t*)XXH_alignedMalloc(sizeof(XXH3_state_t), 64);
@@ -4663,16 +5475,16 @@ XXH_PUBLIC_API XXH3_state_t* XXH3_createState(void)
     return state;
 }
 
-/*! @ingroup xxh3_family */
+/*! @ingroup XXH3_family */
 XXH_PUBLIC_API XXH_errorcode XXH3_freeState(XXH3_state_t* statePtr)
 {
     XXH_alignedFree(statePtr);
     return XXH_OK;
 }
 
-/*! @ingroup xxh3_family */
+/*! @ingroup XXH3_family */
 XXH_PUBLIC_API void
-XXH3_copyState(XXH3_state_t* dst_state, const XXH3_state_t* src_state)
+XXH3_copyState(XXH_NOESCAPE XXH3_state_t* dst_state, XXH_NOESCAPE const XXH3_state_t* src_state)
 {
     XXH_memcpy(dst_state, src_state, sizeof(*dst_state));
 }
@@ -4704,18 +5516,18 @@ XXH3_reset_internal(XXH3_state_t* statePtr,
     statePtr->nbStripesPerBlock = statePtr->secretLimit / XXH_SECRET_CONSUME_RATE;
 }
 
-/*! @ingroup xxh3_family */
+/*! @ingroup XXH3_family */
 XXH_PUBLIC_API XXH_errorcode
-XXH3_64bits_reset(XXH3_state_t* statePtr)
+XXH3_64bits_reset(XXH_NOESCAPE XXH3_state_t* statePtr)
 {
     if (statePtr == NULL) return XXH_ERROR;
     XXH3_reset_internal(statePtr, 0, XXH3_kSecret, XXH_SECRET_DEFAULT_SIZE);
     return XXH_OK;
 }
 
-/*! @ingroup xxh3_family */
+/*! @ingroup XXH3_family */
 XXH_PUBLIC_API XXH_errorcode
-XXH3_64bits_reset_withSecret(XXH3_state_t* statePtr, const void* secret, size_t secretSize)
+XXH3_64bits_reset_withSecret(XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* secret, size_t secretSize)
 {
     if (statePtr == NULL) return XXH_ERROR;
     XXH3_reset_internal(statePtr, 0, secret, secretSize);
@@ -4724,9 +5536,9 @@ XXH3_64bits_reset_withSecret(XXH3_state_t* statePtr, const void* secret, size_t
     return XXH_OK;
 }
 
-/*! @ingroup xxh3_family */
+/*! @ingroup XXH3_family */
 XXH_PUBLIC_API XXH_errorcode
-XXH3_64bits_reset_withSeed(XXH3_state_t* statePtr, XXH64_hash_t seed)
+XXH3_64bits_reset_withSeed(XXH_NOESCAPE XXH3_state_t* statePtr, XXH64_hash_t seed)
 {
     if (statePtr == NULL) return XXH_ERROR;
     if (seed==0) return XXH3_64bits_reset(statePtr);
@@ -4736,9 +5548,9 @@ XXH3_64bits_reset_withSeed(XXH3_state_t* statePtr, XXH64_hash_t seed)
     return XXH_OK;
 }
 
-/*! @ingroup xxh3_family */
+/*! @ingroup XXH3_family */
 XXH_PUBLIC_API XXH_errorcode
-XXH3_64bits_reset_withSecretandSeed(XXH3_state_t* statePtr, const void* secret, size_t secretSize, XXH64_hash_t seed64)
+XXH3_64bits_reset_withSecretandSeed(XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* secret, size_t secretSize, XXH64_hash_t seed64)
 {
     if (statePtr == NULL) return XXH_ERROR;
     if (secret == NULL) return XXH_ERROR;
@@ -4748,35 +5560,61 @@ XXH3_64bits_reset_withSecretandSeed(XXH3_state_t* statePtr, const void* secret,
     return XXH_OK;
 }
 
-/* Note : when XXH3_consumeStripes() is invoked,
- * there must be a guarantee that at least one more byte must be consumed from input
- * so that the function can blindly consume all stripes using the "normal" secret segment */
-XXH_FORCE_INLINE void
+/*!
+ * @internal
+ * @brief Processes a large input for XXH3_update() and XXH3_digest_long().
+ *
+ * Unlike XXH3_hashLong_internal_loop(), this can process data that overlaps a block.
+ *
+ * @param acc                Pointer to the 8 accumulator lanes
+ * @param nbStripesSoFarPtr  In/out pointer to the number of leftover stripes in the block*
+ * @param nbStripesPerBlock  Number of stripes in a block
+ * @param input              Input pointer
+ * @param nbStripes          Number of stripes to process
+ * @param secret             Secret pointer
+ * @param secretLimit        Offset of the last block in @p secret
+ * @param f_acc              Pointer to an XXH3_accumulate implementation
+ * @param f_scramble         Pointer to an XXH3_scrambleAcc implementation
+ * @return                   Pointer past the end of @p input after processing
+ */
+XXH_FORCE_INLINE const xxh_u8 *
 XXH3_consumeStripes(xxh_u64* XXH_RESTRICT acc,
                     size_t* XXH_RESTRICT nbStripesSoFarPtr, size_t nbStripesPerBlock,
                     const xxh_u8* XXH_RESTRICT input, size_t nbStripes,
                     const xxh_u8* XXH_RESTRICT secret, size_t secretLimit,
-                    XXH3_f_accumulate_512 f_acc512,
+                    XXH3_f_accumulate f_acc,
                     XXH3_f_scrambleAcc f_scramble)
 {
-    XXH_ASSERT(nbStripes <= nbStripesPerBlock);  /* can handle max 1 scramble per invocation */
-    XXH_ASSERT(*nbStripesSoFarPtr < nbStripesPerBlock);
-    if (nbStripesPerBlock - *nbStripesSoFarPtr <= nbStripes) {
-        /* need a scrambling operation */
-        size_t const nbStripesToEndofBlock = nbStripesPerBlock - *nbStripesSoFarPtr;
-        size_t const nbStripesAfterBlock = nbStripes - nbStripesToEndofBlock;
-        XXH3_accumulate(acc, input, secret + nbStripesSoFarPtr[0] * XXH_SECRET_CONSUME_RATE, nbStripesToEndofBlock, f_acc512);
-        f_scramble(acc, secret + secretLimit);
-        XXH3_accumulate(acc, input + nbStripesToEndofBlock * XXH_STRIPE_LEN, secret, nbStripesAfterBlock, f_acc512);
-        *nbStripesSoFarPtr = nbStripesAfterBlock;
-    } else {
-        XXH3_accumulate(acc, input, secret + nbStripesSoFarPtr[0] * XXH_SECRET_CONSUME_RATE, nbStripes, f_acc512);
+    const xxh_u8* initialSecret = secret + *nbStripesSoFarPtr * XXH_SECRET_CONSUME_RATE;
+    /* Process full blocks */
+    if (nbStripes >= (nbStripesPerBlock - *nbStripesSoFarPtr)) {
+        /* Process the initial partial block... */
+        size_t nbStripesThisIter = nbStripesPerBlock - *nbStripesSoFarPtr;
+
+        do {
+            /* Accumulate and scramble */
+            f_acc(acc, input, initialSecret, nbStripesThisIter);
+            f_scramble(acc, secret + secretLimit);
+            input += nbStripesThisIter * XXH_STRIPE_LEN;
+            nbStripes -= nbStripesThisIter;
+            /* Then continue the loop with the full block size */
+            nbStripesThisIter = nbStripesPerBlock;
+            initialSecret = secret;
+        } while (nbStripes >= nbStripesPerBlock);
+        *nbStripesSoFarPtr = 0;
+    }
+    /* Process a partial block */
+    if (nbStripes > 0) {
+        f_acc(acc, input, initialSecret, nbStripes);
+        input += nbStripes * XXH_STRIPE_LEN;
         *nbStripesSoFarPtr += nbStripes;
     }
+    /* Return end pointer */
+    return input;
 }
 
 #ifndef XXH3_STREAM_USE_STACK
-# ifndef __clang__ /* clang doesn't need additional stack space */
+# if XXH_SIZE_OPT <= 0 && !defined(__clang__) /* clang doesn't need additional stack space */
 #   define XXH3_STREAM_USE_STACK 1
 # endif
 #endif
@@ -4786,7 +5624,7 @@ XXH3_consumeStripes(xxh_u64* XXH_RESTRICT acc,
 XXH_FORCE_INLINE XXH_errorcode
 XXH3_update(XXH3_state_t* XXH_RESTRICT const state,
             const xxh_u8* XXH_RESTRICT input, size_t len,
-            XXH3_f_accumulate_512 f_acc512,
+            XXH3_f_accumulate f_acc,
             XXH3_f_scrambleAcc f_scramble)
 {
     if (input==NULL) {
@@ -4802,7 +5640,8 @@ XXH3_update(XXH3_state_t* XXH_RESTRICT const state,
          * when operating accumulators directly into state.
          * Operating into stack space seems to enable proper optimization.
          * clang, on the other hand, doesn't seem to need this trick */
-        XXH_ALIGN(XXH_ACC_ALIGN) xxh_u64 acc[8]; memcpy(acc, state->acc, sizeof(acc));
+        XXH_ALIGN(XXH_ACC_ALIGN) xxh_u64 acc[8];
+        XXH_memcpy(acc, state->acc, sizeof(acc));
 #else
         xxh_u64* XXH_RESTRICT const acc = state->acc;
 #endif
@@ -4810,7 +5649,7 @@ XXH3_update(XXH3_state_t* XXH_RESTRICT const state,
         XXH_ASSERT(state->bufferedSize <= XXH3_INTERNALBUFFER_SIZE);
 
         /* small input : just fill in tmp buffer */
-        if (state->bufferedSize + len <= XXH3_INTERNALBUFFER_SIZE) {
+        if (len <= XXH3_INTERNALBUFFER_SIZE - state->bufferedSize) {
             XXH_memcpy(state->buffer + state->bufferedSize, input, len);
             state->bufferedSize += (XXH32_hash_t)len;
             return XXH_OK;
@@ -4832,57 +5671,20 @@ XXH3_update(XXH3_state_t* XXH_RESTRICT const state,
                                &state->nbStripesSoFar, state->nbStripesPerBlock,
                                 state->buffer, XXH3_INTERNALBUFFER_STRIPES,
                                 secret, state->secretLimit,
-                                f_acc512, f_scramble);
+                                f_acc, f_scramble);
             state->bufferedSize = 0;
         }
         XXH_ASSERT(input < bEnd);
-
-        /* large input to consume : ingest per full block */
-        if ((size_t)(bEnd - input) > state->nbStripesPerBlock * XXH_STRIPE_LEN) {
+        if (bEnd - input > XXH3_INTERNALBUFFER_SIZE) {
             size_t nbStripes = (size_t)(bEnd - 1 - input) / XXH_STRIPE_LEN;
-            XXH_ASSERT(state->nbStripesPerBlock >= state->nbStripesSoFar);
-            /* join to current block's end */
-            {   size_t const nbStripesToEnd = state->nbStripesPerBlock - state->nbStripesSoFar;
-                XXH_ASSERT(nbStripes <= nbStripes);
-                XXH3_accumulate(acc, input, secret + state->nbStripesSoFar * XXH_SECRET_CONSUME_RATE, nbStripesToEnd, f_acc512);
-                f_scramble(acc, secret + state->secretLimit);
-                state->nbStripesSoFar = 0;
-                input += nbStripesToEnd * XXH_STRIPE_LEN;
-                nbStripes -= nbStripesToEnd;
-            }
-            /* consume per entire blocks */
-            while(nbStripes >= state->nbStripesPerBlock) {
-                XXH3_accumulate(acc, input, secret, state->nbStripesPerBlock, f_acc512);
-                f_scramble(acc, secret + state->secretLimit);
-                input += state->nbStripesPerBlock * XXH_STRIPE_LEN;
-                nbStripes -= state->nbStripesPerBlock;
-            }
-            /* consume last partial block */
-            XXH3_accumulate(acc, input, secret, nbStripes, f_acc512);
-            input += nbStripes * XXH_STRIPE_LEN;
-            XXH_ASSERT(input < bEnd);  /* at least some bytes left */
-            state->nbStripesSoFar = nbStripes;
-            /* buffer predecessor of last partial stripe */
-            XXH_memcpy(state->buffer + sizeof(state->buffer) - XXH_STRIPE_LEN, input - XXH_STRIPE_LEN, XXH_STRIPE_LEN);
-            XXH_ASSERT(bEnd - input <= XXH_STRIPE_LEN);
-        } else {
-            /* content to consume <= block size */
-            /* Consume input by a multiple of internal buffer size */
-            if (bEnd - input > XXH3_INTERNALBUFFER_SIZE) {
-                const xxh_u8* const limit = bEnd - XXH3_INTERNALBUFFER_SIZE;
-                do {
-                    XXH3_consumeStripes(acc,
+            input = XXH3_consumeStripes(acc,
                                        &state->nbStripesSoFar, state->nbStripesPerBlock,
-                                        input, XXH3_INTERNALBUFFER_STRIPES,
-                                        secret, state->secretLimit,
-                                        f_acc512, f_scramble);
-                    input += XXH3_INTERNALBUFFER_SIZE;
-                } while (input<limit);
-                /* buffer predecessor of last partial stripe */
-                XXH_memcpy(state->buffer + sizeof(state->buffer) - XXH_STRIPE_LEN, input - XXH_STRIPE_LEN, XXH_STRIPE_LEN);
-            }
-        }
+                                       input, nbStripes,
+                                       secret, state->secretLimit,
+                                       f_acc, f_scramble);
+            XXH_memcpy(state->buffer + sizeof(state->buffer) - XXH_STRIPE_LEN, input - XXH_STRIPE_LEN, XXH_STRIPE_LEN);
 
+        }
         /* Some remaining input (always) : buffer it */
         XXH_ASSERT(input < bEnd);
         XXH_ASSERT(bEnd - input <= XXH3_INTERNALBUFFER_SIZE);
@@ -4891,19 +5693,19 @@ XXH3_update(XXH3_state_t* XXH_RESTRICT const state,
         state->bufferedSize = (XXH32_hash_t)(bEnd-input);
 #if defined(XXH3_STREAM_USE_STACK) && XXH3_STREAM_USE_STACK >= 1
         /* save stack accumulators into state */
-        memcpy(state->acc, acc, sizeof(acc));
+        XXH_memcpy(state->acc, acc, sizeof(acc));
 #endif
     }
 
     return XXH_OK;
 }
 
-/*! @ingroup xxh3_family */
+/*! @ingroup XXH3_family */
 XXH_PUBLIC_API XXH_errorcode
-XXH3_64bits_update(XXH3_state_t* state, const void* input, size_t len)
+XXH3_64bits_update(XXH_NOESCAPE XXH3_state_t* state, XXH_NOESCAPE const void* input, size_t len)
 {
     return XXH3_update(state, (const xxh_u8*)input, len,
-                       XXH3_accumulate_512, XXH3_scrambleAcc);
+                       XXH3_accumulate, XXH3_scrambleAcc);
 }
 
 
@@ -4912,37 +5714,40 @@ XXH3_digest_long (XXH64_hash_t* acc,
                   const XXH3_state_t* state,
                   const unsigned char* secret)
 {
+    xxh_u8 lastStripe[XXH_STRIPE_LEN];
+    const xxh_u8* lastStripePtr;
+
     /*
      * Digest on a local copy. This way, the state remains unaltered, and it can
      * continue ingesting more input afterwards.
      */
     XXH_memcpy(acc, state->acc, sizeof(state->acc));
     if (state->bufferedSize >= XXH_STRIPE_LEN) {
+        /* Consume remaining stripes then point to remaining data in buffer */
         size_t const nbStripes = (state->bufferedSize - 1) / XXH_STRIPE_LEN;
         size_t nbStripesSoFar = state->nbStripesSoFar;
         XXH3_consumeStripes(acc,
                            &nbStripesSoFar, state->nbStripesPerBlock,
                             state->buffer, nbStripes,
                             secret, state->secretLimit,
-                            XXH3_accumulate_512, XXH3_scrambleAcc);
-        /* last stripe */
-        XXH3_accumulate_512(acc,
-                            state->buffer + state->bufferedSize - XXH_STRIPE_LEN,
-                            secret + state->secretLimit - XXH_SECRET_LASTACC_START);
+                            XXH3_accumulate, XXH3_scrambleAcc);
+        lastStripePtr = state->buffer + state->bufferedSize - XXH_STRIPE_LEN;
     } else {  /* bufferedSize < XXH_STRIPE_LEN */
-        xxh_u8 lastStripe[XXH_STRIPE_LEN];
+        /* Copy to temp buffer */
         size_t const catchupSize = XXH_STRIPE_LEN - state->bufferedSize;
         XXH_ASSERT(state->bufferedSize > 0);  /* there is always some input buffered */
         XXH_memcpy(lastStripe, state->buffer + sizeof(state->buffer) - catchupSize, catchupSize);
         XXH_memcpy(lastStripe + catchupSize, state->buffer, state->bufferedSize);
-        XXH3_accumulate_512(acc,
-                            lastStripe,
-                            secret + state->secretLimit - XXH_SECRET_LASTACC_START);
+        lastStripePtr = lastStripe;
     }
+    /* Last stripe */
+    XXH3_accumulate_512(acc,
+                        lastStripePtr,
+                        secret + state->secretLimit - XXH_SECRET_LASTACC_START);
 }
 
-/*! @ingroup xxh3_family */
-XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_digest (const XXH3_state_t* state)
+/*! @ingroup XXH3_family */
+XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_digest (XXH_NOESCAPE const XXH3_state_t* state)
 {
     const unsigned char* const secret = (state->extSecret == NULL) ? state->customSecret : state->extSecret;
     if (state->totalLen > XXH3_MIDSIZE_MAX) {
@@ -4958,7 +5763,7 @@ XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_digest (const XXH3_state_t* state)
     return XXH3_64bits_withSecret(state->buffer, (size_t)(state->totalLen),
                                   secret, state->secretLimit + XXH_STRIPE_LEN);
 }
-
+#endif /* !XXH_NO_STREAM */
 
 
 /* ==========================================
@@ -4978,7 +5783,7 @@ XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_digest (const XXH3_state_t* state)
  * fast for a _128-bit_ hash on 32-bit (it usually clears XXH64).
  */
 
-XXH_FORCE_INLINE XXH128_hash_t
+XXH_FORCE_INLINE XXH_PUREF XXH128_hash_t
 XXH3_len_1to3_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
 {
     /* A doubled version of 1to3_64b with different constants. */
@@ -5007,7 +5812,7 @@ XXH3_len_1to3_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_
     }
 }
 
-XXH_FORCE_INLINE XXH128_hash_t
+XXH_FORCE_INLINE XXH_PUREF XXH128_hash_t
 XXH3_len_4to8_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
 {
     XXH_ASSERT(input != NULL);
@@ -5034,7 +5839,7 @@ XXH3_len_4to8_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_
     }
 }
 
-XXH_FORCE_INLINE XXH128_hash_t
+XXH_FORCE_INLINE XXH_PUREF XXH128_hash_t
 XXH3_len_9to16_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
 {
     XXH_ASSERT(input != NULL);
@@ -5109,7 +5914,7 @@ XXH3_len_9to16_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64
 /*
  * Assumption: `secret` size is >= XXH3_SECRET_SIZE_MIN
  */
-XXH_FORCE_INLINE XXH128_hash_t
+XXH_FORCE_INLINE XXH_PUREF XXH128_hash_t
 XXH3_len_0to16_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
 {
     XXH_ASSERT(len <= 16);
@@ -5140,7 +5945,7 @@ XXH128_mix32B(XXH128_hash_t acc, const xxh_u8* input_1, const xxh_u8* input_2,
 }
 
 
-XXH_FORCE_INLINE XXH128_hash_t
+XXH_FORCE_INLINE XXH_PUREF XXH128_hash_t
 XXH3_len_17to128_128b(const xxh_u8* XXH_RESTRICT input, size_t len,
                       const xxh_u8* XXH_RESTRICT secret, size_t secretSize,
                       XXH64_hash_t seed)
@@ -5151,6 +5956,16 @@ XXH3_len_17to128_128b(const xxh_u8* XXH_RESTRICT input, size_t len,
     {   XXH128_hash_t acc;
         acc.low64 = len * XXH_PRIME64_1;
         acc.high64 = 0;
+
+#if XXH_SIZE_OPT >= 1
+        {
+            /* Smaller, but slightly slower. */
+            unsigned int i = (unsigned int)(len - 1) / 32;
+            do {
+                acc = XXH128_mix32B(acc, input+16*i, input+len-16*(i+1), secret+32*i, seed);
+            } while (i-- != 0);
+        }
+#else
         if (len > 32) {
             if (len > 64) {
                 if (len > 96) {
@@ -5161,6 +5976,7 @@ XXH3_len_17to128_128b(const xxh_u8* XXH_RESTRICT input, size_t len,
             acc = XXH128_mix32B(acc, input+16, input+len-32, secret+32, seed);
         }
         acc = XXH128_mix32B(acc, input, input+len-16, secret, seed);
+#endif
         {   XXH128_hash_t h128;
             h128.low64  = acc.low64 + acc.high64;
             h128.high64 = (acc.low64    * XXH_PRIME64_1)
@@ -5173,7 +5989,7 @@ XXH3_len_17to128_128b(const xxh_u8* XXH_RESTRICT input, size_t len,
     }
 }
 
-XXH_NO_INLINE XXH128_hash_t
+XXH_NO_INLINE XXH_PUREF XXH128_hash_t
 XXH3_len_129to240_128b(const xxh_u8* XXH_RESTRICT input, size_t len,
                        const xxh_u8* XXH_RESTRICT secret, size_t secretSize,
                        XXH64_hash_t seed)
@@ -5182,25 +5998,34 @@ XXH3_len_129to240_128b(const xxh_u8* XXH_RESTRICT input, size_t len,
     XXH_ASSERT(128 < len && len <= XXH3_MIDSIZE_MAX);
 
     {   XXH128_hash_t acc;
-        int const nbRounds = (int)len / 32;
-        int i;
+        unsigned i;
         acc.low64 = len * XXH_PRIME64_1;
         acc.high64 = 0;
-        for (i=0; i<4; i++) {
+        /*
+         *  We set as `i` as offset + 32. We do this so that unchanged
+         * `len` can be used as upper bound. This reaches a sweet spot
+         * where both x86 and aarch64 get simple agen and good codegen
+         * for the loop.
+         */
+        for (i = 32; i < 160; i += 32) {
             acc = XXH128_mix32B(acc,
-                                input  + (32 * i),
-                                input  + (32 * i) + 16,
-                                secret + (32 * i),
+                                input  + i - 32,
+                                input  + i - 16,
+                                secret + i - 32,
                                 seed);
         }
         acc.low64 = XXH3_avalanche(acc.low64);
         acc.high64 = XXH3_avalanche(acc.high64);
-        XXH_ASSERT(nbRounds >= 4);
-        for (i=4 ; i < nbRounds; i++) {
+        /*
+         * NB: `i <= len` will duplicate the last 32-bytes if
+         * len % 32 was zero. This is an unfortunate necessity to keep
+         * the hash result stable.
+         */
+        for (i=160; i <= len; i += 32) {
             acc = XXH128_mix32B(acc,
-                                input + (32 * i),
-                                input + (32 * i) + 16,
-                                secret + XXH3_MIDSIZE_STARTOFFSET + (32 * (i - 4)),
+                                input + i - 32,
+                                input + i - 16,
+                                secret + XXH3_MIDSIZE_STARTOFFSET + i - 160,
                                 seed);
         }
         /* last bytes */
@@ -5208,7 +6033,7 @@ XXH3_len_129to240_128b(const xxh_u8* XXH_RESTRICT input, size_t len,
                             input + len - 16,
                             input + len - 32,
                             secret + XXH3_SECRET_SIZE_MIN - XXH3_MIDSIZE_LASTOFFSET - 16,
-                            0ULL - seed);
+                            (XXH64_hash_t)0 - seed);
 
         {   XXH128_hash_t h128;
             h128.low64  = acc.low64 + acc.high64;
@@ -5225,12 +6050,12 @@ XXH3_len_129to240_128b(const xxh_u8* XXH_RESTRICT input, size_t len,
 XXH_FORCE_INLINE XXH128_hash_t
 XXH3_hashLong_128b_internal(const void* XXH_RESTRICT input, size_t len,
                             const xxh_u8* XXH_RESTRICT secret, size_t secretSize,
-                            XXH3_f_accumulate_512 f_acc512,
+                            XXH3_f_accumulate f_acc,
                             XXH3_f_scrambleAcc f_scramble)
 {
     XXH_ALIGN(XXH_ACC_ALIGN) xxh_u64 acc[XXH_ACC_NB] = XXH3_INIT_ACC;
 
-    XXH3_hashLong_internal_loop(acc, (const xxh_u8*)input, len, secret, secretSize, f_acc512, f_scramble);
+    XXH3_hashLong_internal_loop(acc, (const xxh_u8*)input, len, secret, secretSize, f_acc, f_scramble);
 
     /* converge into final hash */
     XXH_STATIC_ASSERT(sizeof(acc) == 64);
@@ -5248,47 +6073,50 @@ XXH3_hashLong_128b_internal(const void* XXH_RESTRICT input, size_t len,
 }
 
 /*
- * It's important for performance that XXH3_hashLong is not inlined.
+ * It's important for performance that XXH3_hashLong() is not inlined.
  */
-XXH_NO_INLINE XXH128_hash_t
+XXH_NO_INLINE XXH_PUREF XXH128_hash_t
 XXH3_hashLong_128b_default(const void* XXH_RESTRICT input, size_t len,
                            XXH64_hash_t seed64,
                            const void* XXH_RESTRICT secret, size_t secretLen)
 {
     (void)seed64; (void)secret; (void)secretLen;
     return XXH3_hashLong_128b_internal(input, len, XXH3_kSecret, sizeof(XXH3_kSecret),
-                                       XXH3_accumulate_512, XXH3_scrambleAcc);
+                                       XXH3_accumulate, XXH3_scrambleAcc);
 }
 
 /*
- * It's important for performance to pass @secretLen (when it's static)
+ * It's important for performance to pass @p secretLen (when it's static)
  * to the compiler, so that it can properly optimize the vectorized loop.
+ *
+ * When the secret size is unknown, or on GCC 12 where the mix of NO_INLINE and FORCE_INLINE
+ * breaks -Og, this is XXH_NO_INLINE.
  */
-XXH_FORCE_INLINE XXH128_hash_t
+XXH3_WITH_SECRET_INLINE XXH128_hash_t
 XXH3_hashLong_128b_withSecret(const void* XXH_RESTRICT input, size_t len,
                               XXH64_hash_t seed64,
                               const void* XXH_RESTRICT secret, size_t secretLen)
 {
     (void)seed64;
     return XXH3_hashLong_128b_internal(input, len, (const xxh_u8*)secret, secretLen,
-                                       XXH3_accumulate_512, XXH3_scrambleAcc);
+                                       XXH3_accumulate, XXH3_scrambleAcc);
 }
 
 XXH_FORCE_INLINE XXH128_hash_t
 XXH3_hashLong_128b_withSeed_internal(const void* XXH_RESTRICT input, size_t len,
                                 XXH64_hash_t seed64,
-                                XXH3_f_accumulate_512 f_acc512,
+                                XXH3_f_accumulate f_acc,
                                 XXH3_f_scrambleAcc f_scramble,
                                 XXH3_f_initCustomSecret f_initSec)
 {
     if (seed64 == 0)
         return XXH3_hashLong_128b_internal(input, len,
                                            XXH3_kSecret, sizeof(XXH3_kSecret),
-                                           f_acc512, f_scramble);
+                                           f_acc, f_scramble);
     {   XXH_ALIGN(XXH_SEC_ALIGN) xxh_u8 secret[XXH_SECRET_DEFAULT_SIZE];
         f_initSec(secret, seed64);
         return XXH3_hashLong_128b_internal(input, len, (const xxh_u8*)secret, sizeof(secret),
-                                           f_acc512, f_scramble);
+                                           f_acc, f_scramble);
     }
 }
 
@@ -5301,7 +6129,7 @@ XXH3_hashLong_128b_withSeed(const void* input, size_t len,
 {
     (void)secret; (void)secretLen;
     return XXH3_hashLong_128b_withSeed_internal(input, len, seed64,
-                XXH3_accumulate_512, XXH3_scrambleAcc, XXH3_initCustomSecret);
+                XXH3_accumulate, XXH3_scrambleAcc, XXH3_initCustomSecret);
 }
 
 typedef XXH128_hash_t (*XXH3_hashLong128_f)(const void* XXH_RESTRICT, size_t,
@@ -5331,94 +6159,93 @@ XXH3_128bits_internal(const void* input, size_t len,
 
 /* ===   Public XXH128 API   === */
 
-/*! @ingroup xxh3_family */
-XXH_PUBLIC_API XXH128_hash_t XXH3_128bits(const void* input, size_t len)
+/*! @ingroup XXH3_family */
+XXH_PUBLIC_API XXH128_hash_t XXH3_128bits(XXH_NOESCAPE const void* input, size_t len)
 {
     return XXH3_128bits_internal(input, len, 0,
                                  XXH3_kSecret, sizeof(XXH3_kSecret),
                                  XXH3_hashLong_128b_default);
 }
 
-/*! @ingroup xxh3_family */
+/*! @ingroup XXH3_family */
 XXH_PUBLIC_API XXH128_hash_t
-XXH3_128bits_withSecret(const void* input, size_t len, const void* secret, size_t secretSize)
+XXH3_128bits_withSecret(XXH_NOESCAPE const void* input, size_t len, XXH_NOESCAPE const void* secret, size_t secretSize)
 {
     return XXH3_128bits_internal(input, len, 0,
                                  (const xxh_u8*)secret, secretSize,
                                  XXH3_hashLong_128b_withSecret);
 }
 
-/*! @ingroup xxh3_family */
+/*! @ingroup XXH3_family */
 XXH_PUBLIC_API XXH128_hash_t
-XXH3_128bits_withSeed(const void* input, size_t len, XXH64_hash_t seed)
+XXH3_128bits_withSeed(XXH_NOESCAPE const void* input, size_t len, XXH64_hash_t seed)
 {
     return XXH3_128bits_internal(input, len, seed,
                                  XXH3_kSecret, sizeof(XXH3_kSecret),
                                  XXH3_hashLong_128b_withSeed);
 }
 
-/*! @ingroup xxh3_family */
+/*! @ingroup XXH3_family */
 XXH_PUBLIC_API XXH128_hash_t
-XXH3_128bits_withSecretandSeed(const void* input, size_t len, const void* secret, size_t secretSize, XXH64_hash_t seed)
+XXH3_128bits_withSecretandSeed(XXH_NOESCAPE const void* input, size_t len, XXH_NOESCAPE const void* secret, size_t secretSize, XXH64_hash_t seed)
 {
     if (len <= XXH3_MIDSIZE_MAX)
         return XXH3_128bits_internal(input, len, seed, XXH3_kSecret, sizeof(XXH3_kSecret), NULL);
     return XXH3_hashLong_128b_withSecret(input, len, seed, secret, secretSize);
 }
 
-/*! @ingroup xxh3_family */
+/*! @ingroup XXH3_family */
 XXH_PUBLIC_API XXH128_hash_t
-XXH128(const void* input, size_t len, XXH64_hash_t seed)
+XXH128(XXH_NOESCAPE const void* input, size_t len, XXH64_hash_t seed)
 {
     return XXH3_128bits_withSeed(input, len, seed);
 }
 
 
 /* ===   XXH3 128-bit streaming   === */
-
+#ifndef XXH_NO_STREAM
 /*
  * All initialization and update functions are identical to 64-bit streaming variant.
  * The only difference is the finalization routine.
  */
 
-/*! @ingroup xxh3_family */
+/*! @ingroup XXH3_family */
 XXH_PUBLIC_API XXH_errorcode
-XXH3_128bits_reset(XXH3_state_t* statePtr)
+XXH3_128bits_reset(XXH_NOESCAPE XXH3_state_t* statePtr)
 {
     return XXH3_64bits_reset(statePtr);
 }
 
-/*! @ingroup xxh3_family */
+/*! @ingroup XXH3_family */
 XXH_PUBLIC_API XXH_errorcode
-XXH3_128bits_reset_withSecret(XXH3_state_t* statePtr, const void* secret, size_t secretSize)
+XXH3_128bits_reset_withSecret(XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* secret, size_t secretSize)
 {
     return XXH3_64bits_reset_withSecret(statePtr, secret, secretSize);
 }
 
-/*! @ingroup xxh3_family */
+/*! @ingroup XXH3_family */
 XXH_PUBLIC_API XXH_errorcode
-XXH3_128bits_reset_withSeed(XXH3_state_t* statePtr, XXH64_hash_t seed)
+XXH3_128bits_reset_withSeed(XXH_NOESCAPE XXH3_state_t* statePtr, XXH64_hash_t seed)
 {
     return XXH3_64bits_reset_withSeed(statePtr, seed);
 }
 
-/*! @ingroup xxh3_family */
+/*! @ingroup XXH3_family */
 XXH_PUBLIC_API XXH_errorcode
-XXH3_128bits_reset_withSecretandSeed(XXH3_state_t* statePtr, const void* secret, size_t secretSize, XXH64_hash_t seed)
+XXH3_128bits_reset_withSecretandSeed(XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* secret, size_t secretSize, XXH64_hash_t seed)
 {
     return XXH3_64bits_reset_withSecretandSeed(statePtr, secret, secretSize, seed);
 }
 
-/*! @ingroup xxh3_family */
+/*! @ingroup XXH3_family */
 XXH_PUBLIC_API XXH_errorcode
-XXH3_128bits_update(XXH3_state_t* state, const void* input, size_t len)
+XXH3_128bits_update(XXH_NOESCAPE XXH3_state_t* state, XXH_NOESCAPE const void* input, size_t len)
 {
-    return XXH3_update(state, (const xxh_u8*)input, len,
-                       XXH3_accumulate_512, XXH3_scrambleAcc);
+    return XXH3_64bits_update(state, input, len);
 }
 
-/*! @ingroup xxh3_family */
-XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_digest (const XXH3_state_t* state)
+/*! @ingroup XXH3_family */
+XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_digest (XXH_NOESCAPE const XXH3_state_t* state)
 {
     const unsigned char* const secret = (state->extSecret == NULL) ? state->customSecret : state->extSecret;
     if (state->totalLen > XXH3_MIDSIZE_MAX) {
@@ -5442,13 +6269,13 @@ XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_digest (const XXH3_state_t* state)
     return XXH3_128bits_withSecret(state->buffer, (size_t)(state->totalLen),
                                    secret, state->secretLimit + XXH_STRIPE_LEN);
 }
-
+#endif /* !XXH_NO_STREAM */
 /* 128-bit utility functions */
 
 #include <string.h>   /* memcmp, memcpy */
 
 /* return : 1 is equal, 0 if different */
-/*! @ingroup xxh3_family */
+/*! @ingroup XXH3_family */
 XXH_PUBLIC_API int XXH128_isEqual(XXH128_hash_t h1, XXH128_hash_t h2)
 {
     /* note : XXH128_hash_t is compact, it has no padding byte */
@@ -5456,11 +6283,11 @@ XXH_PUBLIC_API int XXH128_isEqual(XXH128_hash_t h1, XXH128_hash_t h2)
 }
 
 /* This prototype is compatible with stdlib's qsort().
- * return : >0 if *h128_1  > *h128_2
- *          <0 if *h128_1  < *h128_2
- *          =0 if *h128_1 == *h128_2  */
-/*! @ingroup xxh3_family */
-XXH_PUBLIC_API int XXH128_cmp(const void* h128_1, const void* h128_2)
+ * @return : >0 if *h128_1  > *h128_2
+ *           <0 if *h128_1  < *h128_2
+ *           =0 if *h128_1 == *h128_2  */
+/*! @ingroup XXH3_family */
+XXH_PUBLIC_API int XXH128_cmp(XXH_NOESCAPE const void* h128_1, XXH_NOESCAPE const void* h128_2)
 {
     XXH128_hash_t const h1 = *(const XXH128_hash_t*)h128_1;
     XXH128_hash_t const h2 = *(const XXH128_hash_t*)h128_2;
@@ -5472,9 +6299,9 @@ XXH_PUBLIC_API int XXH128_cmp(const void* h128_1, const void* h128_2)
 
 
 /*======   Canonical representation   ======*/
-/*! @ingroup xxh3_family */
+/*! @ingroup XXH3_family */
 XXH_PUBLIC_API void
-XXH128_canonicalFromHash(XXH128_canonical_t* dst, XXH128_hash_t hash)
+XXH128_canonicalFromHash(XXH_NOESCAPE XXH128_canonical_t* dst, XXH128_hash_t hash)
 {
     XXH_STATIC_ASSERT(sizeof(XXH128_canonical_t) == sizeof(XXH128_hash_t));
     if (XXH_CPU_LITTLE_ENDIAN) {
@@ -5485,9 +6312,9 @@ XXH128_canonicalFromHash(XXH128_canonical_t* dst, XXH128_hash_t hash)
     XXH_memcpy((char*)dst + sizeof(hash.high64), &hash.low64, sizeof(hash.low64));
 }
 
-/*! @ingroup xxh3_family */
+/*! @ingroup XXH3_family */
 XXH_PUBLIC_API XXH128_hash_t
-XXH128_hashFromCanonical(const XXH128_canonical_t* src)
+XXH128_hashFromCanonical(XXH_NOESCAPE const XXH128_canonical_t* src)
 {
     XXH128_hash_t h;
     h.high64 = XXH_readBE64(src);
@@ -5503,26 +6330,34 @@ XXH128_hashFromCanonical(const XXH128_canonical_t* src)
  */
 #define XXH_MIN(x, y) (((x) > (y)) ? (y) : (x))
 
-static void XXH3_combine16(void* dst, XXH128_hash_t h128)
+XXH_FORCE_INLINE void XXH3_combine16(void* dst, XXH128_hash_t h128)
 {
     XXH_writeLE64( dst, XXH_readLE64(dst) ^ h128.low64 );
     XXH_writeLE64( (char*)dst+8, XXH_readLE64((char*)dst+8) ^ h128.high64 );
 }
 
-/*! @ingroup xxh3_family */
+/*! @ingroup XXH3_family */
 XXH_PUBLIC_API XXH_errorcode
-XXH3_generateSecret(void* secretBuffer, size_t secretSize, const void* customSeed, size_t customSeedSize)
+XXH3_generateSecret(XXH_NOESCAPE void* secretBuffer, size_t secretSize, XXH_NOESCAPE const void* customSeed, size_t customSeedSize)
 {
+#if (XXH_DEBUGLEVEL >= 1)
     XXH_ASSERT(secretBuffer != NULL);
-    if (secretBuffer == NULL) return XXH_ERROR;
     XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN);
+#else
+    /* production mode, assert() are disabled */
+    if (secretBuffer == NULL) return XXH_ERROR;
     if (secretSize < XXH3_SECRET_SIZE_MIN) return XXH_ERROR;
+#endif
+
     if (customSeedSize == 0) {
         customSeed = XXH3_kSecret;
         customSeedSize = XXH_SECRET_DEFAULT_SIZE;
     }
+#if (XXH_DEBUGLEVEL >= 1)
     XXH_ASSERT(customSeed != NULL);
+#else
     if (customSeed == NULL) return XXH_ERROR;
+#endif
 
     /* Fill secretBuffer with a copy of customSeed - repeat as needed */
     {   size_t pos = 0;
@@ -5546,9 +6381,9 @@ XXH3_generateSecret(void* secretBuffer, size_t secretSize, const void* customSee
     return XXH_OK;
 }
 
-/*! @ingroup xxh3_family */
+/*! @ingroup XXH3_family */
 XXH_PUBLIC_API void
-XXH3_generateSecret_fromSeed(void* secretBuffer, XXH64_hash_t seed)
+XXH3_generateSecret_fromSeed(XXH_NOESCAPE void* secretBuffer, XXH64_hash_t seed)
 {
     XXH_ALIGN(XXH_SEC_ALIGN) xxh_u8 secret[XXH_SECRET_DEFAULT_SIZE];
     XXH3_initCustomSecret(secret, seed);
@@ -5561,7 +6396,7 @@ XXH3_generateSecret_fromSeed(void* secretBuffer, XXH64_hash_t seed)
 /* Pop our optimization override from above */
 #if XXH_VECTOR == XXH_AVX2 /* AVX2 */ \
   && defined(__GNUC__) && !defined(__clang__) /* GCC, not Clang */ \
-  && defined(__OPTIMIZE__) && !defined(__OPTIMIZE_SIZE__) /* respect -O0 and -Os */
+  && defined(__OPTIMIZE__) && XXH_SIZE_OPT <= 0 /* respect -O0 and -Os */
 #  pragma GCC pop_options
 #endif
 
@@ -5576,5 +6411,5 @@ XXH3_generateSecret_fromSeed(void* secretBuffer, XXH64_hash_t seed)
 
 
 #if defined (__cplusplus)
-}
+} /* extern "C" */
 #endif
diff --git a/docs/commands.md b/docs/commands.md
index b619dd4..f1d9df7 100644
--- a/docs/commands.md
+++ b/docs/commands.md
@@ -372,7 +372,7 @@ Return
 Syntax
 
 ```
-TCFG key capacity rate
+TCFG key rate capacity
 ```
 
 Config a token bucket of `key`, which has a fixed `capacity` bucket and has tokens  are added at a fixed `rate`. If `key` does not exist, a new key holding a token bucket is created. If both capacity and rate are 0, the token bucket has infinite tokens.
@@ -389,12 +389,11 @@ Syntax
 TCONSUME key tokens [ NORMAL | FORCE | FLEXIBLE ]
 ```
 
-Consume `tokens` from a token from the token bucket stored at `key`. 
+Consume `tokens` from the token bucket stored at `key`. 
 
 The option modify `TCONSUME` behavior if the token bucket has not enough `tokens`.
 
 - NORMAL -- Return 0. The default behavior. 
-
 - FORCE -- Return the requested tokens, which are consumed forcibly.
 - FLEXIBLE -- Return the available tokens.
 
@@ -410,11 +409,11 @@ Syntax
 TINFO key
 ```
 
-Return information about `key`  token bucket.
+Return information of token bucket stored at `key`.
 
 Return
 
-- Array reply with information of the filter.
+- Array reply with information of the token bucket.
 
 Example
 
@@ -433,8 +432,160 @@ swarmkv-sync> tinfo tb-192.168.0.1
  9) "Available"
 10) (integer) 20000
 ```
+## Fair Token Bucket
 
-## Cluster Management
+### FTCFG
+
+Syntax
+
+```
+FTCFG key rate capacity divisor
+```
+
+Config a fair token bucket of `key`, except `rate` and `capacity` as normal token bucket, it has a `divisor` which is used to set a different table size to record the deficit of each member. The specified divisor must be a power of two. 
+If `key` does not exist, a new key holding a fair token bucket is created. 
+
+Return
+
+- Simple String Reply: OK if the token bucket was configured. 
+
+### FTCONSUME
+
+Syntax
+
+```
+FTCONSUME key member weight tokens
+```
+
+Consume `tokens` as `member` with `weight` from the fair token bucket stored at `key`. 
+
+The `weight` must be within the range of 1 to 20.
+
+Return
+
+- Integer Reply:  the number of tokens that were allow to consume, or -1 if key does not exist.
+
+### FTINFO
+
+Syntax
+
+```
+FTINFO key
+```
+
+Return information of the fair token bucket storead at `key`. The `ActiveMembers` is estimated by HyperLogLog.
+
+Return
+
+- Array reply with information of the filter.
+
+Example
+
+```
+swarmkv-2-nodes> ftcfg abc 1000 2000 4096
+OK
+swarmkv-2-nodes> ftconsume abc 1 2 20
+(integer) 20
+swarmkv-2-nodes> ftinfo abc
+ 1) "Rate"
+ 2) (integer) 1000
+ 3) "Capacity"
+ 4) (integer) 2000
+ 5) "Consumed"
+ 6) (integer) 120
+ 7) "Refilled"
+ 8) (integer) 2120
+ 9) "Available"
+10) (integer) 2000
+11) "Divisor"
+12) (integer) 4096
+13) "ActiveMembers"
+14) (integer) 3
+```
+
+## Bulk Token Bucket
+
+### BTCFG
+
+Syntax
+
+```
+BTCFG key rate capacity buckets
+```
+
+Config a bulk token bucket of `key`, except `rate` and `capacity` as normal token bucket, it has a `buckets` which specifies the number of sub-token buckets. It should be far more greater than the possible members to ensure a higher accuracy.
+If `key` does not exist, a new key holding a fair token bucket is created. 
+
+Return
+
+- Simple String Reply: OK if the token bucket was configured. 
+
+### BTCONSUME
+
+Syntax
+
+```
+BTCONSUME key member tokens  [ NORMAL | FORCE | FLEXIBLE ]
+```
+
+Consume `tokens` form the sub-token bucket `member` of the bulk token bucket stored at `key`. 
+
+
+Return
+
+- Integer Reply:  the number of tokens that were allow to consume, or -1 if key does not exist.
+
+### BTINFO
+
+Syntax
+
+```
+BTINFO key [member]
+```
+
+Return information of the fair token bucket storead at `key`. The `ActiveMembers` is estimated by HyperLogLog.
+You can optionally specified a `member` to query it's availalbe tokens.
+
+Return
+
+- Array reply with information of the filter.
+
+Example
+
+```
+swarmkv-2-nodes> BTCFG bulk 50000 100000 2048
+OK
+swarmkv-2-nodes> btconsume bulk user1 2000
+(integer) 2000
+swarmkv-2-nodes> BTINFO bulk
+ 1) "Rate"
+ 2) (integer) 50000
+ 3) "Capacity"
+ 4) (integer) 100000
+ 5) "Buckets"
+ 6) (integer) 2048
+ 7) "ActiveMembers"
+ 8) (integer) 2
+ 9) "Collisions"
+10) (double) 0.000000
+11) "Query"
+12) (integer) -1
+swarmkv-2-nodes> BTINFO bulk user1
+ 1) "Rate"
+ 2) (integer) 50000
+ 3) "Capacity"
+ 4) (integer) 100000
+ 5) "Buckets"
+ 6) (integer) 2048
+ 7) "ActiveMembers"
+ 8) (integer) 2
+ 9) "Collisions"
+10) (double) 0.000000
+11) "Query"
+12) (integer) 100000
+```
+
+## Debug
 
 ### INFO
 
@@ -507,6 +658,53 @@ instantaneous_input_cps: 0.00
 instantaneous_output_cps: 0.00
 ```
 
+### LATENCY
+
+Syntax
+
+```
+LATENCY <subcommand> [<arg> [value] [opt] ...]
+```
+
+The `LANTENCY` command returns latency metrics of command execution.
+Subcommands are:
+* COMMAND [command]
+    - Return time-latency samples for a specified command name.
+* PEER [IP:port]
+    - Return time-latency samples for the specified peer.
+* EVENT [event]
+    - Return time-latency samples for the specified event.
+* RESET [command|event|peer]
+    - Reset data of a specified catalog or all the data if no catalog provided.
+
+### DEBUG
+
+Syntax
+
+```
+DEBUG <subcommand> [<arg> [value] [opt] ...].
+```
+
+Subcommands are:
+* SLEEP <seconds>
+    - Stop the server for <seconds>. Decimals allowed.
+* ASSERT
+    - Crash by assertion failed.
+
+### COMMAND LIST
+
+Syntax
+
+```
+COMMAND LIST
+```
+
+The `COMMAND LIST` command returns an array of the server's command names.
+
+Return
+
+- Array reply: a list of command names.
+
 ## Cluster Management
 
 ### CLUSTER KEYS
@@ -546,10 +744,17 @@ The pattern is exactly same as Redis https://redis.io/commands/keys/ .
 | SCARD                    | key                                  | Integer reply: the cardinality (number of elements) of the set, or 0 if key does not exist. |
 | SISMEMBER                | key member                           | Integer reply, specifically:<br/><br/>1 if the element is a member of the set.<br/>0 if the element is not a member of the set, or if key does not exist. |
 | SMEMBERS                 | key                                  | Array reply: all elements of the set. Empty array if the key does not exist. |
+| HSET                     | HSET key field value [field value ...] | |
+| HGET                     |
 | TCFG                     | key capacity rate                    | Simple String Reply: OK if the token bucket was configured. Create the token bucket if key does not exist. |
-| TCONSUM                  | key tokens [NORMAL\|FORCE\|FLEXIBLE] | Integer reply： the number of tokens that were allow to consume, or -1 if key does not exist. |
-| TINFO                    | key                                  | Array Reply: <br/>1) "Capacity"<br/>2) (integer) 1000000<br/>3) "Rate"<br/>4) (integer) 1000<br/>5) "Consumed"<br/>6) (integer) 5239123<br/>7) "Available"<br/>8) (integer) 3113<br/> |
-| HGET                     |                                      |                                                              |
+| TCONSUME                 | key tokens [NORMAL\|FORCE\|FLEXIBLE] | Integer reply： the number of tokens that were allow to consume, or -1 if key does not exist. |
+| TINFO                    | key                                  | Array Reply |
+| FTCFG                    | key capacity rate divisor            | Simple String Reply: OK if the token bucket was configured. Create the token bucket if key does not exist. |
+| FTCONSUME                 | key member weight tokens | Integer reply： the number of tokens that were allow to consume, or -1 if key does not exist. |
+| FTINFO                    | key                                  | Array Reply|
+| BTCFG                    | key capacity rate buckets            | Simple String Reply: OK if the token bucket was configured. Create the token bucket if key does not exist. |
+| BTCONSUME                 | key member tokens [NORMAL\|FORCE\|FLEXIBLE] | Integer reply： the number of tokens that were allow to consume, or -1 if key does not exist. |
+| BTINFO                    | key                                  | Array Reply|                                   |                                                              |
 | KEYSPACE RADD            | key IP port                          | Add replica to the key, create the key if key does not exist. |
 | KEYSPACE XRADD           | key IP Port                          | Add replica to the key, return NULL if the key does not exist. |
 | KEYSPACE RLIST           | key                                  | Node Array reply:<br/>1) "192.168.1.200:5211"<br/>2) "192.168.1.201:5211"<br/>3) "[2001:db8::1]:5211"<br/> |
diff --git a/docs/crdt.md b/docs/crdt.md
new file mode 100644
index 0000000..9798dc4
--- /dev/null
+++ b/docs/crdt.md
@@ -0,0 +1,64 @@
+## CRDT Explained
+
+Swarm KV uses state-based CRDT types to store values. 
+A Conflict-free Replicated Data Type (CRDT) is a data structure that simplifies distributed data storage systems and multi-user applications. In many systems, copies of some data need to be stored on multiple computers (known as replicas).
+### String
+
+Swarm KV implements Last-Write-Wins Register (LWW Register) to store string values. Swarm KV didn't implement logical clock such as  [Version Vector](https://en.wikipedia.org/wiki/Version_vector) or [Lamport Clock](https://en.wikipedia.org/wiki/Lamport_timestamp),  instead,  it uses real time (man 3 gettimeofday) for simplicity.
+
+### Integer
+
+Swarm KV uses Positive-Negative Counter (PN-Counter) to store integer values.
+
+### Set and Hash
+
+Swarm KV implements Add-Wins Observed-Remove Set (OR Set) to store set and hash values. OR Set is state-based and has no tombstone. Please referrer [An optimized conflict-free replicated set](https://arxiv.org/pdf/1210.3368.pdf) for more details.
+
+### Token Bucket
+
+Swarm KV implements a novel Observe-Consumed Token Bucket (OC Token Bucket), which has a PN-Counter to track consumed tokens and a Last-write-wins register to track refilled tokens. It is initialized with two parameters:
+
+- CIR: Committed Information Rate
+- CBS: Committed Burst Size
+
+At the very beginning, the bucket is full, which means it has *CBS* tokens. You can reconfigure the token bucket at any time. Note that the reconfiguration will not refill the token bucket to full.
+
+If a network partition happens, the token bucket is out-of-sync, and each replica has the entire CIR. And after the partition heals, replicas share the CIR again. The OC Token Bucket is robust to overuse as long as the sync interval is reasonable, i.e., 200ms.
+
+### Fair Split Token Bucket
+Fair Split Token Bucket is implemented with a Count-min Sketch and an OC Token Bucket. It archieves [max-min fairness](https://www.ece.rutgers.edu/~marsic/Teaching/CCN/minmax-fairsh.html) which is defined as follows:
+- Resources are allocated in order of increasing demand
+- No source gets a resource share larger than its demand
+- Sources with unsatisfied demands get an equal share of the resource
+
+# References
+Bieniusa, Annette, et al. "[An optimized conflict-free replicated set](https://arxiv.org/pdf/1210.3368.pdf)." *arXiv preprint arXiv:1210.3368* (2012).
+
+[Conflict-free Replicated Data Type (CRDT)](https://en.wikipedia.org/wiki/Conflict-free_replicated_data_type) 
+
+https://crdt.tech/
+
+https://hermes-protocol.com/
+
+[Redis Active-Active Architecture](https://redis.com/redis-enterprise/technology/active-active-geo-distribution/)
+
+[A fast, fault-tolerant & linearizable replication protocol](https://hermes-protocol.com/)
+
+[Bucket4j 8.1.0 Reference](https://bucket4j.com/8.1.0/toc.html)
+
+[An Introduction to Computer Networks: Chapter 24 - Token Bucket Rate Limiting](https://intronetworks.cs.luc.edu/current/html/tokenbucket.html)
+
+[Deficit round robin](https://en.wikipedia.org/wiki/Deficit_round_robin)
+
+[sfq - Stochastic Fairness Queueing](https://man7.org/linux/man-pages/man8/tc-sfq.8.html)
+
+[Max-min fairness](https://www.ece.rutgers.edu/~marsic/Teaching/CCN/minmax-fairsh.html)
+
+Cormode, Graham, and Shan Muthukrishnan. "[An improved data stream summary: the count-min sketch and its applications.](https://twiki.di.uniroma1.it/pub/Ing_algo/WebHome/p14_Cormode_JAl_05.pdf)" Journal of Algorithms 55.1 (2005): 58-75.
+
+Pitel, Guillaume, and Geoffroy Fouquier. "[Count-min-log sketch: Approximately counting with approximate counters.](https://hal.science/hal-01171276/document)" arXiv preprint arXiv:1502.04885 (2015) .
+
+Flajolet, Philippe, et al. "[Hyperloglog: the analysis of a near-optimal cardinality estimation algorithm.](https://hal.science/file/index/docid/406166/filename/FlFuGaMe07.pdf)" Discrete Mathematics and Theoretical Computer Science. Discrete Mathematics and Theoretical Computer Science, 2007.
+
+Cornacchia, Alessandro, et al. "[Staggered HLL: Near-continuous-time cardinality estimation with no overhead.](https://www.sciencedirect.com/science/article/abs/pii/S0140366422002407)" Computer Communications 193 (2022): 168-175.
+
diff --git a/docs/design.md b/docs/design.md
index 1253cd9..c3a3f0f 100644
--- a/docs/design.md
+++ b/docs/design.md
@@ -47,7 +47,7 @@ The command returns a JSON array.
 	},
 	{
 		"slot": "1023",
-    "owner": "192.168.0.1:5211"
+		"owner": "192.168.0.1:5211"
 	}
 ]
 ```
@@ -153,32 +153,6 @@ When a requestor executes a command, it takes the following steps
 - Other replica executes the command, registers the requestor as one of the replica nodes, and send a reply to the requestor
 - The requestor receives the reply and invokes user callback function.
 
-## CRDT Explained
-
-Swarm KV uses state-based CRDT types to store values. 
-
-### String
-
-Swarm KV implements Last-Write-Wins Register (LWW Register) to store string values. Swarm KV didn't implement logical clock such as  [Version Vector](https://en.wikipedia.org/wiki/Version_vector) or [Lamport Clock](https://en.wikipedia.org/wiki/Lamport_timestamp),  instead,  it uses real time (man 3 gettimeofday) for simplicity.
-
-### Integer
-
-Swarm KV uses Positive-Negative Counter (PN-Counter) to store integer values.
-
-### Set and Hash
-
-Swarm KV implements Add-Wins Observed-Remove Set (OR Set) to store set and hash values. OR Set is state-based and has no tombstone. Please referrer [An optimized conflict-free replicated set](https://arxiv.org/pdf/1210.3368.pdf) for more details.
-
-### Token Bucket
-
-Swarm KV implements Observe-Consumed Token Bucket (OC Token Bucket), which has a PN-Counter to track consumed tokens and a Last-write-wins register to track refilled tokens. It is initialized with two parameters:
-
-- CIR: Committed Information Rate
-- CBS: Committed Burst Size
-
-At the very beginning, the bucket is full, which means it has *CBS* tokens. You can reconfigure the token bucket at any time. Note that the reconfiguration will not refill the token bucket to full.
-
-If a network partition happens, the token bucket will be out-of-sync, and each replica has the entire CIR. And after the partition heals, replicas share the CIR again. The OC Token Bucket is robust to overuse as long as the sync interval is reasonable, i.e., 200ms.
 
 ## Fault tolerance
 
@@ -274,16 +248,14 @@ The source files are organized as follows:
 - src/swarmkv_api.c API implementaions.
 - src/swarmkv_store.c  KV operations, key hash table is implemented with [uthash](https://troydhanson.github.io/uthash/). Use [sds](https://github.com/antirez/sds) as a dynamic string library.
 - src/swarmkv_sync.c batching CRDT syncronization.
-- src/t_string.c  commands of data type string.
-- src/t_set.c commands of data type set.
 - src/swarmkv_keyspace.c: The control plane of SwarmKV. Interact with [HashiCorp Consul](https://www.consul.io/) for node discovery, slot assignment, and leader election.
 - src/swarmkv_message.c: Message is encoded to [MessagePack](https://msgpack.org/index.html) format by [mpack](https://github.com/ludocode/mpack) library.
 - src/swarmkv_net.c: High performance P2P communication via [libevent](https://libevent.org/).
 - src/swarmkv_message.c Collecting performance metrics.
-- src/t_xxx.c Implementing SwarmKV data types.
+- src/t_xxx.c Implementing SwarmKV commands of various data types.
 - src/deps/ Dependencies in source codes format, such as [uthash](https://troydhanson.github.io/uthash/), [sds](https://github.com/antirez/sds), [mpack](https://github.com/ludocode/mpack), [timeout](https://25thandclement.com/~william/projects/timeout.c.html), [cJSON](https://github.com/DaveGamble/cJSON), [HDRHistogram](https://github.com/HdrHistogram/HdrHistogram_c)
 - src/vendor/  Dependencies in archive format.
-- CRDT/ Implementing Conflict-free replicated data type.
+- CRDT/ Implementing Conflict-free replicated data types.
 - test/ Unit tests.
 - tools/swarmkv_cli: CLI implementation which is facilitated by [linenoise](https://github.com/antirez/linenoise).
 
@@ -291,7 +263,7 @@ The source files are organized as follows:
 
 [dqlite](https://dqlite.io/) is a C library that implements an embeddable and replicated SQL database engine with high availability and automatic failover.
 
-*[rqlite](https://github.com/rqlite/rqlite)* is an easy-to-use, lightweight, distributed relational database, which uses [SQLite](https://www.sqlite.org/) as its storage engine. 
+[rqlite](https://github.com/rqlite/rqlite) is an easy-to-use, lightweight, distributed relational database, which uses [SQLite](https://www.sqlite.org/) as its storage engine. 
 
 [Zeppelin](https://github.com/Qihoo360/zeppelin) is a Distributed Key-Value Platform that aims to provide excellent performance, reliability, and scalability.
 
@@ -327,22 +299,9 @@ The source files are organized as follows:
 
 # References
 
-Woo, Shinae, et al. "Elastic scaling of stateful network functions." *15th {USENIX} Symposium on Networked Systems Design and Implementation ({NSDI} 18)*. 2018.
-
-Bieniusa, Annette, et al. "[An optimized conflict-free replicated set](https://arxiv.org/pdf/1210.3368.pdf)." *arXiv preprint arXiv:1210.3368* (2012).
-
-[C implementation of the Raft consensus protocol](https://github.com/willemt/raft) by Willem-Hendrik Thiart.
+Woo, Shinae, et al. "Elastic scaling of stateful network functions." *15th {USENIX} Symposium on Networked Systems Design and Implementation ({NSDI} 18)*. 2018. [Source] (https://www.usenix.org/conference/nsdi18/presentation/woo)
 
 [Redis Cluster Specification](https://redis.io/topics/cluster-spec)
 
-[Conflict-free Replicated Data Type (CRDT)](https://en.wikipedia.org/wiki/Conflict-free_replicated_data_type) 
-
-https://crdt.tech/
-
-https://hermes-protocol.com/
-
-[Redis Active-Active Architecture](https://redis.com/redis-enterprise/technology/active-active-geo-distribution/)
-
-[A fast, fault-tolerant & linearizable replication protocol](https://hermes-protocol.com/)
+[C implementation of the Raft consensus protocol](https://github.com/willemt/raft) by Willem-Hendrik Thiart.
 
-https://bucket4j.com/8.1.0/toc.html
-\ No newline at end of file
diff --git a/include/swarmkv/swarmkv.h b/include/swarmkv/swarmkv.h
index 46a6897..9fcd7b6 100644
--- a/include/swarmkv/swarmkv.h
+++ b/include/swarmkv/swarmkv.h
@@ -22,6 +22,7 @@ enum swarmkv_reply_type
 	SWARMKV_REPLY_STRING=0,
 	SWARMKV_REPLY_ARRAY,
 	SWARMKV_REPLY_INTEGER,
+	SWARMKV_REPLY_DOUBLE,
 	SWARMKV_REPLY_NIL,
 	SWARMKV_REPLY_STATUS,
 	SWARMKV_REPLY_ERROR,
@@ -32,6 +33,7 @@ struct swarmkv_reply
 {
 	enum swarmkv_reply_type type;
 	long long integer;	/* The integer when type is SWARMKV_REPLY_INTEGER */
+	double dval;		/* The double when type is SWARMKV_REPLY_DOUBLE */
 	int len; 			/* Length of string */
 	char *str; 			/* Used for SWARMKV_REPLY_ERROR, SWARMKV_REPLY_STATUS, SWARMKV_NODE, SWARMKV_REPLY_STRING and SWARMKV_REPLY_VERBATIM */
 	char vtype[4]; 		/* Used for SWARMKV_REPLY_VERBATIM, contains the null
@@ -78,8 +80,8 @@ struct swarmkv_reply *swarmkv_command_on(struct swarmkv *db, const char *target,
 
 //Non-blocking function
 typedef void swarmkv_on_reply_callback_t(const struct swarmkv_reply *reply, void * arg);
-void swarmkv_async_command_on(struct swarmkv *db, swarmkv_on_reply_callback_t * cb, void *cb_arg, const char *target, const char *format, ...);
-void swarmkv_async_command(struct swarmkv *db, swarmkv_on_reply_callback_t * cb, void *cb_arg, const char *format, ...);
+void swarmkv_async_command_on(struct swarmkv *db, swarmkv_on_reply_callback_t * cb, void *cb_arg, const char *target, const char *format, ...)__attribute__ ((format (printf, 5, 6)));
+void swarmkv_async_command(struct swarmkv *db, swarmkv_on_reply_callback_t * cb, void *cb_arg, const char *format, ...)__attribute__ ((format (printf, 4, 5)));
 
 void swarmkv_get(struct swarmkv * db, 
 	const char * key, size_t keylen, swarmkv_on_reply_callback_t * cb, void * arg);
@@ -102,7 +104,8 @@ void swarmkv_smembers(struct swarmkv *db, const char* key, size_t keylen, swarmk
 void swarmkv_scard(struct swarmkv *db, const char* key, size_t keylen, swarmkv_on_reply_callback_t *cb, void *cb_arg);
 
 void swarmkv_tconsume(struct swarmkv * db, const char * key, size_t keylen, long long tokens, swarmkv_on_reply_callback_t *cb, void *cb_arg);
-
+void swarmkv_ftconsume(struct swarmkv * db, const char * key, size_t keylen, const char * member, size_t member_len, long long weight, long long tokens, swarmkv_on_reply_callback_t *cb, void *cb_arg);
+void swarmkv_btconsume(struct swarmkv * db, const char * key, size_t keylen, const char * member, size_t member_len, long long tokens, swarmkv_on_reply_callback_t *cb, void *cb_arg);
 //Used by swarmkv-cli
 size_t swarmkv_get_possible_command_name(struct swarmkv *db, const char *prefix, const char* cmd_names[], size_t sz);
 char *swarmkv_get_command_hint(struct swarmkv *db, const char* cmd_name);
diff --git a/readme.md b/readme.md
index ea51025..a50c8ef 100644
--- a/readme.md
+++ b/readme.md
@@ -28,8 +28,9 @@ SwarmKV Data Types
 - Integer
 - Set
 - Hash Table
-- Bitmap [todo]
-- Token Bucket
+- Token Bucket, Fair Token Bucket and Bulk Token Bucket
+- Count-min Sketch [Todo]
+- HyperLogLog [Todo]
 
 
 
@@ -147,4 +148,5 @@ int main(int argc, char **argv)
 Here are some specific details about the SwarmKV.
 * [Commands](./docs/commands.md)
 * [Design](./docs/design.md)
-* [CLI](./docs/cli.md)
-\ No newline at end of file
+* [Command-line interface (CLI)](./docs/cli.md)
+* [Conflict-free Replicated Data Type (CRDT)](./docs/crdt.md)
diff --git a/src/inc_internal/swarmkv_common.h b/src/inc_internal/swarmkv_common.h
index cdf6605..e68fa76 100644
--- a/src/inc_internal/swarmkv_common.h
+++ b/src/inc_internal/swarmkv_common.h
@@ -94,6 +94,7 @@ struct swarmkv_reply *swarmkv_reply_new_string_from_integer(long long integer);
 struct swarmkv_reply *swarmkv_reply_new_verbatim(const char *str, size_t sz, const char *ext);
 
 struct swarmkv_reply *swarmkv_reply_new_integer(long long integer);
+struct swarmkv_reply *swarmkv_reply_new_double(double dval);
 struct swarmkv_reply *swarmkv_reply_new_array(size_t n_element);
 struct swarmkv_reply *swarmkv_reply_new_node(node_t *node, int is_ask);
 struct swarmkv_reply *swarmkv_reply_new_nil(void);
diff --git a/src/inc_internal/swarmkv_store.h b/src/inc_internal/swarmkv_store.h
index 1881226..c9b9d8e 100644
--- a/src/inc_internal/swarmkv_store.h
+++ b/src/inc_internal/swarmkv_store.h
@@ -11,6 +11,8 @@
 #include "or_set.h"
 #include "or_map.h"
 #include "oc_token_bucket.h"
+#include "fair_token_bucket.h"
+#include "bulk_token_bucket.h"
 
 enum sobj_type
 {
@@ -19,6 +21,8 @@ enum sobj_type
 	OBJ_TYPE_SET,
 	OBJ_TYPE_HASH,
 	OBJ_TYPE_TOKEN_BUCKET,
+	OBJ_TYPE_FAIR_TOKEN_BUCKET,
+	OBJ_TYPE_BULK_TOKEN_BUCKET,
 	OBJ_TYPE_UNDEFINED,
 	__SWARMKV_OBJ_TYPE_MAX
 };
@@ -34,6 +38,8 @@ struct sobj
 		struct OR_set *set;
 		struct OR_map *hash;
 		struct OC_token_bucket *bucket;
+		struct fair_token_bucket *ftb;
+		struct bulk_token_bucket *btb;
 		void *raw;
 	};
 };
diff --git a/src/inc_internal/t_token_bucket.h b/src/inc_internal/t_token_bucket.h
index 46ca7fd..90f21cc 100644
--- a/src/inc_internal/t_token_bucket.h
+++ b/src/inc_internal/t_token_bucket.h
@@ -4,4 +4,10 @@ enum cmd_exec_result tcfg_command(struct swarmkv_module *mod_store, const struct
 enum cmd_exec_result tinfo_command(struct swarmkv_module *mod_store, const struct swarmkv_cmd *cmd, const node_t *accessing_node, struct swarmkv_reply **reply);
 enum cmd_exec_result tconsume_command(struct swarmkv_module *mod_store, const struct swarmkv_cmd *cmd, const node_t *accessing_node, struct swarmkv_reply **reply);
 
+enum cmd_exec_result ftcfg_command(struct swarmkv_module *mod_store, const struct swarmkv_cmd *cmd, const node_t *accessing_node, struct swarmkv_reply **reply);
+enum cmd_exec_result ftconsume_command(struct swarmkv_module *mod_store, const struct swarmkv_cmd *cmd, const node_t *accessing_node, struct swarmkv_reply **reply);
+enum cmd_exec_result ftinfo_command(struct swarmkv_module *mod_store, const struct swarmkv_cmd *cmd, const node_t *accessing_node, struct swarmkv_reply **reply);
 
+enum cmd_exec_result btcfg_command(struct swarmkv_module *mod_store, const struct swarmkv_cmd *cmd, const node_t *accessing_node, struct swarmkv_reply **reply);
+enum cmd_exec_result btconsume_command(struct swarmkv_module *mod_store, const struct swarmkv_cmd *cmd, const node_t *accessing_node, struct swarmkv_reply **reply);
+enum cmd_exec_result btinfo_command(struct swarmkv_module *mod_store, const struct swarmkv_cmd *cmd, const node_t *accessing_node, struct swarmkv_reply **reply);
+\ No newline at end of file
diff --git a/src/swarmkv.c b/src/swarmkv.c
index 97da504..4adf862 100644
--- a/src/swarmkv.c
+++ b/src/swarmkv.c
@@ -463,6 +463,21 @@ enum cmd_exec_result config_command(struct swarmkv_module *mod_store, const stru
 {
 	return FINISHED;
 }
+enum cmd_exec_result command_list_command(struct swarmkv_module *mod_db, const struct swarmkv_cmd *cmd, const node_t *accessing_node, struct swarmkv_reply **reply)
+{
+	struct swarmkv *db=module2db(mod_db);
+	size_t cnt=HASH_COUNT(db->command_table);
+	struct swarmkv_cmd_spec *spec=NULL, *tmp_spec=NULL;
+	int i=0;
+	*reply=swarmkv_reply_new_array(cnt);
+	HASH_ITER(hh, db->command_table, spec, tmp_spec)
+	{
+		(*reply)->elements[i]=swarmkv_reply_new_string(spec->name, strlen(spec->name));
+		i++;
+	}
+	assert(i==cnt);
+	return FINISHED;
+}
 struct swarmkv_cmd_spec *get_spec_by_argv(struct swarmkv *db, size_t argc, char* const argv[])
 {
 	struct swarmkv_cmd_spec *spec=NULL;
@@ -963,7 +978,7 @@ void command_spec_init(struct swarmkv *db)
 					hincrby_command, db->mod_store);
 
 	/* Token bucket commands */
-	command_register(&(db->command_table), "TCFG", "key capacity rate",
+	command_register(&(db->command_table), "TCFG", "key rate capacity",
 					3, 1, CMD_KEY_OW, REPLY_ERROR, AUTO_ROUTE, 
 					tcfg_command, db->mod_store);
 	command_register(&(db->command_table), "TCONSUME", "key tokens [NORMAL|FORCE|FLEXIBLE]",
@@ -972,8 +987,26 @@ void command_spec_init(struct swarmkv *db)
 	command_register(&(db->command_table), "TINFO", "key",
 					1, 1, CMD_KEY_RO, REPLY_EMPTY_ARRAY, AUTO_ROUTE,
 					tinfo_command, db->mod_store);
+	command_register(&(db->command_table), "FTCFG", "key rate capacity divisor",
+					4, 1, CMD_KEY_OW, REPLY_ERROR, AUTO_ROUTE, 
+					ftcfg_command, db->mod_store);
+	command_register(&(db->command_table), "FTCONSUME", "key member weight tokens",
+					4, 1, CMD_KEY_RW, REPLY_INT_MINORS1, AUTO_ROUTE,
+					ftconsume_command, db->mod_store);
+	command_register(&(db->command_table), "FTINFO", "key",
+					1, 1, CMD_KEY_RO, REPLY_EMPTY_ARRAY, AUTO_ROUTE,
+					ftinfo_command, db->mod_store);
+	command_register(&(db->command_table), "BTCFG", "key rate capacity number-of-buckets",
+					4, 1, CMD_KEY_OW, REPLY_ERROR, AUTO_ROUTE, 
+					btcfg_command, db->mod_store);
+	command_register(&(db->command_table), "BTCONSUME", "key member tokens [NORMAL|FORCE|FLEXIBLE]",
+					3, 1, CMD_KEY_RW, REPLY_INT_MINORS1, AUTO_ROUTE,
+					btconsume_command, db->mod_store);
+	command_register(&(db->command_table), "BTINFO", "key [member]",
+					1, 1, CMD_KEY_RO, REPLY_EMPTY_ARRAY, AUTO_ROUTE,
+					btinfo_command, db->mod_store);
 
-	/* Debugging Commands */
+	/* Debug Commands */
 	command_register(&(db->command_table), "INFO", "[section]",
 					0, -1, CMD_KEY_NA, REPLY_NA, AUTO_ROUTE,
 					info_command, &db->module);
@@ -983,6 +1016,9 @@ void command_spec_init(struct swarmkv *db)
 	command_register(&(db->command_table), "PING", "IP:port",
 					1, -1, CMD_KEY_NA, REPLY_NA, AUTO_ROUTE,
 					ping_command, &db->module);
+	command_register(&(db->command_table), "COMMAND LIST", "",
+					0, -1, CMD_KEY_NA, REPLY_NA, AUTO_ROUTE,
+					command_list_command, &db->module);	
 	command_register(&(db->command_table), "TUNNEL", "IP:port command ...",
 					2, -1, CMD_KEY_NA, REPLY_NA, AUTO_ROUTE,
 					tunnel_command, &db->module);
@@ -991,6 +1027,9 @@ void command_spec_init(struct swarmkv *db)
 					1, -1, CMD_KEY_NA, REPLY_NA, NOT_AUTO_ROUTE,
 					latency_command, db->mod_monitor);
 
+
+					
+
 	/* low-level state-based CRDT synchronization commands*/
 	command_register(&(db->command_table), "CRDT PULL", "key",
 					1, 2, CMD_KEY_NA, REPLY_NA, NOT_AUTO_ROUTE,
diff --git a/src/swarmkv_api.c b/src/swarmkv_api.c
index 92731f7..46bd07c 100644
--- a/src/swarmkv_api.c
+++ b/src/swarmkv_api.c
@@ -284,6 +284,31 @@ void swarmkv_tconsume(struct swarmkv * db, const char * key, size_t keylen, long
 	swarmkv_cmd_free(cmd);
 	return;
 }
+void swarmkv_ftconsume(struct swarmkv * db, const char * key, size_t keylen, const char * member, size_t member_len, long long weight, long long tokens, swarmkv_on_reply_callback_t *cb, void *cb_arg)
+{	
+	struct swarmkv_cmd *cmd=NULL;
+	cmd=swarmkv_cmd_new(5);	
+	cmd->argv[0]=sdsnew("ftconsume");
+	cmd->argv[1]=sdsnewlen(key, keylen);
+	cmd->argv[2]=sdsnewlen(member, member_len);
+	cmd->argv[3]=sdsfromlonglong(weight);
+	cmd->argv[4]=sdsfromlonglong(tokens);
+	exec_for_local(db, cmd, NULL, cb, cb_arg);
+	swarmkv_cmd_free(cmd);
+	return;
+}
+void swarmkv_btconsume(struct swarmkv * db, const char * key, size_t keylen, const char * member, size_t member_len, long long tokens, swarmkv_on_reply_callback_t *cb, void *cb_arg)
+{	
+	struct swarmkv_cmd *cmd=NULL;
+	cmd=swarmkv_cmd_new(4);	
+	cmd->argv[0]=sdsnew("ftconsume");
+	cmd->argv[1]=sdsnewlen(key, keylen);
+	cmd->argv[2]=sdsnewlen(member, member_len);
+	cmd->argv[3]=sdsfromlonglong(tokens);
+	exec_for_local(db, cmd, NULL, cb, cb_arg);
+	swarmkv_cmd_free(cmd);
+	return;
+}
 struct blocking_query_ctx
 {
 	pthread_cond_t cond;
diff --git a/src/swarmkv_common.c b/src/swarmkv_common.c
index 553df92..d467ded 100644
--- a/src/swarmkv_common.c
+++ b/src/swarmkv_common.c
@@ -70,6 +70,9 @@ sds swarmkv_reply_format(const struct swarmkv_reply *r, char *prefix) {
     case SWARMKV_REPLY_INTEGER:
         out = sdscatprintf(out,"(integer) %lld\n", r->integer);
     	break;
+	case SWARMKV_REPLY_DOUBLE:
+		out = sdscatprintf(out,"(double) %f\n", r->dval);
+		break;
     case SWARMKV_REPLY_STRING:
     case SWARMKV_REPLY_VERBATIM:
 	case SWARMKV_REPLY_NODE:
@@ -208,6 +211,13 @@ struct swarmkv_reply *swarmkv_reply_new_integer(long long integer)
 	reply->integer=integer;
 	return reply;
 }
+struct swarmkv_reply *swarmkv_reply_new_double(double dval)
+{
+	struct swarmkv_reply *reply=ALLOC(struct swarmkv_reply, 1);
+	reply->type=SWARMKV_REPLY_DOUBLE;
+	reply->dval=dval;
+	return reply;
+}
 struct swarmkv_reply *swarmkv_reply_new_nil(void)
 {
 	struct swarmkv_reply *reply=ALLOC(struct swarmkv_reply, 1);
diff --git a/src/swarmkv_message.c b/src/swarmkv_message.c
index 4fee1dd..184ce80 100644
--- a/src/swarmkv_message.c
+++ b/src/swarmkv_message.c
@@ -7,25 +7,29 @@ struct swarmkv_reply* deserialize_reply(mpack_node_t reply_node)
 {
 	struct swarmkv_reply* reply=NULL;
 	mpack_node_t item, array_node, vtype_item;
-	item=mpack_node_map_cstr(reply_node, "reply_type");
+	item=mpack_node_map_cstr(reply_node, "t");
 	size_t i=0;
 	enum swarmkv_reply_type type=mpack_node_int(item);
 	switch(type)
 	{
 		case SWARMKV_REPLY_INTEGER:
-			item=mpack_node_map_cstr(reply_node, "integer");
+			item=mpack_node_map_cstr(reply_node, "int");
 			reply=swarmkv_reply_new_integer(mpack_node_i64(item));
 			break;
+		case SWARMKV_REPLY_DOUBLE:
+			item=mpack_node_map_cstr(reply_node, "dval");
+			reply=swarmkv_reply_new_double(mpack_node_double(item));
+			break;
 		case SWARMKV_REPLY_STRING:
 		case SWARMKV_REPLY_ERROR:
 		case SWARMKV_REPLY_STATUS:
 		case SWARMKV_REPLY_NODE:
-			item=mpack_node_map_cstr(reply_node, "string");
+			item=mpack_node_map_cstr(reply_node, "str");
 			reply=swarmkv_reply_new_string(mpack_node_str(item), mpack_node_strlen(item));
 			reply->type=type;//dirty here
 			break;
 		case SWARMKV_REPLY_VERBATIM:
-			item=mpack_node_map_cstr(reply_node, "string");
+			item=mpack_node_map_cstr(reply_node, "str");
 			vtype_item=mpack_node_map_cstr(reply_node, "vtype");
 			reply=swarmkv_reply_new_verbatim(mpack_node_str(item), mpack_node_strlen(item), mpack_node_str(vtype_item));
 			break;
@@ -33,7 +37,7 @@ struct swarmkv_reply* deserialize_reply(mpack_node_t reply_node)
 			reply=swarmkv_reply_new_nil();
 			break;
 		case SWARMKV_REPLY_ARRAY:		
-			array_node=mpack_node_map_cstr(reply_node, "elements");
+			array_node=mpack_node_map_cstr(reply_node, "arr");
 			reply=swarmkv_reply_new_array(mpack_node_array_length(array_node));
 			for(i=0; i<reply->n_element; i++)
 			{
@@ -102,23 +106,27 @@ void swarmkv_msg_serialize_reply(const struct swarmkv_reply* reply, char **blob,
 	mpack_writer_t writer;
 	mpack_writer_init_growable(&writer, &root_mpack_buff, &root_mpack_sz);
 	mpack_build_map(&writer);
-	mpack_write_cstr(&writer, "reply_type");
+	mpack_write_cstr(&writer, "t");
 	mpack_write_int(&writer, reply->type);
 	switch(reply->type)
 	{
 		case SWARMKV_REPLY_INTEGER:
-			mpack_write_cstr(&writer, "integer");
+			mpack_write_cstr(&writer, "int");
 			mpack_write_i64(&writer, reply->integer);
 			break;
+		case SWARMKV_REPLY_DOUBLE:
+			mpack_write_cstr(&writer, "dval");
+			mpack_write_double(&writer, reply->dval);
+			break;
 		case SWARMKV_REPLY_STRING:
 		case SWARMKV_REPLY_STATUS:
 		case SWARMKV_REPLY_ERROR:
 		case SWARMKV_REPLY_NODE:
-			mpack_write_cstr(&writer, "string");
+			mpack_write_cstr(&writer, "str");
 			mpack_write_str(&writer, reply->str, reply->len);
 			break;
 		case SWARMKV_REPLY_VERBATIM:
-			mpack_write_cstr(&writer, "string");
+			mpack_write_cstr(&writer, "str");
 			mpack_write_str(&writer, reply->str, reply->len);
 			mpack_write_cstr(&writer, "vtype");
 			mpack_write_cstr(&writer, reply->vtype);			
@@ -126,7 +134,7 @@ void swarmkv_msg_serialize_reply(const struct swarmkv_reply* reply, char **blob,
 		case SWARMKV_REPLY_NIL:
 			break;
 		case SWARMKV_REPLY_ARRAY:
-			mpack_write_cstr(&writer, "elements");
+			mpack_write_cstr(&writer, "arr");
 			mpack_build_array(&writer);
 			for(i=0; i<reply->n_element; i++)
 			{
diff --git a/src/swarmkv_store.c b/src/swarmkv_store.c
index 2a0b5e9..3620f16 100644
--- a/src/swarmkv_store.c
+++ b/src/swarmkv_store.c
@@ -17,7 +17,124 @@
 #include <pthread.h>
 #include <sys/time.h>//timercmp
 
-
+struct swarmkv_obj_specs
+{
+	enum sobj_type type;
+	const char *type_name;
+	void *(*obj_new) (uuid_t uuid);
+	void (*obj_free) (void *obj);
+	void (*obj_serialize) (const void *obj, char **blob, size_t *blob_sz);
+	void (*obj_merge_blob) (void *obj, const char *blob, size_t blob_sz);
+	size_t (*obj_size)(const void *obj);
+};
+static void *__wrap_OC_token_bucket_new(uuid_t uuid)
+{
+	struct OC_token_bucket *bucket=NULL;
+	struct timeval beginning_of_history;
+	memset(&beginning_of_history, 0, sizeof(beginning_of_history));
+	bucket=OC_token_bucket_new(uuid, beginning_of_history, 0, 0);
+	return bucket;
+}
+static void *__wrap_fair_token_bucket_new(uuid_t uuid)
+{
+	struct fair_token_bucket *bucket=NULL;
+	struct timeval beginning_of_history;
+	memset(&beginning_of_history, 0, sizeof(beginning_of_history));
+	bucket=fair_token_bucket_new(uuid, beginning_of_history, 0, 0, 0);
+	return bucket;
+}
+static void *__wrap_bulk_token_bucket_new(uuid_t uuid)
+{
+	struct bulk_token_bucket *bucket=NULL;
+	struct timeval beginning_of_history;
+	memset(&beginning_of_history, 0, sizeof(beginning_of_history));
+	bucket=bulk_token_bucket_new(uuid, beginning_of_history, 0, 0, 0);
+	return bucket;
+}
+size_t undefined_obj_mem_size(void *obj)
+{
+	return 0;
+}
+void undefined_obj_free(void *obj)
+{
+	assert(obj==NULL);
+	return;
+}
+struct swarmkv_obj_specs sobj_specs[__SWARMKV_OBJ_TYPE_MAX] = 
+{
+	{
+		.type=OBJ_TYPE_STRING,
+		.type_name="string",
+		.obj_new=(void * (*)(unsigned char *))LWW_register_new,
+		.obj_free=(void (*)(void *))LWW_register_free,
+		.obj_serialize=(void (*)(const void *, char **, size_t *))LWW_register_serialize,
+		.obj_merge_blob=(void (*)(void *, const char *, size_t))LWW_register_merge_blob,
+		.obj_size=(size_t (*)(const void *))LWW_regeister_mem_size
+	},
+	{
+		.type=OBJ_TYPE_INTEGER,
+		.type_name="integer",
+		.obj_new=(void * (*)(unsigned char *))PN_counter_new,
+		.obj_free=(void (*)(void *))PN_counter_free,
+		.obj_serialize=(void (*)(const void *, char **, size_t *))PN_counter_serialize,
+		.obj_merge_blob=(void (*)(void *, const char *, size_t))PN_counter_merge_blob,
+		.obj_size=(size_t (*)(const void *))PN_counter_mem_size
+	},
+	{
+		.type=OBJ_TYPE_SET,
+		.type_name="set",
+		.obj_new=(void * (*)(unsigned char *))OR_set_new,
+		.obj_free=(void (*)(void *))OR_set_free,
+		.obj_serialize=(void (*)(const void *, char **, size_t *))OR_set_serialize,
+		.obj_merge_blob=(void (*)(void *, const char *, size_t))OR_set_merge_blob,
+		.obj_size=(size_t (*)(const void *))OR_set_mem_size
+	},
+	{
+		.type=OBJ_TYPE_HASH,
+		.type_name="hash",
+		.obj_new=(void * (*)(unsigned char *))OR_map_new,
+		.obj_free=(void (*)(void *))OR_map_free,
+		.obj_serialize=(void (*)(const void *, char **, size_t *))OR_map_serialize,
+		.obj_merge_blob=(void (*)(void *, const char *, size_t))OR_map_merge_blob,
+		.obj_size=(size_t (*)(const void *))OR_map_mem_size
+	},
+	{
+		.type=OBJ_TYPE_TOKEN_BUCKET,
+		.type_name="token-bucket",
+		.obj_new=__wrap_OC_token_bucket_new,
+		.obj_free=(void (*)(void *))OC_token_bucket_free,
+		.obj_serialize=(void (*)(const void *, char **, size_t *))OC_token_bucket_serialize,
+		.obj_merge_blob=(void (*)(void *, const char *, size_t))OC_token_bucket_merge_blob,
+		.obj_size=(size_t (*)(const void *))OC_token_bucket_mem_size,
+	},
+	{
+		.type=OBJ_TYPE_FAIR_TOKEN_BUCKET,
+		.type_name="fair-token-bucket",
+		.obj_new=__wrap_fair_token_bucket_new,
+		.obj_free=(void (*)(void *))fair_token_bucket_free,
+		.obj_serialize=(void (*)(const void *, char **, size_t *))fair_token_bucket_serialize,
+		.obj_merge_blob=(void (*)(void *, const char *, size_t))fair_token_bucket_merge_blob,
+		.obj_size=(size_t (*)(const void *))fair_token_bucket_mem_size,
+	},
+	{
+		.type=OBJ_TYPE_BULK_TOKEN_BUCKET,
+		.type_name="bulk-token-bucket",
+		.obj_new=__wrap_bulk_token_bucket_new,
+		.obj_free=(void (*)(void *))bulk_token_bucket_free,
+		.obj_serialize=(void (*)(const void *, char **, size_t *))bulk_token_bucket_serialize,
+		.obj_merge_blob=(void (*)(void *, const char *, size_t))bulk_token_bucket_merge_blob,
+		.obj_size=(size_t (*)(const void *))bulk_token_bucket_mem_size
+	},
+	{
+		.type=OBJ_TYPE_UNDEFINED,
+		.type_name="undefined",
+		.obj_new=NULL,
+		.obj_free=undefined_obj_free,
+		.obj_serialize=NULL,
+		.obj_merge_blob=NULL,
+		.obj_size=(size_t (*)(const void *))undefined_obj_mem_size
+	}
+};
 #define	MODULE_SWAMRKV_STORE	module_name_str("swarmkv.store")
 #define STORE_SHARD_NUMBER	8
 struct swarmkv_store
@@ -109,35 +226,8 @@ static void scontainer_free(struct scontainer *ctr)
 		utarray_free(ctr->replica_node_list);
 		ctr->replica_node_list=NULL;
 	}
-	switch(ctr->obj.type)
-	{
-		case OBJ_TYPE_STRING:
-			LWW_register_free(ctr->obj.string);
-			ctr->obj.string=NULL;
-			break;
-		case OBJ_TYPE_INTEGER:
-			PN_counter_free(ctr->obj.counter);
-			ctr->obj.counter=NULL;
-			break;
-		case OBJ_TYPE_SET:
-			OR_set_free(ctr->obj.set);
-			ctr->obj.set=NULL;
-			break;
-		case OBJ_TYPE_HASH:
-			OR_map_free(ctr->obj.hash);
-			ctr->obj.hash=NULL;
-			break;
-		case OBJ_TYPE_TOKEN_BUCKET:
-			OC_token_bucket_free(ctr->obj.bucket);
-			ctr->obj.bucket=NULL;
-			break;
-		case OBJ_TYPE_UNDEFINED:
-			assert(ctr->obj.raw==NULL);
-			break;
-		default:
-			assert(0);
-			break;
-	}
+	assert(ctr->obj.type<__SWARMKV_OBJ_TYPE_MAX);
+	sobj_specs[ctr->obj.type].obj_free(ctr->obj.raw);
 	sdsfree(ctr->obj.key);
 	free(ctr);
 	return;
@@ -288,27 +378,7 @@ void scontainer_serialize(struct scontainer *ctr, char **blob, size_t *blob_sz)
 	char *value_blob=NULL;
 	size_t value_blob_sz=0;
 	struct sobj *obj=&ctr->obj;
-	switch(obj->type)
-	{
-		case OBJ_TYPE_STRING:
-			LWW_register_serialize(obj->string, &value_blob, &value_blob_sz);
-			break;
-		case OBJ_TYPE_INTEGER:
-			PN_counter_serialize(obj->counter, &value_blob, &value_blob_sz);
-			break;
-		case OBJ_TYPE_SET:
-			OR_set_serialize(obj->set, &value_blob, &value_blob_sz);
-			break;
-		case OBJ_TYPE_HASH:
-			OR_map_serialize(obj->hash, &value_blob, &value_blob_sz);
-			break;
-		case OBJ_TYPE_TOKEN_BUCKET:
-			OC_token_bucket_serialize(obj->bucket, &value_blob, &value_blob_sz);
-			break;
-		default:
-			assert(0);
-			break;
-	}
+	sobj_specs[obj->type].obj_serialize(obj->raw, &value_blob, &value_blob_sz);
 	char *mpack_buff=NULL;
 	size_t mpack_sz=0;	
 	mpack_sz=sizeof(obj->type)+sizeof(ctr->op_timestamp);
@@ -360,50 +430,11 @@ void sobj_merge_blob(struct sobj *obj, const char *blob, size_t blob_sz, uuid_t
 	offset+=sizeof(size_t);
 	assert(offset+value_blob_sz==blob_sz);
 	const char *value_blob=blob+offset;
-
-	switch(ctr->obj.type)
+	if(!obj->raw)
 	{
-		case OBJ_TYPE_STRING:
-			if(!obj->string)
-			{				
-				obj->string=LWW_register_new(uuid);
-			}
-			LWW_register_merge_blob(obj->string, value_blob, value_blob_sz);
-			break;
-		case OBJ_TYPE_INTEGER:
-			if(!obj->counter)
-			{
-				obj->counter=PN_counter_new(uuid);
-			}
-			PN_counter_merge_blob(obj->counter, value_blob, value_blob_sz);
-			break;
-		case OBJ_TYPE_SET:
-			if(!obj->set)
-			{
-				obj->set=OR_set_new(uuid);
-			}
-			OR_set_merge_blob(obj->set, value_blob, value_blob_sz);
-			break;
-		case OBJ_TYPE_HASH:
-			if(!obj->hash)
-			{
-				obj->hash=OR_map_new(uuid);
-			}
-			OR_map_merge_blob(obj->hash, value_blob, value_blob_sz);
-			break;
-		case OBJ_TYPE_TOKEN_BUCKET:
-			if(!obj->bucket)
-			{
-				struct timeval beginning_of_history;
-				memset(&beginning_of_history, 0, sizeof(beginning_of_history));
-				obj->bucket=OC_token_bucket_new(uuid, beginning_of_history, 0, 0);
-			}
-			OC_token_bucket_merge_blob(obj->bucket, value_blob, value_blob_sz);
-			break;
-		default:
-			assert(0);
-			break;
+		obj->raw=sobj_specs[obj->type].obj_new(uuid);
 	}
+	sobj_specs[obj->type].obj_merge_blob(obj->raw, value_blob, value_blob_sz);
 	return;
 
 }
@@ -570,7 +601,7 @@ void swarmkv_store_periodic(struct swarmkv_module * mod_store)
 }
 struct swarmkv_module *swarmkv_store_new(const struct swarmkv_options *opts, exec_cmd_func *send_cmd, void *handle_send_cmd, uuid_t node_uuid)
 {
-	struct swarmkv_store* store=ALLOC(struct swarmkv_store, 1);
+	struct swarmkv_store *store=ALLOC(struct swarmkv_store, 1);
 	strncpy(store->module.name, "store", sizeof(store->module.name));
 	store->module.mod_ctx=store;
 	store->module.lock=store_lock;
@@ -690,35 +721,6 @@ struct pattern_match_arg
 	sds pattern;
     UT_array *matched_replies;
 };
-const char *type2string(enum sobj_type type)
-{
-	const char *ret_str=NULL;
-	switch(type)
-	{
-		case OBJ_TYPE_STRING:
-			ret_str="string";
-			break;
-		case OBJ_TYPE_INTEGER:
-			ret_str="integer";
-			break;
-		case OBJ_TYPE_SET:
-			ret_str="set";
-			break;
-		case OBJ_TYPE_HASH:
-			ret_str="hash";
-			break;
-		case OBJ_TYPE_TOKEN_BUCKET:
-			ret_str="token-bucket";
-			break;
-		case OBJ_TYPE_UNDEFINED:
-			ret_str="undefined";
-			break;
-		default:
-			assert(0);
-			break;
-	}
-	return ret_str;
-}
 enum cmd_exec_result type_command(struct swarmkv_module *mod_store, const struct swarmkv_cmd *cmd, const node_t *accessing_node, struct swarmkv_reply **reply)
 {
 /*TYPE key*/
@@ -729,8 +731,7 @@ enum cmd_exec_result type_command(struct swarmkv_module *mod_store, const struct
 	{
 		return NEED_KEY_ROUTE;
 	}
-	const char *type_str=type2string(obj->type);
-	*reply=swarmkv_reply_new_string_fmt(type_str);
+	*reply=swarmkv_reply_new_string_fmt(sobj_specs[obj->type].type_name);
 	return FINISHED;
 }
 enum cmd_exec_result crdt_pull_command(struct swarmkv_module *mod_store, const struct swarmkv_cmd *cmd, const node_t *accessing_node, struct swarmkv_reply **reply)
@@ -931,30 +932,7 @@ enum cmd_exec_result crdt_info_command(struct swarmkv_module *mod_store, const s
 		return FINISHED;
 	}
 	size_t sz=0;
-	switch(ctr->obj.type)
-	{
-		case OBJ_TYPE_STRING:
-			sz+=LWW_regeister_size(ctr->obj.string);
-			break;
-		case OBJ_TYPE_INTEGER:
-			sz+=PN_counter_size(ctr->obj.counter);
-			break;
-		case OBJ_TYPE_SET:
-			sz+=OR_set_size(ctr->obj.set);
-			break;
-		case OBJ_TYPE_HASH:
-			sz+=OR_map_size(ctr->obj.hash);
-			break;
-		case OBJ_TYPE_TOKEN_BUCKET:
-			sz+=OC_token_bucket_size(ctr->obj.bucket);
-			break;
-		case OBJ_TYPE_UNDEFINED:
-			sz+=0;
-			break;
-		default:
-			assert(0);
-			break;
-	}
+	sz+=sobj_specs[ctr->obj.type].obj_size(ctr->obj.raw);
 	sz+=sizeof(struct scontainer)+sdslen(ctr->obj.key);
 	size_t n_replica=0;
 	n_replica=ctr->replica_node_list?utarray_len(ctr->replica_node_list):0;
@@ -962,7 +940,7 @@ enum cmd_exec_result crdt_info_command(struct swarmkv_module *mod_store, const s
 	int i=0;
 	*reply=swarmkv_reply_new_array(8);
 	(*reply)->elements[i++]=swarmkv_reply_new_string_fmt("Type");
-	(*reply)->elements[i++]=swarmkv_reply_new_string_fmt(type2string(ctr->obj.type));
+	(*reply)->elements[i++]=swarmkv_reply_new_string_fmt(sobj_specs[ctr->obj.type].type_name);
 	(*reply)->elements[i++]=swarmkv_reply_new_string_fmt("Size");
 	(*reply)->elements[i++]=swarmkv_reply_new_integer(sz);
 	(*reply)->elements[i++]=swarmkv_reply_new_string_fmt("Replicas");
diff --git a/src/t_token_bucket.c b/src/t_token_bucket.c
index db97c96..f710639 100644
--- a/src/t_token_bucket.c
+++ b/src/t_token_bucket.c
@@ -6,6 +6,8 @@
 
 #include <stdlib.h>
 #include <assert.h>
+#include <stdbool.h>
+//Unlike string, set and hash, XTCONSUME and XTINFO can only operate on an initialized token bucket.
 enum cmd_exec_result handle_undefined_object(struct sobj *obj, struct swarmkv_reply **reply)
 {
 	assert(obj->type==OBJ_TYPE_UNDEFINED);
@@ -19,6 +21,26 @@ enum cmd_exec_result handle_undefined_object(struct sobj *obj, struct swarmkv_re
 	}
 	return NEED_KEY_ROUTE;	
 }
+static int get_consume_type(sds s, enum tb_consume_type *consume_type)
+{
+	if(0==strncasecmp(s, "NORMAL", sdslen(s)))
+	{
+		*consume_type=TB_CONSUME_NORMAL;
+	}
+	if(0==strncasecmp(s, "FORCE", sdslen(s)))
+	{
+		*consume_type=TB_CONSUME_FORCE;
+	}
+	else if(0==strncasecmp(s, "FLEXIBLE", sdslen(s)))
+	{
+		*consume_type=TB_CONSUME_FLEXIBLE;
+	}
+	else
+	{
+		return -1;
+	}
+	return 0;
+}
 enum cmd_exec_result tcfg_command(struct swarmkv_module *mod_store, const struct swarmkv_cmd *cmd, const node_t *accessing_node, struct swarmkv_reply **reply)
 {
 /*TCFG key rate capacity*/
@@ -27,21 +49,21 @@ enum cmd_exec_result tcfg_command(struct swarmkv_module *mod_store, const struct
 	
 	char *endptr=NULL;
 
-	long long capacity=0, rate=0;	
+	long long rate=0, capacity=0;
 	rate=strtol(cmd->argv[2], &endptr, 10);
-	if(*endptr!='\0' || capacity<0)
+	if(*endptr!='\0' || rate<0)
 	{
 		*reply=swarmkv_reply_new_error(error_arg_not_valid_integer, cmd->argv[2]);
 		return FINISHED; 		
 	}
 
 	capacity=strtol(cmd->argv[3], &endptr, 10);
-	if(*endptr!='\0' || rate<0)
+	if(*endptr!='\0' || capacity<0)
 	{
 		*reply=swarmkv_reply_new_error(error_arg_not_valid_integer, cmd->argv[3]);
 		return FINISHED; 		
 	}
-	
+
 	obj=store_lookup(mod_store, key);
 	if(!obj)
 	{
@@ -55,17 +77,14 @@ enum cmd_exec_result tcfg_command(struct swarmkv_module *mod_store, const struct
 		uuid_t uuid;
 		assert(obj->raw==NULL);
 		store_get_uuid(mod_store, uuid);
-	
+		obj->bucket=OC_token_bucket_new(uuid, now, rate, capacity);
 		obj->type=OBJ_TYPE_TOKEN_BUCKET;
-		obj->bucket=OC_token_bucket_new(uuid, now, capacity, rate);
-		
 		*reply=swarmkv_reply_new_status("OK");
 	}
 	else if(obj->type==OBJ_TYPE_TOKEN_BUCKET)
 	{
-		OC_token_bucket_configure(obj->bucket, now, capacity, rate, 0);
+		OC_token_bucket_configure(obj->bucket, now, rate, capacity);
 		sobj_need_sync(mod_store, obj);
-		
 		*reply=swarmkv_reply_new_status("OK");
 	}
 	else
@@ -85,38 +104,41 @@ enum cmd_exec_result tinfo_command(struct swarmkv_module *mod_store, const struc
 	{
 		return NEED_KEY_ROUTE;		
 	}
-	//Unlike string, set and hash, TCONSUME and TINFO can only operate on an initialized token bucket.
 	if(obj->type==OBJ_TYPE_UNDEFINED)
 	{
 		return handle_undefined_object(obj, reply);
 	}
 	if(obj->type!=OBJ_TYPE_TOKEN_BUCKET)
 	{
-		*reply=swarmkv_reply_new_error(error_wrong_type);		
+		*reply=swarmkv_reply_new_error(error_wrong_type);
 		return FINISHED;
 	}
-	struct OC_token_bucket_info info;
-	memset(&info, 0, sizeof(info));
-	OC_token_bucket_info(obj->bucket,&info);
+
+	struct timeval now;
+	gettimeofday(&now, NULL);
+	
+	
+	struct OC_token_bucket_info oc_info;
+	memset(&oc_info, 0, sizeof(oc_info));
+	OC_token_bucket_info(obj->bucket, now, &oc_info);
 	int i=0;
 	*reply=swarmkv_reply_new_array(10);
-	(*reply)->elements[i++]=swarmkv_reply_new_string_fmt("Capacity");
-	(*reply)->elements[i++]=swarmkv_reply_new_integer(info.CBS);
 	(*reply)->elements[i++]=swarmkv_reply_new_string_fmt("Rate");
-	(*reply)->elements[i++]=swarmkv_reply_new_integer(info.CIR);
+	(*reply)->elements[i++]=swarmkv_reply_new_integer(oc_info.CIR);
+	(*reply)->elements[i++]=swarmkv_reply_new_string_fmt("Capacity");
+	(*reply)->elements[i++]=swarmkv_reply_new_integer(oc_info.CBS);
 	(*reply)->elements[i++]=swarmkv_reply_new_string_fmt("Consumed");
-	(*reply)->elements[i++]=swarmkv_reply_new_integer(info.consumed);
+	(*reply)->elements[i++]=swarmkv_reply_new_integer(oc_info.consumed);
 	(*reply)->elements[i++]=swarmkv_reply_new_string_fmt("Refilled");
-	(*reply)->elements[i++]=swarmkv_reply_new_integer(info.refilled);
+	(*reply)->elements[i++]=swarmkv_reply_new_integer(oc_info.refilled);
 	(*reply)->elements[i++]=swarmkv_reply_new_string_fmt("Available");
-	(*reply)->elements[i++]=swarmkv_reply_new_integer(info.available);
+	(*reply)->elements[i++]=swarmkv_reply_new_integer(oc_info.available);
 	assert(i==10);
 	return FINISHED;
 }
 enum cmd_exec_result tconsume_command(struct swarmkv_module *mod_store, const struct swarmkv_cmd *cmd, const node_t *accessing_node, struct swarmkv_reply **reply)
 {
 /*TCONSUME key tokens [NORMAL|FORCE|FLEXIBLE]*/
-	
 	struct sobj *obj=NULL;
 	const sds key=cmd->argv[1];
 	obj=store_lookup(mod_store, key);
@@ -128,45 +150,342 @@ enum cmd_exec_result tconsume_command(struct swarmkv_module *mod_store, const st
 	{
 		return handle_undefined_object(obj, reply);
 	}
+
+	long long request=0, allocated=0;
+	char *endptr=NULL;
+	request=strtol(cmd->argv[2], &endptr, 10);
+	if(*endptr!='\0' || request<0)
+	{
+		*reply=swarmkv_reply_new_error(error_arg_not_valid_integer, cmd->argv[2]);
+		return FINISHED;
+	}
+
+	enum tb_consume_type consume_type=TB_CONSUME_NORMAL;
+	if(cmd->argc>3)
+	{
+		if(0>get_consume_type(cmd->argv[3], &consume_type))
+		{
+			*reply=swarmkv_reply_new_error(error_arg_string_should_be, cmd->argv[3], "NORMAL|FORCE|FLEXIBLE");
+			return FINISHED;
+		}
+	}
+
 	if(obj->type!=OBJ_TYPE_TOKEN_BUCKET)
 	{
 		*reply=swarmkv_reply_new_error(error_wrong_type);		
 		return FINISHED;
 	}
-	long long request=0, got=0;
+	struct timeval now;
+	gettimeofday(&now, NULL);
+	allocated=OC_token_bucket_consume(obj->bucket, now, consume_type, request);
+	*reply=swarmkv_reply_new_integer(allocated);
+	sobj_need_sync(mod_store, obj);
+	return FINISHED;
+}
+bool is_power_of_2(long long num) 
+{
+    if (num > 0 && (num & (num - 1)) == 0) {
+        return true;
+    }
+    return false;
+}
+enum cmd_exec_result ftcfg_command(struct swarmkv_module *mod_store, const struct swarmkv_cmd *cmd, const node_t *accessing_node, struct swarmkv_reply **reply)
+{
+/*FTCFG key rate capacity divisor*/
+	struct sobj *obj=NULL;
+	const sds key=cmd->argv[1];
+	
 	char *endptr=NULL;
-	request=strtol(cmd->argv[2], &endptr, 10);
+
+	long long rate=0, capacity=0, divisor=0;
+	rate=strtol(cmd->argv[2], &endptr, 10);
+	if(*endptr!='\0' || rate<0)
+	{
+		*reply=swarmkv_reply_new_error(error_arg_not_valid_integer, cmd->argv[2]);
+		return FINISHED; 		
+	}
+	capacity=strtol(cmd->argv[3], &endptr, 10);
+	if(*endptr!='\0' || capacity<0)
+	{
+		*reply=swarmkv_reply_new_error(error_arg_not_valid_integer, cmd->argv[3]);
+		return FINISHED; 		
+	}
+	divisor=strtol(cmd->argv[4], &endptr, 10);
+	if(*endptr!='\0' || !is_power_of_2(divisor))
+	{
+		*reply=swarmkv_reply_new_error(error_arg_not_valid_integer, cmd->argv[4]);
+		return FINISHED; 		
+	}
+	obj=store_lookup(mod_store, key);
+	if(!obj)
+	{
+		return NEED_KEY_ROUTE;	
+	}
+	struct timeval now;
+	gettimeofday(&now, NULL);
+	
+	if(obj->type==OBJ_TYPE_UNDEFINED)
+	{
+		uuid_t uuid;
+		assert(obj->raw==NULL);
+		store_get_uuid(mod_store, uuid);
+	
+		obj->type=OBJ_TYPE_FAIR_TOKEN_BUCKET;
+		obj->ftb=fair_token_bucket_new(uuid, now, rate, capacity, divisor);
+		*reply=swarmkv_reply_new_status("OK");
+	}
+	else if(obj->type==OBJ_TYPE_FAIR_TOKEN_BUCKET)
+	{
+		fair_token_bucket_configure(obj->ftb, now, rate, capacity, divisor);
+		sobj_need_sync(mod_store, obj);
+		*reply=swarmkv_reply_new_status("OK");
+	}
+	else
+	{
+		*reply=swarmkv_reply_new_error(error_wrong_type);		
+	}	
+	return FINISHED;
+}
+enum cmd_exec_result ftconsume_command(struct swarmkv_module *mod_store, const struct swarmkv_cmd *cmd, const node_t *accessing_node, struct swarmkv_reply **reply)
+{
+/*FTCONSUME key member weight tokens*/	
+	struct sobj *obj=NULL;
+	const sds key=cmd->argv[1];
+	const sds member=cmd->argv[2];
+	obj=store_lookup(mod_store, key);
+	if(!obj)
+	{
+		return NEED_KEY_ROUTE;		
+	}
+	if(obj->type==OBJ_TYPE_UNDEFINED)
+	{
+		return handle_undefined_object(obj, reply);
+	}
+
+	long long request=0, allocated=0;
+	char *endptr=NULL;
+	request=strtol(cmd->argv[4], &endptr, 10);
 	if(*endptr!='\0' || request<0)
 	{
+		*reply=swarmkv_reply_new_error(error_arg_not_valid_integer, cmd->argv[4]);
+		return FINISHED;
+	}
+	long long weight=0;
+	weight=strtol(cmd->argv[3], &endptr, 10);
+	if(*endptr!='\0' || weight<1 || weight>20)
+	{
+		*reply=swarmkv_reply_new_error(error_arg_not_valid_integer, cmd->argv[3]);
+		return FINISHED;
+	}
+	if(obj->type!=OBJ_TYPE_FAIR_TOKEN_BUCKET)
+	{
+		*reply=swarmkv_reply_new_error(error_wrong_type);		
+		return FINISHED;
+	}
+	struct timeval now;
+	gettimeofday(&now, NULL);
+	allocated=fair_token_bucket_consume(obj->ftb, now, member, sdslen(member), weight, TB_CONSUME_NORMAL, request);
+
+	*reply=swarmkv_reply_new_integer(allocated);
+	sobj_need_sync(mod_store, obj);
+	return FINISHED;
+}
+enum cmd_exec_result ftinfo_command(struct swarmkv_module *mod_store, const struct swarmkv_cmd *cmd, const node_t *accessing_node, struct swarmkv_reply **reply)
+{
+/*FTINFO key*/	
+	struct sobj *obj=NULL;
+	const sds key=cmd->argv[1];
+	
+	obj=store_lookup(mod_store, key);
+	if(!obj)
+	{
+		return NEED_KEY_ROUTE;		
+	}
+	if(obj->type==OBJ_TYPE_UNDEFINED)
+	{
+		return handle_undefined_object(obj, reply);
+	}
+	if(obj->type!=OBJ_TYPE_FAIR_TOKEN_BUCKET)
+	{
+		*reply=swarmkv_reply_new_error(error_wrong_type);
+		return FINISHED;
+	}
+	struct timeval now;
+	gettimeofday(&now, NULL);
+
+	struct fair_token_bucket_info ftb_info;
+	memset(&ftb_info, 0, sizeof(ftb_info));
+	fair_token_bucket_info(obj->ftb, now, &ftb_info);
+	
+	int i=0;
+	*reply=swarmkv_reply_new_array(14);
+	(*reply)->elements[i++]=swarmkv_reply_new_string_fmt("Rate");
+	(*reply)->elements[i++]=swarmkv_reply_new_integer(ftb_info.bucket_info.CIR);
+	(*reply)->elements[i++]=swarmkv_reply_new_string_fmt("Capacity");
+	(*reply)->elements[i++]=swarmkv_reply_new_integer(ftb_info.bucket_info.CBS);
+	(*reply)->elements[i++]=swarmkv_reply_new_string_fmt("Consumed");
+	(*reply)->elements[i++]=swarmkv_reply_new_integer(ftb_info.bucket_info.consumed);
+	(*reply)->elements[i++]=swarmkv_reply_new_string_fmt("Refilled");
+	(*reply)->elements[i++]=swarmkv_reply_new_integer(ftb_info.bucket_info.refilled);
+	(*reply)->elements[i++]=swarmkv_reply_new_string_fmt("Available");
+	(*reply)->elements[i++]=swarmkv_reply_new_integer(ftb_info.bucket_info.available);
+	(*reply)->elements[i++]=swarmkv_reply_new_string_fmt("Divisor");
+	(*reply)->elements[i++]=swarmkv_reply_new_integer(ftb_info.divisor);
+	(*reply)->elements[i++]=swarmkv_reply_new_string_fmt("ActiveMembers");
+	(*reply)->elements[i++]=swarmkv_reply_new_integer(ftb_info.active_key_number);
+	assert(i==14);
+	return FINISHED;
+}
+enum cmd_exec_result btcfg_command(struct swarmkv_module *mod_store, const struct swarmkv_cmd *cmd, const node_t *accessing_node, struct swarmkv_reply **reply)
+{
+/*BTCFG key rate capacity buckets*/
+	struct sobj *obj=NULL;
+	const sds key=cmd->argv[1];
+	
+	char *endptr=NULL;
+
+	long long rate=0, capacity=0, buckets=0;
+	rate=strtol(cmd->argv[2], &endptr, 10);
+	if(*endptr!='\0' || rate<0)
+	{
 		*reply=swarmkv_reply_new_error(error_arg_not_valid_integer, cmd->argv[2]);
 		return FINISHED; 		
 	}
+	capacity=strtol(cmd->argv[3], &endptr, 10);
+	if(*endptr!='\0' || capacity<0)
+	{
+		*reply=swarmkv_reply_new_error(error_arg_not_valid_integer, cmd->argv[3]);
+		return FINISHED; 		
+	}
+	buckets=strtol(cmd->argv[4], &endptr, 10);
+	if(*endptr!='\0' || !is_power_of_2(buckets))
+	{
+		*reply=swarmkv_reply_new_error(error_arg_not_valid_integer, cmd->argv[4]);
+		return FINISHED; 		
+	}
+	obj=store_lookup(mod_store, key);
+	if(!obj)
+	{
+		return NEED_KEY_ROUTE;	
+	}
 	struct timeval now;
 	gettimeofday(&now, NULL);
-	enum OC_token_bucket_command oc_cmd=OCTB_CMD_CONSUME_NORMAL;
-	if(cmd->argc==4)
+	
+	if(obj->type==OBJ_TYPE_UNDEFINED)
 	{
-		const sds mode=cmd->argv[3];
-		if(0==strncasecmp(mode, "NORMAL", sdslen(mode)))
-		{
-			oc_cmd=OCTB_CMD_CONSUME_NORMAL;
-		}
-		if(0==strncasecmp(mode, "FORCE", sdslen(mode)))
-		{
-			oc_cmd=OCTB_CMD_CONSUME_FORCE;
-		}
-		else if(0==strncasecmp(mode, "FLEXIBLE", sdslen(mode)))
-		{
-			oc_cmd=OCTB_CMD_CONSUME_FLEXIBLE;
-		}
-		else
+		uuid_t uuid;
+		assert(obj->raw==NULL);
+		store_get_uuid(mod_store, uuid);
+	
+		obj->type=OBJ_TYPE_BULK_TOKEN_BUCKET;
+		obj->btb=bulk_token_bucket_new(uuid, now, rate, capacity, buckets);
+		*reply=swarmkv_reply_new_status("OK");
+	}
+	else if(obj->type==OBJ_TYPE_BULK_TOKEN_BUCKET)
+	{
+		bulk_token_bucket_configure(obj->btb, now, rate, capacity, buckets);
+		sobj_need_sync(mod_store, obj);
+		*reply=swarmkv_reply_new_status("OK");
+	}
+	else
+	{
+		*reply=swarmkv_reply_new_error(error_wrong_type);		
+	}	
+	return FINISHED;
+}
+enum cmd_exec_result btconsume_command(struct swarmkv_module *mod_store, const struct swarmkv_cmd *cmd, const node_t *accessing_node, struct swarmkv_reply **reply)
+{
+/*BTCONSUME key member tokens [NORMAL|FORCE|FLEXIBLE]*/	
+	struct sobj *obj=NULL;
+	const sds key=cmd->argv[1];
+	const sds member=cmd->argv[2];
+	obj=store_lookup(mod_store, key);
+	if(!obj)
+	{
+		return NEED_KEY_ROUTE;		
+	}
+	if(obj->type==OBJ_TYPE_UNDEFINED)
+	{
+		return handle_undefined_object(obj, reply);
+	}
+
+	long long request=0, allocated=0;
+	char *endptr=NULL;
+	request=strtol(cmd->argv[3], &endptr, 10);
+	if(*endptr!='\0' || request<0)
+	{
+		*reply=swarmkv_reply_new_error(error_arg_not_valid_integer, cmd->argv[3]);
+		return FINISHED;
+	}
+
+	enum tb_consume_type consume_type=TB_CONSUME_NORMAL;
+	if(cmd->argc>4)
+	{
+		if(0>get_consume_type(cmd->argv[4], &consume_type))
 		{
-			*reply=swarmkv_reply_new_error(error_arg_string_should_be, mode, "NORMAL|FORCE|FLEXIBLE");
+			*reply=swarmkv_reply_new_error(error_arg_string_should_be, cmd->argv[4], "NORMAL|FORCE|FLEXIBLE");
 			return FINISHED;
 		}
 	}
-	got=OC_token_bucket_control(obj->bucket, now, oc_cmd, request);
-	*reply=swarmkv_reply_new_integer(got);
+
+	if(obj->type!=OBJ_TYPE_BULK_TOKEN_BUCKET)
+	{
+		*reply=swarmkv_reply_new_error(error_wrong_type);	
+		return FINISHED;
+	}
+	struct timeval now;
+	gettimeofday(&now, NULL);
+	allocated=bulk_token_bucket_consume(obj->btb, now, member, sdslen(member), consume_type, request);
+
+	*reply=swarmkv_reply_new_integer(allocated);
 	sobj_need_sync(mod_store, obj);
 	return FINISHED;
 }
+enum cmd_exec_result btinfo_command(struct swarmkv_module *mod_store, const struct swarmkv_cmd *cmd, const node_t *accessing_node, struct swarmkv_reply **reply)
+{
+/*BTINFO key [member]*/	
+	struct sobj *obj=NULL;
+	const sds key=cmd->argv[1];
+	
+	obj=store_lookup(mod_store, key);
+	if(!obj)
+	{
+		return NEED_KEY_ROUTE;		
+	}
+	if(obj->type==OBJ_TYPE_UNDEFINED)
+	{
+		return handle_undefined_object(obj, reply);
+	}
+	if(obj->type!=OBJ_TYPE_BULK_TOKEN_BUCKET)
+	{
+		*reply=swarmkv_reply_new_error(error_wrong_type);
+		return FINISHED;
+	}
+	struct timeval now;
+	gettimeofday(&now, NULL);
+
+	struct bulk_token_bucket_info btb_info;
+	memset(&btb_info, 0, sizeof(btb_info));
+	bulk_token_bucket_info(obj->btb, now, &btb_info);
+	long long available=-1;
+	if(cmd->argc>2)
+	{
+		available=bulk_token_bucket_read_available(obj->btb, now, cmd->argv[2], sdslen(cmd->argv[2]));
+	}
+	int i=0;
+	*reply=swarmkv_reply_new_array(12);
+	(*reply)->elements[i++]=swarmkv_reply_new_string_fmt("Rate");
+	(*reply)->elements[i++]=swarmkv_reply_new_integer(btb_info.CIR);
+	(*reply)->elements[i++]=swarmkv_reply_new_string_fmt("Capacity");
+	(*reply)->elements[i++]=swarmkv_reply_new_integer(btb_info.CBS);
+	(*reply)->elements[i++]=swarmkv_reply_new_string_fmt("Buckets");
+	(*reply)->elements[i++]=swarmkv_reply_new_integer(btb_info.bucket_number);
+	(*reply)->elements[i++]=swarmkv_reply_new_string_fmt("ActiveMembers");
+	(*reply)->elements[i++]=swarmkv_reply_new_integer(btb_info.estimate_keys);
+	(*reply)->elements[i++]=swarmkv_reply_new_string_fmt("Collisions");
+	(*reply)->elements[i++]=swarmkv_reply_new_double(btb_info.collision_rate);
+	(*reply)->elements[i++]=swarmkv_reply_new_string_fmt("Query");
+	(*reply)->elements[i++]=swarmkv_reply_new_integer(available);
+	assert(i==12);
+	return FINISHED;
+}
+\ No newline at end of file
diff --git a/test/consul.d/server.hcl b/test/consul.d/server.hcl
index ee8dcf5..1a1b81d 100644
--- a/test/consul.d/server.hcl
+++ b/test/consul.d/server.hcl
@@ -1,7 +1,7 @@
-bind_addr = "172.17.93.178"
+bind_addr = "127.0.0.1"
 client_addr = "0.0.0.0"
 server = true
 bootstrap_expect = 1
 ui_config {
   enabled = true
-}
-\ No newline at end of file
+}
diff --git a/test/swarmkv_gtest.cpp b/test/swarmkv_gtest.cpp
index 042f1a0..8cd8a19 100644
--- a/test/swarmkv_gtest.cpp
+++ b/test/swarmkv_gtest.cpp
@@ -309,9 +309,9 @@ TEST_F(SwarmkvBasicTest, TypeTokenBucket)
 	const char *key="tb-192.168.0.1";
 	int exec_successful=0;
 	arg=cmd_exec_arg_new();
-	long long capacity=1024*4, rate=1024*2, request_tokens=0, got_tokens=0;
+	long long capacity=1024*4, rate=1024*2, request_tokens=0, allocated_tokens=0;
 	cmd_exec_arg_expect_OK(arg);	
-	swarmkv_async_command(db, generic_callback, arg, "TCFG %s %lld %lld", key, capacity, rate);	
+	swarmkv_async_command(db, generic_callback, arg, "TCFG %s %lld %lld", key, rate, capacity);	
 	exec_successful=cmd_exec_arg_wait(arg, CMD_EXEC_TIMEOUT_MS);
 	cmd_exec_arg_clear(arg);
 	EXPECT_TRUE(exec_successful);
@@ -327,13 +327,13 @@ TEST_F(SwarmkvBasicTest, TypeTokenBucket)
 		reply=swarmkv_command(db, "TCONSUME %s %lld FLEXIBLE", key, request_tokens);
 		if(reply->type==SWARMKV_REPLY_INTEGER)
 		{
-			got_tokens+=reply->integer;
+			allocated_tokens+=reply->integer;
 		}
 		swarmkv_reply_free(reply);
 		gettimeofday(&now, NULL);
 		i++;
 	}
-	EXPECT_LE(got_tokens, (now.tv_sec -start.tv_sec)*rate+capacity);	
+	EXPECT_LE(allocated_tokens, (now.tv_sec -start.tv_sec)*rate+capacity);	
 	cmd_exec_arg_free(arg);
 
 	//Infinite tokens
@@ -350,7 +350,109 @@ TEST_F(SwarmkvBasicTest, TypeTokenBucket)
 	EXPECT_EQ(t, 10000*i);
 	reply=swarmkv_command(db, "TINFO %s", key);
 	ASSERT_EQ(reply->n_element, 10);
-	EXPECT_EQ(reply->elements[5]->integer, got_tokens+t);
+	EXPECT_EQ(reply->elements[5]->integer, allocated_tokens+t);
+	swarmkv_reply_free(reply);
+}
+TEST_F(SwarmkvBasicTest, TypeFairTokenBucket)
+{
+	struct cmd_exec_arg *arg=NULL;
+	struct swarmkv *db=SwarmkvBasicTest::db;
+	const char *key="3-floor-bandwidth-100Mbps";
+	int exec_successful=0;
+	arg=cmd_exec_arg_new();
+	long long capacity=200*1024*1024, rate=100*1024*1024, request_tokens=0, allocated_tokens=0;
+	cmd_exec_arg_expect_OK(arg);	
+	swarmkv_async_command(db, generic_callback, arg, "FTCFG %s %lld %lld 128", key, rate, capacity);	
+	exec_successful=cmd_exec_arg_wait(arg, CMD_EXEC_TIMEOUT_MS);
+	cmd_exec_arg_clear(arg);
+	EXPECT_TRUE(exec_successful);
+	struct timeval start, now;
+	gettimeofday(&start, NULL);
+	gettimeofday(&now, NULL);
+	srand(171);
+	struct swarmkv_reply *reply=NULL;
+	int i=0;
+	while(now.tv_sec - start.tv_sec<3)
+	{
+		request_tokens=random()%(2*rate);
+		reply=swarmkv_command(db, "FTCONSUME %s user-001 5 %lld", key, request_tokens);
+		if(reply->type==SWARMKV_REPLY_INTEGER)
+		{
+			allocated_tokens+=reply->integer;
+		}
+		swarmkv_reply_free(reply);
+		gettimeofday(&now, NULL);
+		i++;
+	}
+	EXPECT_LE(allocated_tokens, (now.tv_sec -start.tv_sec)*rate+capacity);	
+	cmd_exec_arg_free(arg);
+
+	//Infinite tokens
+	reply=swarmkv_command(db, "FTCFG %s 0 0 256", key);
+	EXPECT_EQ(reply->type, SWARMKV_REPLY_STATUS);
+	swarmkv_reply_free(reply);
+	long long t=0;
+	for(i=0; i<100; i++)
+	{
+		reply=swarmkv_command(db, "FTCONSUME %s user-001 5 10000", key);
+		t+=reply->integer;
+		swarmkv_reply_free(reply);
+	}
+	EXPECT_EQ(t, 10000*i);
+	reply=swarmkv_command(db, "FTINFO %s", key);
+	ASSERT_EQ(reply->n_element, 14);
+	EXPECT_EQ(reply->elements[5]->integer, allocated_tokens+t);
+	swarmkv_reply_free(reply);
+}
+TEST_F(SwarmkvBasicTest, TypeBulkTokenBucket)
+{
+	struct cmd_exec_arg *arg=NULL;
+	struct swarmkv *db=SwarmkvBasicTest::db;
+	const char *key="everyone-has-1Mbps";
+	int exec_successful=0;
+	arg=cmd_exec_arg_new();
+	long long capacity=2*1024*1024, rate=1*1024*1024, request_tokens=0, allocated_tokens=0;
+	cmd_exec_arg_expect_OK(arg);	
+	swarmkv_async_command(db, generic_callback, arg, "BTCFG %s %lld %lld 128", key, rate, capacity);	
+	exec_successful=cmd_exec_arg_wait(arg, CMD_EXEC_TIMEOUT_MS);
+	cmd_exec_arg_clear(arg);
+	EXPECT_TRUE(exec_successful);
+	struct timeval start, now;
+	gettimeofday(&start, NULL);
+	gettimeofday(&now, NULL);
+	srand(171);
+	struct swarmkv_reply *reply=NULL;
+	int i=0, n_member=120;
+	while(now.tv_sec - start.tv_sec<3)
+	{
+		request_tokens=random()%(2*rate);
+		reply=swarmkv_command(db, "BTCONSUME %s user-%d %lld", key, i%n_member, request_tokens);
+		if(reply->type==SWARMKV_REPLY_INTEGER)
+		{
+			allocated_tokens+=reply->integer;
+		}
+		swarmkv_reply_free(reply);
+		gettimeofday(&now, NULL);
+		i++;
+	}
+	EXPECT_LE(allocated_tokens/n_member, (now.tv_sec -start.tv_sec)*rate+capacity);	
+	cmd_exec_arg_free(arg);
+
+	//Infinite tokens
+	reply=swarmkv_command(db, "BTCFG %s 0 0 256", key);
+	EXPECT_EQ(reply->type, SWARMKV_REPLY_STATUS);
+	swarmkv_reply_free(reply);
+	long long t=0;
+	for(i=0; i<100; i++)
+	{
+		reply=swarmkv_command(db, "BTCONSUME %s user-001 10000", key);
+		t+=reply->integer;
+		swarmkv_reply_free(reply);
+	}
+	EXPECT_EQ(t, 10000*i);
+	reply=swarmkv_command(db, "BTINFO %s", key);
+	ASSERT_EQ(reply->n_element, 12);
+	EXPECT_NEAR(reply->elements[7]->integer, n_member, n_member/5);
 	swarmkv_reply_free(reply);
 }
 TEST_F(SwarmkvBasicTest, TypeHash)
@@ -1110,6 +1212,7 @@ TEST_F(SwarmkvTwoNodes, TypeSet)
 	free(members);
 	free(member_len);
 }
+
 TEST_F(SwarmkvTwoNodes, TypeTokenBucket)
 {
 	struct cmd_exec_arg *arg=NULL;
@@ -1121,7 +1224,7 @@ TEST_F(SwarmkvTwoNodes, TypeTokenBucket)
 	arg=cmd_exec_arg_new();
 	long long capacity=1024*4, rate=1024*2;
 	cmd_exec_arg_expect_OK(arg);	
-	swarmkv_async_command(db[0], generic_callback, arg, "TCFG %s %lld %lld", key, capacity, rate);	
+	swarmkv_async_command(db[0], generic_callback, arg, "TCFG %s %lld %lld", key, rate, capacity);	
 	exec_successful=cmd_exec_arg_wait(arg, CMD_EXEC_TIMEOUT_MS);
 	cmd_exec_arg_clear(arg);
 	EXPECT_TRUE(exec_successful);
@@ -1133,11 +1236,11 @@ TEST_F(SwarmkvTwoNodes, TypeTokenBucket)
 	gettimeofday(&now, NULL);
 	long long token=0, requested_tokens=0, got_tokens=0;
 	//Two heavy consumers
-	while(now.tv_sec - start.tv_sec<5)
+	while(now.tv_sec - start.tv_sec<10)
 	{
 		token=random()%(4*rate);
 		requested_tokens+=token;
-		reply=swarmkv_command_on(db[i%2], NULL, "TCONSUME %s %lld FLEXIBLE", key, token);
+		reply=swarmkv_command(db[i%2], "TCONSUME %s %lld FLEXIBLE", key, token);
 		if(reply->type==SWARMKV_REPLY_INTEGER)
 		{
 			got_tokens+=reply->integer;
@@ -1147,7 +1250,7 @@ TEST_F(SwarmkvTwoNodes, TypeTokenBucket)
 		i++;
 	}
 	//Light consumers
-	while(now.tv_sec - start.tv_sec<10)
+	while(0 && now.tv_sec - start.tv_sec<10)
 	{
 		token=rate*3/4+random()%(rate/2);
 		if(i%2==0)
@@ -1160,7 +1263,7 @@ TEST_F(SwarmkvTwoNodes, TypeTokenBucket)
 		}
 		token=token*6/10;
 		requested_tokens+=token;
-		reply=swarmkv_command_on(db[i%2], NULL, "TCONSUME %s %lld FLEXIBLE", key, token);
+		reply=swarmkv_command(db[0], "TCONSUME %s %lld FLEXIBLE", key, token);
 		if(reply->type==SWARMKV_REPLY_INTEGER)
 		{
 			got_tokens+=reply->integer;
@@ -1170,11 +1273,11 @@ TEST_F(SwarmkvTwoNodes, TypeTokenBucket)
 		i++;
 	}
 	//One heavy consumer
-	while(now.tv_sec - start.tv_sec<15)
+	while(0 && now.tv_sec - start.tv_sec<15)
 	{
 		token=rate*3/4+random()%(rate);
 		requested_tokens+=token;
-		reply=swarmkv_command_on(db[0], NULL, "TCONSUME %s %lld", key, token);
+		reply=swarmkv_command(db[0], "TCONSUME %s %lld", key, token);
 		if(reply->type==SWARMKV_REPLY_INTEGER)
 		{
 			got_tokens+=reply->integer;
@@ -1198,7 +1301,138 @@ TEST_F(SwarmkvTwoNodes, TypeTokenBucket)
 	wait_for_sync();
 
 	cmd_exec_arg_expect_integer(arg, 0);
-	swarmkv_async_command(db[1], generic_callback, arg, "TCONSUME %s %lld FLEXIBLE", key, 4096);
+	swarmkv_async_command(db[1], generic_callback, arg, "TCONSUME %s 4096 FLEXIBLE", key);
+	exec_successful=cmd_exec_arg_wait(arg, CMD_EXEC_TIMEOUT_MS);
+	cmd_exec_arg_clear(arg);
+
+	cmd_exec_arg_free(arg);
+	
+}
+TEST_F(SwarmkvTwoNodes, TypeFairTokenBucket)
+{
+	struct cmd_exec_arg *arg=NULL;
+	struct swarmkv *db[2];
+	db[0]=SwarmkvTwoNodes::db1;
+	db[1]=SwarmkvTwoNodes::db2;
+	const char *key="shaping-profile-with-fairness";
+	int exec_successful=0;
+	arg=cmd_exec_arg_new();
+	long long capacity=1024*4, rate=1024*2, divisor=1024;
+	cmd_exec_arg_expect_OK(arg);	
+	swarmkv_async_command(db[0], generic_callback, arg, "FTCFG %s %lld %lld %lld", key, rate, capacity, divisor);	
+	exec_successful=cmd_exec_arg_wait(arg, CMD_EXEC_TIMEOUT_MS);
+	cmd_exec_arg_clear(arg);
+	EXPECT_TRUE(exec_successful);
+	srandom(171);
+	struct swarmkv_reply *reply=NULL;
+	int round=0;
+	struct timeval start, now;
+	gettimeofday(&start, NULL);
+	gettimeofday(&now, NULL);
+	long long token=0, requested_tokens=0, allocated_tokens=0;
+	long long member_id=0, weight=0, n_member=100;
+
+	while(now.tv_sec - start.tv_sec<20)
+	{
+		token=random()%(2*rate/n_member);
+		requested_tokens+=token;
+		
+		weight=1+(member_id%20);
+		reply=swarmkv_command(db[round%2], "FTCONSUME %s user-%lld %lld %lld", key, member_id, weight, token);
+		if(reply->type==SWARMKV_REPLY_INTEGER)
+		{
+			allocated_tokens+=reply->integer;
+		}
+		swarmkv_reply_free(reply);
+		gettimeofday(&now, NULL);
+		member_id=(member_id+1)%n_member;
+		round++;
+	}
+//	printf("consume round %d, speed %d ops\n", i, i/(int)(now.tv_sec-start.tv_sec));
+	EXPECT_GE(round/(int)(now.tv_sec-start.tv_sec), 100000);
+	long long upper_limit=(now.tv_sec-start.tv_sec)*rate+capacity;
+	double accuracy=(double)allocated_tokens/(upper_limit<requested_tokens?upper_limit:requested_tokens);
+	EXPECT_NEAR(accuracy, 1, 0.035);
+
+	wait_for_sync();
+
+	reply=swarmkv_command(db[0], "FTINFO %s", key);
+	EXPECT_NEAR(reply->elements[13]->integer, n_member, n_member/5);
+	swarmkv_reply_free(reply);
+
+	cmd_exec_arg_expect_integer(arg, 1);
+	swarmkv_del(db[0], key, strlen(key), generic_callback, arg);
+	exec_successful=cmd_exec_arg_wait(arg, CMD_EXEC_TIMEOUT_MS);
+	cmd_exec_arg_clear(arg);
+	wait_for_sync();
+
+	cmd_exec_arg_expect_integer(arg, -1);
+	swarmkv_async_command(db[1], generic_callback, arg, "FTCONSUME %s user-001 20 1000", key);
+	exec_successful=cmd_exec_arg_wait(arg, CMD_EXEC_TIMEOUT_MS);
+	cmd_exec_arg_clear(arg);
+
+	cmd_exec_arg_free(arg);
+}
+TEST_F(SwarmkvTwoNodes, TypeBulkTokenBucket)
+{
+	struct cmd_exec_arg *arg=NULL;
+	struct swarmkv *db[2];
+	db[0]=SwarmkvTwoNodes::db1;
+	db[1]=SwarmkvTwoNodes::db2;
+	const char *key="shaping-profile-everyone-has-10Mbps";
+	int exec_successful=0;
+	arg=cmd_exec_arg_new();
+	long long capacity=15*1024*1024, rate=10*1024*1024, buckets=8192;
+	cmd_exec_arg_expect_OK(arg);	
+	swarmkv_async_command(db[0], generic_callback, arg, "BTCFG %s %lld %lld %lld", key, rate, capacity, buckets);	
+	exec_successful=cmd_exec_arg_wait(arg, CMD_EXEC_TIMEOUT_MS);
+	cmd_exec_arg_clear(arg);
+	EXPECT_TRUE(exec_successful);
+	srandom(171);
+	struct swarmkv_reply *reply=NULL;
+	int round=0;
+	struct timeval start, now;
+	gettimeofday(&start, NULL);
+	gettimeofday(&now, NULL);
+	long long token=0, requested_tokens=0, allocated_tokens=0;
+	long long member_id=0, n_member=100;
+
+	while(now.tv_sec - start.tv_sec<15)
+	{
+		token=random()%(2*rate/n_member);
+		requested_tokens+=token;
+		
+		reply=swarmkv_command(db[round%2], "BTCONSUME %s user-%lld %lld", key, member_id, token);
+		if(reply->type==SWARMKV_REPLY_INTEGER)
+		{
+			allocated_tokens+=reply->integer;
+		}
+		swarmkv_reply_free(reply);
+		gettimeofday(&now, NULL);
+		member_id=(member_id+1)%n_member;
+		round++;
+	}
+//	printf("consume round %d, speed %d ops\n", i, i/(int)(now.tv_sec-start.tv_sec));
+	EXPECT_GE(round/(int)(now.tv_sec-start.tv_sec), 100000);
+	long long upper_limit=(now.tv_sec-start.tv_sec)*rate+capacity;
+	upper_limit=upper_limit*n_member;
+	double accuracy=(double)allocated_tokens/(upper_limit<requested_tokens?upper_limit:requested_tokens);
+	EXPECT_NEAR(accuracy, 1, 0.035);
+
+	wait_for_sync();
+
+	reply=swarmkv_command(db[0], "BTINFO %s", key);
+	EXPECT_NEAR(reply->elements[7]->integer, n_member, n_member/5);
+	swarmkv_reply_free(reply);
+
+	cmd_exec_arg_expect_integer(arg, 1);
+	swarmkv_del(db[0], key, strlen(key), generic_callback, arg);
+	exec_successful=cmd_exec_arg_wait(arg, CMD_EXEC_TIMEOUT_MS);
+	cmd_exec_arg_clear(arg);
+	wait_for_sync();
+
+	cmd_exec_arg_expect_integer(arg, -1);
+	swarmkv_async_command(db[1], generic_callback, arg, "BTCONSUME %s user-001 1000", key);
 	exec_successful=cmd_exec_arg_wait(arg, CMD_EXEC_TIMEOUT_MS);
 	cmd_exec_arg_clear(arg);
 
@@ -1261,7 +1495,7 @@ TEST_F(SwarmkvTwoNodes, TypeHash)
 }
 TEST_F(SwarmkvTwoNodes, Wait)
 {
-	return;
+	//return;
 	sleep(3600*2);
 }
 TEST(CloudNative, AnnounceIPPort)
@@ -1273,7 +1507,7 @@ TEST(CloudNative, AnnounceIPPort)
 	struct swarmkv_options *opts[NODE_NUMBER];
 	const char *cluster_name="skv-in-k8s";
 	char *err=NULL;
-	const char *annouce_ip="172.17.93.178";//eth0 of GDNT-BJ-DEV1 of AliCloud
+	const char *annouce_ip="172.17.58.172";//eth0 of GDNT-BJ-DEV1 of AliCloud
 	const char *bind_ip="0.0.0.0";
 	char node_list_str[1024]={0};
 	for(i=0; i<NODE_NUMBER; i++)
author	郑超 <[email protected]>	2023-05-05 12:16:36 +0000
committer	郑超 <[email protected]>	2023-05-05 12:16:36 +0000
commit	d698baf916c37bd831aa08440f9898fee328725c (patch)
tree	ca1c28c451f42209b11ff41bfdd17e15b5aec46d
parent	7614cf86377091b8f43649e7432c038d2daa96f9 (diff)