summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorchenzizhan <[email protected]>2024-10-17 15:04:58 +0800
committerchenzizhan <[email protected]>2024-10-17 15:04:58 +0800
commit06153e396729e3091db544ce1217b5f737d2cd0b (patch)
tree8505a83ea6a7c060ad6927b6cfc996e78bfcd2f4
parent0505aba853c13e555306626a20ce0701cabb406d (diff)
use murmurhash as uthash functionv4.6.7
-rw-r--r--CMakeLists.txt1
-rw-r--r--src/cells/hash_table.c6
-rw-r--r--src/cells/heavy_keeper.c7
-rw-r--r--src/cells/spread_sketch.c6
-rw-r--r--test/test_fuzz_test.cpp2
-rw-r--r--vendors/murmurhash/LICENSE21
-rw-r--r--vendors/murmurhash/murmurhash.c73
-rw-r--r--vendors/murmurhash/murmurhash.h30
8 files changed, 142 insertions, 4 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 9843401..45aa8a7 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -114,6 +114,7 @@ file(GLOB SRC
"vendors/mpack/*.c"
"vendors/xxhash/*.c"
"vendors/base64/*.c"
+ "vendors/murmurhash/*.c"
)
add_subdirectory(test/deps)
diff --git a/src/cells/hash_table.c b/src/cells/hash_table.c
index 255a3ef..4a3668d 100644
--- a/src/cells/hash_table.c
+++ b/src/cells/hash_table.c
@@ -5,6 +5,12 @@
#include <stdlib.h>
#include <string.h>
+// uthash use faster murmurhash
+#include "murmurhash/murmurhash.h"
+#define HASH_FUNCTION(keyptr, keylen, hashv) \
+ do { \
+ hashv = murmurhash(keyptr, keylen, 0); \
+ } while (0)
#include "uthash.h"
#include "fieldstat.h"
diff --git a/src/cells/heavy_keeper.c b/src/cells/heavy_keeper.c
index 82fe4be..c17ef3b 100644
--- a/src/cells/heavy_keeper.c
+++ b/src/cells/heavy_keeper.c
@@ -13,12 +13,13 @@
#include "minheap/heap.h"
#include "mpack/mpack.h"
#include "xxhash/xxhash.h"
-// XXHASH is faster
+
+// uthash use faster murmurhash
+#include "murmurhash/murmurhash.h"
#define HASH_FUNCTION(keyptr, keylen, hashv) \
do { \
- hashv = XXH3_64bits(keyptr, keylen); \
+ hashv = murmurhash(keyptr, keylen, 0); \
} while (0)
-
#include "uthash.h"
#include "exdata.h"
diff --git a/src/cells/spread_sketch.c b/src/cells/spread_sketch.c
index 9618771..833e97a 100644
--- a/src/cells/spread_sketch.c
+++ b/src/cells/spread_sketch.c
@@ -6,6 +6,12 @@
#include <assert.h>
#include "xxhash/xxhash.h"
+// uthash use faster murmurhash
+#include "murmurhash/murmurhash.h"
+#define HASH_FUNCTION(keyptr, keylen, hashv) \
+ do { \
+ hashv = murmurhash(keyptr, keylen, 0); \
+ } while (0)
#include "uthash.h"
#include "spread_sketch.h"
diff --git a/test/test_fuzz_test.cpp b/test/test_fuzz_test.cpp
index 0f5a0ba..2c9d9ba 100644
--- a/test/test_fuzz_test.cpp
+++ b/test/test_fuzz_test.cpp
@@ -475,7 +475,7 @@ TEST(Fuzz_test, many_instance_random_flow_unregister_calibrate_reset_fork_merge_
est_total += value_est;
true_total += value_true;
}
- EXPECT_LE(abs(est_total - true_total) / true_total, 0.2);
+ EXPECT_LE(abs(est_total - true_total) / true_total, 0.25);
// printf("spreadsketch Mean ratio e: %f\n", abs(est_total - true_total) / true_total);
for (size_t j = 0; j < cell_num; j++) {
diff --git a/vendors/murmurhash/LICENSE b/vendors/murmurhash/LICENSE
new file mode 100644
index 0000000..3bab1f7
--- /dev/null
+++ b/vendors/murmurhash/LICENSE
@@ -0,0 +1,21 @@
+The MIT License (MIT)
+
+Copyright (c) 2014 Joseph Werle
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE. \ No newline at end of file
diff --git a/vendors/murmurhash/murmurhash.c b/vendors/murmurhash/murmurhash.c
new file mode 100644
index 0000000..0c631c4
--- /dev/null
+++ b/vendors/murmurhash/murmurhash.c
@@ -0,0 +1,73 @@
+/**
+ * `murmurhash.h' - murmurhash
+ *
+ * copyright (c) 2014-2022 joseph werle <[email protected]>
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdint.h>
+#include "murmurhash.h"
+
+uint32_t
+murmurhash (const char *key, uint32_t len, uint32_t seed) {
+ uint32_t c1 = 0xcc9e2d51;
+ uint32_t c2 = 0x1b873593;
+ uint32_t r1 = 15;
+ uint32_t r2 = 13;
+ uint32_t m = 5;
+ uint32_t n = 0xe6546b64;
+ uint32_t h = 0;
+ uint32_t k = 0;
+ uint8_t *d = (uint8_t *) key; // 32 bit extract from `key'
+ const uint32_t *chunks = NULL;
+ const uint8_t *tail = NULL; // tail - last 8 bytes
+ int i = 0;
+ int l = len / 4; // chunk length
+
+ h = seed;
+
+ chunks = (const uint32_t *) (d + l * 4); // body
+ tail = (const uint8_t *) (d + l * 4); // last 8 byte chunk of `key'
+
+ // for each 4 byte chunk of `key'
+ for (i = -l; i != 0; ++i) {
+ // next 4 byte chunk of `key'
+ k = chunks[i];
+
+ // encode next 4 byte chunk of `key'
+ k *= c1;
+ k = (k << r1) | (k >> (32 - r1));
+ k *= c2;
+
+ // append to hash
+ h ^= k;
+ h = (h << r2) | (h >> (32 - r2));
+ h = h * m + n;
+ }
+
+ k = 0;
+
+ // remainder
+ switch (len & 3) { // `len % 4'
+ case 3: k ^= (tail[2] << 16);
+ case 2: k ^= (tail[1] << 8);
+
+ case 1:
+ k ^= tail[0];
+ k *= c1;
+ k = (k << r1) | (k >> (32 - r1));
+ k *= c2;
+ h ^= k;
+ }
+
+ h ^= len;
+
+ h ^= (h >> 16);
+ h *= 0x85ebca6b;
+ h ^= (h >> 13);
+ h *= 0xc2b2ae35;
+ h ^= (h >> 16);
+
+ return h;
+}
diff --git a/vendors/murmurhash/murmurhash.h b/vendors/murmurhash/murmurhash.h
new file mode 100644
index 0000000..b6b6e43
--- /dev/null
+++ b/vendors/murmurhash/murmurhash.h
@@ -0,0 +1,30 @@
+/**
+ * `murmurhash.h' - murmurhash
+ *
+ * copyright (c) 2014-2022 joseph werle <[email protected]>
+ */
+
+#ifndef MURMURHASH_H
+#define MURMURHASH_H
+
+#include <stdint.h>
+
+#define MURMURHASH_VERSION "0.1.0"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * Returns a murmur hash of `key' based on `seed'
+ * using the MurmurHash3 algorithm
+ */
+
+uint32_t
+murmurhash (const char *, uint32_t, uint32_t);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif