diff options
| author | chenzizhan <[email protected]> | 2024-10-17 15:04:58 +0800 |
|---|---|---|
| committer | chenzizhan <[email protected]> | 2024-10-17 15:04:58 +0800 |
| commit | 06153e396729e3091db544ce1217b5f737d2cd0b (patch) | |
| tree | 8505a83ea6a7c060ad6927b6cfc996e78bfcd2f4 | |
| parent | 0505aba853c13e555306626a20ce0701cabb406d (diff) | |
use murmurhash as uthash functionv4.6.7
| -rw-r--r-- | CMakeLists.txt | 1 | ||||
| -rw-r--r-- | src/cells/hash_table.c | 6 | ||||
| -rw-r--r-- | src/cells/heavy_keeper.c | 7 | ||||
| -rw-r--r-- | src/cells/spread_sketch.c | 6 | ||||
| -rw-r--r-- | test/test_fuzz_test.cpp | 2 | ||||
| -rw-r--r-- | vendors/murmurhash/LICENSE | 21 | ||||
| -rw-r--r-- | vendors/murmurhash/murmurhash.c | 73 | ||||
| -rw-r--r-- | vendors/murmurhash/murmurhash.h | 30 |
8 files changed, 142 insertions, 4 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index 9843401..45aa8a7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -114,6 +114,7 @@ file(GLOB SRC "vendors/mpack/*.c"
"vendors/xxhash/*.c"
"vendors/base64/*.c"
+ "vendors/murmurhash/*.c"
)
add_subdirectory(test/deps)
diff --git a/src/cells/hash_table.c b/src/cells/hash_table.c index 255a3ef..4a3668d 100644 --- a/src/cells/hash_table.c +++ b/src/cells/hash_table.c @@ -5,6 +5,12 @@ #include <stdlib.h> #include <string.h> +// uthash use faster murmurhash +#include "murmurhash/murmurhash.h" +#define HASH_FUNCTION(keyptr, keylen, hashv) \ + do { \ + hashv = murmurhash(keyptr, keylen, 0); \ + } while (0) #include "uthash.h" #include "fieldstat.h" diff --git a/src/cells/heavy_keeper.c b/src/cells/heavy_keeper.c index 82fe4be..c17ef3b 100644 --- a/src/cells/heavy_keeper.c +++ b/src/cells/heavy_keeper.c @@ -13,12 +13,13 @@ #include "minheap/heap.h" #include "mpack/mpack.h" #include "xxhash/xxhash.h" -// XXHASH is faster + +// uthash use faster murmurhash +#include "murmurhash/murmurhash.h" #define HASH_FUNCTION(keyptr, keylen, hashv) \ do { \ - hashv = XXH3_64bits(keyptr, keylen); \ + hashv = murmurhash(keyptr, keylen, 0); \ } while (0) - #include "uthash.h" #include "exdata.h" diff --git a/src/cells/spread_sketch.c b/src/cells/spread_sketch.c index 9618771..833e97a 100644 --- a/src/cells/spread_sketch.c +++ b/src/cells/spread_sketch.c @@ -6,6 +6,12 @@ #include <assert.h> #include "xxhash/xxhash.h" +// uthash use faster murmurhash +#include "murmurhash/murmurhash.h" +#define HASH_FUNCTION(keyptr, keylen, hashv) \ + do { \ + hashv = murmurhash(keyptr, keylen, 0); \ + } while (0) #include "uthash.h" #include "spread_sketch.h" diff --git a/test/test_fuzz_test.cpp b/test/test_fuzz_test.cpp index 0f5a0ba..2c9d9ba 100644 --- a/test/test_fuzz_test.cpp +++ b/test/test_fuzz_test.cpp @@ -475,7 +475,7 @@ TEST(Fuzz_test, many_instance_random_flow_unregister_calibrate_reset_fork_merge_ est_total += value_est; true_total += value_true; } - EXPECT_LE(abs(est_total - true_total) / true_total, 0.2); + EXPECT_LE(abs(est_total - true_total) / true_total, 0.25); // printf("spreadsketch Mean ratio e: %f\n", abs(est_total - true_total) / true_total); for (size_t j = 0; j < cell_num; j++) { diff --git a/vendors/murmurhash/LICENSE b/vendors/murmurhash/LICENSE new file mode 100644 index 0000000..3bab1f7 --- /dev/null +++ b/vendors/murmurhash/LICENSE @@ -0,0 +1,21 @@ +The MIT License (MIT) + +Copyright (c) 2014 Joseph Werle + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE.
\ No newline at end of file diff --git a/vendors/murmurhash/murmurhash.c b/vendors/murmurhash/murmurhash.c new file mode 100644 index 0000000..0c631c4 --- /dev/null +++ b/vendors/murmurhash/murmurhash.c @@ -0,0 +1,73 @@ +/** + * `murmurhash.h' - murmurhash + * + * copyright (c) 2014-2022 joseph werle <[email protected]> + */ + +#include <stdlib.h> +#include <stdio.h> +#include <stdint.h> +#include "murmurhash.h" + +uint32_t +murmurhash (const char *key, uint32_t len, uint32_t seed) { + uint32_t c1 = 0xcc9e2d51; + uint32_t c2 = 0x1b873593; + uint32_t r1 = 15; + uint32_t r2 = 13; + uint32_t m = 5; + uint32_t n = 0xe6546b64; + uint32_t h = 0; + uint32_t k = 0; + uint8_t *d = (uint8_t *) key; // 32 bit extract from `key' + const uint32_t *chunks = NULL; + const uint8_t *tail = NULL; // tail - last 8 bytes + int i = 0; + int l = len / 4; // chunk length + + h = seed; + + chunks = (const uint32_t *) (d + l * 4); // body + tail = (const uint8_t *) (d + l * 4); // last 8 byte chunk of `key' + + // for each 4 byte chunk of `key' + for (i = -l; i != 0; ++i) { + // next 4 byte chunk of `key' + k = chunks[i]; + + // encode next 4 byte chunk of `key' + k *= c1; + k = (k << r1) | (k >> (32 - r1)); + k *= c2; + + // append to hash + h ^= k; + h = (h << r2) | (h >> (32 - r2)); + h = h * m + n; + } + + k = 0; + + // remainder + switch (len & 3) { // `len % 4' + case 3: k ^= (tail[2] << 16); + case 2: k ^= (tail[1] << 8); + + case 1: + k ^= tail[0]; + k *= c1; + k = (k << r1) | (k >> (32 - r1)); + k *= c2; + h ^= k; + } + + h ^= len; + + h ^= (h >> 16); + h *= 0x85ebca6b; + h ^= (h >> 13); + h *= 0xc2b2ae35; + h ^= (h >> 16); + + return h; +} diff --git a/vendors/murmurhash/murmurhash.h b/vendors/murmurhash/murmurhash.h new file mode 100644 index 0000000..b6b6e43 --- /dev/null +++ b/vendors/murmurhash/murmurhash.h @@ -0,0 +1,30 @@ +/** + * `murmurhash.h' - murmurhash + * + * copyright (c) 2014-2022 joseph werle <[email protected]> + */ + +#ifndef MURMURHASH_H +#define MURMURHASH_H + +#include <stdint.h> + +#define MURMURHASH_VERSION "0.1.0" + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * Returns a murmur hash of `key' based on `seed' + * using the MurmurHash3 algorithm + */ + +uint32_t +murmurhash (const char *, uint32_t, uint32_t); + +#ifdef __cplusplus +} +#endif + +#endif |
