summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorzy <[email protected]>2023-09-27 09:25:34 +0000
committerzy <[email protected]>2023-09-27 09:25:34 +0000
commitc175a0abc8eb6f1a4885abe1f981e649456225dd (patch)
treeb1a45b2f671d3213347be268c5787eebba1ff5b8
parent48bb368877c04e36308f6f52c680181c2fbff076 (diff)
parent36450f5dfa230ef806151101ee8047375915ad73 (diff)
Merge branch 'main' into feature-rs-timeoutfeature-rs-timeout
-rw-r--r--Cargo.toml4
-rw-r--r--bindings/rs-dablooms/.gitignore2
-rw-r--r--bindings/rs-dablooms/Cargo.toml10
-rw-r--r--bindings/rs-dablooms/README.md67
-rw-r--r--bindings/rs-dablooms/README.zh_cn.md67
-rw-r--r--bindings/rs-dablooms/build.rs37
-rw-r--r--bindings/rs-dablooms/dablooms/CMakeLists.txt5
-rw-r--r--bindings/rs-dablooms/dablooms/LICENSE17
-rw-r--r--bindings/rs-dablooms/dablooms/Makefile179
-rw-r--r--bindings/rs-dablooms/dablooms/README.md259
-rw-r--r--bindings/rs-dablooms/dablooms/godablooms/README.md15
-rw-r--r--bindings/rs-dablooms/dablooms/godablooms/dablooms.go73
-rw-r--r--bindings/rs-dablooms/dablooms/godablooms/dablooms_test.go40
-rw-r--r--bindings/rs-dablooms/dablooms/pydablooms/README.md28
-rw-r--r--bindings/rs-dablooms/dablooms/pydablooms/modpath.py38
-rw-r--r--bindings/rs-dablooms/dablooms/pydablooms/pydablooms.c225
-rw-r--r--bindings/rs-dablooms/dablooms/pydablooms/setup.py35
-rw-r--r--bindings/rs-dablooms/dablooms/pydablooms/test_pydablooms.py104
-rw-r--r--bindings/rs-dablooms/dablooms/src/dablooms.c649
-rw-r--r--bindings/rs-dablooms/dablooms/src/dablooms.h92
-rw-r--r--bindings/rs-dablooms/dablooms/src/murmur.c120
-rw-r--r--bindings/rs-dablooms/dablooms/src/murmur.h12
-rw-r--r--bindings/rs-dablooms/dablooms/src/test_dablooms.c342
-rw-r--r--bindings/rs-dablooms/dablooms/test/test_dabloombin0 -> 21280 bytes
-rw-r--r--bindings/rs-dablooms/dablooms/test/test_dabloom.c152
-rw-r--r--bindings/rs-dablooms/src/dablooms.rs321
-rw-r--r--bindings/rs-dablooms/src/dablooms_bind.rs156
-rw-r--r--bindings/rs-dablooms/src/dablooms_bind_test.rs458
-rw-r--r--bindings/rs-dablooms/src/lib.rs4
29 files changed, 3510 insertions, 1 deletions
diff --git a/Cargo.toml b/Cargo.toml
index d9d139a..178679d 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -7,10 +7,12 @@ edition = "2021"
[workspace]
members = [
- "bindings/rs-timeout"
+ "bindings/rs-timeout",
+ "bindings/rs-dablooms",
]
[dependencies]
+rs-dablooms = {path="bindings/rs-dablooms"}
rs-timeout = {path="bindings/rs-timeout"}
nom = "7"
chrono = "0.4"
diff --git a/bindings/rs-dablooms/.gitignore b/bindings/rs-dablooms/.gitignore
new file mode 100644
index 0000000..a0fb2cd
--- /dev/null
+++ b/bindings/rs-dablooms/.gitignore
@@ -0,0 +1,2 @@
+dablooms/build/
+/target
diff --git a/bindings/rs-dablooms/Cargo.toml b/bindings/rs-dablooms/Cargo.toml
new file mode 100644
index 0000000..65e0441
--- /dev/null
+++ b/bindings/rs-dablooms/Cargo.toml
@@ -0,0 +1,10 @@
+[package]
+name = "rs-dablooms"
+version = "0.1.0"
+edition = "2021"
+
+# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
+
+[dependencies]
+libc = "0.2.0"
+bindgen = "0.68.1" \ No newline at end of file
diff --git a/bindings/rs-dablooms/README.md b/bindings/rs-dablooms/README.md
new file mode 100644
index 0000000..22f6a9b
--- /dev/null
+++ b/bindings/rs-dablooms/README.md
@@ -0,0 +1,67 @@
+# README
+
+Rust binding for [dablooms](https://git.mesalab.cn/tango/dablooms).
+
+Provides 3 types of Bloom filters: CountingBloomFilter, ScalingBloomFilter, ExpiryBloomFilter.
+
+Each type of Bloom filter supports 3 basic operations: new, add, remove, and check.
+- ExpiryBloomFilter does not provide remove, and relies on expiration time to clean up data.
+
+The key value is required to implement the `AsRef<[u8]>` trait.
+- In Rust, a `&str` like "hello world" implements `AsRef<[u8]>` by default, so it can be used directly.
+
+Usage example
+
+```rust
+// CountingBloomFilter
+let mut bloom = CountingBloomFilter::new(1000, 0.01).unwrap();
+let key1 = "hello world";
+
+assert!(bloom.check(key1) == false);
+bloom.add(key1).unwrap();
+assert!(bloom.check(key) == true);
+bloom.remove(key1);
+assert!(bloom.check(key1) == false);
+
+// ScalingBloomFilter
+let mut bloom = ScalingBloomFilter::new(100, 0.05).unwrap();
+let key1 = "aaa";
+let id1 = 1;
+
+assert!(bloom.check(key1) == false);
+bloom.add(key1, id1).unwrap();
+assert!(bloom.check(key1) == true);
+bloom.remove(key1, id1).unwrap();
+assert!(bloom.check(key1) == false);
+
+// ExpiryBloomFilter
+let capacity: u32 = 100;
+let error_rate: f64 = 0.05;
+let expiry_secs: i32 = 3; // expiry time 3s
+let key1 = "aaa";
+
+let mut bloom = ExpiryBloomFilter::new(capacity, error_rate, expiry_secs).unwrap();
+
+bloom.add(key1).unwrap();
+assert!(bloom.count() == 1);
+assert!(bloom.check(key1));
+
+unsafe {
+ sleep(2); // sleep 2 sec | all key1's value not expired
+}
+assert!(bloom.check(key1));
+unsafe {
+ sleep(2); // sleep 4 sec | all key1's value expired
+}
+assert!(!bloom.check(key1));
+```
+
+## Oher API
+
+ScalingBloomFilter
+- flush:
+- mem_seqnum
+- disk_seqnum
+
+ExpiryBloomFilter
+- count
diff --git a/bindings/rs-dablooms/README.zh_cn.md b/bindings/rs-dablooms/README.zh_cn.md
new file mode 100644
index 0000000..a6a6e79
--- /dev/null
+++ b/bindings/rs-dablooms/README.zh_cn.md
@@ -0,0 +1,67 @@
+# README
+
+[dablooms](https://git.mesalab.cn/tango/dablooms) 的 Rust binding.
+
+提供了 3 种 布隆过滤器: CountingBloomFilter ScalingBloomFilter ExpiryBloomFilter
+
+每种布隆过滤器 分别支持 new add remove check 三种基本操作.
+- ExpiryBloomFilter 不提供 remove, 其依靠 过期时间清理数据.
+
+key 值要求实现了 `AsRef<[u8]>` trait.
+- rust 中类似 "hello world" 的 `&str` 默认实现了 `AsRef<[u8]>`,因此可以直接使用
+
+使用示例
+
+```rust
+// CountingBloomFilter
+let mut bloom = CountingBloomFilter::new(1000, 0.01).unwrap();
+let key1 = "hello world";
+
+assert!(bloom.check(key1) == false);
+bloom.add(key1).unwrap();
+assert!(bloom.check(key) == true);
+bloom.remove(key1);
+assert!(bloom.check(key1) == false);
+
+// ScalingBloomFilter
+let mut bloom = ScalingBloomFilter::new(100, 0.05).unwrap();
+let key1 = "aaa";
+let id1 = 1;
+
+assert!(bloom.check(key1) == false);
+bloom.add(key1, id1).unwrap();
+assert!(bloom.check(key1) == true);
+bloom.remove(key1, id1).unwrap();
+assert!(bloom.check(key1) == false);
+
+// ExpiryBloomFilter
+let capacity: u32 = 100;
+let error_rate: f64 = 0.05;
+let expiry_secs: i32 = 3; // expiry time 3s
+let key1 = "aaa";
+
+let mut bloom = ExpiryBloomFilter::new(capacity, error_rate, expiry_secs).unwrap();
+
+bloom.add(key1).unwrap();
+assert!(bloom.count() == 1);
+assert!(bloom.check(key1));
+
+unsafe {
+ sleep(2); // sleep 2 sec | all key1's value not expired
+}
+assert!(bloom.check(key1));
+unsafe {
+ sleep(2); // sleep 4 sec | all key1's value expired
+}
+assert!(!bloom.check(key1));
+```
+
+## 其他API
+
+ScalingBloomFilter
+- flush:
+- mem_seqnum
+- disk_seqnum
+
+ExpiryBloomFilter
+- count
diff --git a/bindings/rs-dablooms/build.rs b/bindings/rs-dablooms/build.rs
new file mode 100644
index 0000000..9d9f1d2
--- /dev/null
+++ b/bindings/rs-dablooms/build.rs
@@ -0,0 +1,37 @@
+use std::process::Command;
+
+fn main() {
+ // Compile C source file
+ let output = Command::new("make")
+ // .arg("so") // 生成动态库
+ .arg("install") // install
+ .current_dir("./dablooms")
+ .output()
+ .expect("Failed to compile C source file");
+
+ if !output.status.success() {
+ panic!(
+ "Failed to compile C source file: {}",
+ String::from_utf8_lossy(&output.stderr)
+ );
+ }
+
+ let output = Command::new("bash")
+ .arg("-c")
+ .arg("ldconfig")
+ .output()
+ .expect("Failed to execute command");
+
+ if output.status.success() {
+ println!(
+ "Command output: {}",
+ String::from_utf8_lossy(&output.stdout)
+ );
+ } else {
+ println!("Command error: {}", String::from_utf8_lossy(&output.stderr));
+ }
+
+ /* set the library search path for marsio */
+ // println!("cargo:rustc-link-search=native=/opt/tsg/mrzcpd/znver1/lib/");
+ println!("cargo:rustc-link-lib=static=dablooms");
+}
diff --git a/bindings/rs-dablooms/dablooms/CMakeLists.txt b/bindings/rs-dablooms/dablooms/CMakeLists.txt
new file mode 100644
index 0000000..f9010c4
--- /dev/null
+++ b/bindings/rs-dablooms/dablooms/CMakeLists.txt
@@ -0,0 +1,5 @@
+cmake_minimum_required(VERSION 3.10)
+project(dabloom)
+add_definitions(-fPIC)
+add_library(libdabloom-static STATIC src/dablooms.c src/murmur.c)
+add_library(libdabloom SHARED src/dablooms.c src/murmur.c) \ No newline at end of file
diff --git a/bindings/rs-dablooms/dablooms/LICENSE b/bindings/rs-dablooms/dablooms/LICENSE
new file mode 100644
index 0000000..89de354
--- /dev/null
+++ b/bindings/rs-dablooms/dablooms/LICENSE
@@ -0,0 +1,17 @@
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
diff --git a/bindings/rs-dablooms/dablooms/Makefile b/bindings/rs-dablooms/dablooms/Makefile
new file mode 100644
index 0000000..51cec38
--- /dev/null
+++ b/bindings/rs-dablooms/dablooms/Makefile
@@ -0,0 +1,179 @@
+HELPTEXT = "\
+\n dablooms Makefile usage \
+\n \
+\n Options (current value) \
+\n \
+\n BLDDIR ($(BLDDIR)) \
+\n DESTDIR ($(DESTDIR)) \
+\n prefix ($(prefix)) \
+\n libdir ($(libdir)) \
+\n includedir ($(includedir)) \
+\n \
+\n CC ($(CC)) \
+\n CFLAGS ($(ALL_CFLAGS)) \
+\n LDFLAGS ($(LDFLAGS)) \
+\n INSTALL ($(INSTALL)) \
+\n \
+\n PYTHON ($(PYTHON)) \
+\n PY_MOD_DIR ($(PY_MOD_DIR)) \
+\n \
+\n Targets \
+\n \
+\n all (c libdablooms) \
+\n install \
+\n test \
+\n clean \
+\n help \
+\n \
+\n\n"
+# \n pydablooms \
+# \n install_pydablooms \
+# \n test_pydablooms \
+\n\n"
+
+prefix = /usr/local
+libdir = $(prefix)/lib
+includedir = $(prefix)/include
+DESTDIR =
+BLDDIR = build
+
+CFLAGS = -g -Wall -O2
+LDFLAGS =
+ALL_CFLAGS = -fPIC $(CFLAGS)
+LIBS = -lm
+
+INSTALL = install
+CC = gcc
+AR = ar
+
+### dynamic shared object ###
+
+# shared-object version does not follow software release version
+SO_VER_MAJOR = 1
+SO_VER_MINOR = 1
+
+SO_VER = $(SO_VER_MAJOR).$(SO_VER_MINOR)
+SO_NAME = so
+SO_CMD = -soname
+SO_EXT_MAJOR = $(SO_NAME).$(SO_VER_MAJOR)
+SO_EXT = $(SO_NAME).$(SO_VER)
+UNAME := $(shell uname -s)
+ifeq ($(UNAME),Darwin)
+ SO_NAME = dylib
+ SO_CMD = -install_name
+ SO_EXT_MAJOR = $(SO_VER_MAJOR).$(SO_NAME)
+ SO_EXT = $(SO_VER).$(SO_NAME)
+endif
+SHARED_LDFLAGS = -shared -Wl,$(SO_CMD),libdablooms.$(SO_EXT_MAJOR)
+
+### sources and outputs ###
+
+SRCS_LIBDABLOOMS = dablooms.c murmur.c
+SRCS_TESTS = test_dablooms.c
+
+OBJS_LIBDABLOOMS = $(patsubst %.c, $(BLDDIR)/%.o, $(SRCS_LIBDABLOOMS))
+OBJS_TESTS = $(patsubst %.c, $(BLDDIR)/%.o, $(SRCS_TESTS))
+
+LIB_SYMLNKS = libdablooms.$(SO_NAME) libdablooms.$(SO_EXT_MAJOR)
+LIB_FILES = libdablooms.a libdablooms.$(SO_EXT) $(LIB_SYMLNKS)
+
+# for tests
+WORDS = /usr/share/dict/words
+
+### rules ###
+
+# default target (needs to be first target)
+all: libdablooms
+
+# sort removes duplicates
+DEPS := $(sort $(patsubst %.o, %.o.deps, $(OBJS_LIBDABLOOMS) $(OBJS_TESTS)))
+-include $(DEPS)
+
+libdablooms: $(patsubst %, $(BLDDIR)/%, $(LIB_FILES))
+
+install: install_libdablooms
+
+install_libdablooms: $(patsubst %, $(DESTDIR)$(libdir)/%, $(LIB_FILES)) $(DESTDIR)$(includedir)/dablooms.h
+
+$(DESTDIR)$(libdir)/libdablooms.a: $(BLDDIR)/libdablooms.a
+
+$(DESTDIR)$(libdir)/libdablooms.$(SO_EXT): $(BLDDIR)/libdablooms.$(SO_EXT)
+
+$(patsubst %, $(DESTDIR)$(libdir)/%, $(LIB_SYMLNKS)): %: $(DESTDIR)$(libdir)/libdablooms.$(SO_EXT)
+ @echo " SYMLNK " $@
+ @$(INSTALL) -d $(dir $@)
+ @ln -fs $(notdir $<) $@
+
+$(DESTDIR)$(includedir)/dablooms.h: src/dablooms.h
+
+$(DESTDIR)$(prefix)/%:
+ @echo " INSTALL " $@
+ @$(INSTALL) -d $(dir $@)
+ @$(INSTALL) $< $@
+
+$(BLDDIR)/%.o: src/%.c
+ @echo " CC " $@
+ @mkdir -p $(dir $@)
+ @$(CC) -o $@ -c $< $(ALL_CFLAGS) -MMD -MF [email protected]
+
+$(BLDDIR)/libdablooms.a: $(OBJS_LIBDABLOOMS)
+ @echo " AR " $@
+ @rm -f $@
+ @$(AR) rcs $@ $^
+
+$(BLDDIR)/libdablooms.$(SO_EXT): $(OBJS_LIBDABLOOMS)
+ @echo " SO " $@
+ @$(CC) -o $@ $(ALL_CFLAGS) $(SHARED_LDFLAGS) $(LDFLAGS) $^ $(LIBS)
+
+$(patsubst %, $(BLDDIR)/%, $(LIB_SYMLNKS)): %: $(BLDDIR)/libdablooms.$(SO_EXT)
+ @echo " SYMLNK " $@
+ @mkdir -p $(dir $@)
+ @ln -fs $(notdir $<) $@
+
+$(BLDDIR)/test_dablooms: $(OBJS_TESTS) $(BLDDIR)/libdablooms.a
+ @echo " LD " $@
+ @$(CC) -o $@ $(ALL_CFLAGS) $(LDFLAGS) $(OBJS_TESTS) $(BLDDIR)/libdablooms.a $(LIBS)
+
+test: $(BLDDIR)/test_dablooms
+ @$(BLDDIR)/test_dablooms $(BLDDIR)/testbloom.bin $(WORDS)
+
+help:
+ @printf $(HELPTEXT)
+
+clean:
+ rm -f $(DEPS) $(OBJS_LIBDABLOOMS) $(patsubst %, $(BLDDIR)/%, $(LIB_FILES)) $(OBJS_TESTS) $(BLDDIR)/test_dablooms $(BLDDIR)/testbloom.bin
+ -rmdir $(BLDDIR)
+
+# .PHONY: all clean help install test libdablooms install_libdablooms
+
+### pydablooms ###
+
+# PYTHON = python
+# PY_BLDDIR = $(BLDDIR)/python
+# PY_MOD_DIR_ARG = # optional: --user or --system
+# PY_MOD_DIR := $(shell $(PYTHON) pydablooms/modpath.py $(PY_MOD_DIR_ARG))
+# PY_FLAGS = --build-lib=$(PY_BLDDIR) --build-temp=$(PY_BLDDIR)
+# PY_BLD_ENV = BLDDIR="$(BLDDIR)"
+
+# pydablooms: $(PY_BLDDIR)/pydablooms.so
+
+# install_pydablooms: $(DESTDIR)$(PY_MOD_DIR)/pydablooms.so
+
+# $(DESTDIR)$(PY_MOD_DIR)/pydablooms.so: $(PY_BLDDIR)/pydablooms.so
+# @echo " PY_INSTALL " $@
+# @$(INSTALL) -d $(dir $@)
+# @$(INSTALL) $< $@
+
+# $(PY_BLDDIR)/pydablooms.so: pydablooms/pydablooms.c src/dablooms.c src/murmur.c
+# @echo " PY_BUILD" $@
+# @$(PY_BLD_ENV) $(PYTHON) pydablooms/setup.py build $(PY_FLAGS) >/dev/null
+
+# test_pydablooms: pydablooms
+# @PYTHONPATH=$(PY_BLDDIR) $(PYTHON) pydablooms/test_pydablooms.py $(BLDDIR)/testbloom_py.bin $(WORDS)
+
+# clean: clean_pydablooms
+# clean_pydablooms:
+# rm -f $(BLDDIR)/pydablooms.so $(BLDDIR)/testbloom_py.bin
+# $(PYTHON) pydablooms/setup.py clean $(PY_FLAGS)
+
+# .PHONY: pydablooms install_pydablooms test_pydablooms clean_pydablooms
diff --git a/bindings/rs-dablooms/dablooms/README.md b/bindings/rs-dablooms/dablooms/README.md
new file mode 100644
index 0000000..d606233
--- /dev/null
+++ b/bindings/rs-dablooms/dablooms/README.md
@@ -0,0 +1,259 @@
+Dablooms: A Scalable, Counting, Bloom Filter
+----------------------------------
+
+_Note_: this project has been mostly unmaintained for a while.
+
+### Overview
+This project aims to demonstrate a novel Bloom filter implementation that can
+scale, and provide not only the addition of new members, but reliable removal
+of existing members.
+
+Bloom filters are a probabilistic data structure that provide space-efficient
+storage of elements at the cost of possible false positive on membership
+queries.
+
+**dablooms** implements such a structure that takes additional metadata to classify
+elements in order to make an intelligent decision as to which Bloom filter an element
+should belong.
+
+### Features
+**dablooms**, in addition to the above, has several features.
+
+* Implemented as a static C library
+* Memory mapped
+* 4 bit counters
+* Sequence counters for clean/dirty checks
+* Python wrapper
+
+For performance, the low-level operations are implemented in C. It is also
+memory mapped which provides async flushing and persistence at low cost.
+In an effort to maintain memory efficiency, rather than using integers, or
+even whole bytes as counters, we use only four bit counters. These four bit
+counters allow up to 15 items to share a counter in the map. If more than a
+small handful are sharing said counter, the Bloom filter would be overloaded
+(resulting in excessive false positives anyway) at any sane error rate, so
+there is no benefit in supporting larger counters.
+
+The Bloom filter also employs change sequence numbers to track operations performed
+on the Bloom filter. These allow the application to determine if a write might have
+only partially completed (perhaps due to a crash), leaving the filter in an
+inconsistent state. The application can thus determine if a filter is ok or needs
+to be recreated. The sequence number can be used to determine what a consistent but
+out-of-date filter missed, and bring it up-to-date.
+
+There are two sequence numbers (and helper functions to get them): "mem_seqnum" and
+"disk_seqnum". The "mem" variant is useful if the user is sure the OS didn't crash,
+and the "disk" variant is useful if the OS might have crashed since the Bloom filter
+was last changed. Both values could be "0", meaning the filter is possibly
+inconsistent from their point of view, or a non-zero sequence number that the filter
+is consistent with. The "mem" variant is often non-zero, but the "disk" variant only
+becomes non-zero right after a (manual) flush. This can be expensive (it's an fsync),
+so the value can be ignored if not relevant for the application. For example, if the
+Bloom file exists in a directory which is cleared at boot (like `/tmp`), then the
+application can safely assume that any existing file was not affected by an OS crash,
+and never bother to flush or check disk_seqnum. Schemes involving batching up changes
+are also possible.
+
+The dablooms library is not inherently thread safe, this is the clients responsibility.
+Bindings are also not thread safe, unless they state otherwise.
+
+### Installing
+Clone the repo, or download and extract a tarball of a tagged version
+[from github](https://github.com/bitly/dablooms/tags).
+In the source tree, type `make`, `make install` (`sudo` may be needed).
+This will only install static and dynamic versions of the C dablooms library "libdablooms".
+
+To use a specific build directory, install prefix, or destination directory for packaging,
+specify `BLDDIR`, `prefix`, or `DESTDIR` to make. For example:
+`make install BLDDIR=/tmp/dablooms/bld DESTDIR=/tmp/dablooms/pkg prefix=/usr`
+
+Look at the output of `make help` for more options and targets.
+
+Also available are bindings for various other languages:
+
+#### Python (pydablooms)
+To install the Python bindings "pydablooms" (currently only compatibly with python 2.x)
+run `make pydablooms`, `make install_pydablooms` (`sudo` may be needed).
+
+To use and install for a specific version of Python installed on your system,
+use the `PYTHON` option to make. For example: `make install_pydablooms PYTHON=python2.7`.
+You can override the module install location with the `PY_MOD_DIR` option to make,
+and the `BLDDIR` and `DESTDIR` options also affect pydablooms.
+
+The Makefile attempts to determine the python module location `PY_MOD_DIR`
+automatically. It prefers a location in `/usr/local`, but you can specify
+`PY_MOD_DIR_ARG=--user` to try to use the location which `pip install --user`
+would use in your HOME dir. You can instead specify `PY_MOD_DIR_ARG=--system`
+to prefer the normal/central system python module dir.
+
+See pydablooms/README.md for more info.
+
+#### Go (godablooms)
+The Go bindings "godablooms" are not integrated into the Makefile.
+Install libdablooms first, then look at `godablooms/README.md`
+
+### Contributing
+If you make changes to C portions of dablooms which you would like merged into the
+upstream repository, it would help to have your code match our C coding style. We use
+[astyle](http://astyle.sourceforge.net/), svn rev 353 or later, on our code, with the
+following options:
+
+ astyle --style=1tbs --lineend=linux --convert-tabs --preserve-date \
+ --fill-empty-lines --pad-header --indent-switches \
+ --align-pointer=name --align-reference=name --pad-oper -n <files>
+
+### Testing
+To run a quick and dirty test, type `make test`. This test uses a list of words
+and defaults to `/usr/share/dict/words`. If your path differs, you can use the
+`WORDS` flag to specific its location, such as `make test WORDS=/usr/dict/words`.
+
+This will run a simple test that iterates through a word list and
+adds each word to dablooms. It iterates again, removing every fifth
+element. Lastly, it saves the file, opens a new filter, and iterates a third time
+checking the existence of each word. It prints results of the true negatives,
+false positives, true positives, and false negatives, and the false positive rate.
+
+The false positive rate is calculated by "false positives / (false positivies + true negatives)".
+That is, what rate of real negatives are false positives. This is the interesting
+statistic because the rate of false negatives should always be zero.
+
+The test uses a maximum error rate of .05 (5%) and an initial capacity of 100k. If
+the dictionary is near 500k, we should have created 4 new filters in order to scale to size.
+
+A second test adds every other word in the list, and removes no words, causing each
+used filter to stay at maximum capacity, which is a worse case for accuracy.
+
+Check out the performance yourself, and checkout the size of the resulting file!
+
+## Bloom Filter Basics
+Bloom filters are probabilistic data structures that provide
+space-efficient storage of elements at the cost of occasional false positives on
+membership queries, i.e. a Bloom filter may state true on query when it in fact does
+not contain said element. A Bloom filter is traditionally implemented as an array of
+`M` bits, where `M` is the size of the Bloom filter. On initialization all bits are
+set to zero. A filter is also parameterized by a constant `k` that defines the number
+of hash functions used to set and test bits in the filter. Each hash function should
+output one index in `M`. When inserting an element `x` into the filter, the bits
+in the `k` indices `h1(x), h2(x), ..., hk(X)` are set.
+
+In order to query a Bloom filter, say for element `x`, it suffices to verify if
+all bits in indices `h1(x), h2(x), ..., hk(x)` are set. If one or more of these
+bits is not set then the queried element is definitely not present in the
+filter. However, if all these bits are set, then the element is considered to
+be in the filter. Given this procedure, an error probability exists for positive
+matches, since the tested indices might have been set by the insertion of other
+elements.
+
+### Counting Bloom Filters: Solving Removals
+The same property that results in false positives *also* makes it
+difficult to remove an element from the filter as there is no
+easy means of discerning if another element is hashed to the same bit.
+Unsetting a bit that is hashed by multiple elements can cause **false
+negatives**. Using a counter, instead of a bit, can circumvent this issue.
+The bit can be incremented when an element is hashed to a
+given location, and decremented upon removal. Membership queries rely on whether a
+given counter is greater than zero. This reduces the exceptional
+space-efficiency provided by the standard Bloom filter.
+
+### Scalable Bloom Filters: Solving Scale
+Another important property of a Bloom filter is its linear relationship between size
+and storage capacity. If the maximum allowable error probability and the number of elements to store
+are both known, it is relatively straightforward to dimension an appropriate
+filter. However, it is not always possible to know how many elements
+will need to be stored a priori. There is a trade off between over-dimensioning filters or
+suffering from a ballooning error probability as it fills.
+
+Almeida, Baquero, Preguiça, Hutchison published a paper in 2006, on
+[Scalable Bloom Filters](http://www.sciencedirect.com/science/article/pii/S0020019006003127),
+which suggested a means of scalable Bloom filters by creating essentially
+a list of Bloom filters that act as one large Bloom filter. When greater
+capacity is desired, a new filter is added to the list.
+
+Membership queries are conducted on each filter with the positives
+evaluated if the element is found in any one of the filters. Naively, this
+leads to an increasing compounding error probability since the probability
+of the given structure evaluates to:
+
+ 1 - 𝚺(1 - P)
+
+It is possible to bound this error probability by adding a reducing tightening
+ratio, `r`. As a result, the bounded error probability is represented as:
+
+ 1 - 𝚺(1 - P0 * r^i) where r is chosen as 0 < r < 1
+
+Since size is simply a function of an error probability and capacity, any
+array of growth functions can be applied to scale the size of the Bloom filter
+as necessary. We found it sufficient to pick .9 for `r`.
+
+## Problems with Mixing Scalable and Counting Bloom Filters
+Scalable Bloom filters do not allow for the removal of elements from the filter.
+In addition, simply converting each Bloom filter in a scalable Bloom filter into
+a counting filter also poses problems. Since an element can be in any filter, and
+Bloom filters inherently allow for false positives, a given element may appear to
+be in two or more filters. If an element is inadvertently removed from a filter
+which did not contain it, it would introduce the possibility of **false negatives**.
+
+If however, an element can be removed from the correct filter, it maintains
+the integrity of said filter, i.e. prevents the possibility of false negatives. Thus,
+a scaling, counting, Bloom filter is possible if upon additions and deletions
+one can correctly decide which Bloom filter contains the element.
+
+There are several advantages to using a Bloom filter. A Bloom filter gives the
+application cheap, memory efficient set operations, with no actual data stored
+about the given element. Rather, Bloom filters allow the application to test,
+with some given error probability, the membership of an item. This leads to the
+conclusion that the majority of operations performed on Bloom filters are the
+queries of membership, rather than the addition and removal of elements. Thus,
+for a scaling, counting, Bloom filter, we can optimize for membership queries at
+the expense of additions and removals. This expense comes not in performance,
+but in the addition of more metadata concerning an element and its relation to
+the Bloom filter. With the addition of some sort of identification of an
+element, which does not need to be unique as long as it is fairly distributed, it
+is possible to correctly determine which filter an element belongs to, thereby able
+to maintain the integrity of a given Bloom filter with accurate additions
+and removals.
+
+## Enter dablooms
+dablooms is one such implementation of a scaling, counting, Bloom filter that takes
+additional metadata during additions and deletions in the form of a (generally)
+monotonically increasing integer to classify elements (possibly a timestamp).
+This is used during additions/removals to easily determine the correct Bloom filter
+for an element (each filter is assigned a range). Checking an item against the Bloom
+filter, which is assumed to be the dominant activity, does not use the id (it works
+like a normal scaling Bloom filter).
+
+dablooms is designed to scale itself using these identifiers and the given capacity.
+When a Bloom filter is at capacity, dablooms will create a new Bloom filter which
+starts at the next id after the greatest id of the previous Bloom filter. Given the
+fact that the identifiers monotonically increase, new elements will be added to the
+newest Bloom filter. Note, in theory and as implemented, nothing prevents one from
+adding an element to any "older" filter. You just run the increasing risk of the
+error probability growing beyond the bound as it becomes "overfilled".
+
+You can then remove any element from any Bloom filter using the identifier to intelligently
+pick which Bloom filter to remove from. Consequently, as you continue to remove elements
+from Bloom filters that you are not continuing to add to, these Bloom filters will become
+more accurate.
+
+The "id" of an element does not need to be known to check the Bloom filter, but does need
+to be known when the element is removed (and the same as when it was added). This might
+be convenient if the item already has an appropriate id (almost always increasing for new
+items) associated with it.
+
+### Example use case
+There is a database with a collection of entries. There is a series of items, each of which
+you want to look up in the database; most will have no entry in the database, but some
+will. Perhaps it's a database of spam links. If you use dablooms in front of the database,
+you can avoid needing to check the database for almost all items which won't be found in
+it anyway, and save a lot of time and effort. It's also much easier to distribute the
+Bloom filter than the entire database. But to make it work, you need to determine an "id"
+whenever you add to or remove from the Bloom filter. You could store the timestamp when
+you add the item to the database as another column in the database, and give it to
+`scaling_bloom_add()` as well. When you remove the item, you look it up in the database
+first and pass the timestamp stored there to `scaling_bloom_remove()`. The timestamps for
+new items will be equal or greater, and definitely greater over time. Instead of
+timestamps, you could also use an auto-incrementing index. Checks against the Bloom
+don't need to know the id and should be quick. If a check comes back negative, you can be
+sure the item isn't in the database, and skip that query completely. If a check comes
+back positive, you have to query the database, because there's a slight chance that the
+item isn't actually in there.
diff --git a/bindings/rs-dablooms/dablooms/godablooms/README.md b/bindings/rs-dablooms/dablooms/godablooms/README.md
new file mode 100644
index 0000000..971e6cc
--- /dev/null
+++ b/bindings/rs-dablooms/dablooms/godablooms/README.md
@@ -0,0 +1,15 @@
+godablooms
+==========
+
+For the Go package you can install outside of `make` via:
+
+ $ go get github.com/bitly/dablooms/godablooms
+
+However, we recommend using [go-install-as](https://github.com/mreiferson/go-install-as):
+
+ $ go tool install_as --import-as=bitly/dablooms
+
+To run tests:
+
+ $ go test
+
diff --git a/bindings/rs-dablooms/dablooms/godablooms/dablooms.go b/bindings/rs-dablooms/dablooms/godablooms/dablooms.go
new file mode 100644
index 0000000..14fb63d
--- /dev/null
+++ b/bindings/rs-dablooms/dablooms/godablooms/dablooms.go
@@ -0,0 +1,73 @@
+package dablooms
+
+/*
+#cgo LDFLAGS: -ldablooms
+
+#include <stdlib.h>
+#include <dablooms.h>
+*/
+import "C"
+
+import (
+ "unsafe"
+)
+
+func Version() string {
+ return "0.9.0"
+}
+
+type ScalingBloom struct {
+ cfilter *C.scaling_bloom_t
+}
+
+func NewScalingBloom(capacity C.uint, errorRate C.double, filename string) *ScalingBloom {
+ cFilename := C.CString(filename)
+ defer C.free(unsafe.Pointer(cFilename))
+ sb := &ScalingBloom{
+ cfilter: C.new_scaling_bloom(capacity, errorRate, cFilename),
+ }
+ return sb
+}
+
+func NewScalingBloomFromFile(capacity C.uint, errorRate C.double, filename string) *ScalingBloom {
+ cFilename := C.CString(filename)
+ defer C.free(unsafe.Pointer(cFilename))
+ sb := &ScalingBloom{
+ cfilter: C.new_scaling_bloom_from_file(capacity, errorRate, cFilename),
+ }
+ return sb
+}
+
+// apparently this is an unsupported feature of cgo
+// we should probably use runtime.SetFinalizer
+// see: https://groups.google.com/forum/?fromgroups#!topic/golang-dev/5cD0EmU2voI
+func (sb *ScalingBloom) Destroy() {
+ C.free_scaling_bloom(sb.cfilter)
+}
+
+func (sb *ScalingBloom) Check(key []byte) bool {
+ cKey := (*C.char)(unsafe.Pointer(&key[0]))
+ return C.scaling_bloom_check(sb.cfilter, cKey, C.size_t(len(key))) == 1
+}
+
+func (sb *ScalingBloom) Add(key []byte, id C.uint64_t) bool {
+ cKey := (*C.char)(unsafe.Pointer(&key[0]))
+ return C.scaling_bloom_add(sb.cfilter, cKey, C.size_t(len(key)), id) == 1
+}
+
+func (sb *ScalingBloom) Remove(key []byte, id C.uint64_t) bool {
+ cKey := (*C.char)(unsafe.Pointer(&key[0]))
+ return C.scaling_bloom_remove(sb.cfilter, cKey, C.size_t(len(key)), id) == 1
+}
+
+func (sb *ScalingBloom) Flush() bool {
+ return C.scaling_bloom_flush(sb.cfilter) == 1
+}
+
+func (sb *ScalingBloom) MemSeqNum() C.uint64_t {
+ return C.scaling_bloom_mem_seqnum(sb.cfilter)
+}
+
+func (sb *ScalingBloom) DiskSeqNum() C.uint64_t {
+ return C.scaling_bloom_disk_seqnum(sb.cfilter)
+}
diff --git a/bindings/rs-dablooms/dablooms/godablooms/dablooms_test.go b/bindings/rs-dablooms/dablooms/godablooms/dablooms_test.go
new file mode 100644
index 0000000..826124f
--- /dev/null
+++ b/bindings/rs-dablooms/dablooms/godablooms/dablooms_test.go
@@ -0,0 +1,40 @@
+package dablooms
+
+import (
+ "io/ioutil"
+ "log"
+ "os"
+ "testing"
+)
+
+func TestPutMessage(t *testing.T) {
+ log.SetOutput(ioutil.Discard)
+ defer log.SetOutput(os.Stdout)
+
+ sb := NewScalingBloom(1000, 0.5, "testbloom.bin")
+ if sb == nil {
+ t.Fatalf("NewScalingBloom failed")
+ }
+
+ key := []byte("test")
+
+ if sb.Check(key) != false {
+ t.Fatalf("Check failed")
+ }
+
+ if sb.Add(key, 1) != true {
+ t.Fatalf("Add failed")
+ }
+
+ if sb.Check(key) != true {
+ t.Fatalf("'%s' not found", key)
+ }
+
+ if sb.Remove(key, 1) != true {
+ t.Fatalf("Remove failed")
+ }
+
+ if sb.Check(key) != false {
+ t.Fatalf("'%s' was found (after remove)", key)
+ }
+}
diff --git a/bindings/rs-dablooms/dablooms/pydablooms/README.md b/bindings/rs-dablooms/dablooms/pydablooms/README.md
new file mode 100644
index 0000000..c3534e4
--- /dev/null
+++ b/bindings/rs-dablooms/dablooms/pydablooms/README.md
@@ -0,0 +1,28 @@
+pydablooms
+==========
+
+Python language bindings for dablooms. See the main dablooms `README.md`
+for build and install instructions. There is also a `test_pydablooms`
+target in the Makefile (remember to specify any options to make you used
+during the build).
+
+
+### Example usage
+
+ >>> import pydablooms
+ >>> bloom = pydablooms.Dablooms(capacity=1000,
+ ... error_rate=.05,
+ ... filepath='/tmp/bloom.bin')
+ >>> bloom.add('foo', 2)
+ 1
+ >>> bloom.check('bar')
+ 0
+ >>> bloom.delete('foo', 2)
+ 0
+ >>> bloom.check('foo')
+ 0
+ >>> del bloom
+ >>> bloom = pydablooms.load_dabloom(capacity=1000,
+ ... error_rate=.05,
+ ... filepath='/tmp/bloom.bin')
+ >>>
diff --git a/bindings/rs-dablooms/dablooms/pydablooms/modpath.py b/bindings/rs-dablooms/dablooms/pydablooms/modpath.py
new file mode 100644
index 0000000..bf9faee
--- /dev/null
+++ b/bindings/rs-dablooms/dablooms/pydablooms/modpath.py
@@ -0,0 +1,38 @@
+#!/usr/bin/env python
+import os
+import sys
+import distutils.sysconfig
+
+user = False
+system = False
+
+def err_exit(msg):
+ sys.stderr.write(msg + "\n")
+ exit(1)
+
+for arg in sys.argv[1:]:
+ if arg == "--user": user = True
+ elif arg == "--system": system = True
+ else:
+ err_exit("invalid argument '%s'" % arg)
+
+mod_paths = sys.path
+mod_paths.reverse()
+
+if user:
+ home = os.environ.get('HOME')
+ if not home:
+ err_exit("environment variable 'HOME' not set")
+ for p in mod_paths:
+ if p.startswith(home):
+ print(p)
+ exit(0)
+ err_exit("no user python module path found")
+
+if not system:
+ for p in mod_paths:
+ if p.startswith("/usr/local/"):
+ print(p)
+ exit(0)
+
+print(distutils.sysconfig.get_python_lib())
diff --git a/bindings/rs-dablooms/dablooms/pydablooms/pydablooms.c b/bindings/rs-dablooms/dablooms/pydablooms/pydablooms.c
new file mode 100644
index 0000000..c101fc2
--- /dev/null
+++ b/bindings/rs-dablooms/dablooms/pydablooms/pydablooms.c
@@ -0,0 +1,225 @@
+#include <Python.h>
+#include "dablooms.h"
+#include "structmember.h"
+
+int Py_ModuleVersion = 1;
+
+typedef struct {
+ PyObject_HEAD
+ scaling_bloom_t *filter; /* Type-specific fields go here. */
+} Dablooms;
+
+static void Dablooms_dealloc(Dablooms *self)
+{
+ free_scaling_bloom(self->filter);
+ self->ob_type->tp_free((PyObject *)self);
+}
+
+static PyObject *Dablooms_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
+{
+ Dablooms *self;
+
+ if ((self = (Dablooms *)type->tp_alloc(type, 0)) == NULL) {
+ return NULL;
+ }
+
+ self->filter = NULL;
+
+ return (PyObject *) self;
+}
+
+static int Dablooms_init(Dablooms *self, PyObject *args, PyObject *kwds)
+{
+ double error_rate;
+ const char *filepath;
+ unsigned int capacity;
+ static char *kwlist[] = {"capacity", "error_rate", "filepath", NULL};
+
+ if (! PyArg_ParseTupleAndKeywords(args, kwds, "|ids", kwlist,
+ &capacity, &error_rate, &filepath)) {
+ return -1;
+ }
+
+ self->filter = new_scaling_bloom(capacity, error_rate, filepath);
+
+ return 0;
+}
+
+
+static int contains(Dablooms *self, PyObject *key)
+{
+ const char *hash;
+ int len;
+
+ if (!PyArg_Parse(key, "s#", &hash, &len)) {
+ return -1;
+ }
+ return scaling_bloom_check(self->filter, hash, len);
+}
+
+static PyObject *check(Dablooms *self, PyObject *args)
+{
+ const char *hash;
+ int len;
+
+ if (!PyArg_ParseTuple(args, "s#", &hash, &len)) {
+ return NULL;
+ }
+ return Py_BuildValue("i", scaling_bloom_check(self->filter, hash, len));
+}
+
+static PyObject *add(Dablooms *self, PyObject *args, PyObject *kwds)
+{
+ const char *hash;
+ int len;
+ unsigned long long id;
+ static char *kwlist[] = {"hash", "id", NULL};
+
+ if (! PyArg_ParseTupleAndKeywords(args, kwds, "|s#K", kwlist, &hash, &len, &id)) {
+ return NULL;
+ }
+
+ return Py_BuildValue("i", scaling_bloom_add(self->filter, hash, len, id));
+}
+
+static PyObject *delete(Dablooms *self, PyObject *args, PyObject *kwds)
+{
+ const char *hash;
+ int len;
+ unsigned long long id;
+ static char *kwlist[] = {"hash", "id", NULL};
+
+ if (! PyArg_ParseTupleAndKeywords(args, kwds, "|s#K", kwlist, &hash, &len, &id)) {
+ return NULL;
+ }
+
+ return Py_BuildValue("i", scaling_bloom_remove(self->filter, hash, len, id));
+}
+
+static PyObject *flush(Dablooms *self, PyObject *args, PyObject *kwds)
+{
+ return Py_BuildValue("i", scaling_bloom_flush(self->filter));
+}
+
+static PyObject *mem_seqnum(Dablooms *self, PyObject *args, PyObject *kwds)
+{
+ return Py_BuildValue("K", scaling_bloom_mem_seqnum(self->filter));
+}
+
+static PyObject *disk_seqnum(Dablooms *self, PyObject *args, PyObject *kwds)
+{
+ return Py_BuildValue("K", scaling_bloom_disk_seqnum(self->filter));
+}
+
+static PyMethodDef Dablooms_methods[] = {
+ {"add", (PyCFunction)add, METH_VARARGS | METH_KEYWORDS, "Add an element to the bloom filter."},
+ {"delete", (PyCFunction)delete, METH_VARARGS | METH_KEYWORDS, "Remove an element from the bloom filter."},
+ {"check", (PyCFunction)check, METH_VARARGS | METH_KEYWORDS, "Check if an element is in the bloom filter."},
+ {"flush", (PyCFunction)flush, METH_VARARGS | METH_KEYWORDS, "Flush a bloom filter to file."},
+ {"mem_seqnum", (PyCFunction)mem_seqnum, METH_VARARGS | METH_KEYWORDS, "Get the memory-consistent sequence number."},
+ {"disk_seqnum", (PyCFunction)disk_seqnum, METH_VARARGS | METH_KEYWORDS, "Get the disk-consistent sequence number."},
+ {NULL}, /* Sentinel */
+};
+
+static PyMemberDef Dablooms_members[] = {
+ {NULL} /* Sentinel */
+};
+
+static PySequenceMethods Dablooms_sequence = {
+ NULL, /*sq_length*/
+ NULL, /*sq_concat*/
+ NULL, /*sq_repeat*/
+ NULL, /*sq_item*/
+ NULL, /*sq_slice*/
+ NULL, /*sq_ass_item*/
+ NULL, /*sq_ass_slice*/
+ (objobjproc)contains, /*sq_contains*/
+};
+
+static PyTypeObject DabloomsType = {
+ PyObject_HEAD_INIT(NULL)
+ 0, /*ob_size*/
+ "pydablooms.Dablooms", /*tp_name*/
+ sizeof(Dablooms), /*tp_basicsize*/
+ 0, /*tp_itemsize*/
+ (destructor)Dablooms_dealloc, /*tp_dealloc*/
+ 0, /*tp_print*/
+ 0, /*tp_getattr*/
+ 0, /*tp_setattr*/
+ 0, /*tp_compare*/
+ 0, /*tp_repr*/
+ 0, /*tp_as_number*/
+ &Dablooms_sequence, /*tp_as_sequence*/
+ 0, /*tp_as_mapping*/
+ 0, /*tp_hash*/
+ 0, /*tp_call*/
+ 0, /*tp_str*/
+ 0, /*tp_getattro*/
+ 0, /*tp_setattro*/
+ 0, /*tp_as_buffer*/
+ Py_TPFLAGS_DEFAULT, /*tp_flags*/
+ "Dablooms objects", /*tp_doc*/
+ 0, /*tp_traverse*/
+ 0, /*tp_clear*/
+ 0, /*tp_richcompare*/
+ 0, /*tp_weaklistoffset*/
+ 0, /*tp_iter*/
+ 0, /*tp_iternext*/
+ Dablooms_methods, /*tp_methods*/
+ Dablooms_members, /*tp_members*/
+ 0, /*tp_getset*/
+ 0, /*tp_base*/
+ 0, /*tp_dict*/
+ 0, /*tp_descr_get*/
+ 0, /*tp_descr_set*/
+ 0, /*tp_dictoffset*/
+ (initproc)Dablooms_init, /*tp_init*/
+ 0, /*tp_alloc*/
+ Dablooms_new, /*tp_new*/
+};
+
+static PyObject *load_dabloom(PyTypeObject *type, PyObject *args, PyObject *kwds)
+{
+ Dablooms *self = (Dablooms *)PyObject_New(Dablooms, &DabloomsType);
+ double error_rate;
+ const char *filepath;
+ unsigned int capacity;
+ static char *kwlist[] = {"capacity", "error_rate", "filepath", NULL};
+
+ if (! PyArg_ParseTupleAndKeywords(args, kwds, "|ids", kwlist,
+ &capacity, &error_rate, &filepath)) {
+ return NULL;
+ }
+
+ self->filter = new_scaling_bloom_from_file(capacity, error_rate, filepath);
+ return (PyObject *) self;
+}
+
+static PyMethodDef pydablooms_methods[] = {
+ {"load_dabloom", (PyCFunction)load_dabloom, METH_VARARGS | METH_KEYWORDS, "Add an element to the bloom filter."},
+ {NULL}
+};
+
+#ifndef PyMODINIT_FUNC
+#define PyMODINIT_FUNC void
+#endif
+
+
+PyMODINIT_FUNC initpydablooms(void)
+{
+ PyObject *m;
+ if (PyType_Ready(&DabloomsType) < 0) {
+ return;
+ }
+
+ m = Py_InitModule3("pydablooms", pydablooms_methods, "Dablooms module");
+
+ if (m == NULL) {
+ return;
+ }
+
+ PyModule_AddObject(m, "__version__", PyString_FromString(dablooms_version()));
+
+ Py_INCREF(&DabloomsType);
+ PyModule_AddObject(m, "Dablooms", (PyObject *)&DabloomsType);
+}
diff --git a/bindings/rs-dablooms/dablooms/pydablooms/setup.py b/bindings/rs-dablooms/dablooms/pydablooms/setup.py
new file mode 100644
index 0000000..add7ba5
--- /dev/null
+++ b/bindings/rs-dablooms/dablooms/pydablooms/setup.py
@@ -0,0 +1,35 @@
+from distutils.core import setup, Extension
+import os, sys
+
+def local_path(path):
+ local_dir = os.path.dirname(__file__)
+ return os.path.normpath(os.path.join(local_dir, path))
+
+def parse_version_from_c():
+ cfile = open(local_path('../src/dablooms.c'))
+ result = ''
+ for line in cfile:
+ parts = line.split()
+ if len(parts) == 3 and parts[:2] == ['#define', 'DABLOOMS_VERSION']:
+ result = parts[2].strip('"')
+ break
+ cfile.close()
+ return result
+
+def path_from_env(name, default):
+ return os.environ.get(name, local_path(default))
+
+module1 = Extension('pydablooms',
+ include_dirs = [local_path('../src')],
+ sources = [local_path('pydablooms.c'),
+ local_path('../src/dablooms.c'),
+ local_path('../src/murmur.c'), ],
+ )
+
+setup (name = 'pydablooms',
+ version = parse_version_from_c(),
+ description = 'This is a a python extension of the scaling, counting, bloom filter, dablooms.',
+ author = 'Justin P. Hines',
+ author_email = '[email protected]',
+ url = 'http://github.com/bitly/dablooms.git',
+ ext_modules = [module1])
diff --git a/bindings/rs-dablooms/dablooms/pydablooms/test_pydablooms.py b/bindings/rs-dablooms/dablooms/pydablooms/test_pydablooms.py
new file mode 100644
index 0000000..227433e
--- /dev/null
+++ b/bindings/rs-dablooms/dablooms/pydablooms/test_pydablooms.py
@@ -0,0 +1,104 @@
+import sys, os
+import pydablooms
+
+capacity = 100000
+error_rate = 0.05
+
+print("pydablooms version: %s" % pydablooms.__version__)
+
+if len(sys.argv) != 3:
+ sys.stderr.write("Usage: %s <bloom_file> <words_file>\n" % sys.argv[0])
+ sys.exit(1)
+
+bloom_fname = sys.argv[1]
+words_fname = sys.argv[2]
+
+bloom = pydablooms.Dablooms(capacity=capacity,
+ error_rate=error_rate,
+ filepath=bloom_fname)
+
+words_file = open(words_fname, 'rb')
+i = 0
+for line in words_file:
+ bloom.add(line.rstrip(), i)
+ i += 1
+
+words_file.seek(0)
+i = 0
+for line in words_file:
+ if i % 5 == 0:
+ bloom.delete(line.rstrip(), i)
+ i += 1
+
+bloom.flush()
+del bloom
+
+bloom = pydablooms.load_dabloom(capacity=capacity,
+ error_rate=error_rate,
+ filepath=bloom_fname)
+
+true_positives = 0
+true_negatives = 0
+false_positives = 0
+false_negatives = 0
+
+words_file.seek(0)
+i = 0
+for line in words_file:
+ exists = bloom.check(line.rstrip())
+ contains = line.rstrip() in bloom
+ assert exists == contains, \
+ "ERROR: %r from 'bloom.check(x)', %i from 'x in bloom'" \
+ % (exists, contains)
+
+ if i % 5 == 0:
+ if exists:
+ false_positives += 1
+ else:
+ true_negatives += 1
+ else:
+ if exists:
+ true_positives += 1
+ else:
+ false_negatives += 1
+ sys.stderr.write("ERROR: False negative: '%s'\n" % line.rstrip())
+ i += 1
+
+words_file.close()
+del bloom
+
+false_positive_rate = float(false_positives) / (false_positives + true_negatives)
+
+print('''
+Elements Added: %6d
+Elements Removed: %6d
+
+True Positives: %6d
+True Negatives: %6d
+False Positives: %6d
+False Negatives: %6d
+
+False positive rate: %.4f
+Total Size: %d KiB''' % (
+ i, i/5,
+ true_positives,
+ true_negatives,
+ false_positives,
+ false_negatives,
+ false_positive_rate,
+ os.stat(bloom_fname).st_size / 1024
+ )
+)
+
+if false_negatives > 0:
+ print("TEST FAIL (false negatives exist)")
+elif false_positive_rate > error_rate:
+ print("TEST WARN (false positive rate too high)")
+else:
+ print("TEST PASS")
+print("")
+
+if false_negatives > 0:
+ sys.exit(1)
+else:
+ sys.exit(0)
diff --git a/bindings/rs-dablooms/dablooms/src/dablooms.c b/bindings/rs-dablooms/dablooms/src/dablooms.c
new file mode 100644
index 0000000..dc1ba7a
--- /dev/null
+++ b/bindings/rs-dablooms/dablooms/src/dablooms.c
@@ -0,0 +1,649 @@
+/* Copyright @2012 by Justin Hines at Bitly under a very liberal license. See LICENSE in the source distribution. */
+
+#include <sys/stat.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <fcntl.h>
+#include <math.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <unistd.h>
+#include <errno.h>
+#include <time.h>
+#include "murmur.h"
+#include "dablooms.h"
+
+#define DABLOOMS_VERSION "0.9.1"
+
+#define ERROR_TIGHTENING_RATIO 0.5
+#define SALT_CONSTANT 0x97c29b3a
+
+const char *dablooms_version(void)
+{
+ return DABLOOMS_VERSION;
+}
+
+void free_bitmap(bitmap_t *bitmap)
+{
+#if 0
+ if ((munmap(bitmap->array, bitmap->bytes)) < 0) {
+ perror("Error, unmapping memory");
+ }
+#else
+ free(bitmap->array);
+#endif
+ free(bitmap);
+}
+
+bitmap_t *bitmap_resize(bitmap_t *bitmap, size_t old_size, size_t new_size)
+{
+
+#if 0
+ /* resize if mmap exists and possible on this os, else new mmap */
+ if (bitmap->array != NULL) {
+#if __linux
+ bitmap->array = mremap(bitmap->array, old_size, new_size, MREMAP_MAYMOVE);
+ if (bitmap->array == MAP_FAILED) {
+ perror("Error resizing mmap");
+ free_bitmap(bitmap);
+ return NULL;
+ }
+#else
+ if (munmap(bitmap->array, bitmap->bytes) < 0) {
+ perror("Error unmapping memory");
+ free_bitmap(bitmap);
+ return NULL;
+ }
+ bitmap->array = NULL;
+#endif
+ }
+ if (bitmap->array == NULL) {
+ bitmap->array = mmap(NULL, new_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0);
+ if (bitmap->array == MAP_FAILED) {
+ perror("Error init mmap");
+ free_bitmap(bitmap);
+ return NULL;
+ }
+ }
+#else
+ if (bitmap->array != NULL)
+ {
+ bitmap->array = (char *)realloc(bitmap->array, new_size);
+ if (bitmap->array == NULL)
+ {
+ perror("Error resizing memory");
+ free_bitmap(bitmap);
+ return NULL;
+ }
+ memset(bitmap->array + old_size, 0, new_size - old_size);
+ }
+ else
+ {
+ bitmap->array = (char *)malloc(new_size);
+ if (bitmap->array == NULL)
+ {
+ perror("Error init memory");
+ free_bitmap(bitmap);
+ return NULL;
+ }
+ memset(bitmap->array, 0, new_size);
+ }
+#endif
+ bitmap->bytes = new_size;
+ return bitmap;
+}
+
+/* Create a new bitmap, not full featured, simple to give
+ * us a means of interacting with the 4 bit counters */
+bitmap_t *new_bitmap(size_t bytes)
+{
+ bitmap_t *bitmap;
+
+ if ((bitmap = (bitmap_t *)malloc(sizeof(bitmap_t))) == NULL) {
+ return NULL;
+ }
+
+ bitmap->bytes = bytes;
+ bitmap->array = NULL;
+
+ if ((bitmap = bitmap_resize(bitmap, 0, bytes)) == NULL) {
+ return NULL;
+ }
+
+ return bitmap;
+}
+
+int bitmap_increment(bitmap_t *bitmap, unsigned int index, long offset)
+{
+ long access = index / 2 + offset;
+ uint8_t temp;
+ __builtin_prefetch(&(bitmap->array[access]), 0, 1);
+ uint8_t n = bitmap->array[access];
+ if (index % 2 != 0) {
+ temp = (n & 0x0f);
+ n = (n & 0xf0) + ((n & 0x0f) + 0x01);
+ } else {
+ temp = (n & 0xf0) >> 4;
+ n = (n & 0x0f) + ((n & 0xf0) + 0x10);
+ }
+
+ if (temp == 0x0f) {
+ //fprintf(stderr, "Error, 4 bit int Overflow\n");
+ return -1;
+ }
+
+ __builtin_prefetch(&(bitmap->array[access]), 1, 1);
+ bitmap->array[access] = n;
+ return 0;
+}
+
+/* increments the four bit counter */
+int bitmap_decrement(bitmap_t *bitmap, unsigned int index, long offset)
+{
+ long access = index / 2 + offset;
+ uint8_t temp;
+ uint8_t n = bitmap->array[access];
+
+ if (index % 2 != 0) {
+ temp = (n & 0x0f);
+ n = (n & 0xf0) + ((n & 0x0f) - 0x01);
+ } else {
+ temp = (n & 0xf0) >> 4;
+ n = (n & 0x0f) + ((n & 0xf0) - 0x10);
+ }
+
+ if (temp == 0x00) {
+ //fprintf(stderr, "Error, Decrementing zero\n");
+ return -1;
+ }
+
+ bitmap->array[access] = n;
+ return 0;
+}
+
+/* decrements the four bit counter */
+int bitmap_check(bitmap_t *bitmap, unsigned int index, long offset)
+{
+ long access = index / 2 + offset;
+ if (index % 2 != 0 ) {
+ return bitmap->array[access] & 0x0f;
+ } else {
+ return bitmap->array[access] & 0xf0;
+ }
+}
+
+int bitmap_flush(bitmap_t *bitmap)
+{
+#if 0
+ if ((msync(bitmap->array, bitmap->bytes, MS_SYNC) < 0)) {
+ perror("Error, flushing bitmap to disk");
+ return -1;
+ } else {
+ return 0;
+ }
+#else
+ return 0;
+#endif
+}
+
+/*
+ * Perform the actual hashing for `key`
+ *
+ * Only call the hash once to get a pair of initial values (h1 and
+ * h2). Use these values to generate all hashes in a quick loop.
+ *
+ * See paper by Kirsch, Mitzenmacher [2006]
+ * http://www.eecs.harvard.edu/~michaelm/postscripts/rsa2008.pdf
+ */
+void hash_func(counting_bloom_t *bloom, const char *key, size_t key_len, uint32_t *hashes)
+{
+ int i;
+ uint32_t checksum[4];
+
+ MurmurHash3_x64_128(key, key_len, SALT_CONSTANT, checksum);
+ uint32_t h1 = checksum[0];
+ uint32_t h2 = checksum[1];
+
+ for (i = 0; i < bloom->nfuncs; i++) {
+ hashes[i] = (h1 + i * h2) % bloom->counts_per_func;
+ }
+}
+
+int free_counting_bloom(counting_bloom_t *bloom)
+{
+ if (bloom != NULL) {
+ free(bloom->hashes);
+ bloom->hashes = NULL;
+ free_bitmap(bloom->bitmap);
+ free(bloom);
+ bloom = NULL;
+ }
+ return 0;
+}
+
+counting_bloom_t *counting_bloom_init(unsigned int capacity, double error_rate, long offset)
+{
+ counting_bloom_t *bloom;
+
+ if ((bloom = malloc(sizeof(counting_bloom_t))) == NULL) {
+ fprintf(stderr, "Error, could not realloc a new bloom filter\n");
+ return NULL;
+ }
+ bloom->bitmap = NULL;
+ bloom->capacity = capacity;
+ bloom->error_rate = error_rate;
+ bloom->offset = offset + sizeof(counting_bloom_header_t);
+ bloom->nfuncs = (int) ceil(log(1 / error_rate) / log(2));
+ bloom->counts_per_func = (int) ceil(capacity * fabs(log(error_rate)) / (bloom->nfuncs * pow(log(2), 2)));
+ bloom->size = bloom->nfuncs * bloom->counts_per_func;
+ /* rounding-up integer divide by 2 of bloom->size */
+ bloom->num_bytes = ((bloom->size + 1) / 2) + sizeof(counting_bloom_header_t);
+ bloom->hashes = calloc(bloom->nfuncs, sizeof(uint32_t));
+
+ return bloom;
+}
+
+counting_bloom_t *new_counting_bloom(unsigned int capacity, double error_rate)
+{
+ counting_bloom_t *cur_bloom;
+
+ cur_bloom = counting_bloom_init(capacity, error_rate, 0);
+ cur_bloom->bitmap = new_bitmap(cur_bloom->num_bytes);
+ cur_bloom->header = (counting_bloom_header_t *)(cur_bloom->bitmap->array);
+ return cur_bloom;
+}
+
+int counting_bloom_add(counting_bloom_t *bloom, const char *s, size_t len)
+{
+ unsigned int index, i, offset;
+ unsigned int *hashes = bloom->hashes;
+
+ hash_func(bloom, s, len, hashes);
+
+ for (i = 0; i < bloom->nfuncs; i++) {
+ offset = i * bloom->counts_per_func;
+ index = hashes[i] + offset;
+ bitmap_increment(bloom->bitmap, index, bloom->offset);
+ }
+ bloom->header->count++;
+
+ return 0;
+}
+
+int counting_bloom_remove(counting_bloom_t *bloom, const char *s, size_t len)
+{
+ unsigned int index, i, offset;
+ unsigned int *hashes = bloom->hashes;
+
+ hash_func(bloom, s, len, hashes);
+
+ for (i = 0; i < bloom->nfuncs; i++) {
+ offset = i * bloom->counts_per_func;
+ index = hashes[i] + offset;
+ bitmap_decrement(bloom->bitmap, index, bloom->offset);
+ }
+ bloom->header->count--;
+
+ return 0;
+}
+
+int counting_bloom_check(counting_bloom_t *bloom, const char *s, size_t len)
+{
+ unsigned int index, i, offset;
+ unsigned int *hashes = bloom->hashes;
+
+ hash_func(bloom, s, len, hashes);
+
+ for (i = 0; i < bloom->nfuncs; i++) {
+ offset = i * bloom->counts_per_func;
+ index = hashes[i] + offset;
+ if (!(bitmap_check(bloom->bitmap, index, bloom->offset))) {
+ return 0;
+ }
+ }
+ return 1;
+}
+
+int free_scaling_bloom(scaling_bloom_t *bloom)
+{
+ int i;
+ for (i = bloom->num_blooms - 1; i >= 0; i--) {
+ free(bloom->blooms[i]->hashes);
+ bloom->blooms[i]->hashes = NULL;
+ free(bloom->blooms[i]);
+ bloom->blooms[i] = NULL;
+ }
+ free(bloom->blooms);
+ free_bitmap(bloom->bitmap);
+ free(bloom);
+ return 0;
+}
+
+/* creates a new counting bloom filter from a given scaling bloom filter, with count and id */
+counting_bloom_t *new_counting_bloom_from_scale(scaling_bloom_t *bloom)
+{
+ int i;
+ long offset;
+ double error_rate;
+ counting_bloom_t *cur_bloom;
+
+ error_rate = bloom->error_rate * (pow(ERROR_TIGHTENING_RATIO, bloom->num_blooms + 1));
+
+ if ((bloom->blooms = realloc(bloom->blooms, (bloom->num_blooms + 1) * sizeof(counting_bloom_t *))) == NULL) {
+ fprintf(stderr, "Error, could not realloc a new bloom filter\n");
+ return NULL;
+ }
+
+ cur_bloom = counting_bloom_init(bloom->capacity, error_rate, bloom->num_bytes);
+ bloom->blooms[bloom->num_blooms] = cur_bloom;
+
+ bloom->bitmap = bitmap_resize(bloom->bitmap, bloom->num_bytes, bloom->num_bytes + cur_bloom->num_bytes);
+
+ /* reset header pointer, as mmap may have moved */
+ bloom->header = (scaling_bloom_header_t *) bloom->bitmap->array;
+
+ /* Set the pointers for these header structs to the right location since mmap may have moved */
+ bloom->num_blooms++;
+ for (i = 0; i < bloom->num_blooms; i++) {
+ offset = bloom->blooms[i]->offset - sizeof(counting_bloom_header_t);
+ bloom->blooms[i]->header = (counting_bloom_header_t *) (bloom->bitmap->array + offset);
+ }
+
+ bloom->num_bytes += cur_bloom->num_bytes;
+ cur_bloom->bitmap = bloom->bitmap;
+
+ return cur_bloom;
+}
+
+
+uint64_t scaling_bloom_clear_seqnums(scaling_bloom_t *bloom)
+{
+ uint64_t seqnum;
+
+ if (bloom->header->disk_seqnum != 0) {
+ // disk_seqnum cleared on disk before any other changes
+ bloom->header->disk_seqnum = 0;
+ bitmap_flush(bloom->bitmap);
+ }
+ seqnum = bloom->header->mem_seqnum;
+ bloom->header->mem_seqnum = 0;
+ return seqnum;
+}
+
+int scaling_bloom_add(scaling_bloom_t *bloom, const char *s, size_t len, uint64_t id)
+{
+ int i;
+ uint64_t seqnum;
+
+ counting_bloom_t *cur_bloom = NULL;
+ for (i = bloom->num_blooms - 1; i >= 0; i--) {
+ cur_bloom = bloom->blooms[i];
+ if (id >= cur_bloom->header->id) {
+ break;
+ }
+ }
+
+ seqnum = scaling_bloom_clear_seqnums(bloom);
+
+ if ((id > bloom->header->max_id) && (cur_bloom->header->count >= cur_bloom->capacity - 1)) {
+ cur_bloom = new_counting_bloom_from_scale(bloom);
+ cur_bloom->header->count = 0;
+ cur_bloom->header->id = bloom->header->max_id + 1;
+ }
+ if (bloom->header->max_id < id) {
+ bloom->header->max_id = id;
+ }
+ counting_bloom_add(cur_bloom, s, len);
+
+ bloom->header->mem_seqnum = seqnum + 1;
+
+ return 1;
+}
+
+int scaling_bloom_remove(scaling_bloom_t *bloom, const char *s, size_t len, uint64_t id)
+{
+ counting_bloom_t *cur_bloom;
+ int i;
+ uint64_t seqnum;
+
+ for (i = bloom->num_blooms - 1; i >= 0; i--) {
+ cur_bloom = bloom->blooms[i];
+ if (id >= cur_bloom->header->id) {
+ seqnum = scaling_bloom_clear_seqnums(bloom);
+
+ counting_bloom_remove(cur_bloom, s, len);
+
+ bloom->header->mem_seqnum = seqnum + 1;
+ return 1;
+ }
+ }
+ return 0;
+}
+
+int scaling_bloom_check(scaling_bloom_t *bloom, const char *s, size_t len)
+{
+ int i;
+ counting_bloom_t *cur_bloom;
+ for (i = bloom->num_blooms - 1; i >= 0; i--) {
+ cur_bloom = bloom->blooms[i];
+ if (counting_bloom_check(cur_bloom, s, len)) {
+ return 1;
+ }
+ }
+ return 0;
+}
+
+int scaling_bloom_flush(scaling_bloom_t *bloom)
+{
+ if (bitmap_flush(bloom->bitmap) != 0) {
+ return -1;
+ }
+ // all changes written to disk before disk_seqnum set
+ if (bloom->header->disk_seqnum == 0) {
+ bloom->header->disk_seqnum = bloom->header->mem_seqnum;
+ return bitmap_flush(bloom->bitmap);
+ }
+ return 0;
+}
+
+uint64_t scaling_bloom_mem_seqnum(scaling_bloom_t *bloom)
+{
+ return bloom->header->mem_seqnum;
+}
+
+uint64_t scaling_bloom_disk_seqnum(scaling_bloom_t *bloom)
+{
+ return bloom->header->disk_seqnum;
+}
+
+scaling_bloom_t *scaling_bloom_init(unsigned int capacity, double error_rate)
+{
+ scaling_bloom_t *bloom;
+
+ if ((bloom = malloc(sizeof(scaling_bloom_t))) == NULL) {
+ return NULL;
+ }
+ if ((bloom->bitmap = new_bitmap(sizeof(scaling_bloom_header_t))) == NULL) {
+ fprintf(stderr, "Error, Could not create bitmap with file\n");
+ free_scaling_bloom(bloom);
+ return NULL;
+ }
+
+ bloom->header = (scaling_bloom_header_t *) bloom->bitmap->array;
+ bloom->capacity = capacity;
+ bloom->error_rate = error_rate;
+ bloom->num_blooms = 0;
+ bloom->num_bytes = sizeof(scaling_bloom_header_t);
+ bloom->blooms = NULL;
+
+ return bloom;
+}
+
+scaling_bloom_t *new_scaling_bloom(unsigned int capacity, double error_rate)
+{
+
+ scaling_bloom_t *bloom;
+ counting_bloom_t *cur_bloom;
+
+ bloom = scaling_bloom_init(capacity, error_rate);
+
+ if (!(cur_bloom = new_counting_bloom_from_scale(bloom))) {
+ fprintf(stderr, "Error, Could not create counting bloom\n");
+ free_scaling_bloom(bloom);
+ return NULL;
+ }
+ cur_bloom->header->count = 0;
+ cur_bloom->header->id = 0;
+
+ bloom->header->mem_seqnum = 1;
+ return bloom;
+}
+
+
+struct expiry_dablooms_handle{
+ scaling_bloom_t *cur_bloom;
+ scaling_bloom_t *next_bloom;
+ time_t cur_bloom_start;
+ time_t next_bloom_start;
+ time_t last_bloom_check;
+ uint64_t cur_bloom_inc_id;
+ uint64_t next_bloom_inc_id;
+ unsigned int capacity;
+ int expiry_time;
+ time_t cur_time;
+ double error_rate;
+};
+
+char* expiry_dablooms_errno_trans(enum expiry_dablooms_errno _errno){
+ switch(_errno){
+ case EXPIRY_DABLOOMS_ERRNO_BLOOM_NULL:
+ return (char*)"scaling_bloom_null";
+ case EXPIRY_DABLOOMS_ERRNO_NEW_BLOOM_FAIL:
+ return (char*)"new_scaling_bloom_fail";
+ default:
+ return (char*)"unknown";
+ }
+}
+
+void expiry_dablooms_destroy(struct expiry_dablooms_handle *handle){
+ if(handle != NULL){
+ if(handle->cur_bloom != NULL){
+ free_scaling_bloom(handle->cur_bloom);
+ }
+ if(handle->next_bloom != NULL){
+ free_scaling_bloom(handle->next_bloom);
+ }
+ FREE(&handle);
+ }
+}
+
+struct expiry_dablooms_handle* expiry_dablooms_init(unsigned int capacity, double error_rate, time_t cur_time, int expiry_time){
+ struct expiry_dablooms_handle *handle = ALLOC(struct expiry_dablooms_handle, 1);
+ scaling_bloom_t *cur_bloom = new_scaling_bloom(capacity, error_rate);
+ if(cur_bloom == NULL){
+ goto error_out;
+ }
+ handle->cur_bloom = cur_bloom;
+ handle->cur_bloom_inc_id = 0;
+ handle->cur_bloom_start=cur_time;
+ handle->capacity = capacity;
+ handle->error_rate = error_rate;
+ handle->expiry_time = expiry_time;
+ handle->cur_time = cur_time;
+ return handle;
+
+error_out:
+ expiry_dablooms_destroy(handle);
+ return NULL;
+}
+
+int expiry_dablooms_element_count_get(struct expiry_dablooms_handle *handle, uint64_t *count){
+ if(handle == NULL || handle->cur_bloom == NULL){
+ return EXPIRY_DABLOOMS_ERRNO_BLOOM_NULL;
+ }
+ *count = handle->cur_bloom_inc_id;
+ return 0;
+}
+
+static int bloom_expired_check(struct expiry_dablooms_handle *handle, time_t cur_time){
+ if(handle == NULL || handle->cur_bloom == NULL){
+ return EXPIRY_DABLOOMS_ERRNO_BLOOM_NULL;
+ }
+ if(cur_time <= handle->last_bloom_check){
+ return 0;
+ }
+ time_t delta_time = cur_time - handle->cur_bloom_start;
+ handle->cur_time=cur_time;
+ if(delta_time >= handle->expiry_time){
+ free_scaling_bloom(handle->cur_bloom);
+ if(handle->next_bloom != NULL){
+ handle->cur_bloom = handle->next_bloom;
+ handle->cur_bloom_start = handle->next_bloom_start;
+ handle->cur_bloom_inc_id = handle->next_bloom_inc_id;
+ handle->next_bloom = NULL;
+ handle->last_bloom_check=0;
+ }
+ else{
+ scaling_bloom_t *cur_bloom = new_scaling_bloom(handle->capacity, handle->error_rate);
+ if(cur_bloom == NULL){
+ return EXPIRY_DABLOOMS_ERRNO_NEW_BLOOM_FAIL;
+ }
+ handle->cur_bloom = cur_bloom;
+ handle->cur_bloom_inc_id = 0;
+ handle->cur_bloom_start=cur_time;
+ handle->last_bloom_check=0;
+ }
+ }
+ else
+ {
+ handle->last_bloom_check=cur_time;
+ }
+ return 0;
+}
+
+int expiry_dablooms_add(struct expiry_dablooms_handle *handle, const char *key, size_t len, time_t cur_time){
+ if(key==NULL || len ==0 || handle == NULL)
+ {
+ return -1;
+ }
+ int ret = bloom_expired_check(handle, cur_time);
+ if(ret < 0)
+ {
+ return ret;
+ }
+
+ scaling_bloom_add(handle->cur_bloom, key, len, handle->cur_bloom_inc_id);
+ handle->cur_bloom_inc_id++;
+ time_t delta_time = cur_time - handle->cur_bloom_start;
+ handle->cur_time=cur_time;
+ if(delta_time >= handle->expiry_time){
+ if(handle->next_bloom == NULL){
+ scaling_bloom_t *next_bloom = new_scaling_bloom(handle->capacity, handle->error_rate);
+ if(next_bloom == NULL){
+ return EXPIRY_DABLOOMS_ERRNO_NEW_BLOOM_FAIL;
+ }
+ handle->next_bloom = next_bloom;
+ handle->next_bloom_inc_id = 0;
+ handle->next_bloom_start=cur_time;
+ }
+ scaling_bloom_add(handle->next_bloom, key, len, handle->next_bloom_inc_id);
+ handle->next_bloom_inc_id++;
+ }
+ return 0;
+}
+
+int expiry_dablooms_search(struct expiry_dablooms_handle *handle, const char *key, size_t len, time_t cur_time){
+ if(key==NULL || len ==0 || handle == NULL)
+ {
+ return -1;
+ }
+ int ret = bloom_expired_check(handle, cur_time);
+ if(ret < 0)
+ {
+ return ret;
+ }
+ int bloom_hit = scaling_bloom_check(handle->cur_bloom, key, len);
+ return bloom_hit;
+}
diff --git a/bindings/rs-dablooms/dablooms/src/dablooms.h b/bindings/rs-dablooms/dablooms/src/dablooms.h
new file mode 100644
index 0000000..ee4d4e3
--- /dev/null
+++ b/bindings/rs-dablooms/dablooms/src/dablooms.h
@@ -0,0 +1,92 @@
+/* Copyright @2012 by Justin Hines at Bitly under a very liberal license. See LICENSE in the source distribution. */
+
+#ifndef __BLOOM_H__
+#define __BLOOM_H__
+#include <stdint.h>
+#include <stdlib.h>
+
+#define ALLOC(type, number) ((type *)calloc(sizeof(type), number))
+#define FREE(p) {free(*p);*p=NULL;}
+
+const char *dablooms_version(void);
+
+typedef struct {
+ size_t bytes;
+ char *array;
+} bitmap_t;
+
+
+bitmap_t *bitmap_resize(bitmap_t *bitmap, size_t old_size, size_t new_size);
+bitmap_t *new_bitmap(size_t bytes);
+
+int bitmap_increment(bitmap_t *bitmap, unsigned int index, long offset);
+int bitmap_decrement(bitmap_t *bitmap, unsigned int index, long offset);
+int bitmap_check(bitmap_t *bitmap, unsigned int index, long offset);
+int bitmap_flush(bitmap_t *bitmap);
+
+void free_bitmap(bitmap_t *bitmap);
+
+typedef struct {
+ uint64_t id;
+ uint32_t count;
+ uint32_t _pad;
+} counting_bloom_header_t;
+
+
+typedef struct {
+ counting_bloom_header_t *header;
+ unsigned int capacity;
+ long offset;
+ unsigned int counts_per_func;
+ uint32_t *hashes;
+ size_t nfuncs;
+ size_t size;
+ size_t num_bytes;
+ double error_rate;
+ bitmap_t *bitmap;
+} counting_bloom_t;
+
+int free_counting_bloom(counting_bloom_t *bloom);
+counting_bloom_t *new_counting_bloom(unsigned int capacity, double error_rate);
+int counting_bloom_add(counting_bloom_t *bloom, const char *s, size_t len);
+int counting_bloom_remove(counting_bloom_t *bloom, const char *s, size_t len);
+int counting_bloom_check(counting_bloom_t *bloom, const char *s, size_t len);
+
+typedef struct {
+ uint64_t max_id;
+ uint64_t mem_seqnum;
+ uint64_t disk_seqnum;
+} scaling_bloom_header_t;
+
+typedef struct {
+ scaling_bloom_header_t *header;
+ unsigned int capacity;
+ unsigned int num_blooms;
+ size_t num_bytes;
+ double error_rate;
+ counting_bloom_t **blooms;
+ bitmap_t *bitmap;
+} scaling_bloom_t;
+
+scaling_bloom_t *new_scaling_bloom(unsigned int capacity, double error_rate);
+int free_scaling_bloom(scaling_bloom_t *bloom);
+int scaling_bloom_add(scaling_bloom_t *bloom, const char *s, size_t len, uint64_t id);
+int scaling_bloom_remove(scaling_bloom_t *bloom, const char *s, size_t len, uint64_t id);
+int scaling_bloom_check(scaling_bloom_t *bloom, const char *s, size_t len);
+int scaling_bloom_flush(scaling_bloom_t *bloom);
+uint64_t scaling_bloom_mem_seqnum(scaling_bloom_t *bloom);
+uint64_t scaling_bloom_disk_seqnum(scaling_bloom_t *bloom);
+
+struct expiry_dablooms_handle;
+enum expiry_dablooms_errno{
+ EXPIRY_DABLOOMS_ERRNO_BLOOM_NULL = -1,
+ EXPIRY_DABLOOMS_ERRNO_NEW_BLOOM_FAIL = -2,
+};
+char* expiry_dablooms_errno_trans(enum expiry_dablooms_errno _errno);
+void expiry_dablooms_destroy(struct expiry_dablooms_handle *handle);
+struct expiry_dablooms_handle* expiry_dablooms_init(unsigned int capacity, double error_rate, time_t cur_time, int expiry_time);
+int expiry_dablooms_element_count_get(struct expiry_dablooms_handle *handle, uint64_t *count);
+int expiry_dablooms_add(struct expiry_dablooms_handle *handle, const char *key, size_t len, time_t cur_time);
+int expiry_dablooms_search(struct expiry_dablooms_handle *handle, const char *key, size_t len, time_t cur_time);
+
+#endif
diff --git a/bindings/rs-dablooms/dablooms/src/murmur.c b/bindings/rs-dablooms/dablooms/src/murmur.c
new file mode 100644
index 0000000..fcf1dc1
--- /dev/null
+++ b/bindings/rs-dablooms/dablooms/src/murmur.c
@@ -0,0 +1,120 @@
+//-----------------------------------------------------------------------------
+// MurmurHash3 was written by Austin Appleby, and is placed in the public
+// domain. The author hereby disclaims copyright to this source code.
+
+// Note - The x86 and x64 versions do _not_ produce the same results, as the
+// algorithms are optimized for their respective platforms. You can still
+// compile and run any of them on any platform, but your performance with the
+// non-native version will be less than optimal.
+
+#include "murmur.h"
+
+#define FORCE_INLINE inline static
+
+FORCE_INLINE uint64_t rotl64 ( uint64_t x, int8_t r )
+{
+ return (x << r) | (x >> (64 - r));
+}
+
+#define ROTL64(x,y) rotl64(x,y)
+
+#define BIG_CONSTANT(x) (x##LLU)
+
+#define getblock(x, i) (x[i])
+
+//-----------------------------------------------------------------------------
+// Finalization mix - force all bits of a hash block to avalanche
+
+FORCE_INLINE uint64_t fmix64(uint64_t k)
+{
+ k ^= k >> 33;
+ k *= BIG_CONSTANT(0xff51afd7ed558ccd);
+ k ^= k >> 33;
+ k *= BIG_CONSTANT(0xc4ceb9fe1a85ec53);
+ k ^= k >> 33;
+
+ return k;
+}
+
+//-----------------------------------------------------------------------------
+
+void MurmurHash3_x64_128 ( const void * key, const int len,
+ const uint32_t seed, void * out )
+{
+ const uint8_t * data = (const uint8_t*)key;
+ const int nblocks = len / 16;
+
+ uint64_t h1 = seed;
+ uint64_t h2 = seed;
+
+ uint64_t c1 = BIG_CONSTANT(0x87c37b91114253d5);
+ uint64_t c2 = BIG_CONSTANT(0x4cf5ad432745937f);
+
+ int i;
+
+ //----------
+ // body
+
+ const uint64_t * blocks = (const uint64_t *)(data);
+
+ for(i = 0; i < nblocks; i++) {
+ uint64_t k1 = getblock(blocks,i*2+0);
+ uint64_t k2 = getblock(blocks,i*2+1);
+
+ k1 *= c1; k1 = ROTL64(k1,31); k1 *= c2; h1 ^= k1;
+
+ h1 = ROTL64(h1,27); h1 += h2; h1 = h1*5+0x52dce729;
+
+ k2 *= c2; k2 = ROTL64(k2,33); k2 *= c1; h2 ^= k2;
+
+ h2 = ROTL64(h2,31); h2 += h1; h2 = h2*5+0x38495ab5;
+ }
+
+ //----------
+ // tail
+
+ const uint8_t * tail = (const uint8_t*)(data + nblocks*16);
+
+ uint64_t k1 = 0;
+ uint64_t k2 = 0;
+
+ switch(len & 15) {
+ case 15: k2 ^= ((uint64_t)tail[14]) << 48;
+ case 14: k2 ^= ((uint64_t)tail[13]) << 40;
+ case 13: k2 ^= ((uint64_t)tail[12]) << 32;
+ case 12: k2 ^= ((uint64_t)tail[11]) << 24;
+ case 11: k2 ^= ((uint64_t)tail[10]) << 16;
+ case 10: k2 ^= ((uint64_t)tail[ 9]) << 8;
+ case 9: k2 ^= ((uint64_t)tail[ 8]) << 0;
+ k2 *= c2; k2 = ROTL64(k2,33); k2 *= c1; h2 ^= k2;
+
+ case 8: k1 ^= ((uint64_t)tail[ 7]) << 56;
+ case 7: k1 ^= ((uint64_t)tail[ 6]) << 48;
+ case 6: k1 ^= ((uint64_t)tail[ 5]) << 40;
+ case 5: k1 ^= ((uint64_t)tail[ 4]) << 32;
+ case 4: k1 ^= ((uint64_t)tail[ 3]) << 24;
+ case 3: k1 ^= ((uint64_t)tail[ 2]) << 16;
+ case 2: k1 ^= ((uint64_t)tail[ 1]) << 8;
+ case 1: k1 ^= ((uint64_t)tail[ 0]) << 0;
+ k1 *= c1; k1 = ROTL64(k1,31); k1 *= c2; h1 ^= k1;
+ }
+
+ //----------
+ // finalization
+
+ h1 ^= len; h2 ^= len;
+
+ h1 += h2;
+ h2 += h1;
+
+ h1 = fmix64(h1);
+ h2 = fmix64(h2);
+
+ h1 += h2;
+ h2 += h1;
+
+ ((uint64_t*)out)[0] = h1;
+ ((uint64_t*)out)[1] = h2;
+}
+
+//-----------------------------------------------------------------------------
diff --git a/bindings/rs-dablooms/dablooms/src/murmur.h b/bindings/rs-dablooms/dablooms/src/murmur.h
new file mode 100644
index 0000000..c7547db
--- /dev/null
+++ b/bindings/rs-dablooms/dablooms/src/murmur.h
@@ -0,0 +1,12 @@
+//-----------------------------------------------------------------------------
+// MurmurHash3 was written by Austin Appleby, and is placed in the public
+// domain. The author hereby disclaims copyright to this source code.
+
+#ifndef _MURMURHASH3_H_
+#define _MURMURHASH3_H_
+
+#include <stdint.h>
+
+void MurmurHash3_x64_128 ( const void * key, int len, uint32_t seed, void * out );
+
+#endif // _MURMURHASH3_H_
diff --git a/bindings/rs-dablooms/dablooms/src/test_dablooms.c b/bindings/rs-dablooms/dablooms/src/test_dablooms.c
new file mode 100644
index 0000000..5dfdf96
--- /dev/null
+++ b/bindings/rs-dablooms/dablooms/src/test_dablooms.c
@@ -0,0 +1,342 @@
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <stdlib.h>
+
+#include "dablooms.h"
+
+#define CAPACITY 100000
+#define ERROR_RATE .05
+
+enum {
+ TEST_PASS,
+ TEST_WARN,
+ TEST_FAIL,
+};
+
+struct stats {
+ int true_positives;
+ int true_negatives;
+ int false_positives;
+ int false_negatives;
+};
+
+static void chomp_line(char *word)
+{
+ char *p;
+ if ((p = strchr(word, '\r'))) {
+ *p = '\0';
+ }
+ if ((p = strchr(word, '\n'))) {
+ *p = '\0';
+ }
+}
+
+static int print_results(struct stats *stats)
+{
+ float false_positive_rate = (float)stats->false_positives /
+ (stats->false_positives + stats->true_negatives);
+
+ printf("True positives: %7d" "\n"
+ "True negatives: %7d" "\n"
+ "False positives: %7d" "\n"
+ "False negatives: %7d" "\n"
+ "False positive rate: %.4f" "\n",
+ stats->true_positives,
+ stats->true_negatives,
+ stats->false_positives,
+ stats->false_negatives,
+ false_positive_rate );
+
+ if (stats->false_negatives > 0) {
+ printf("TEST FAIL (false negatives exist)\n");
+ return TEST_FAIL;
+ } else if (false_positive_rate > ERROR_RATE) {
+ printf("TEST WARN (false positive rate too high)\n");
+ return TEST_WARN;
+ } else {
+ printf("TEST PASS\n");
+ return TEST_PASS;
+ }
+}
+
+static void bloom_score(int positive, int should_positive, struct stats *stats, const char *key)
+{
+ if (should_positive) {
+ if (positive) {
+ stats->true_positives++;
+ } else {
+ stats->false_negatives++;
+ fprintf(stderr, "ERROR: False negative: '%s'\n", key);
+ }
+ } else {
+ if (positive) {
+ stats->false_positives++;
+ } else {
+ stats->true_negatives++;
+ }
+ }
+}
+
+int test_counting_remove_reopen(const char *bloom_file, const char *words_file)
+{
+ FILE *fp;
+ char word[256];
+ counting_bloom_t *bloom;
+ int i, key_removed;
+ struct stats results = { 0 };
+
+ printf("\n* test counting remove & reopen\n");
+
+ if ((fp = fopen(bloom_file, "r"))) {
+ fclose(fp);
+ remove(bloom_file);
+ }
+
+ if (!(bloom = new_counting_bloom(CAPACITY, ERROR_RATE, bloom_file))) {
+ fprintf(stderr, "ERROR: Could not create bloom filter\n");
+ return TEST_FAIL;
+ }
+ if (!(fp = fopen(words_file, "r"))) {
+ fprintf(stderr, "ERROR: Could not open words file\n");
+ return TEST_FAIL;
+ }
+
+ for (i = 0; fgets(word, sizeof(word), fp) && (i < CAPACITY); i++) {
+ chomp_line(word);
+ counting_bloom_add(bloom, word, strlen(word));
+ }
+
+ fseek(fp, 0, SEEK_SET);
+ for (i = 0; fgets(word, sizeof(word), fp) && (i < CAPACITY); i++) {
+ if (i % 5 == 0) {
+ chomp_line(word);
+ counting_bloom_remove(bloom, word, strlen(word));
+ }
+ }
+
+ free_counting_bloom(bloom);
+ bloom = new_counting_bloom_from_file(CAPACITY, ERROR_RATE, bloom_file);
+
+ fseek(fp, 0, SEEK_SET);
+ for (i = 0; (fgets(word, sizeof(word), fp)) && (i < CAPACITY); i++) {
+ chomp_line(word);
+ key_removed = (i % 5 == 0);
+ bloom_score(counting_bloom_check(bloom, word, strlen(word)), !key_removed, &results, word);
+ }
+ fclose(fp);
+
+ printf("Elements added: %6d" "\n"
+ "Elements removed: %6d" "\n"
+ "Total size: %d KiB" "\n\n",
+ i, i / 5,
+ (int) bloom->num_bytes / 1024);
+
+ free_counting_bloom(bloom);
+
+ return print_results(&results);
+}
+
+int test_scaling_remove_reopen(const char *bloom_file, const char *words_file)
+{
+ FILE *fp;
+ char word[256];
+ scaling_bloom_t *bloom;
+ int i, key_removed;
+ struct stats results = { 0 };
+
+ printf("\n* test scaling remove & reopen\n");
+
+ if ((fp = fopen(bloom_file, "r"))) {
+ fclose(fp);
+ remove(bloom_file);
+ }
+
+ if (!(bloom = new_scaling_bloom(CAPACITY, ERROR_RATE, bloom_file))) {
+ fprintf(stderr, "ERROR: Could not create bloom filter\n");
+ return TEST_FAIL;
+ }
+
+ if (!(fp = fopen(words_file, "r"))) {
+ fprintf(stderr, "ERROR: Could not open words file\n");
+ return TEST_FAIL;
+ }
+
+ for (i = 0; fgets(word, sizeof(word), fp); i++) {
+ chomp_line(word);
+ scaling_bloom_add(bloom, word, strlen(word), i);
+ }
+
+ fseek(fp, 0, SEEK_SET);
+ for (i = 0; fgets(word, sizeof(word), fp); i++) {
+ if (i % 5 == 0) {
+ chomp_line(word);
+ scaling_bloom_remove(bloom, word, strlen(word), i);
+ }
+ }
+
+ bitmap_flush(bloom->bitmap);
+ free_scaling_bloom(bloom);
+
+ bloom = new_scaling_bloom_from_file(CAPACITY, ERROR_RATE, bloom_file);
+
+ fseek(fp, 0, SEEK_SET);
+ for (i = 0; fgets(word, sizeof(word), fp); i++) {
+ chomp_line(word);
+ key_removed = (i % 5 == 0);
+ bloom_score(scaling_bloom_check(bloom, word, strlen(word)), !key_removed, &results, word);
+ }
+ fclose(fp);
+
+ printf("Elements added: %6d" "\n"
+ "Elements removed: %6d" "\n"
+ "Total size: %d KiB" "\n\n",
+ i, i / 5,
+ (int) bloom->num_bytes / 1024);
+
+ free_scaling_bloom(bloom);
+
+ return print_results(&results);
+}
+
+int test_counting_accuracy(const char *bloom_file, const char *words_file)
+{
+ FILE *fp;
+ char word[256];
+ counting_bloom_t *bloom;
+ int i;
+ struct stats results = { 0 };
+
+ printf("\n* test counting accuracy\n");
+
+ if ((fp = fopen(bloom_file, "r"))) {
+ fclose(fp);
+ remove(bloom_file);
+ }
+
+ if (!(bloom = new_counting_bloom(CAPACITY, ERROR_RATE, bloom_file))) {
+ fprintf(stderr, "ERROR: Could not create bloom filter\n");
+ return TEST_FAIL;
+ }
+ if (!(fp = fopen(words_file, "r"))) {
+ fprintf(stderr, "ERROR: Could not open words file\n");
+ return TEST_FAIL;
+ }
+
+ for (i = 0; fgets(word, sizeof(word), fp) && (i < CAPACITY * 2); i++) {
+ if (i % 2 == 0) {
+ chomp_line(word);
+ counting_bloom_add(bloom, word, strlen(word));
+ }
+ }
+
+ fseek(fp, 0, SEEK_SET);
+ for (i = 0; fgets(word, sizeof(word), fp) && (i < CAPACITY * 2); i++) {
+ if (i % 2 == 1) {
+ chomp_line(word);
+ bloom_score(counting_bloom_check(bloom, word, strlen(word)), 0, &results, word);
+ }
+ }
+
+ fclose(fp);
+
+ printf("Elements added: %6d" "\n"
+ "Elements checked: %6d" "\n"
+ "Total size: %d KiB" "\n\n",
+ (i + 1) / 2, i / 2,
+ (int) bloom->num_bytes / 1024);
+
+ free_counting_bloom(bloom);
+
+ return print_results(&results);
+}
+
+int test_scaling_accuracy(const char *bloom_file, const char *words_file)
+{
+ FILE *fp;
+ char word[256];
+ scaling_bloom_t *bloom;
+ int i;
+ struct stats results = { 0 };
+
+ printf("\n* test scaling accuracy\n");
+
+ if ((fp = fopen(bloom_file, "r"))) {
+ fclose(fp);
+ remove(bloom_file);
+ }
+
+ if (!(bloom = new_scaling_bloom(CAPACITY, ERROR_RATE, bloom_file))) {
+ fprintf(stderr, "ERROR: Could not create bloom filter\n");
+ return TEST_FAIL;
+ }
+
+ if (!(fp = fopen(words_file, "r"))) {
+ fprintf(stderr, "ERROR: Could not open words file\n");
+ return TEST_FAIL;
+ }
+
+ for (i = 0; fgets(word, sizeof(word), fp); i++) {
+ if (i % 2 == 0) {
+ chomp_line(word);
+ scaling_bloom_add(bloom, word, strlen(word), i);
+ }
+ }
+
+ fseek(fp, 0, SEEK_SET);
+ for (i = 0; fgets(word, sizeof(word), fp); i++) {
+ if (i % 2 == 1) {
+ chomp_line(word);
+ bloom_score(scaling_bloom_check(bloom, word, strlen(word)), 0, &results, word);
+ }
+ }
+
+ fclose(fp);
+
+ printf("Elements added: %6d" "\n"
+ "Elements checked: %6d" "\n"
+ "Total size: %d KiB" "\n\n",
+ (i + 1) / 2, i / 2,
+ (int) bloom->num_bytes / 1024);
+
+ free_scaling_bloom(bloom);
+
+ return print_results(&results);
+}
+
+int main(int argc, char *argv[])
+{
+ printf("** dablooms version: %s\n", dablooms_version());
+ int i;
+ int failures = 0, warnings = 0;
+
+ if (argc != 3) {
+ fprintf(stderr, "Usage: %s <bloom_file> <words_file>\n", argv[0]);
+ return EXIT_FAILURE;
+ }
+
+ int (*tests[])(const char *, const char *) = {
+ test_counting_remove_reopen,
+ test_counting_accuracy,
+ test_scaling_remove_reopen,
+ test_scaling_accuracy,
+ NULL,
+ };
+ for (i = 0; tests[i] != NULL; i++) {
+ int result = (tests[i])(argv[1], argv[2]);
+ if (result == TEST_FAIL) {
+ failures++;
+ } else if (result == TEST_WARN) {
+ warnings++;
+ }
+ }
+
+ printf("\n** %d failures, %d warnings\n", failures, warnings);
+ if (failures) {
+ return EXIT_FAILURE;
+ } else {
+ return EXIT_SUCCESS;
+ }
+}
diff --git a/bindings/rs-dablooms/dablooms/test/test_dabloom b/bindings/rs-dablooms/dablooms/test/test_dabloom
new file mode 100644
index 0000000..c8fe1fb
--- /dev/null
+++ b/bindings/rs-dablooms/dablooms/test/test_dabloom
Binary files differ
diff --git a/bindings/rs-dablooms/dablooms/test/test_dabloom.c b/bindings/rs-dablooms/dablooms/test/test_dabloom.c
new file mode 100644
index 0000000..ac8f82a
--- /dev/null
+++ b/bindings/rs-dablooms/dablooms/test/test_dabloom.c
@@ -0,0 +1,152 @@
+#include <sys/stat.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <fcntl.h>
+#include <math.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <unistd.h>
+#include <errno.h>
+#include <inttypes.h>
+#include "dablooms.h"
+#include <time.h>
+
+
+//gcc test_dabloom.c -g -Wall -o test_dabloom -I/usr/local/include -L/usr/local/lib -ldablooms -lrt
+void test_dev_zero_ftruncate(){
+ const char *filename = "./test_ftruncate.txt";
+ const char *filename1 = "/dev/zero";
+ int fd = open(filename, O_RDWR | O_CREAT | O_TRUNC, (mode_t)0600);
+ int fd1 = open(filename1, O_RDWR, (mode_t)0600);
+ int new_size = 1024;
+ if (ftruncate(fd, new_size) < 0) {
+ printf("fail at ftruncate, errmsg = %s, fd = %d, filename = %s\n", strerror(errno), fd, filename);
+ }
+ else{
+ printf("succeed at ftruncate, fd = %d, filename = %s\n", fd, filename);
+ }
+ if (ftruncate(fd1, new_size) < 0) {
+ printf("fail at ftruncate, errmsg = %s, fd = %d, filename = %s\n", strerror(errno), fd1, filename1);
+ }
+ else{
+ printf("succeed at ftruncate, fd = %d, filename = %s\n", fd1, filename1);
+ }
+}
+
+void test_mmap_and_ftruncate(){
+ const char *filename = "./test_ftruncate.txt";
+ int fd = open(filename, O_RDWR | O_CREAT | O_TRUNC, (mode_t)0600);
+ int new_size = 10;
+ if (ftruncate(fd, new_size) < 0)
+ printf("fail at ftruncate, errmsg = %s, fd = %d, filename = %s\n", strerror(errno), fd, filename);
+ else
+ printf("secceed ftruncate size = %d\n", new_size);
+ char *p = (char*)mmap(0, new_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
+ *p = 'a';
+ printf("*p = %c\n", *p);
+ //ftruncate size = 0, then memory access error
+ new_size = 0;
+ if (ftruncate(fd, new_size) < 0)
+ printf("fail at ftruncate, errmsg = %s, fd = %d, filename = %s\n", strerror(errno), fd, filename);
+ else
+ printf("secceed ftruncate size = %d\n", new_size);
+ printf("*p = %c\n", *p);
+}
+
+
+// void test_private_anonymous_mmap(){
+// int new_size = 100;
+// char *p = (char*)mmap(NULL, new_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0);
+// *p = 'a';
+// char *p1 = (char*)mmap(NULL, new_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0);
+// *p1 = 'b';
+// printf("p = %c, p1 = %c\n", *p, *p1);
+// }
+
+
+void test_scaling_bloom(){
+ int capacity = 100;
+ double error_rate = 0.05;
+
+ scaling_bloom_t *bloom = new_scaling_bloom(capacity, error_rate);
+ scaling_bloom_add(bloom, "aaa", 3, 1);
+ printf("bloom add: key = aaa\n");
+
+ scaling_bloom_t *bloom1 = new_scaling_bloom(capacity, error_rate);
+ scaling_bloom_add(bloom1, "bbb", 3, 1);
+ printf("bloom1 add: key = bbb\n");
+
+ int hit = scaling_bloom_check(bloom, "aaa", 3);
+ printf("bloom check: key = aaa, hit = %d\n", hit);
+
+ hit = scaling_bloom_check(bloom, "bbb", 3);
+ printf("bloom check: key = bbb, hit = %d\n", hit);
+
+ hit = scaling_bloom_check(bloom1, "aaa", 3);
+ printf("bloom1 check: key = aaa, hit = %d\n", hit);
+
+ hit = scaling_bloom_check(bloom1, "bbb", 3);
+ printf("bloom1 check: key = bbb, hit = %d\n", hit);
+}
+
+time_t get_current_time() {
+ struct timespec tp;
+ if (clock_gettime(CLOCK_REALTIME, &tp) == -1) {
+ perror("clock_gettime");
+ return -1;
+ }
+
+ time_t current_time = tp.tv_sec;
+ return current_time;
+}
+
+void test_expiry_dablooms(){
+ unsigned int capacity = 100;
+ double error_rate = 0.001;
+ int expiry_time = 1;
+ // time_t cur_time = get_current_time();
+
+ struct expiry_dablooms_handle *handle = expiry_dablooms_init(capacity, error_rate, get_current_time(), expiry_time);
+ uint64_t count = -1, i = 0;
+ for(i = 0; i < 7; i++){
+ expiry_dablooms_add(handle, "aaa", 3, get_current_time());
+ expiry_dablooms_element_count_get(handle, &count);
+ printf("element_count = %d\n", (int)count);
+ }
+ sleep(4);
+ printf("After 4s: \n");
+ for(i = 0; i < 8; i++){
+ expiry_dablooms_add(handle, "bbb", 3, get_current_time());
+ expiry_dablooms_element_count_get(handle, &count);
+ printf("element_count = %d\n", (int)count);
+ }
+ sleep(3);
+ printf("After 7s: \n");
+ for(i = 0; i < 9; i++){
+ expiry_dablooms_add(handle, "ccc", 3, get_current_time());
+ expiry_dablooms_element_count_get(handle, &count);
+ printf("element_count = %d\n", count);
+ }
+ expiry_dablooms_destroy(handle);
+ /*
+ int hit = expiry_dablooms_search(handle, "aaa", 3);
+ uint64_t count = -1;
+ expiry_dablooms_element_count_get(handle, &count);
+ printf("key = aaa, hit = %d, element_count = %" PRIu64 "\n", hit, count);
+ sleep(6);
+ hit = expiry_dablooms_search(handle, "aaa", 3);
+ count = -1;
+ expiry_dablooms_element_count_get(handle, &count);
+ printf("after 6s, key = aaa, hit = %d, element_count = %" PRIu64 "\n", hit, count);
+ */
+}
+
+
+
+int main(){
+ //test_scaling_bloom();
+ test_expiry_dablooms();
+ return 0;
+} \ No newline at end of file
diff --git a/bindings/rs-dablooms/src/dablooms.rs b/bindings/rs-dablooms/src/dablooms.rs
new file mode 100644
index 0000000..cf74d38
--- /dev/null
+++ b/bindings/rs-dablooms/src/dablooms.rs
@@ -0,0 +1,321 @@
+use libc::time_t;
+
+use crate::dablooms_bind::*;
+use std::ffi::{CStr, CString};
+use std::os::raw::{c_int, c_uint};
+use std::ptr::NonNull;
+use std::time::{SystemTime, UNIX_EPOCH};
+
+// String form rust not contain '\0' in the end, that may cause some problem
+//
+
+pub struct CountingBloomFilter {
+ bloom: NonNull<counting_bloom_t>,
+}
+
+impl CountingBloomFilter {
+ pub fn new(capacity: u32, error_rate: f64) -> Result<Self, &'static str> {
+ let bloom_ptr = unsafe { new_counting_bloom(capacity, error_rate) };
+ let bloom =
+ NonNull::new(bloom_ptr).ok_or("Failed to create CountingBloomFilter,return null")?;
+ Ok(CountingBloomFilter { bloom })
+ }
+
+ // get raw pointer
+ fn get_raw(&self) -> *mut counting_bloom_t {
+ self.bloom.as_ptr()
+ }
+
+ pub fn add<T>(&mut self, key: T) -> Result<(), ()>
+ where
+ T: AsRef<[u8]>,
+ {
+ let key = key.as_ref();
+ let res =
+ unsafe { counting_bloom_add(self.get_raw(), key.as_ptr() as *const i8, key.len()) };
+ if res == 0 {
+ Ok(())
+ } else {
+ Err(())
+ }
+ }
+
+ pub fn remove<T>(&mut self, key: T) -> Result<(), ()>
+ where
+ T: AsRef<[u8]>,
+ {
+ let key = key.as_ref();
+ let res =
+ unsafe { counting_bloom_remove(self.get_raw(), key.as_ptr() as *const i8, key.len()) };
+ if res == 0 {
+ Ok(())
+ } else {
+ Err(())
+ }
+ }
+
+ pub fn check<T>(&self, key: T) -> bool
+ where
+ T: AsRef<[u8]>,
+ {
+ let key = key.as_ref();
+ unsafe { counting_bloom_check(self.get_raw(), key.as_ptr() as *const i8, key.len()) > 0 }
+ }
+}
+
+impl Drop for CountingBloomFilter {
+ fn drop(&mut self) {
+ unsafe { free_counting_bloom(self.get_raw()) };
+ }
+}
+
+pub struct ScalingBloomFilter {
+ bloom: NonNull<scaling_bloom_t>,
+}
+
+impl ScalingBloomFilter {
+ pub fn new(capacity: u32, error_rate: f64) -> Result<Self, &'static str> {
+ let bloom_ptr = unsafe { new_scaling_bloom(capacity as c_uint, error_rate) };
+ let bloom =
+ NonNull::new(bloom_ptr).ok_or("Failed to create ScalingBloomFilter,return null")?;
+ Ok(ScalingBloomFilter { bloom })
+ }
+
+ // get raw pointer
+ fn get_raw(&self) -> *mut scaling_bloom_t {
+ self.bloom.as_ptr()
+ }
+
+ pub fn add<T>(&mut self, key: T, id: u64) -> Result<(), ()>
+ where
+ T: AsRef<[u8]>,
+ {
+ let key = key.as_ref();
+ let res =
+ unsafe { scaling_bloom_add(self.get_raw(), key.as_ptr() as *const i8, key.len(), id) };
+ // scaling_bloom_add returns 1 if the key is ok...
+ // no idea why it's not 0...
+ if res == 1 {
+ Ok(())
+ } else {
+ Err(())
+ }
+ }
+
+ pub fn remove<T>(&mut self, key: T, id: u64) -> Result<(), ()>
+ where
+ T: AsRef<[u8]>,
+ {
+ let key = key.as_ref();
+ let res = unsafe {
+ scaling_bloom_remove(self.get_raw(), key.as_ptr() as *const i8, key.len(), id)
+ };
+ // scaling_bloom_remove returns 1 if remove success...
+ // no idea why it's not 0...
+ if res == 1 {
+ Ok(())
+ } else {
+ Err(())
+ }
+ }
+
+ pub fn check<T>(&self, key: T) -> bool
+ where
+ T: AsRef<[u8]>,
+ {
+ let key = key.as_ref();
+ unsafe { scaling_bloom_check(self.get_raw(), key.as_ptr() as *const i8, key.len()) > 0 }
+ }
+
+ pub fn flush(&mut self) -> Result<(), ()> {
+ let res = unsafe { scaling_bloom_flush(self.get_raw()) };
+ if res == 0 {
+ Ok(())
+ } else {
+ Err(())
+ }
+ }
+
+ pub fn mem_seqnum(&self) -> u64 {
+ unsafe { scaling_bloom_mem_seqnum(self.get_raw()) }
+ }
+
+ pub fn disk_seqnum(&self) -> u64 {
+ unsafe { scaling_bloom_disk_seqnum(self.get_raw()) }
+ }
+}
+
+impl Drop for ScalingBloomFilter {
+ fn drop(&mut self) {
+ unsafe { free_scaling_bloom(self.get_raw()) };
+ }
+}
+
+fn get_current_time() -> Result<time_t, c_int> {
+ let current_time = SystemTime::now()
+ .duration_since(UNIX_EPOCH)
+ .map_err(|_| -1)?
+ .as_secs() as time_t;
+ Ok(current_time)
+}
+
+pub struct ExpiryBloomFilter {
+ bloom: NonNull<expiry_dablooms_handle>,
+}
+
+impl ExpiryBloomFilter {
+ pub fn new(capacity: u32, error_rate: f64, expiry_time: i32) -> Result<Self, &'static str> {
+ let cur_time = get_current_time();
+ let bloom_ptr = unsafe {
+ expiry_dablooms_init(
+ capacity as c_uint,
+ error_rate,
+ cur_time.unwrap(),
+ expiry_time as c_int,
+ )
+ };
+ let bloom =
+ NonNull::new(bloom_ptr).ok_or("Failed to create ExpiryBloomFilter,return null")?;
+ Ok(ExpiryBloomFilter { bloom })
+ }
+
+ // get raw pointer
+ fn get_raw(&self) -> *mut expiry_dablooms_handle {
+ self.bloom.as_ptr()
+ }
+
+ pub fn add<T>(&mut self, key: T) -> Result<(), String>
+ where
+ T: AsRef<[u8]>,
+ {
+ let key = key.as_ref();
+ let cur_time = get_current_time();
+ let res = unsafe {
+ expiry_dablooms_add(
+ self.get_raw(),
+ key.as_ptr() as *const i8,
+ key.len(),
+ cur_time.unwrap(),
+ )
+ };
+ if res == 0 {
+ Ok(())
+ } else {
+ let err_cstr = unsafe { expiry_dablooms_errno_trans(res) };
+ let err_msg = unsafe { CStr::from_ptr(err_cstr).to_string_lossy().into_owned() };
+ Err(err_msg)
+ }
+ }
+
+ pub fn check<T>(&self, key: T) -> bool
+ where
+ T: AsRef<[u8]>,
+ {
+ let key = key.as_ref();
+ let cur_time = get_current_time();
+ unsafe {
+ expiry_dablooms_search(
+ self.get_raw(),
+ key.as_ptr() as *const i8,
+ key.len(),
+ cur_time.unwrap(),
+ ) > 0
+ }
+ }
+
+ pub fn count(&self) -> i32 {
+ let mut count: u64 = 0;
+ unsafe {
+ expiry_dablooms_element_count_get(self.get_raw(), &mut count);
+ }
+ return count as i32;
+ }
+}
+
+impl Drop for ExpiryBloomFilter {
+ fn drop(&mut self) {
+ unsafe { expiry_dablooms_destroy(self.get_raw()) };
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use libc::sleep;
+
+ use crate::dablooms::*;
+
+ #[test]
+ fn test_counting_bloom_filter() {
+ let mut bloom = CountingBloomFilter::new(1000, 0.01).unwrap();
+ let key1 = "hello";
+ let key2 = "world";
+ let key3 = "rust";
+
+ assert!(bloom.check(key1) == false);
+ assert!(bloom.check(key2) == false);
+ assert!(bloom.check(key3) == false);
+ bloom.add(key1).unwrap();
+ bloom.add(key2).unwrap();
+
+ drop(key1);
+ let key = "hello";
+
+ assert!(bloom.check(key) == true);
+ assert!(bloom.check(key2) == true);
+ assert!(bloom.check(key3) == false);
+ bloom.remove(key).unwrap();
+ assert!(bloom.check(key1) == false);
+ assert!(bloom.check(key2) == true);
+ assert!(bloom.check(key3) == false);
+ }
+
+ #[test]
+ fn test_scaling_bloom_filter() {
+ let mut bloom = ScalingBloomFilter::new(100, 0.05).unwrap();
+ let key1 = "aaa";
+ let key2 = "bbb";
+ let id1 = 1;
+ let id2 = 2;
+ assert!(bloom.check(key1) == false);
+ assert!(bloom.check(key2) == false);
+
+ bloom.add(key1, id1).unwrap();
+ bloom.add(key2, id2).unwrap();
+ assert!(bloom.check(key1) == true);
+ assert!(bloom.check(key2) == true);
+ bloom.remove(key1, id1).unwrap();
+ assert!(bloom.check(key1) == false);
+ assert!(bloom.check(key2) == true);
+ }
+
+ #[test]
+ fn test_expiry_dablooms_filter() {
+ let capacity: u32 = 100;
+ let error_rate: f64 = 0.05;
+ let expiry_secs: i32 = 3; // expiry time 1s
+ let key1 = "aaa";
+ let key2 = "bbb";
+
+ let mut bloom = ExpiryBloomFilter::new(capacity, error_rate, expiry_secs).unwrap();
+
+ for _i in 1..7 {
+ bloom.add(key1).unwrap();
+ assert!(bloom.count() == _i);
+ }
+ assert!(bloom.check(key1));
+
+ unsafe {
+ sleep(2); // sleep 2 sec | all key1's value not expired
+ }
+ assert!(bloom.check(key1));
+ unsafe {
+ sleep(2); // sleep 4 sec | all key1's value expired
+ }
+ assert!(!bloom.check(key1));
+
+ for _i in 1..7 {
+ bloom.add(key2).unwrap();
+ assert!(bloom.count() == _i);
+ }
+ }
+}
diff --git a/bindings/rs-dablooms/src/dablooms_bind.rs b/bindings/rs-dablooms/src/dablooms_bind.rs
new file mode 100644
index 0000000..3600880
--- /dev/null
+++ b/bindings/rs-dablooms/src/dablooms_bind.rs
@@ -0,0 +1,156 @@
+#![allow(non_upper_case_globals)] // allow snake case just in this file
+use libc::{c_char, c_int, c_long, c_uint, time_t};
+
+extern "C" {
+ pub fn dablooms_version() -> *const c_char;
+}
+
+#[repr(C)]
+#[derive(Debug, Copy, Clone)]
+pub struct bitmap_t {
+ pub bytes: usize,
+ pub array: *mut c_char,
+}
+
+//#[link(name = "dablooms")]
+extern "C" {
+ pub fn bitmap_resize(bitmap: *mut bitmap_t, old_size: usize, new_size: usize) -> *mut bitmap_t;
+ pub fn new_bitmap(bytes: usize) -> *mut bitmap_t;
+ //
+ pub fn bitmap_increment(bitmap: *mut bitmap_t, index: c_uint, offset: c_long) -> c_int;
+ pub fn bitmap_decrement(bitmap: *mut bitmap_t, index: c_uint, offset: c_long) -> c_int;
+ pub fn bitmap_check(bitmap: *mut bitmap_t, index: c_uint, offset: c_long) -> c_int;
+ pub fn bitmap_flush(bitmap: *mut bitmap_t) -> c_int;
+ pub fn free_bitmap(bitmap: *mut bitmap_t);
+}
+
+#[repr(C)]
+#[derive(Debug, Copy, Clone)]
+pub struct counting_bloom_header_t {
+ pub id: u64,
+ pub count: u32,
+ pub _pad: u32,
+}
+
+#[repr(C)]
+#[derive(Debug, Copy, Clone)]
+pub struct counting_bloom_t {
+ pub header: *mut counting_bloom_header_t,
+ pub capacity: c_uint,
+ pub offset: c_long,
+ pub counts_per_func: c_uint,
+ pub hashes: *mut u32,
+ pub nfuncs: usize,
+ pub size: usize,
+ pub num_bytes: usize,
+ pub error_rate: f64,
+ pub bitmap: *mut bitmap_t,
+}
+
+//#[link(name = "dablooms")]
+extern "C" {
+ pub fn free_counting_bloom(bloom: *mut counting_bloom_t) -> c_int;
+ pub fn new_counting_bloom(capacity: c_uint, error_rate: f64) -> *mut counting_bloom_t;
+ //
+ pub fn counting_bloom_add(bloom: *mut counting_bloom_t, s: *const c_char, len: usize) -> c_int;
+ pub fn counting_bloom_remove(
+ bloom: *mut counting_bloom_t,
+ s: *const c_char,
+ len: usize,
+ ) -> c_int;
+ pub fn counting_bloom_check(
+ bloom: *mut counting_bloom_t,
+ s: *const c_char,
+ len: usize,
+ ) -> c_int;
+}
+
+#[repr(C)]
+#[derive(Debug, Copy, Clone)]
+pub struct scaling_bloom_header_t {
+ pub max_id: u64,
+ pub mem_seqnum: u64,
+ pub disk_seqnum: u64,
+}
+
+#[repr(C)]
+#[derive(Debug, Copy, Clone)]
+pub struct scaling_bloom_t {
+ pub header: *mut scaling_bloom_header_t,
+ pub capacity: c_uint,
+ pub num_blooms: c_uint,
+ pub num_bytes: usize,
+ pub error_rate: f64,
+ pub blooms: *mut *mut counting_bloom_t,
+ pub bitmap: *mut bitmap_t,
+}
+
+//#[link(name = "dablooms")]
+extern "C" {
+ pub fn new_scaling_bloom(capacity: c_uint, error_rate: f64) -> *mut scaling_bloom_t;
+ pub fn free_scaling_bloom(bloom: *mut scaling_bloom_t) -> c_int;
+
+ pub fn scaling_bloom_add(
+ bloom: *mut scaling_bloom_t,
+ s: *const c_char,
+ len: usize,
+ id: u64,
+ ) -> c_int;
+ pub fn scaling_bloom_remove(
+ bloom: *mut scaling_bloom_t,
+ s: *const c_char,
+ len: usize,
+ id: u64,
+ ) -> c_int;
+ pub fn scaling_bloom_check(bloom: *mut scaling_bloom_t, s: *const c_char, len: usize) -> c_int;
+ pub fn scaling_bloom_flush(bloom: *mut scaling_bloom_t) -> c_int;
+ pub fn scaling_bloom_mem_seqnum(bloom: *mut scaling_bloom_t) -> u64;
+ pub fn scaling_bloom_disk_seqnum(bloom: *mut scaling_bloom_t) -> u64;
+}
+
+#[repr(C)]
+#[derive(Debug, Copy, Clone)]
+pub struct expiry_dablooms_handle {
+ _unused: [u8; 0],
+}
+
+// 存疑
+// pub const expiry_dablooms_errno_EXPIRY_DABLOOMS_ERRNO_BLOOM_NULL: expiry_dablooms_errno = -1;
+// pub const expiry_dablooms_errno_EXPIRY_DABLOOMS_ERRNO_NEW_BLOOM_FAIL: expiry_dablooms_errno = -2;
+// pub type expiry_dablooms_errno = c_int;
+
+// #[repr(C)]
+// #[derive(Debug, Copy, Clone)]
+// pub enum expiry_dablooms_errno {
+// EXPIRY_DABLOOMS_ERRNO_BLOOM_NULL = -1,
+// EXPIRY_DABLOOMS_ERRNO_NEW_BLOOM_FAIL = -2,
+// }
+
+//#[link(name = "dablooms")]
+extern "C" {
+ // _errno 是 expiry_dablooms_errno 的枚举类型, c 中和 c_int 通用;
+ pub fn expiry_dablooms_errno_trans(_errno: c_int) -> *mut c_char;
+ pub fn expiry_dablooms_destroy(handle: *mut expiry_dablooms_handle);
+ pub fn expiry_dablooms_init(
+ capacity: c_uint,
+ error_rate: f64,
+ cur_time: time_t,
+ expiry_time: c_int,
+ ) -> *mut expiry_dablooms_handle;
+ pub fn expiry_dablooms_element_count_get(
+ handle: *mut expiry_dablooms_handle,
+ count: *mut u64,
+ ) -> c_int;
+ pub fn expiry_dablooms_add(
+ handle: *mut expiry_dablooms_handle,
+ key: *const c_char,
+ len: usize,
+ cur_time: time_t,
+ ) -> c_int;
+ pub fn expiry_dablooms_search(
+ handle: *mut expiry_dablooms_handle,
+ key: *const c_char,
+ len: usize,
+ cur_time: time_t,
+ ) -> c_int;
+}
diff --git a/bindings/rs-dablooms/src/dablooms_bind_test.rs b/bindings/rs-dablooms/src/dablooms_bind_test.rs
new file mode 100644
index 0000000..4fc31eb
--- /dev/null
+++ b/bindings/rs-dablooms/src/dablooms_bind_test.rs
@@ -0,0 +1,458 @@
+use std::{
+ mem::{align_of, size_of, MaybeUninit},
+ ptr::addr_of,
+};
+
+use libc::{c_char, c_int, c_long, c_uint, time_t};
+
+use crate::dablooms_bind::{
+ bitmap_t, counting_bloom_header_t, counting_bloom_t, scaling_bloom_header_t, scaling_bloom_t,
+};
+
+#[test]
+fn bindgen_test_layout_bitmap_t() {
+ const UNINIT: MaybeUninit<bitmap_t> = MaybeUninit::uninit();
+ let ptr = UNINIT.as_ptr();
+ assert_eq!(
+ size_of::<bitmap_t>(),
+ 16usize,
+ concat!("Size of: ", stringify!(bitmap_t))
+ );
+ assert_eq!(
+ align_of::<bitmap_t>(),
+ 8usize,
+ concat!("Alignment of ", stringify!(bitmap_t))
+ );
+ assert_eq!(
+ unsafe { addr_of!((*ptr).bytes) as usize - ptr as usize },
+ 0usize,
+ concat!(
+ "Offset of field: ",
+ stringify!(bitmap_t),
+ "::",
+ stringify!(bytes)
+ )
+ );
+ assert_eq!(
+ unsafe { addr_of!((*ptr).array) as usize - ptr as usize },
+ 8usize,
+ concat!(
+ "Offset of field: ",
+ stringify!(bitmap_t),
+ "::",
+ stringify!(array)
+ )
+ );
+}
+
+#[test]
+fn bindgen_test_layout_counting_bloom_header_t() {
+ const UNINIT: MaybeUninit<counting_bloom_header_t> = MaybeUninit::uninit();
+ let ptr = UNINIT.as_ptr();
+ assert_eq!(
+ size_of::<counting_bloom_header_t>(),
+ 16usize,
+ concat!("Size of: ", stringify!(counting_bloom_header_t))
+ );
+ assert_eq!(
+ align_of::<counting_bloom_header_t>(),
+ 8usize,
+ concat!("Alignment of ", stringify!(counting_bloom_header_t))
+ );
+ assert_eq!(
+ unsafe { addr_of!((*ptr).id) as usize - ptr as usize },
+ 0usize,
+ concat!(
+ "Offset of field: ",
+ stringify!(counting_bloom_header_t),
+ "::",
+ stringify!(id)
+ )
+ );
+ assert_eq!(
+ unsafe { addr_of!((*ptr).count) as usize - ptr as usize },
+ 8usize,
+ concat!(
+ "Offset of field: ",
+ stringify!(counting_bloom_header_t),
+ "::",
+ stringify!(count)
+ )
+ );
+ assert_eq!(
+ unsafe { addr_of!((*ptr)._pad) as usize - ptr as usize },
+ 12usize,
+ concat!(
+ "Offset of field: ",
+ stringify!(counting_bloom_header_t),
+ "::",
+ stringify!(_pad)
+ )
+ );
+}
+
+#[test]
+fn bindgen_test_layout_counting_bloom_t() {
+ const UNINIT: MaybeUninit<counting_bloom_t> = MaybeUninit::uninit();
+ let ptr = UNINIT.as_ptr();
+ assert_eq!(
+ size_of::<counting_bloom_t>(),
+ 80usize,
+ concat!("Size of: ", stringify!(counting_bloom_t))
+ );
+ assert_eq!(
+ align_of::<counting_bloom_t>(),
+ 8usize,
+ concat!("Alignment of ", stringify!(counting_bloom_t))
+ );
+ assert_eq!(
+ unsafe { addr_of!((*ptr).header) as usize - ptr as usize },
+ 0usize,
+ concat!(
+ "Offset of field: ",
+ stringify!(counting_bloom_t),
+ "::",
+ stringify!(header)
+ )
+ );
+ assert_eq!(
+ unsafe { addr_of!((*ptr).capacity) as usize - ptr as usize },
+ 8usize,
+ concat!(
+ "Offset of field: ",
+ stringify!(counting_bloom_t),
+ "::",
+ stringify!(capacity)
+ )
+ );
+ assert_eq!(
+ unsafe { addr_of!((*ptr).offset) as usize - ptr as usize },
+ 16usize,
+ concat!(
+ "Offset of field: ",
+ stringify!(counting_bloom_t),
+ "::",
+ stringify!(offset)
+ )
+ );
+ assert_eq!(
+ unsafe { addr_of!((*ptr).counts_per_func) as usize - ptr as usize },
+ 24usize,
+ concat!(
+ "Offset of field: ",
+ stringify!(counting_bloom_t),
+ "::",
+ stringify!(counts_per_func)
+ )
+ );
+ assert_eq!(
+ unsafe { addr_of!((*ptr).hashes) as usize - ptr as usize },
+ 32usize,
+ concat!(
+ "Offset of field: ",
+ stringify!(counting_bloom_t),
+ "::",
+ stringify!(hashes)
+ )
+ );
+ assert_eq!(
+ unsafe { addr_of!((*ptr).nfuncs) as usize - ptr as usize },
+ 40usize,
+ concat!(
+ "Offset of field: ",
+ stringify!(counting_bloom_t),
+ "::",
+ stringify!(nfuncs)
+ )
+ );
+ assert_eq!(
+ unsafe { addr_of!((*ptr).size) as usize - ptr as usize },
+ 48usize,
+ concat!(
+ "Offset of field: ",
+ stringify!(counting_bloom_t),
+ "::",
+ stringify!(size)
+ )
+ );
+ assert_eq!(
+ unsafe { addr_of!((*ptr).num_bytes) as usize - ptr as usize },
+ 56usize,
+ concat!(
+ "Offset of field: ",
+ stringify!(counting_bloom_t),
+ "::",
+ stringify!(num_bytes)
+ )
+ );
+ assert_eq!(
+ unsafe { addr_of!((*ptr).error_rate) as usize - ptr as usize },
+ 64usize,
+ concat!(
+ "Offset of field: ",
+ stringify!(counting_bloom_t),
+ "::",
+ stringify!(error_rate)
+ )
+ );
+ assert_eq!(
+ unsafe { addr_of!((*ptr).bitmap) as usize - ptr as usize },
+ 72usize,
+ concat!(
+ "Offset of field: ",
+ stringify!(counting_bloom_t),
+ "::",
+ stringify!(bitmap)
+ )
+ );
+}
+
+#[test]
+fn bindgen_test_layout_scaling_bloom_header_t() {
+ const UNINIT: MaybeUninit<scaling_bloom_header_t> = MaybeUninit::uninit();
+ let ptr = UNINIT.as_ptr();
+ assert_eq!(
+ size_of::<scaling_bloom_header_t>(),
+ 24usize,
+ concat!("Size of: ", stringify!(scaling_bloom_header_t))
+ );
+ assert_eq!(
+ align_of::<scaling_bloom_header_t>(),
+ 8usize,
+ concat!("Alignment of ", stringify!(scaling_bloom_header_t))
+ );
+ assert_eq!(
+ unsafe { addr_of!((*ptr).max_id) as usize - ptr as usize },
+ 0usize,
+ concat!(
+ "Offset of field: ",
+ stringify!(scaling_bloom_header_t),
+ "::",
+ stringify!(max_id)
+ )
+ );
+ assert_eq!(
+ unsafe { addr_of!((*ptr).mem_seqnum) as usize - ptr as usize },
+ 8usize,
+ concat!(
+ "Offset of field: ",
+ stringify!(scaling_bloom_header_t),
+ "::",
+ stringify!(mem_seqnum)
+ )
+ );
+ assert_eq!(
+ unsafe { addr_of!((*ptr).disk_seqnum) as usize - ptr as usize },
+ 16usize,
+ concat!(
+ "Offset of field: ",
+ stringify!(scaling_bloom_header_t),
+ "::",
+ stringify!(disk_seqnum)
+ )
+ );
+}
+
+#[test]
+fn bindgen_test_layout_scaling_bloom_t() {
+ const UNINIT: MaybeUninit<scaling_bloom_t> = MaybeUninit::uninit();
+ let ptr = UNINIT.as_ptr();
+ assert_eq!(
+ size_of::<scaling_bloom_t>(),
+ 48usize,
+ concat!("Size of: ", stringify!(scaling_bloom_t))
+ );
+ assert_eq!(
+ align_of::<scaling_bloom_t>(),
+ 8usize,
+ concat!("Alignment of ", stringify!(scaling_bloom_t))
+ );
+ assert_eq!(
+ unsafe { addr_of!((*ptr).header) as usize - ptr as usize },
+ 0usize,
+ concat!(
+ "Offset of field: ",
+ stringify!(scaling_bloom_t),
+ "::",
+ stringify!(header)
+ )
+ );
+ assert_eq!(
+ unsafe { addr_of!((*ptr).capacity) as usize - ptr as usize },
+ 8usize,
+ concat!(
+ "Offset of field: ",
+ stringify!(scaling_bloom_t),
+ "::",
+ stringify!(capacity)
+ )
+ );
+ assert_eq!(
+ unsafe { addr_of!((*ptr).num_blooms) as usize - ptr as usize },
+ 12usize,
+ concat!(
+ "Offset of field: ",
+ stringify!(scaling_bloom_t),
+ "::",
+ stringify!(num_blooms)
+ )
+ );
+ assert_eq!(
+ unsafe { addr_of!((*ptr).num_bytes) as usize - ptr as usize },
+ 16usize,
+ concat!(
+ "Offset of field: ",
+ stringify!(scaling_bloom_t),
+ "::",
+ stringify!(num_bytes)
+ )
+ );
+ assert_eq!(
+ unsafe { addr_of!((*ptr).error_rate) as usize - ptr as usize },
+ 24usize,
+ concat!(
+ "Offset of field: ",
+ stringify!(scaling_bloom_t),
+ "::",
+ stringify!(error_rate)
+ )
+ );
+ assert_eq!(
+ unsafe { addr_of!((*ptr).blooms) as usize - ptr as usize },
+ 32usize,
+ concat!(
+ "Offset of field: ",
+ stringify!(scaling_bloom_t),
+ "::",
+ stringify!(blooms)
+ )
+ );
+ assert_eq!(
+ unsafe { addr_of!((*ptr).bitmap) as usize - ptr as usize },
+ 40usize,
+ concat!(
+ "Offset of field: ",
+ stringify!(scaling_bloom_t),
+ "::",
+ stringify!(bitmap)
+ )
+ );
+}
+
+// test_dabloom.c
+#[cfg(test)]
+mod tests {
+ use core::*;
+ use libc::{c_uint, sleep, time_t};
+ use std::ffi::CString;
+ use std::os::raw::{c_int, c_long};
+ use std::time::{SystemTime, UNIX_EPOCH};
+
+ use crate::dablooms_bind::{
+ expiry_dablooms_add, expiry_dablooms_destroy, expiry_dablooms_element_count_get,
+ expiry_dablooms_init, new_scaling_bloom, scaling_bloom_add, scaling_bloom_check,
+ };
+
+ #[test]
+ fn test_scaling_bloom() {
+ let capacity = 100;
+ let error_rate = 0.05;
+
+ unsafe {
+ let bloom = new_scaling_bloom(capacity as c_uint, error_rate);
+ let a = scaling_bloom_add(bloom, CString::new("aaa").unwrap().as_ptr(), 3, 1);
+
+ println!("bloom add: key = aaa\n");
+
+ let bloom1 = new_scaling_bloom(capacity as c_uint, error_rate);
+ scaling_bloom_add(bloom1, CString::new("bbb").unwrap().as_ptr(), 3, 1);
+ println!("bloom1 add: key = bbb\n");
+
+ let hit = scaling_bloom_check(bloom, CString::new("aaa").unwrap().as_ptr(), 3);
+ println!("bloom check: key = aaa, hit = {}\n", hit);
+
+ let hit = scaling_bloom_check(bloom, CString::new("bbb").unwrap().as_ptr(), 3);
+ println!("bloom check: key = bbb, hit = {}\n", hit);
+
+ let hit = scaling_bloom_check(bloom1, CString::new("aaa").unwrap().as_ptr(), 3);
+ println!("bloom1 check: key = aaa, hit = {}\n", hit);
+
+ let hit = scaling_bloom_check(bloom1, CString::new("bbb").unwrap().as_ptr(), 3);
+ println!("bloom1 check: key = bbb, hit = {}\n", hit);
+ }
+ }
+
+ fn get_current_time() -> Result<time_t, c_int> {
+ let current_time = SystemTime::now()
+ .duration_since(UNIX_EPOCH)
+ .map_err(|_| -1)?
+ .as_secs() as time_t;
+ Ok(current_time)
+ }
+
+ #[test]
+ fn test_expiry_dablooms() {
+ let capacity: usize = 100;
+ let error_rate: f64 = 0.05;
+ let expiry_secs: i32 = 1;
+
+ let handle = unsafe {
+ expiry_dablooms_init(
+ capacity as c_uint,
+ error_rate,
+ get_current_time().unwrap(),
+ expiry_secs as c_int,
+ )
+ };
+
+ let mut count: u64 = 0;
+ for i in 0..7 {
+ unsafe {
+ expiry_dablooms_add(
+ handle,
+ CString::new("aaa").unwrap().as_ptr(),
+ 3,
+ get_current_time().unwrap(),
+ );
+ expiry_dablooms_element_count_get(handle, &mut count);
+ }
+ println!("element_count = {}", count);
+ }
+ unsafe {
+ sleep(4);
+ }
+ println!("After 4s: \n");
+
+ for i in 0..8 {
+ unsafe {
+ expiry_dablooms_add(
+ handle,
+ CString::new("bbb").unwrap().as_ptr(),
+ 3,
+ get_current_time().unwrap(),
+ );
+ expiry_dablooms_element_count_get(handle, &mut count);
+ };
+ println!("element_count = {}", count);
+ }
+ unsafe {
+ sleep(7);
+ }
+ println!("After 7s: \n");
+
+ for i in 0..9 {
+ unsafe {
+ expiry_dablooms_add(
+ handle,
+ CString::new("ccc").unwrap().as_ptr(),
+ 3,
+ get_current_time().unwrap(),
+ );
+ expiry_dablooms_element_count_get(handle, &mut count);
+ };
+ println!("element_count = {}", count);
+ }
+
+ unsafe { expiry_dablooms_destroy(handle) };
+ }
+}
diff --git a/bindings/rs-dablooms/src/lib.rs b/bindings/rs-dablooms/src/lib.rs
new file mode 100644
index 0000000..c201a99
--- /dev/null
+++ b/bindings/rs-dablooms/src/lib.rs
@@ -0,0 +1,4 @@
+mod dablooms_bind;
+mod dablooms_bind_test;
+
+pub mod dablooms;