Skip to content

Commit 0a77f24

Browse files
authored
Implemented V2 (#1)
New filter implemented: - new hashing function (WyHash) - new implementation (for speed) with fixed sized small filters - filters can be defined with some optimization levels (space, speed, best) - poppy bench in CLI - python bindings
1 parent 4992825 commit 0a77f24

File tree

29 files changed

+493126
-852
lines changed

29 files changed

+493126
-852
lines changed

.github/workflows/rust.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,4 +19,4 @@ jobs:
1919
- name: Build
2020
run: cargo build --verbose
2121
- name: Run tests
22-
run: cargo test --verbose
22+
run: cargo test --release --verbose

Cargo.toml

Lines changed: 3 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,3 @@
1-
[package]
2-
name = "poppy"
3-
version = "0.1.0"
4-
edition = "2021"
5-
authors = ["[email protected]"]
6-
7-
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
8-
9-
[dependencies]
10-
anyhow = "1.0.79"
11-
clap = { version = "4.5.0", features = ["derive"] }
12-
jemallocator = "0.5.4"
13-
thiserror = "1.0.57"
14-
15-
[dev-dependencies]
16-
rand = "0.8.5"
17-
18-
[[bin]]
19-
name = "poppy"
20-
path = "src/bin/poppy.rs"
1+
[workspace]
2+
resolver = "2"
3+
members = ["poppy", "poppy-py"]

poppy-py/.gitignore

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
/target
2+
3+
# Byte-compiled / optimized / DLL files
4+
__pycache__/
5+
.pytest_cache/
6+
*.py[cod]
7+
8+
# C extensions
9+
*.so
10+
11+
# Distribution / packaging
12+
.Python
13+
.venv/
14+
env/
15+
bin/
16+
build/
17+
develop-eggs/
18+
dist/
19+
eggs/
20+
lib/
21+
lib64/
22+
parts/
23+
sdist/
24+
var/
25+
include/
26+
man/
27+
venv/
28+
*.egg-info/
29+
.installed.cfg
30+
*.egg
31+
32+
# Installer logs
33+
pip-log.txt
34+
pip-delete-this-directory.txt
35+
pip-selfcheck.json
36+
37+
# Unit test / coverage reports
38+
htmlcov/
39+
.tox/
40+
.coverage
41+
.cache
42+
nosetests.xml
43+
coverage.xml
44+
45+
# Translations
46+
*.mo
47+
48+
# Mr Developer
49+
.mr.developer.cfg
50+
.project
51+
.pydevproject
52+
53+
# Rope
54+
.ropeproject
55+
56+
# Django stuff:
57+
*.log
58+
*.pot
59+
60+
.DS_Store
61+
62+
# Sphinx documentation
63+
docs/_build/
64+
65+
# PyCharm
66+
.idea/
67+
68+
# VSCode
69+
.vscode/
70+
71+
# Pyenv
72+
.python-version

poppy-py/Cargo.toml

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
[package]
2+
name = "poppy-py"
3+
version = "0.1.0"
4+
edition = "2021"
5+
6+
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
7+
[lib]
8+
name = "poppy"
9+
crate-type = ["cdylib"]
10+
11+
[dependencies]
12+
poppy = { path = "../poppy" }
13+
pyo3 = "0.20.0"

poppy-py/pyproject.toml

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
[build-system]
2+
requires = ["maturin>=1.5,<2.0"]
3+
build-backend = "maturin"
4+
5+
[project]
6+
name = "poppy-py"
7+
requires-python = ">=3.8"
8+
classifiers = [
9+
"Programming Language :: Rust",
10+
"Programming Language :: Python :: Implementation :: CPython",
11+
"Programming Language :: Python :: Implementation :: PyPy",
12+
]
13+
dynamic = ["version"]
14+
[tool.maturin]
15+
features = ["pyo3/extension-module"]

poppy-py/src/lib.rs

Lines changed: 143 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,143 @@
1+
use std::{
2+
fs,
3+
io::{self, Read},
4+
path::PathBuf,
5+
};
6+
7+
use pyo3::{exceptions::PyValueError, prelude::*, types::PyBytes};
8+
9+
#[pyclass]
10+
pub struct BloomFilter(poppy::BloomFilter);
11+
12+
struct Error(poppy::Error);
13+
14+
impl From<poppy::Error> for Error {
15+
fn from(value: poppy::Error) -> Self {
16+
Self(value)
17+
}
18+
}
19+
20+
impl From<Error> for PyErr {
21+
fn from(value: Error) -> Self {
22+
PyValueError::new_err(value.0.to_string())
23+
}
24+
}
25+
26+
#[pyfunction]
27+
/// Loads a filter from bytes
28+
pub fn loads(bytes: Vec<u8>) -> PyResult<BloomFilter> {
29+
let br = io::Cursor::new(bytes);
30+
Ok(BloomFilter(
31+
poppy::BloomFilter::from_reader(br).map_err(Error::from)?,
32+
))
33+
}
34+
35+
#[pyfunction]
36+
/// Loads a filter from a given path
37+
pub fn load(path: PathBuf) -> PyResult<BloomFilter> {
38+
Ok(BloomFilter(
39+
poppy::BloomFilter::from_reader(fs::File::open(path)?).map_err(Error::from)?,
40+
))
41+
}
42+
43+
#[pymethods]
44+
impl BloomFilter {
45+
#[new]
46+
/// Creates a new filter with the given capacity and false positive probability
47+
fn new(capacity: usize, fpp: f64) -> Self {
48+
Self(poppy::BloomFilter::with_capacity(capacity, fpp))
49+
}
50+
51+
#[staticmethod]
52+
/// Creates a new filter with a given version. Pass version=1 if you
53+
/// want the filter being compatible with DCSO bloom filter tools.
54+
fn with_version(version: u8, capacity: usize, fpp: f64) -> PyResult<Self> {
55+
Ok(Self(
56+
poppy::BloomFilter::with_version_capacity(version, capacity, fpp)
57+
.map_err(Error::from)?,
58+
))
59+
}
60+
61+
/// Insert a bytes into the filter
62+
pub fn insert_bytes(&mut self, data: &[u8]) -> PyResult<bool> {
63+
Ok(self.0.insert_bytes(data).map_err(Error::from)?)
64+
}
65+
66+
/// Insert a str into the filter
67+
pub fn insert_str(&mut self, s: &str) -> PyResult<bool> {
68+
Ok(self.0.insert_bytes(s).map_err(Error::from)?)
69+
}
70+
71+
/// Check if argument is contained in the filter
72+
pub fn contains_bytes(&mut self, data: &[u8]) -> bool {
73+
self.0.contains_bytes(data)
74+
}
75+
76+
/// Check if argument is contained in the filter
77+
pub fn contains_str(&mut self, s: &str) -> bool {
78+
self.0.contains_bytes(s)
79+
}
80+
81+
/// Merge two filters, doing the union of them this methods does an
82+
/// in-place merging into the current filter
83+
pub fn union_merge(&mut self, o: &Self) -> PyResult<()> {
84+
Ok(self.0.union_merge(&o.0).map_err(Error::from)?)
85+
}
86+
87+
/// Estimate the number of common entries between two filters
88+
pub fn count_common_entries(&self, o: &Self) -> PyResult<usize> {
89+
Ok(self.0.count_common_entries(&o.0).map_err(Error::from)?)
90+
}
91+
92+
/// Dumps bloom filter into a binary form
93+
pub fn dumps<'py>(&self, py: Python<'py>) -> PyResult<&'py PyBytes> {
94+
let mut cursor = io::Cursor::new(vec![]);
95+
self.0.write(&mut cursor).map_err(Error::from)?;
96+
cursor.set_position(0);
97+
let b = PyBytes::new(py, cursor.bytes().flatten().collect::<Vec<u8>>().as_slice());
98+
Ok(b)
99+
}
100+
101+
/// Save filter into a file
102+
pub fn save(&self, path: PathBuf) -> PyResult<()> {
103+
let mut f = fs::File::create(path)?;
104+
Ok(self.0.write(&mut f).map_err(Error::from)?)
105+
}
106+
107+
// gather all the getters here
108+
109+
#[getter]
110+
pub fn version(&self) -> u8 {
111+
self.0.version()
112+
}
113+
114+
#[getter]
115+
pub fn capacity(&self) -> usize {
116+
self.0.capacity()
117+
}
118+
119+
#[getter]
120+
pub fn fpp(&self) -> f64 {
121+
self.0.fpp()
122+
}
123+
124+
#[getter]
125+
pub fn count_estimate(&self) -> usize {
126+
self.0.count_estimate() as usize
127+
}
128+
129+
#[getter]
130+
pub fn data(&self) -> Vec<u8> {
131+
self.0.data().to_vec()
132+
}
133+
}
134+
135+
/// A Python module implemented in Rust.
136+
#[pymodule]
137+
#[pyo3(name = "poppy")]
138+
fn poppy_py(_py: Python, m: &PyModule) -> PyResult<()> {
139+
m.add_class::<BloomFilter>()?;
140+
m.add_function(wrap_pyfunction!(load, m)?)?;
141+
m.add_function(wrap_pyfunction!(loads, m)?)?;
142+
Ok(())
143+
}

0 commit comments

Comments
 (0)