Skip to content

Commit

Permalink
Implemented V2 (#1)
Browse files Browse the repository at this point in the history
New filter implemented:
- new hashing function (WyHash)
- new implementation (for speed) with fixed sized small filters
- filters can be defined with some optimization levels (space, speed, best)
- poppy bench in CLI
- python bindings
  • Loading branch information
qjerome authored Mar 18, 2024
1 parent 4992825 commit 0a77f24
Show file tree
Hide file tree
Showing 29 changed files with 493,126 additions and 852 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/rust.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,4 +19,4 @@ jobs:
- name: Build
run: cargo build --verbose
- name: Run tests
run: cargo test --verbose
run: cargo test --release --verbose
23 changes: 3 additions & 20 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,20 +1,3 @@
[package]
name = "poppy"
version = "0.1.0"
edition = "2021"
authors = ["[email protected]"]

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
anyhow = "1.0.79"
clap = { version = "4.5.0", features = ["derive"] }
jemallocator = "0.5.4"
thiserror = "1.0.57"

[dev-dependencies]
rand = "0.8.5"

[[bin]]
name = "poppy"
path = "src/bin/poppy.rs"
[workspace]
resolver = "2"
members = ["poppy", "poppy-py"]
72 changes: 72 additions & 0 deletions poppy-py/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
/target

# Byte-compiled / optimized / DLL files
__pycache__/
.pytest_cache/
*.py[cod]

# C extensions
*.so

# Distribution / packaging
.Python
.venv/
env/
bin/
build/
develop-eggs/
dist/
eggs/
lib/
lib64/
parts/
sdist/
var/
include/
man/
venv/
*.egg-info/
.installed.cfg
*.egg

# Installer logs
pip-log.txt
pip-delete-this-directory.txt
pip-selfcheck.json

# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.cache
nosetests.xml
coverage.xml

# Translations
*.mo

# Mr Developer
.mr.developer.cfg
.project
.pydevproject

# Rope
.ropeproject

# Django stuff:
*.log
*.pot

.DS_Store

# Sphinx documentation
docs/_build/

# PyCharm
.idea/

# VSCode
.vscode/

# Pyenv
.python-version
13 changes: 13 additions & 0 deletions poppy-py/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
[package]
name = "poppy-py"
version = "0.1.0"
edition = "2021"

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[lib]
name = "poppy"
crate-type = ["cdylib"]

[dependencies]
poppy = { path = "../poppy" }
pyo3 = "0.20.0"
15 changes: 15 additions & 0 deletions poppy-py/pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
[build-system]
requires = ["maturin>=1.5,<2.0"]
build-backend = "maturin"

[project]
name = "poppy-py"
requires-python = ">=3.8"
classifiers = [
"Programming Language :: Rust",
"Programming Language :: Python :: Implementation :: CPython",
"Programming Language :: Python :: Implementation :: PyPy",
]
dynamic = ["version"]
[tool.maturin]
features = ["pyo3/extension-module"]
143 changes: 143 additions & 0 deletions poppy-py/src/lib.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
use std::{
fs,
io::{self, Read},
path::PathBuf,
};

use pyo3::{exceptions::PyValueError, prelude::*, types::PyBytes};

#[pyclass]
pub struct BloomFilter(poppy::BloomFilter);

struct Error(poppy::Error);

impl From<poppy::Error> for Error {
fn from(value: poppy::Error) -> Self {
Self(value)
}
}

impl From<Error> for PyErr {
fn from(value: Error) -> Self {
PyValueError::new_err(value.0.to_string())
}
}

#[pyfunction]
/// Loads a filter from bytes
pub fn loads(bytes: Vec<u8>) -> PyResult<BloomFilter> {
let br = io::Cursor::new(bytes);
Ok(BloomFilter(
poppy::BloomFilter::from_reader(br).map_err(Error::from)?,
))
}

#[pyfunction]
/// Loads a filter from a given path
pub fn load(path: PathBuf) -> PyResult<BloomFilter> {
Ok(BloomFilter(
poppy::BloomFilter::from_reader(fs::File::open(path)?).map_err(Error::from)?,
))
}

#[pymethods]
impl BloomFilter {
#[new]
/// Creates a new filter with the given capacity and false positive probability
fn new(capacity: usize, fpp: f64) -> Self {
Self(poppy::BloomFilter::with_capacity(capacity, fpp))
}

#[staticmethod]
/// Creates a new filter with a given version. Pass version=1 if you
/// want the filter being compatible with DCSO bloom filter tools.
fn with_version(version: u8, capacity: usize, fpp: f64) -> PyResult<Self> {
Ok(Self(
poppy::BloomFilter::with_version_capacity(version, capacity, fpp)
.map_err(Error::from)?,
))
}

/// Insert a bytes into the filter
pub fn insert_bytes(&mut self, data: &[u8]) -> PyResult<bool> {
Ok(self.0.insert_bytes(data).map_err(Error::from)?)
}

/// Insert a str into the filter
pub fn insert_str(&mut self, s: &str) -> PyResult<bool> {
Ok(self.0.insert_bytes(s).map_err(Error::from)?)
}

/// Check if argument is contained in the filter
pub fn contains_bytes(&mut self, data: &[u8]) -> bool {
self.0.contains_bytes(data)
}

/// Check if argument is contained in the filter
pub fn contains_str(&mut self, s: &str) -> bool {
self.0.contains_bytes(s)
}

/// Merge two filters, doing the union of them this methods does an
/// in-place merging into the current filter
pub fn union_merge(&mut self, o: &Self) -> PyResult<()> {
Ok(self.0.union_merge(&o.0).map_err(Error::from)?)
}

/// Estimate the number of common entries between two filters
pub fn count_common_entries(&self, o: &Self) -> PyResult<usize> {
Ok(self.0.count_common_entries(&o.0).map_err(Error::from)?)
}

/// Dumps bloom filter into a binary form
pub fn dumps<'py>(&self, py: Python<'py>) -> PyResult<&'py PyBytes> {
let mut cursor = io::Cursor::new(vec![]);
self.0.write(&mut cursor).map_err(Error::from)?;
cursor.set_position(0);
let b = PyBytes::new(py, cursor.bytes().flatten().collect::<Vec<u8>>().as_slice());
Ok(b)
}

/// Save filter into a file
pub fn save(&self, path: PathBuf) -> PyResult<()> {
let mut f = fs::File::create(path)?;
Ok(self.0.write(&mut f).map_err(Error::from)?)
}

// gather all the getters here

#[getter]
pub fn version(&self) -> u8 {
self.0.version()
}

#[getter]
pub fn capacity(&self) -> usize {
self.0.capacity()
}

#[getter]
pub fn fpp(&self) -> f64 {
self.0.fpp()
}

#[getter]
pub fn count_estimate(&self) -> usize {
self.0.count_estimate() as usize
}

#[getter]
pub fn data(&self) -> Vec<u8> {
self.0.data().to_vec()
}
}

/// A Python module implemented in Rust.
#[pymodule]
#[pyo3(name = "poppy")]
fn poppy_py(_py: Python, m: &PyModule) -> PyResult<()> {
m.add_class::<BloomFilter>()?;
m.add_function(wrap_pyfunction!(load, m)?)?;
m.add_function(wrap_pyfunction!(loads, m)?)?;
Ok(())
}
Loading

0 comments on commit 0a77f24

Please sign in to comment.