-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
New filter implemented: - new hashing function (WyHash) - new implementation (for speed) with fixed sized small filters - filters can be defined with some optimization levels (space, speed, best) - poppy bench in CLI - python bindings
- Loading branch information
Showing
29 changed files
with
493,126 additions
and
852 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,20 +1,3 @@ | ||
[package] | ||
name = "poppy" | ||
version = "0.1.0" | ||
edition = "2021" | ||
authors = ["[email protected]"] | ||
|
||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html | ||
|
||
[dependencies] | ||
anyhow = "1.0.79" | ||
clap = { version = "4.5.0", features = ["derive"] } | ||
jemallocator = "0.5.4" | ||
thiserror = "1.0.57" | ||
|
||
[dev-dependencies] | ||
rand = "0.8.5" | ||
|
||
[[bin]] | ||
name = "poppy" | ||
path = "src/bin/poppy.rs" | ||
[workspace] | ||
resolver = "2" | ||
members = ["poppy", "poppy-py"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,72 @@ | ||
/target | ||
|
||
# Byte-compiled / optimized / DLL files | ||
__pycache__/ | ||
.pytest_cache/ | ||
*.py[cod] | ||
|
||
# C extensions | ||
*.so | ||
|
||
# Distribution / packaging | ||
.Python | ||
.venv/ | ||
env/ | ||
bin/ | ||
build/ | ||
develop-eggs/ | ||
dist/ | ||
eggs/ | ||
lib/ | ||
lib64/ | ||
parts/ | ||
sdist/ | ||
var/ | ||
include/ | ||
man/ | ||
venv/ | ||
*.egg-info/ | ||
.installed.cfg | ||
*.egg | ||
|
||
# Installer logs | ||
pip-log.txt | ||
pip-delete-this-directory.txt | ||
pip-selfcheck.json | ||
|
||
# Unit test / coverage reports | ||
htmlcov/ | ||
.tox/ | ||
.coverage | ||
.cache | ||
nosetests.xml | ||
coverage.xml | ||
|
||
# Translations | ||
*.mo | ||
|
||
# Mr Developer | ||
.mr.developer.cfg | ||
.project | ||
.pydevproject | ||
|
||
# Rope | ||
.ropeproject | ||
|
||
# Django stuff: | ||
*.log | ||
*.pot | ||
|
||
.DS_Store | ||
|
||
# Sphinx documentation | ||
docs/_build/ | ||
|
||
# PyCharm | ||
.idea/ | ||
|
||
# VSCode | ||
.vscode/ | ||
|
||
# Pyenv | ||
.python-version |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
[package] | ||
name = "poppy-py" | ||
version = "0.1.0" | ||
edition = "2021" | ||
|
||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html | ||
[lib] | ||
name = "poppy" | ||
crate-type = ["cdylib"] | ||
|
||
[dependencies] | ||
poppy = { path = "../poppy" } | ||
pyo3 = "0.20.0" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
[build-system] | ||
requires = ["maturin>=1.5,<2.0"] | ||
build-backend = "maturin" | ||
|
||
[project] | ||
name = "poppy-py" | ||
requires-python = ">=3.8" | ||
classifiers = [ | ||
"Programming Language :: Rust", | ||
"Programming Language :: Python :: Implementation :: CPython", | ||
"Programming Language :: Python :: Implementation :: PyPy", | ||
] | ||
dynamic = ["version"] | ||
[tool.maturin] | ||
features = ["pyo3/extension-module"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,143 @@ | ||
use std::{ | ||
fs, | ||
io::{self, Read}, | ||
path::PathBuf, | ||
}; | ||
|
||
use pyo3::{exceptions::PyValueError, prelude::*, types::PyBytes}; | ||
|
||
#[pyclass] | ||
pub struct BloomFilter(poppy::BloomFilter); | ||
|
||
struct Error(poppy::Error); | ||
|
||
impl From<poppy::Error> for Error { | ||
fn from(value: poppy::Error) -> Self { | ||
Self(value) | ||
} | ||
} | ||
|
||
impl From<Error> for PyErr { | ||
fn from(value: Error) -> Self { | ||
PyValueError::new_err(value.0.to_string()) | ||
} | ||
} | ||
|
||
#[pyfunction] | ||
/// Loads a filter from bytes | ||
pub fn loads(bytes: Vec<u8>) -> PyResult<BloomFilter> { | ||
let br = io::Cursor::new(bytes); | ||
Ok(BloomFilter( | ||
poppy::BloomFilter::from_reader(br).map_err(Error::from)?, | ||
)) | ||
} | ||
|
||
#[pyfunction] | ||
/// Loads a filter from a given path | ||
pub fn load(path: PathBuf) -> PyResult<BloomFilter> { | ||
Ok(BloomFilter( | ||
poppy::BloomFilter::from_reader(fs::File::open(path)?).map_err(Error::from)?, | ||
)) | ||
} | ||
|
||
#[pymethods] | ||
impl BloomFilter { | ||
#[new] | ||
/// Creates a new filter with the given capacity and false positive probability | ||
fn new(capacity: usize, fpp: f64) -> Self { | ||
Self(poppy::BloomFilter::with_capacity(capacity, fpp)) | ||
} | ||
|
||
#[staticmethod] | ||
/// Creates a new filter with a given version. Pass version=1 if you | ||
/// want the filter being compatible with DCSO bloom filter tools. | ||
fn with_version(version: u8, capacity: usize, fpp: f64) -> PyResult<Self> { | ||
Ok(Self( | ||
poppy::BloomFilter::with_version_capacity(version, capacity, fpp) | ||
.map_err(Error::from)?, | ||
)) | ||
} | ||
|
||
/// Insert a bytes into the filter | ||
pub fn insert_bytes(&mut self, data: &[u8]) -> PyResult<bool> { | ||
Ok(self.0.insert_bytes(data).map_err(Error::from)?) | ||
} | ||
|
||
/// Insert a str into the filter | ||
pub fn insert_str(&mut self, s: &str) -> PyResult<bool> { | ||
Ok(self.0.insert_bytes(s).map_err(Error::from)?) | ||
} | ||
|
||
/// Check if argument is contained in the filter | ||
pub fn contains_bytes(&mut self, data: &[u8]) -> bool { | ||
self.0.contains_bytes(data) | ||
} | ||
|
||
/// Check if argument is contained in the filter | ||
pub fn contains_str(&mut self, s: &str) -> bool { | ||
self.0.contains_bytes(s) | ||
} | ||
|
||
/// Merge two filters, doing the union of them this methods does an | ||
/// in-place merging into the current filter | ||
pub fn union_merge(&mut self, o: &Self) -> PyResult<()> { | ||
Ok(self.0.union_merge(&o.0).map_err(Error::from)?) | ||
} | ||
|
||
/// Estimate the number of common entries between two filters | ||
pub fn count_common_entries(&self, o: &Self) -> PyResult<usize> { | ||
Ok(self.0.count_common_entries(&o.0).map_err(Error::from)?) | ||
} | ||
|
||
/// Dumps bloom filter into a binary form | ||
pub fn dumps<'py>(&self, py: Python<'py>) -> PyResult<&'py PyBytes> { | ||
let mut cursor = io::Cursor::new(vec![]); | ||
self.0.write(&mut cursor).map_err(Error::from)?; | ||
cursor.set_position(0); | ||
let b = PyBytes::new(py, cursor.bytes().flatten().collect::<Vec<u8>>().as_slice()); | ||
Ok(b) | ||
} | ||
|
||
/// Save filter into a file | ||
pub fn save(&self, path: PathBuf) -> PyResult<()> { | ||
let mut f = fs::File::create(path)?; | ||
Ok(self.0.write(&mut f).map_err(Error::from)?) | ||
} | ||
|
||
// gather all the getters here | ||
|
||
#[getter] | ||
pub fn version(&self) -> u8 { | ||
self.0.version() | ||
} | ||
|
||
#[getter] | ||
pub fn capacity(&self) -> usize { | ||
self.0.capacity() | ||
} | ||
|
||
#[getter] | ||
pub fn fpp(&self) -> f64 { | ||
self.0.fpp() | ||
} | ||
|
||
#[getter] | ||
pub fn count_estimate(&self) -> usize { | ||
self.0.count_estimate() as usize | ||
} | ||
|
||
#[getter] | ||
pub fn data(&self) -> Vec<u8> { | ||
self.0.data().to_vec() | ||
} | ||
} | ||
|
||
/// A Python module implemented in Rust. | ||
#[pymodule] | ||
#[pyo3(name = "poppy")] | ||
fn poppy_py(_py: Python, m: &PyModule) -> PyResult<()> { | ||
m.add_class::<BloomFilter>()?; | ||
m.add_function(wrap_pyfunction!(load, m)?)?; | ||
m.add_function(wrap_pyfunction!(loads, m)?)?; | ||
Ok(()) | ||
} |
Oops, something went wrong.