@@ -25,6 +25,9 @@ class ExpandingBloomFilter(object):
25
25
At this point, the expanding Bloom Filter does not support \
26
26
`export` or `import` '''
27
27
28
+ __slots__ = ['_blooms' , '__fpr' , '__est_elements' , '__hash_func' ,
29
+ '__added_elements' ]
30
+
28
31
def __init__ (self , est_elements = None , false_positive_rate = None ,
29
32
hash_function = None ):
30
33
''' initialize '''
@@ -62,16 +65,6 @@ def elements_added(self):
62
65
''' int: The total number of elements added '''
63
66
return self .__added_elements
64
67
65
- def __add_bloom_filter (self ):
66
- ''' build a new bloom and add it on! '''
67
- blm = BloomFilter (self .__est_elements , self .__fpr , self .__hash_func )
68
- self ._blooms .append (blm )
69
-
70
- def __check_for_growth (self ):
71
- ''' detereming if the bloom filter should automatically grow '''
72
- if self ._blooms [- 1 ].elements_added >= self .__est_elements :
73
- self .__add_bloom_filter ()
74
-
75
68
def check (self , key ):
76
69
''' Check to see if the key is in the Bloom Filter
77
70
@@ -103,8 +96,8 @@ def add(self, key, force=False):
103
96
Args:
104
97
key (str): The element to be inserted
105
98
force (bool): `True` will force it to be inserted, even if it \
106
- likely has been inserted before \
107
- `False` will only insert if not found in the Bloom Filter '''
99
+ likely has been inserted before `False` will \
100
+ only insert if not found in the Bloom Filter '''
108
101
hashes = self ._blooms [0 ].hashes (key )
109
102
self .add_alt (hashes , force )
110
103
@@ -115,8 +108,101 @@ def add_alt(self, hashes, force=False):
115
108
hashes (list): A list of integers representing the key to insert
116
109
force (bool): `True` will force it to be inserted, even if \
117
110
it likely has been inserted before \
118
- `False` will only insert if not found in the Bloom Filter '''
111
+ `False` will only insert if not found in the \
112
+ Bloom Filter '''
119
113
self .__added_elements += 1
120
114
if force or not self .check_alt (hashes ):
121
115
self .__check_for_growth ()
122
116
self ._blooms [- 1 ].add_alt (hashes )
117
+
118
+ def __add_bloom_filter (self ):
119
+ ''' build a new bloom and add it on! '''
120
+ blm = BloomFilter (est_elements = self .__est_elements ,
121
+ false_positive_rate = self .__fpr ,
122
+ hash_function = self .__hash_func )
123
+ self ._blooms .append (blm )
124
+
125
+ def __check_for_growth (self ):
126
+ ''' detereming if the bloom filter should automatically grow '''
127
+ if self ._blooms [- 1 ].elements_added >= self .__est_elements :
128
+ self .__add_bloom_filter ()
129
+
130
+
131
+ class RotatingBloomFilter (ExpandingBloomFilter ):
132
+ ''' Simple Rotating Bloom Filter implementation that allows for the "older"
133
+ elements added to be removed, in chunks. As the queue fills up, those
134
+ elements inserted earlier will be bulk removed. This also provides the
135
+ user with the oportunity to force the removal instead of it being time
136
+ based.
137
+
138
+ Args:
139
+ est_elements (int): The number of estimated elements to be added
140
+ false_positive_rate (float): The desired false positive rate
141
+ max_queue_size (int): This is the number is used to determine the \
142
+ maximum number of Bloom Filters. Total elements added is based on \
143
+ `max_queue_size * est_elements`
144
+ hash_function (function): Hashing strategy function to use \
145
+ `hf(key, number)`
146
+ '''
147
+ __slots__ = ['_blooms' , '__fpr' , '__est_elements' , '__hash_func' ,
148
+ '__added_elements' , '_queue_size' ]
149
+
150
+ def __init__ (self , est_elements = None , false_positive_rate = None ,
151
+ max_queue_size = 10 , hash_function = None ):
152
+ ''' initialize '''
153
+ super (RotatingBloomFilter ,
154
+ self ).__init__ (est_elements = est_elements ,
155
+ false_positive_rate = false_positive_rate ,
156
+ hash_function = hash_function )
157
+ self .__fpr = false_positive_rate
158
+ self .__est_elements = est_elements
159
+ self .__hash_func = hash_function
160
+ self ._queue_size = max_queue_size
161
+ self .__added_elements = 0
162
+
163
+ @property
164
+ def max_queue_size (self ):
165
+ ''' int: The maximum size for the queue '''
166
+ return self ._queue_size
167
+
168
+ @property
169
+ def current_queue_size (self ):
170
+ ''' int: The current size of the queue '''
171
+ return len (self ._blooms )
172
+
173
+ def add_alt (self , hashes , force = False ):
174
+ ''' Add the element represented by hashes into the Bloom Filter
175
+
176
+ Args:
177
+ hashes (list): A list of integers representing the key to insert
178
+ force (bool): `True` will force it to be inserted, even if \
179
+ it likely has been inserted before \
180
+ `False` will only insert if not found in the \
181
+ Bloom Filter '''
182
+ self .__added_elements += 1
183
+ if force or not self .check_alt (hashes ):
184
+ self .__rotate_bloom_filter ()
185
+ self ._blooms [- 1 ].add_alt (hashes )
186
+
187
+ def pop (self ):
188
+ ''' Pop an element off of the queue '''
189
+ self .__rotate_bloom_filter (force = True )
190
+
191
+ def __rotate_bloom_filter (self , force = False ):
192
+ ''' handle determining if/when the Bloom Filter queue needs to be
193
+ rotated '''
194
+ blm = self ._blooms [- 1 ]
195
+ ready_to_rotate = blm .elements_added == blm .estimated_elements
196
+ neeeds_to_pop = self .current_queue_size < self ._queue_size
197
+ if force or (ready_to_rotate and neeeds_to_pop ):
198
+ self .__add_bloom_filter ()
199
+ elif force or ready_to_rotate :
200
+ blm = self ._blooms .pop (0 )
201
+ self .__add_bloom_filter ()
202
+
203
+ def __add_bloom_filter (self ):
204
+ ''' build a new bloom and add it on! '''
205
+ blm = BloomFilter (est_elements = self .__est_elements ,
206
+ false_positive_rate = self .__fpr ,
207
+ hash_function = self .__hash_func )
208
+ self ._blooms .append (blm )
0 commit comments