Skip to content

Commit 8bdc402

Browse files
authored
Merge pull request #265 from splitio/BloomFilter
BloomFilter implementation
2 parents bab7850 + fd24fd5 commit 8bdc402

File tree

4 files changed

+136
-0
lines changed

4 files changed

+136
-0
lines changed

setup.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
'pyyaml>=5.4',
2020
'docopt>=0.6.2',
2121
'enum34;python_version<"3.4"',
22+
'bloom-filter2>=2.0.0',
2223
]
2324

2425
with open(path.join(path.abspath(path.dirname(__file__)), 'splitio', 'version.py')) as f:

splitio/engine/filters/__init__.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
import abc
2+
3+
class BaseFilter(object, metaclass=abc.ABCMeta):
4+
"""Impressions Filter interface."""
5+
6+
@abc.abstractmethod
7+
def add(self, data):
8+
"""
9+
Return a boolean flag
10+
11+
"""
12+
pass
13+
14+
@abc.abstractmethod
15+
def contains(self, data):
16+
"""
17+
Return a boolean flag
18+
19+
"""
20+
pass
21+
22+
@abc.abstractmethod
23+
def clear(self):
24+
"""
25+
No return
26+
27+
"""
28+
pass
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
from splitio.engine.filters import BaseFilter
2+
from bloom_filter2 import BloomFilter as BloomFilter2
3+
4+
class BloomFilter(BaseFilter):
5+
"""Optimized mode strategy."""
6+
7+
def __init__(self, max_elements=5000, error_rate=0.01):
8+
"""
9+
Construct a bloom filter instance.
10+
11+
:param max_element: maximum elements in the filter
12+
:type string:
13+
14+
:param error_rate: error rate for the false positives, reduce it will consume more memory
15+
:type numeric:
16+
"""
17+
self._max_elements = max_elements
18+
self._error_rate = error_rate
19+
self._imps_bloom_filter = BloomFilter2(max_elements=self._max_elements, error_rate=self._error_rate)
20+
21+
def add(self, data):
22+
"""
23+
Add an item to the bloom filter instance.
24+
25+
:param data: element to be added
26+
:type string:
27+
28+
:return: True if successful
29+
:rtype: boolean
30+
"""
31+
self._imps_bloom_filter.add(data)
32+
return data in self._imps_bloom_filter
33+
34+
def contains(self, data):
35+
"""
36+
Check if an item exist in the bloom filter instance.
37+
38+
:param data: element to be checked
39+
:type string:
40+
41+
:return: True if exist
42+
:rtype: boolean
43+
"""
44+
return data in self._imps_bloom_filter
45+
46+
def clear(self):
47+
"""
48+
Destroy the current filter instance and create new one.
49+
50+
"""
51+
self._imps_bloom_filter.close()
52+
self._imps_bloom_filter = BloomFilter2(max_elements=self._max_elements, error_rate=self._error_rate)

tests/engine/test_bloom_filter.py

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
"""BloomFilter unit tests."""
2+
3+
from random import random
4+
import uuid
5+
from splitio.engine.filters.bloom_filter import BloomFilter
6+
7+
class BloomFilterTests(object):
8+
"""StandardRecorderTests test cases."""
9+
10+
def test_bloom_filter_methods(self, mocker):
11+
bloom_filter = BloomFilter()
12+
key1 = str(uuid.uuid4())
13+
key2 = str(uuid.uuid4())
14+
bloom_filter.add(key1)
15+
16+
assert(bloom_filter.contains(key1))
17+
assert(not bloom_filter.contains(key2))
18+
19+
bloom_filter.clear()
20+
assert(not bloom_filter.contains(key1))
21+
22+
bloom_filter.add(key1)
23+
bloom_filter.add(key2)
24+
assert(bloom_filter.contains(key1))
25+
assert(bloom_filter.contains(key2))
26+
27+
def test_bloom_filter_error_percentage(self, mocker):
28+
arr_storage = []
29+
total_sample = 20000
30+
error_rate = 0.01
31+
bloom_filter = BloomFilter(total_sample, error_rate)
32+
33+
for x in range(1, total_sample):
34+
myuuid = str(uuid.uuid4())
35+
bloom_filter.add(myuuid)
36+
arr_storage.append(myuuid)
37+
38+
false_positive_count = 0
39+
for x in range(1, total_sample):
40+
y = int(random()*total_sample*5)
41+
if y > total_sample - 2:
42+
myuuid = str(uuid.uuid4())
43+
if myuuid in arr_storage:
44+
# False Negative
45+
assert(bloom_filter.contains(myuuid))
46+
else:
47+
if bloom_filter.contains(myuuid):
48+
# False Positive
49+
false_positive_count = false_positive_count + 1
50+
else:
51+
myuuid = arr_storage[y]
52+
assert(bloom_filter.contains(myuuid))
53+
# False Negative
54+
55+
assert(false_positive_count/total_sample <= error_rate)

0 commit comments

Comments
 (0)