Skip to content

Commit 8fd337f

Browse files
committed
BloomFilter implementation
1 parent 802a2f2 commit 8fd337f

File tree

5 files changed

+139
-1
lines changed

5 files changed

+139
-1
lines changed

setup.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
'flake8',
99
'pytest==7.0.1',
1010
'pytest-mock>=3.5.1',
11-
'coverage==6.2',
11+
'coverage',
1212
'pytest-cov',
1313
'importlib-metadata==4.2',
1414
'tomli==1.2.3',
@@ -19,6 +19,7 @@
1919
'pyyaml>=5.4',
2020
'docopt>=0.6.2',
2121
'enum34;python_version<"3.4"',
22+
'bloom-filter2>=2.0.0',
2223
]
2324

2425
with open(path.join(path.abspath(path.dirname(__file__)), 'splitio', 'version.py')) as f:

splitio/engine/filters/__init__.py

Whitespace-only changes.
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
import abc
2+
3+
class BaseFilter(object, metaclass=abc.ABCMeta):
4+
"""Impressions Filter interface."""
5+
6+
@abc.abstractmethod
7+
def add(self, data):
8+
"""
9+
Return a boolean flag
10+
11+
"""
12+
pass
13+
14+
@abc.abstractmethod
15+
def contains(self, data):
16+
"""
17+
Return a boolean flag
18+
19+
"""
20+
pass
21+
22+
@abc.abstractmethod
23+
def clear(self):
24+
"""
25+
No return
26+
27+
"""
28+
pass
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
from splitio.engine.filters.base_filter import BaseFilter
2+
from splitio import util
3+
from bloom_filter2 import BloomFilter
4+
5+
class ImpressionsBloomFilter(BaseFilter):
6+
"""Optimized mode strategy."""
7+
8+
def __init__(self, max_elements=5000, error_rate=0.01):
9+
"""
10+
Construct a bloom filter instance.
11+
12+
:param max_element: maximum elements in the filter
13+
:type string:
14+
15+
:param error_rate: error rate for the false positives, reduce it will consume more memory
16+
:type numeric:
17+
"""
18+
self._max_elements = max_elements
19+
self._error_rate = error_rate
20+
self._imps_bloom_filter = BloomFilter(max_elements=self._max_elements, error_rate=self._error_rate)
21+
22+
def add(self, data):
23+
"""
24+
Add an item to the bloom filter instance.
25+
26+
:param data: element to be added
27+
:type string:
28+
29+
:return: True if successful
30+
:rtype: boolean
31+
"""
32+
self._imps_bloom_filter.add(data)
33+
return data in self._imps_bloom_filter
34+
35+
def contains(self, data):
36+
"""
37+
Check if an item exist in the bloom filter instance.
38+
39+
:param data: element to be checked
40+
:type string:
41+
42+
:return: True if exist
43+
:rtype: boolean
44+
"""
45+
return data in self._imps_bloom_filter
46+
47+
def clear(self):
48+
"""
49+
Destroy the current filter instance and create new one.
50+
51+
"""
52+
self._imps_bloom_filter.close()
53+
self._imps_bloom_filter = BloomFilter(max_elements=self._max_elements, error_rate=self._error_rate)

tests/engine/test_bloom_filter.py

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
"""BloomFilter unit tests."""
2+
3+
from random import random
4+
import uuid
5+
import time
6+
from splitio.engine.filters.bloom_filter import ImpressionsBloomFilter
7+
8+
class BloomFilterTests(object):
9+
"""StandardRecorderTests test cases."""
10+
11+
def test_bloom_filter_methods(self, mocker):
12+
bloom_filter = ImpressionsBloomFilter()
13+
key1 = str(uuid.uuid4())
14+
key2 = str(uuid.uuid4())
15+
bloom_filter.add(key1)
16+
17+
assert(bloom_filter.contains(key1))
18+
assert(not bloom_filter.contains(key2))
19+
20+
bloom_filter.clear()
21+
assert(not bloom_filter.contains(key1))
22+
23+
bloom_filter.add(key1)
24+
bloom_filter.add(key2)
25+
assert(bloom_filter.contains(key1))
26+
assert(bloom_filter.contains(key2))
27+
28+
def test_bloom_filter_error_percentage(self, mocker):
29+
arr_storage = []
30+
total_sample = 20000
31+
error_rate = 0.01
32+
bloom_filter = ImpressionsBloomFilter(total_sample, error_rate)
33+
34+
for x in range(1, total_sample):
35+
myuuid = str(uuid.uuid4())
36+
bloom_filter.add(myuuid)
37+
arr_storage.append(myuuid)
38+
39+
false_positive_count = 0
40+
for x in range(1, total_sample):
41+
y = int(random()*total_sample*5)
42+
if y > total_sample - 2:
43+
myuuid = str(uuid.uuid4())
44+
if myuuid in arr_storage:
45+
# False Negative
46+
assert(bloom_filter.contains(myuuid))
47+
else:
48+
if bloom_filter.contains(myuuid):
49+
# False Positive
50+
false_positive_count = false_positive_count + 1
51+
else:
52+
myuuid = arr_storage[y]
53+
assert(bloom_filter.contains(myuuid))
54+
# False Negative
55+
56+
assert(false_positive_count/total_sample <= error_rate)

0 commit comments

Comments
 (0)