Skip to content

Commit f2beb3c

Browse files
authored
Merge pull request #180 from splitio/feature/hash128
add murmur128
2 parents 69892f6 + 274606b commit f2beb3c

File tree

5 files changed

+100153
-3
lines changed

5 files changed

+100153
-3
lines changed

setup.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,7 @@
2323
'futures>=3.0.5;python_version<"3"'
2424
]
2525

26-
with open(path.join(path.abspath(path.dirname(__file__)),
27-
'splitio', 'version.py')) as f:
26+
with open(path.join(path.abspath(path.dirname(__file__)), 'splitio', 'version.py')) as f:
2827
exec(f.read()) # pylint: disable=exec-used
2928

3029
setup(
@@ -38,11 +37,13 @@
3837
license='Apache License 2.0',
3938
install_requires=INSTALL_REQUIRES,
4039
tests_require=TESTS_REQUIRES,
40+
# dependency_links=['https://github.com/splitio/mmh3cffi/tarball/feature/development#egg=mmh3cffi-0.2.0'],
4141
extras_require={
4242
'test': TESTS_REQUIRES,
4343
'redis': ['redis>=2.10.5'],
4444
'uwsgi': ['uwsgi>=2.0.0'],
45-
'cpphash': ['mmh3cffi>=0.1.5']
45+
# 'cpphash': ['mmh3cffi==0.2.0']
46+
'cpphash': ['mmh3cffi@git+https://github.com/splitio/mmh3cffi@development#egg=mmh3cffi']
4647
},
4748
setup_requires=['pytest-runner'],
4849
classifiers=[

splitio/engine/hashfns/__init__.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,17 +17,23 @@
1717

1818
def _murmur_hash(key, seed):
1919
return mmh3cffi.hash_str(key, seed)
20+
21+
def _murmur_hash128(key, seed):
22+
return mmh3cffi.hash_str_128(key, seed)[0]
23+
2024
except ImportError:
2125
# Fallback to interpreted python hash algoritm (slower)
2226
from splitio.engine.hashfns import murmur3py #pylint: disable=ungrouped-imports
2327
_murmur_hash = murmur3py.murmur32_py #pylint: disable=invalid-name
28+
_murmur_hash128 = lambda k, s: murmur3py.hash128_x64(k, s)[0] #pylint: disable=invalid-name
2429

2530

2631
_HASH_ALGORITHMS = {
2732
HashAlgorithm.LEGACY: legacy.legacy_hash,
2833
HashAlgorithm.MURMUR: _murmur_hash
2934
}
3035

36+
murmur_128 = _murmur_hash128 #pylint: disable=invalid-name
3137

3238
def get_hash_fn(algo):
3339
"""

splitio/engine/hashfns/murmur3py.py

Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,3 +74,132 @@ def fmix(current_hash):
7474

7575
unsigned_val = fmix(hash1 ^ length)
7676
return unsigned_val
77+
78+
def hash128_x64(key, seed):
79+
"""
80+
Pure python implementation of murmurhash3-128.
81+
82+
borrowed from: https://github.com/wc-duck/pymmh3/blob/master/pymmh3.py
83+
"""
84+
key = bytearray(key, 'utf-8')
85+
86+
def fmix(k):
87+
k ^= k >> 33
88+
k = (k * 0xff51afd7ed558ccd) & 0xFFFFFFFFFFFFFFFF
89+
k ^= k >> 33
90+
k = (k * 0xc4ceb9fe1a85ec53) & 0xFFFFFFFFFFFFFFFF
91+
k ^= k >> 33
92+
return k
93+
94+
length = len(key)
95+
nblocks = int(length / 16)
96+
97+
h1 = seed
98+
h2 = seed
99+
100+
c1 = 0x87c37b91114253d5
101+
c2 = 0x4cf5ad432745937f
102+
103+
#body
104+
for block_start in range(0, nblocks * 8, 8):
105+
# ??? big endian?
106+
k1 = key[2 * block_start + 7] << 56 | \
107+
key[2 * block_start + 6] << 48 | \
108+
key[2 * block_start + 5] << 40 | \
109+
key[2 * block_start + 4] << 32 | \
110+
key[2 * block_start + 3] << 24 | \
111+
key[2 * block_start + 2] << 16 | \
112+
key[2 * block_start + 1] << 8 | \
113+
key[2 * block_start + 0]
114+
115+
k2 = key[2 * block_start + 15] << 56 | \
116+
key[2 * block_start + 14] << 48 | \
117+
key[2 * block_start + 13] << 40 | \
118+
key[2 * block_start + 12] << 32 | \
119+
key[2 * block_start + 11] << 24 | \
120+
key[2 * block_start + 10] << 16 | \
121+
key[2 * block_start + 9] << 8 | \
122+
key[2 * block_start + 8]
123+
124+
k1 = (c1 * k1) & 0xFFFFFFFFFFFFFFFF
125+
k1 = (k1 << 31 | k1 >> 33) & 0xFFFFFFFFFFFFFFFF # inlined ROTL64
126+
k1 = (c2 * k1) & 0xFFFFFFFFFFFFFFFF
127+
h1 ^= k1
128+
129+
h1 = (h1 << 27 | h1 >> 37) & 0xFFFFFFFFFFFFFFFF # inlined ROTL64
130+
h1 = (h1 + h2) & 0xFFFFFFFFFFFFFFFF
131+
h1 = (h1 * 5 + 0x52dce729) & 0xFFFFFFFFFFFFFFFF
132+
133+
k2 = (c2 * k2) & 0xFFFFFFFFFFFFFFFF
134+
k2 = (k2 << 33 | k2 >> 31) & 0xFFFFFFFFFFFFFFFF # inlined ROTL64
135+
k2 = (c1 * k2) & 0xFFFFFFFFFFFFFFFF
136+
h2 ^= k2
137+
138+
h2 = (h2 << 31 | h2 >> 33) & 0xFFFFFFFFFFFFFFFF # inlined ROTL64
139+
h2 = (h1 + h2) & 0xFFFFFFFFFFFFFFFF
140+
h2 = (h2 * 5 + 0x38495ab5) & 0xFFFFFFFFFFFFFFFF
141+
142+
#tail
143+
tail_index = nblocks * 16
144+
k1 = 0
145+
k2 = 0
146+
tail_size = length & 15
147+
148+
if tail_size >= 15:
149+
k2 ^= key[tail_index + 14] << 48
150+
if tail_size >= 14:
151+
k2 ^= key[tail_index + 13] << 40
152+
if tail_size >= 13:
153+
k2 ^= key[tail_index + 12] << 32
154+
if tail_size >= 12:
155+
k2 ^= key[tail_index + 11] << 24
156+
if tail_size >= 11:
157+
k2 ^= key[tail_index + 10] << 16
158+
if tail_size >= 10:
159+
k2 ^= key[tail_index + 9] << 8
160+
if tail_size >= 9:
161+
k2 ^= key[tail_index + 8]
162+
163+
if tail_size > 8:
164+
k2 = (k2 * c2) & 0xFFFFFFFFFFFFFFFF
165+
k2 = (k2 << 33 | k2 >> 31) & 0xFFFFFFFFFFFFFFFF # inlined ROTL64
166+
k2 = (k2 * c1) & 0xFFFFFFFFFFFFFFFF
167+
h2 ^= k2
168+
169+
if tail_size >= 8:
170+
k1 ^= key[tail_index + 7] << 56
171+
if tail_size >= 7:
172+
k1 ^= key[tail_index + 6] << 48
173+
if tail_size >= 6:
174+
k1 ^= key[tail_index + 5] << 40
175+
if tail_size >= 5:
176+
k1 ^= key[tail_index + 4] << 32
177+
if tail_size >= 4:
178+
k1 ^= key[tail_index + 3] << 24
179+
if tail_size >= 3:
180+
k1 ^= key[tail_index + 2] << 16
181+
if tail_size >= 2:
182+
k1 ^= key[tail_index + 1] << 8
183+
if tail_size >= 1:
184+
k1 ^= key[tail_index + 0]
185+
186+
if tail_size > 0:
187+
k1 = (k1 * c1) & 0xFFFFFFFFFFFFFFFF
188+
k1 = (k1 << 31 | k1 >> 33) & 0xFFFFFFFFFFFFFFFF # inlined ROTL64
189+
k1 = (k1 * c2) & 0xFFFFFFFFFFFFFFFF
190+
h1 ^= k1
191+
192+
#finalization
193+
h1 ^= length
194+
h2 ^= length
195+
196+
h1 = (h1 + h2) & 0xFFFFFFFFFFFFFFFF
197+
h2 = (h1 + h2) & 0xFFFFFFFFFFFFFFFF
198+
199+
h1 = fmix(h1)
200+
h2 = fmix(h2)
201+
202+
h1 = (h1 + h2) & 0xFFFFFFFFFFFFFFFF
203+
h2 = (h1 + h2) & 0xFFFFFFFFFFFFFFFF
204+
205+
return [h1, h2]

0 commit comments

Comments
 (0)