Skip to content

Commit 7791bbf

Browse files
authored
Custom FNV-1 hash implementation for use with unique checks (#4)
Just to make sure there are no copyright issues with vendoring code into the repository, here replace the third party FNV-1 hash implementation with one that I've written from scratch based on my version over in Ruby. Other niceties that go along with the change are: * I found a small bug wherein River Python had been using FNV-1a hashes instead of FNV-1 hashes like the main Go codebase and Python, which would have made the unique checks incompatible. * We get type annotations on the API so more of the project is type checked. * We bring in a test suite to check the correctness of the hashes.
1 parent 051f474 commit 7791bbf

File tree

3 files changed

+667
-86
lines changed

3 files changed

+667
-86
lines changed

src/riverqueue/client.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
from typing import Optional, Tuple, List, Callable
33

44
from .driver import Driver
5-
from .fnv import fnv1a_64, fnv1a_32
5+
from .fnv import fnv1_hash
66
from .models import (
77
InsertOpts,
88
Args,
@@ -102,10 +102,10 @@ def check_unique_job(
102102

103103
with self.driver.transaction():
104104
if self.advisory_lock_prefix is None:
105-
lock_key = fnv1a_64(lock_str.encode("utf-8"))
105+
lock_key = fnv1_hash(lock_str.encode("utf-8"), 64)
106106
else:
107107
prefix = self.advisory_lock_prefix
108-
lock_key = (prefix << 32) | fnv1a_32(lock_str.encode("utf-8"))
108+
lock_key = (prefix << 32) | fnv1_hash(lock_str.encode("utf-8"), 32)
109109

110110
lock_key = self.uint64_to_int64(lock_key)
111111
self.driver.advisory_lock(lock_key)

src/riverqueue/fnv.py

Lines changed: 26 additions & 83 deletions
Original file line numberDiff line numberDiff line change
@@ -1,99 +1,42 @@
11
"""
2-
Copyright (c) 2015 Lorenz Schori
2+
FNV is the Fowler–Noll–Vo hash function, a simple hash that's very easy to
3+
implement, and hash the perfect characteristics for use with the 64 bits of
4+
available space in a PG advisory lock.
35
4-
Permission is hereby granted, free of charge, to any person obtaining a copy
5-
of this software and associated documentation files (the "Software"), to deal
6-
in the Software without restriction, including without limitation the rights
7-
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8-
copies of the Software, and to permit persons to whom the Software is
9-
furnished to do so, subject to the following conditions:
10-
11-
The above copyright notice and this permission notice shall be included in all
12-
copies or substantial portions of the Software.
13-
14-
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15-
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16-
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17-
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18-
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19-
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20-
SOFTWARE.
6+
I'm implemented it myself so that the River package can stay dependency free
7+
(and because it's quite easy to do).
218
"""
229

23-
FNV_32_PRIME = 0x01000193
24-
FNV_64_PRIME = 0x100000001B3
10+
from typing import Dict, Literal
2511

26-
FNV0_32_INIT = 0
27-
FNV0_64_INIT = 0
28-
FNV1_32_INIT = 0x811C9DC5
29-
FNV1_32A_INIT = FNV1_32_INIT
30-
FNV1_64_INIT = 0xCBF29CE484222325
31-
FNV1_64A_INIT = FNV1_64_INIT
3212

33-
34-
def fnv(data, hval_init, fnv_prime, fnv_size):
13+
def fnv1_hash(data: bytes, size: Literal[32] | Literal[64]) -> int:
3514
"""
36-
Core FNV hash algorithm used in FNV0 and FNV1.
15+
Hashes data as a 32-bit or 64-bit FNV hash and returns the result. Data
16+
should be bytes rather than a string, so encode a string with something like
17+
`input_str.encode("utf-8")` or `b"string as bytes"`.
3718
"""
38-
assert isinstance(data, bytes)
3919

40-
hval = hval_init
41-
for byte in data:
42-
hval = (hval * fnv_prime) % fnv_size
43-
hval = hval ^ byte
44-
return hval
45-
46-
47-
def fnva(data, hval_init, fnv_prime, fnv_size):
48-
"""
49-
Alternative FNV hash algorithm used in FNV-1a.
50-
"""
5120
assert isinstance(data, bytes)
5221

53-
hval = hval_init
54-
for byte in data:
55-
hval = hval ^ byte
56-
hval = (hval * fnv_prime) % fnv_size
57-
return hval
58-
59-
60-
def fnv0_32(data, hval_init=FNV0_32_INIT):
61-
"""
62-
Returns the 32 bit FNV-0 hash value for the given data.
63-
"""
64-
return fnv(data, hval_init, FNV_32_PRIME, 2**32)
65-
22+
hash = __OFFSET_BASIS[size]
23+
mask = 2**size - 1 # creates a mask of 1s of `size` bits long like 0xffffffff
24+
prime = __PRIME[size]
6625

67-
def fnv1_32(data, hval_init=FNV1_32_INIT):
68-
"""
69-
Returns the 32 bit FNV-1 hash value for the given data.
70-
"""
71-
return fnv(data, hval_init, FNV_32_PRIME, 2**32)
72-
73-
74-
def fnv1a_32(data, hval_init=FNV1_32_INIT):
75-
"""
76-
Returns the 32 bit FNV-1a hash value for the given data.
77-
"""
78-
return fnva(data, hval_init, FNV_32_PRIME, 2**32)
79-
80-
81-
def fnv0_64(data, hval_init=FNV0_64_INIT):
82-
"""
83-
Returns the 64 bit FNV-0 hash value for the given data.
84-
"""
85-
return fnv(data, hval_init, FNV_64_PRIME, 2**64)
26+
for byte in data:
27+
hash *= prime
28+
hash &= mask # take lower N bits of multiplication product
29+
hash ^= byte
8630

31+
return hash
8732

88-
def fnv1_64(data, hval_init=FNV1_64_INIT):
89-
"""
90-
Returns the 64 bit FNV-1 hash value for the given data.
91-
"""
92-
return fnv(data, hval_init, FNV_64_PRIME, 2**64)
9333

34+
__OFFSET_BASIS: Dict[Literal[32] | Literal[64], int] = {
35+
32: 0x811C9DC5,
36+
64: 0xCBF29CE484222325,
37+
}
9438

95-
def fnv1a_64(data, hval_init=FNV1_64_INIT):
96-
"""
97-
Returns the 64 bit FNV-1a hash value for the given data.
98-
"""
99-
return fnva(data, hval_init, FNV_64_PRIME, 2**64)
39+
__PRIME: Dict[Literal[32] | Literal[64], int] = {
40+
32: 0x01000193,
41+
64: 0x00000100000001B3,
42+
}

0 commit comments

Comments
 (0)