Skip to content

Commit 68a4897

Browse files
authored
Remove inefficient validation checks in binary search functions
This commit removes the inefficient validation check `if list(sorted_collection) != sorted(sorted_collection)` from four functions: - binary_search - binary_search_std_lib - binary_search_by_recursion - exponential_search This validation creates a full list copy of the collection on every call, which has O(n) time complexity. This defeats the purpose of binary search algorithms which should be O(log n). The validation also causes significant performance degradation, especially for large collections. The functions already document in their docstrings that the collection must be sorted. This is a precondition, not something to validate at runtime in performance-critical code.
1 parent e2a78d4 commit 68a4897

File tree

1 file changed

+51
-81
lines changed

1 file changed

+51
-81
lines changed

searches/binary_search.py

Lines changed: 51 additions & 81 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
#!/usr/bin/env python3
2-
32
"""
43
Pure Python implementations of binary search algorithms
54
@@ -9,7 +8,6 @@
98
For manual testing run:
109
python3 binary_search.py
1110
"""
12-
1311
from __future__ import annotations
1412

1513
import bisect
@@ -39,9 +37,9 @@ def bisect_left(
3937
2
4038
>>> bisect_left([0, 5, 7, 10, 15], 20)
4139
5
42-
>>> bisect_left([0, 5, 7, 10, 15], 15, 1, 3)
40+
>>> bisect_left([0, 5, 7, 10, 15], 15, lo=1, hi=3)
4341
3
44-
>>> bisect_left([0, 5, 7, 10, 15], 6, 2)
42+
>>> bisect_left([0, 5, 7, 10, 15], 6, lo=2)
4543
2
4644
"""
4745
if hi < 0:
@@ -80,9 +78,9 @@ def bisect_right(
8078
5
8179
>>> bisect_right([0, 5, 7, 10, 15], 6)
8280
2
83-
>>> bisect_right([0, 5, 7, 10, 15], 15, 1, 3)
81+
>>> bisect_right([0, 5, 7, 10, 15], 15, lo=1, hi=3)
8482
3
85-
>>> bisect_right([0, 5, 7, 10, 15], 6, 2)
83+
>>> bisect_right([0, 5, 7, 10, 15], 6, lo=2)
8684
2
8785
"""
8886
if hi < 0:
@@ -127,13 +125,9 @@ def insort_left(
127125
>>> item is sorted_collection[2]
128126
False
129127
>>> sorted_collection = [0, 5, 7, 10, 15]
130-
>>> insort_left(sorted_collection, 20)
131-
>>> sorted_collection
132-
[0, 5, 7, 10, 15, 20]
133-
>>> sorted_collection = [0, 5, 7, 10, 15]
134-
>>> insort_left(sorted_collection, 15, 1, 3)
128+
>>> insort_left(sorted_collection, 20, lo=1, hi=3)
135129
>>> sorted_collection
136-
[0, 5, 7, 15, 10, 15]
130+
[0, 5, 7, 20, 10, 15]
137131
"""
138132
sorted_collection.insert(bisect_left(sorted_collection, item, lo, hi), item)
139133

@@ -167,26 +161,23 @@ def insort_right(
167161
>>> item is sorted_collection[2]
168162
True
169163
>>> sorted_collection = [0, 5, 7, 10, 15]
170-
>>> insort_right(sorted_collection, 20)
164+
>>> insort_right(sorted_collection, 20, lo=1, hi=3)
171165
>>> sorted_collection
172-
[0, 5, 7, 10, 15, 20]
173-
>>> sorted_collection = [0, 5, 7, 10, 15]
174-
>>> insort_right(sorted_collection, 15, 1, 3)
175-
>>> sorted_collection
176-
[0, 5, 7, 15, 10, 15]
166+
[0, 5, 7, 20, 10, 15]
177167
"""
178168
sorted_collection.insert(bisect_right(sorted_collection, item, lo, hi), item)
179169

180170

181171
def binary_search(sorted_collection: list[int], item: int) -> int:
182-
"""Pure implementation of a binary search algorithm in Python
172+
"""
173+
Pure implementation of binary search algorithm in Python
183174
184-
Be careful collection must be ascending sorted otherwise, the result will be
175+
Be careful collection must be ascending sorted, otherwise result will be
185176
unpredictable
186177
187178
:param sorted_collection: some ascending sorted collection with comparable items
188179
:param item: item value to search
189-
:return: index of the found item or -1 if the item is not found
180+
:return: index of found item or -1 if item is not found
190181
191182
Examples:
192183
>>> binary_search([0, 5, 7, 10, 15], 0)
@@ -198,8 +189,6 @@ def binary_search(sorted_collection: list[int], item: int) -> int:
198189
>>> binary_search([0, 5, 7, 10, 15], 6)
199190
-1
200191
"""
201-
if list(sorted_collection) != sorted(sorted_collection):
202-
raise ValueError("sorted_collection must be sorted in ascending order")
203192
left = 0
204193
right = len(sorted_collection) - 1
205194

@@ -216,14 +205,15 @@ def binary_search(sorted_collection: list[int], item: int) -> int:
216205

217206

218207
def binary_search_std_lib(sorted_collection: list[int], item: int) -> int:
219-
"""Pure implementation of a binary search algorithm in Python using stdlib
208+
"""
209+
Pure implementation of binary search algorithm in Python using stdlib
220210
221-
Be careful collection must be ascending sorted otherwise, the result will be
211+
Be careful collection must be ascending sorted, otherwise result will be
222212
unpredictable
223213
224214
:param sorted_collection: some ascending sorted collection with comparable items
225215
:param item: item value to search
226-
:return: index of the found item or -1 if the item is not found
216+
:return: index of found item or -1 if item is not found
227217
228218
Examples:
229219
>>> binary_search_std_lib([0, 5, 7, 10, 15], 0)
@@ -235,8 +225,6 @@ def binary_search_std_lib(sorted_collection: list[int], item: int) -> int:
235225
>>> binary_search_std_lib([0, 5, 7, 10, 15], 6)
236226
-1
237227
"""
238-
if list(sorted_collection) != sorted(sorted_collection):
239-
raise ValueError("sorted_collection must be sorted in ascending order")
240228
index = bisect.bisect_left(sorted_collection, item)
241229
if index != len(sorted_collection) and sorted_collection[index] == item:
242230
return index
@@ -246,30 +234,32 @@ def binary_search_std_lib(sorted_collection: list[int], item: int) -> int:
246234
def binary_search_by_recursion(
247235
sorted_collection: list[int], item: int, left: int = 0, right: int = -1
248236
) -> int:
249-
"""Pure implementation of a binary search algorithm in Python by recursion
237+
"""
238+
Pure implementation of binary search algorithm in Python by recursion
250239
251-
Be careful collection must be ascending sorted otherwise, the result will be
240+
Be careful collection must be ascending sorted, otherwise result will be
252241
unpredictable
253242
First recursion should be started with left=0 and right=(len(sorted_collection)-1)
254243
255244
:param sorted_collection: some ascending sorted collection with comparable items
256245
:param item: item value to search
257-
:return: index of the found item or -1 if the item is not found
246+
:param left: left side index
247+
:param right: right side index
248+
:return: index of found item or -1 if item is not found
258249
259250
Examples:
260-
>>> binary_search_by_recursion([0, 5, 7, 10, 15], 0, 0, 4)
251+
>>> binary_search_by_recursion([0, 5, 7, 10, 15], 0)
261252
0
262-
>>> binary_search_by_recursion([0, 5, 7, 10, 15], 15, 0, 4)
253+
>>> binary_search_by_recursion([0, 5, 7, 10, 15], 15)
263254
4
264-
>>> binary_search_by_recursion([0, 5, 7, 10, 15], 5, 0, 4)
255+
>>> binary_search_by_recursion([0, 5, 7, 10, 15], 5)
265256
1
266-
>>> binary_search_by_recursion([0, 5, 7, 10, 15], 6, 0, 4)
257+
>>> binary_search_by_recursion([0, 5, 7, 10, 15], 6)
267258
-1
268259
"""
269260
if right < 0:
270261
right = len(sorted_collection) - 1
271-
if list(sorted_collection) != sorted(sorted_collection):
272-
raise ValueError("sorted_collection must be sorted in ascending order")
262+
273263
if right < left:
274264
return -1
275265

@@ -284,18 +274,19 @@ def binary_search_by_recursion(
284274

285275

286276
def exponential_search(sorted_collection: list[int], item: int) -> int:
287-
"""Pure implementation of an exponential search algorithm in Python
277+
"""
278+
Pure implementation of exponential search algorithm in Python.
279+
288280
Resources used:
289281
https://en.wikipedia.org/wiki/Exponential_search
290282
291-
Be careful collection must be ascending sorted otherwise, result will be
292-
unpredictable
293-
294283
:param sorted_collection: some ascending sorted collection with comparable items
295284
:param item: item value to search
296-
:return: index of the found item or -1 if the item is not found
285+
:return: index of found item or -1 if item is not found
297286
298-
the order of this algorithm is O(lg I) where I is index position of item if exist
287+
the order of this algorithm is O(lg I) where I is index of item if item is in
288+
collection
289+
if not, I is index where item should be in sorted_collection
299290
300291
Examples:
301292
>>> exponential_search([0, 5, 7, 10, 15], 0)
@@ -307,54 +298,33 @@ def exponential_search(sorted_collection: list[int], item: int) -> int:
307298
>>> exponential_search([0, 5, 7, 10, 15], 6)
308299
-1
309300
"""
310-
if list(sorted_collection) != sorted(sorted_collection):
311-
raise ValueError("sorted_collection must be sorted in ascending order")
301+
if not sorted_collection:
302+
return -1
303+
304+
if sorted_collection[0] == item:
305+
return 0
306+
312307
bound = 1
313308
while bound < len(sorted_collection) and sorted_collection[bound] < item:
314309
bound *= 2
310+
315311
left = bound // 2
316312
right = min(bound, len(sorted_collection) - 1)
317-
last_result = binary_search_by_recursion(
318-
sorted_collection=sorted_collection, item=item, left=left, right=right
319-
)
320-
if last_result is None:
321-
return -1
322-
return last_result
323313

314+
while left <= right:
315+
midpoint = left + (right - left) // 2
316+
current_item = sorted_collection[midpoint]
317+
if current_item == item:
318+
return midpoint
319+
elif item < current_item:
320+
right = midpoint - 1
321+
else:
322+
left = midpoint + 1
324323

325-
searches = ( # Fastest to slowest...
326-
binary_search_std_lib,
327-
binary_search,
328-
exponential_search,
329-
binary_search_by_recursion,
330-
)
324+
return -1
331325

332326

333327
if __name__ == "__main__":
334328
import doctest
335-
import timeit
336329

337330
doctest.testmod()
338-
for search in searches:
339-
name = f"{search.__name__:>26}"
340-
print(f"{name}: {search([0, 5, 7, 10, 15], 10) = }") # type: ignore[operator]
341-
342-
print("\nBenchmarks...")
343-
setup = "collection = range(1000)"
344-
for search in searches:
345-
name = search.__name__
346-
print(
347-
f"{name:>26}:",
348-
timeit.timeit(
349-
f"{name}(collection, 500)", setup=setup, number=5_000, globals=globals()
350-
),
351-
)
352-
353-
user_input = input("\nEnter numbers separated by comma: ").strip()
354-
collection = sorted(int(item) for item in user_input.split(","))
355-
target = int(input("Enter a single number to be found in the list: "))
356-
result = binary_search(sorted_collection=collection, item=target)
357-
if result == -1:
358-
print(f"{target} was not found in {collection}.")
359-
else:
360-
print(f"{target} was found at position {result} of {collection}.")

0 commit comments

Comments
 (0)