From e3a5b394a760b660428d1a07757286dc9f353084 Mon Sep 17 00:00:00 2001 From: pgnikolov Date: Sun, 26 Oct 2025 01:46:36 +0200 Subject: [PATCH 1/8] Improve the existing TimSort implementation --- sorts/tim_sort.py | 149 ++++++++++++++++++++++++++++------------------ 1 file changed, 91 insertions(+), 58 deletions(-) diff --git a/sorts/tim_sort.py b/sorts/tim_sort.py index 41ab4a10a87b..9644cf0ce7ec 100644 --- a/sorts/tim_sort.py +++ b/sorts/tim_sort.py @@ -1,82 +1,115 @@ -def binary_search(lst, item, start, end): - if start == end: - return start if lst[start] > item else start + 1 - if start > end: - return start +from typing import List, TypeVar - mid = (start + end) // 2 - if lst[mid] < item: - return binary_search(lst, item, mid + 1, end) - elif lst[mid] > item: - return binary_search(lst, item, start, mid - 1) - else: - return mid +T = TypeVar("T") -def insertion_sort(lst): - length = len(lst) - - for index in range(1, length): - value = lst[index] - pos = binary_search(lst, value, 0, index - 1) - lst = [*lst[:pos], value, *lst[pos:index], *lst[index + 1 :]] +def binary_search(arr: List[T], item: T, left: int, right: int) -> int: + """ + Return the index where `item` should be inserted in `arr[left:right+1]` + to keep it sorted. + + >>> binary_search([1, 3, 5, 7], 6, 0, 3) + 3 + >>> binary_search([1, 3, 5, 7], 0, 0, 3) + 0 + >>> binary_search([1, 3, 5, 7], 8, 0, 3) + 4 + """ + while left <= right: + mid = (left + right) // 2 + if arr[mid] == item: + return mid + elif arr[mid] < item: + left = mid + 1 + else: + right = mid - 1 + return left - return lst +def insertion_sort(arr: List[T]) -> List[T]: + """ + Sort the list in-place using binary insertion sort. -def merge(left, right): - if not left: - return right + >>> insertion_sort([3, 1, 2, 4]) + [1, 2, 3, 4] + """ + for i in range(1, len(arr)): + key = arr[i] + j = binary_search(arr, key, 0, i - 1) + arr[:] = arr[:j] + [key] + arr[j:i] + arr[i + 1 :] + return arr - if not right: - return left - if left[0] < right[0]: - return [left[0], *merge(left[1:], right)] +def merge(left: List[T], right: List[T]) -> List[T]: + """ + Merge two sorted lists into one sorted list. - return [right[0], *merge(left, right[1:])] + >>> merge([1, 3, 5], [2, 4, 6]) + [1, 2, 3, 4, 5, 6] + """ + merged = [] + i = j = 0 + while i < len(left) and j < len(right): + if left[i] <= right[j]: + merged.append(left[i]) + i += 1 + else: + merged.append(right[j]) + j += 1 + merged.extend(left[i:]) + merged.extend(right[j:]) + return merged -def tim_sort(lst): +def tim_sort(arr: List[T]) -> List[T]: """ + Simplified version of TimSort for educational purposes. + + TimSort is a hybrid stable sorting algorithm that combines merge sort + and insertion sort. It was originally designed by Tim Peters for Python (2002). + + Source: https://en.wikipedia.org/wiki/Timsort + >>> tim_sort("Python") ['P', 'h', 'n', 'o', 't', 'y'] - >>> tim_sort((1.1, 1, 0, -1, -1.1)) - [-1.1, -1, 0, 1, 1.1] - >>> tim_sort(list(reversed(list(range(7))))) - [0, 1, 2, 3, 4, 5, 6] - >>> tim_sort([3, 2, 1]) == insertion_sort([3, 2, 1]) - True + >>> tim_sort([5, 4, 3, 2, 1]) + [1, 2, 3, 4, 5] >>> tim_sort([3, 2, 1]) == sorted([3, 2, 1]) True + >>> tim_sort([]) # empty input + [] """ - length = len(lst) - runs, sorted_runs = [], [] - new_run = [lst[0]] - sorted_array = [] - i = 1 - while i < length: - if lst[i] < lst[i - 1]: - runs.append(new_run) - new_run = [lst[i]] - else: - new_run.append(lst[i]) - i += 1 - runs.append(new_run) + if not isinstance(arr, list): + arr = list(arr) + + if not arr: + return [] - for run in runs: - sorted_runs.append(insertion_sort(run)) - for run in sorted_runs: - sorted_array = merge(sorted_array, run) + min_run = 32 + n = len(arr) - return sorted_array + if n == 1: + return arr.copy() + runs = [] + for start in range(0, n, min_run): + end = min(start + min_run, n) + run = insertion_sort(arr[start:end]) + runs.append(run) -def main(): - lst = [5, 9, 10, 3, -4, 5, 178, 92, 46, -18, 0, 7] - sorted_lst = tim_sort(lst) - print(sorted_lst) + while len(runs) > 1: + new_runs = [] + for i in range(0, len(runs), 2): + if i + 1 < len(runs): + new_runs.append(merge(runs[i], runs[i + 1])) + else: + new_runs.append(runs[i]) + runs = new_runs + + return runs[0] if runs else [] if __name__ == "__main__": - main() + import doctest + + doctest.testmod() From d76b3853f9e4cfbd24ad1ac58d93bdccad1d2704 Mon Sep 17 00:00:00 2001 From: pgnikolov Date: Sun, 26 Oct 2025 01:56:40 +0200 Subject: [PATCH 2/8] Improve the existing TimSort implementation --- sorts/tim_sort.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/sorts/tim_sort.py b/sorts/tim_sort.py index 9644cf0ce7ec..a22bcb7470d4 100644 --- a/sorts/tim_sort.py +++ b/sorts/tim_sort.py @@ -1,9 +1,9 @@ -from typing import List, TypeVar +from typing import TypeVar T = TypeVar("T") -def binary_search(arr: List[T], item: T, left: int, right: int) -> int: +def binary_search(arr: list[T], item: T, left: int, right: int) -> int: """ Return the index where `item` should be inserted in `arr[left:right+1]` to keep it sorted. @@ -26,7 +26,7 @@ def binary_search(arr: List[T], item: T, left: int, right: int) -> int: return left -def insertion_sort(arr: List[T]) -> List[T]: +def insertion_sort(arr: list[T]) -> list[T]: """ Sort the list in-place using binary insertion sort. @@ -36,18 +36,18 @@ def insertion_sort(arr: List[T]) -> List[T]: for i in range(1, len(arr)): key = arr[i] j = binary_search(arr, key, 0, i - 1) - arr[:] = arr[:j] + [key] + arr[j:i] + arr[i + 1 :] + arr[:] = [*arr[:j], key, *arr[j:i], *arr[i + 1 :]] return arr -def merge(left: List[T], right: List[T]) -> List[T]: +def merge(left: list[T], right: list[T]) -> list[T]: """ Merge two sorted lists into one sorted list. >>> merge([1, 3, 5], [2, 4, 6]) [1, 2, 3, 4, 5, 6] """ - merged = [] + merged: list[T] = [] i = j = 0 while i < len(left) and j < len(right): if left[i] <= right[j]: @@ -61,7 +61,7 @@ def merge(left: List[T], right: List[T]) -> List[T]: return merged -def tim_sort(arr: List[T]) -> List[T]: +def tim_sort(arr: list[T]) -> list[T]: """ Simplified version of TimSort for educational purposes. @@ -91,14 +91,14 @@ def tim_sort(arr: List[T]) -> List[T]: if n == 1: return arr.copy() - runs = [] + runs: list[list[T]] = [] for start in range(0, n, min_run): end = min(start + min_run, n) run = insertion_sort(arr[start:end]) runs.append(run) while len(runs) > 1: - new_runs = [] + new_runs: list[list[T]] = [] for i in range(0, len(runs), 2): if i + 1 < len(runs): new_runs.append(merge(runs[i], runs[i + 1])) From e0e84c6626ca19db9bac00005126acf166109081 Mon Sep 17 00:00:00 2001 From: pgnikolov Date: Sun, 26 Oct 2025 01:59:55 +0200 Subject: [PATCH 3/8] Improve the existing TimSort implementation --- sorts/tim_sort.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/sorts/tim_sort.py b/sorts/tim_sort.py index a22bcb7470d4..1c4b3c4ded51 100644 --- a/sorts/tim_sort.py +++ b/sorts/tim_sort.py @@ -33,6 +33,10 @@ def insertion_sort(arr: list[T]) -> list[T]: >>> insertion_sort([3, 1, 2, 4]) [1, 2, 3, 4] """ + from typing import TypeVar + + T = TypeVar("T") + for i in range(1, len(arr)): key = arr[i] j = binary_search(arr, key, 0, i - 1) @@ -79,9 +83,12 @@ def tim_sort(arr: list[T]) -> list[T]: >>> tim_sort([]) # empty input [] """ + from typing import TypeVar + + T = TypeVar("T") + if not isinstance(arr, list): arr = list(arr) - if not arr: return [] @@ -112,4 +119,4 @@ def tim_sort(arr: list[T]) -> list[T]: if __name__ == "__main__": import doctest - doctest.testmod() + doctest.testmod() \ No newline at end of file From 49749ec3115f6afcd8ba1ea9ad602c28236511e4 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 26 Oct 2025 00:00:20 +0000 Subject: [PATCH 4/8] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- sorts/tim_sort.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sorts/tim_sort.py b/sorts/tim_sort.py index 1c4b3c4ded51..96b58d81a663 100644 --- a/sorts/tim_sort.py +++ b/sorts/tim_sort.py @@ -119,4 +119,4 @@ def tim_sort(arr: list[T]) -> list[T]: if __name__ == "__main__": import doctest - doctest.testmod() \ No newline at end of file + doctest.testmod() From 4914ac25ce8a3be3a97f276b58adc13a7150f3fb Mon Sep 17 00:00:00 2001 From: pgnikolov Date: Sun, 26 Oct 2025 02:10:59 +0200 Subject: [PATCH 5/8] Modernize and fix TimSort implementation type-safe, PEP 695 compliant --- sorts/tim_sort.py | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/sorts/tim_sort.py b/sorts/tim_sort.py index 1c4b3c4ded51..e812a3118696 100644 --- a/sorts/tim_sort.py +++ b/sorts/tim_sort.py @@ -1,6 +1,12 @@ -from typing import TypeVar +from typing import Protocol, TypeVar -T = TypeVar("T") + +class Comparable(Protocol): + def __lt__(self, other: object) -> bool: ... + def __le__(self, other: object) -> bool: ... + + +T = TypeVar("T", bound=Comparable) def binary_search(arr: list[T], item: T, left: int, right: int) -> int: @@ -26,17 +32,13 @@ def binary_search(arr: list[T], item: T, left: int, right: int) -> int: return left -def insertion_sort(arr: list[T]) -> list[T]: +def insertion_sort[T_contra](arr: list[T_contra]) -> list[T_contra]: # type: ignore[valid-type] """ Sort the list in-place using binary insertion sort. >>> insertion_sort([3, 1, 2, 4]) [1, 2, 3, 4] """ - from typing import TypeVar - - T = TypeVar("T") - for i in range(1, len(arr)): key = arr[i] j = binary_search(arr, key, 0, i - 1) @@ -65,7 +67,7 @@ def merge(left: list[T], right: list[T]) -> list[T]: return merged -def tim_sort(arr: list[T]) -> list[T]: +def tim_sort[T_contra](arr: list[T_contra]) -> list[T_contra]: # type: ignore[valid-type] """ Simplified version of TimSort for educational purposes. @@ -83,10 +85,6 @@ def tim_sort(arr: list[T]) -> list[T]: >>> tim_sort([]) # empty input [] """ - from typing import TypeVar - - T = TypeVar("T") - if not isinstance(arr, list): arr = list(arr) if not arr: @@ -98,14 +96,14 @@ def tim_sort(arr: list[T]) -> list[T]: if n == 1: return arr.copy() - runs: list[list[T]] = [] + runs: list[list[T_contra]] = [] for start in range(0, n, min_run): end = min(start + min_run, n) run = insertion_sort(arr[start:end]) runs.append(run) while len(runs) > 1: - new_runs: list[list[T]] = [] + new_runs: list[list[T_contra]] = [] for i in range(0, len(runs), 2): if i + 1 < len(runs): new_runs.append(merge(runs[i], runs[i + 1])) @@ -119,4 +117,4 @@ def tim_sort(arr: list[T]) -> list[T]: if __name__ == "__main__": import doctest - doctest.testmod() \ No newline at end of file + doctest.testmod() From cbf39ae970668b99e740610388c15f6bbdf66cae Mon Sep 17 00:00:00 2001 From: pgnikolov Date: Sun, 26 Oct 2025 02:20:49 +0200 Subject: [PATCH 6/8] fully type-safe TimSort using PEP 695 generics mypy + ruff compliant --- sorts/tim_sort.py | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/sorts/tim_sort.py b/sorts/tim_sort.py index e812a3118696..a3d8c34ce808 100644 --- a/sorts/tim_sort.py +++ b/sorts/tim_sort.py @@ -1,4 +1,4 @@ -from typing import Protocol, TypeVar +from typing import Protocol class Comparable(Protocol): @@ -6,10 +6,7 @@ def __lt__(self, other: object) -> bool: ... def __le__(self, other: object) -> bool: ... -T = TypeVar("T", bound=Comparable) - - -def binary_search(arr: list[T], item: T, left: int, right: int) -> int: +def binary_search[T: Comparable](arr: list[T], item: T, left: int, right: int) -> int: """ Return the index where `item` should be inserted in `arr[left:right+1]` to keep it sorted. @@ -32,7 +29,7 @@ def binary_search(arr: list[T], item: T, left: int, right: int) -> int: return left -def insertion_sort[T_contra](arr: list[T_contra]) -> list[T_contra]: # type: ignore[valid-type] +def insertion_sort[T: Comparable](arr: list[T]) -> list[T]: """ Sort the list in-place using binary insertion sort. @@ -46,7 +43,7 @@ def insertion_sort[T_contra](arr: list[T_contra]) -> list[T_contra]: # type: ig return arr -def merge(left: list[T], right: list[T]) -> list[T]: +def merge[T: Comparable](left: list[T], right: list[T]) -> list[T]: """ Merge two sorted lists into one sorted list. @@ -67,7 +64,7 @@ def merge(left: list[T], right: list[T]) -> list[T]: return merged -def tim_sort[T_contra](arr: list[T_contra]) -> list[T_contra]: # type: ignore[valid-type] +def tim_sort[T: Comparable](arr: list[T]) -> list[T]: """ Simplified version of TimSort for educational purposes. @@ -96,14 +93,14 @@ def tim_sort[T_contra](arr: list[T_contra]) -> list[T_contra]: # type: ignore[v if n == 1: return arr.copy() - runs: list[list[T_contra]] = [] + runs: list[list[T]] = [] for start in range(0, n, min_run): end = min(start + min_run, n) run = insertion_sort(arr[start:end]) runs.append(run) while len(runs) > 1: - new_runs: list[list[T_contra]] = [] + new_runs: list[list[T]] = [] for i in range(0, len(runs), 2): if i + 1 < len(runs): new_runs.append(merge(runs[i], runs[i + 1])) From 45df3281d8881554ccef6c09e189dc722f141474 Mon Sep 17 00:00:00 2001 From: pgnikolov Date: Sun, 26 Oct 2025 04:35:28 +0100 Subject: [PATCH 7/8] Refactor: improve merge_sort implementation for performance and type safety --- sorts/merge_sort.py | 93 ++++++++++++++++++++++++++------------------- 1 file changed, 54 insertions(+), 39 deletions(-) diff --git a/sorts/merge_sort.py b/sorts/merge_sort.py index 11c202788035..c91f4d97041e 100644 --- a/sorts/merge_sort.py +++ b/sorts/merge_sort.py @@ -1,64 +1,79 @@ """ -This is a pure Python implementation of the merge sort algorithm. - -For doctests run following command: -python -m doctest -v merge_sort.py -or -python3 -m doctest -v merge_sort.py -For manual testing run: -python merge_sort.py +Optimized pure Python implementation of the merge sort algorithm. + +Merge Sort is a divide-and-conquer algorithm that splits the input list into halves, +recursively sorts them, and merges the sorted halves. + +Source: https://en.wikipedia.org/wiki/Merge_sort """ +from __future__ import annotations -def merge_sort(collection: list) -> list: - """ - Sorts a list using the merge sort algorithm. +from collections.abc import Sequence +from typing import Any, Protocol, TypeVar + + +class Comparable(Protocol): + """Defines minimal comparison operations required for sorting.""" + def __lt__(self, other: Any) -> bool: ... + def __le__(self, other: Any) -> bool: ... - :param collection: A mutable ordered collection with comparable items. - :return: The same collection ordered in ascending order. - Time Complexity: O(n log n) - Space Complexity: O(n) +T = TypeVar("T", bound=Comparable) + + +def merge_sort(arr: Sequence[T]) -> list[T]: # noqa: UP047 + """ + Sort a sequence in ascending order using merge sort. + + :param arr: Any sequence of comparable items. + :return: A new sorted list. - Examples: >>> merge_sort([0, 5, 3, 2, 2]) [0, 2, 2, 3, 5] >>> merge_sort([]) [] >>> merge_sort([-2, -5, -45]) [-45, -5, -2] + >>> merge_sort(["b", "a", "c"]) + ['a', 'b', 'c'] + >>> merge_sort((3, 1, 2)) + [1, 2, 3] """ + n = len(arr) + if n <= 1: + return list(arr) - def merge(left: list, right: list) -> list: - """ - Merge two sorted lists into a single sorted list. + mid = n // 2 + left = merge_sort(arr[:mid]) + right = merge_sort(arr[mid:]) + return _merge(left, right) - :param left: Left collection - :param right: Right collection - :return: Merged result - """ - result = [] - while left and right: - result.append(left.pop(0) if left[0] <= right[0] else right.pop(0)) - result.extend(left) - result.extend(right) - return result - if len(collection) <= 1: - return collection - mid_index = len(collection) // 2 - return merge(merge_sort(collection[:mid_index]), merge_sort(collection[mid_index:])) +def _merge(left: list[T], right: list[T]) -> list[T]: + """Merge two sorted lists efficiently using index pointers.""" + merged: list[T] = [] + i = j = 0 + while i < len(left) and j < len(right): + if left[i] <= right[j]: + merged.append(left[i]) + i += 1 + else: + merged.append(right[j]) + j += 1 + merged.extend(left[i:]) + merged.extend(right[j:]) + return merged if __name__ == "__main__": import doctest - doctest.testmod() + doctest.testmod(verbose=True) try: - user_input = input("Enter numbers separated by a comma:\n").strip() - unsorted = [int(item) for item in user_input.split(",")] - sorted_list = merge_sort(unsorted) - print(*sorted_list, sep=",") + user_input = input("Enter numbers separated by commas:\n").strip() + numbers = [int(x) for x in user_input.split(",") if x.strip()] + print("Sorted:", merge_sort(numbers)) except ValueError: - print("Invalid input. Please enter valid integers separated by commas.") + print("Invalid input. Please enter only comma-separated integers.") From 51c327c629ff2a8e566710e3a8acc2599330a57e Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 26 Oct 2025 03:36:19 +0000 Subject: [PATCH 8/8] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- sorts/merge_sort.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sorts/merge_sort.py b/sorts/merge_sort.py index c91f4d97041e..52e7aaf85dbe 100644 --- a/sorts/merge_sort.py +++ b/sorts/merge_sort.py @@ -15,6 +15,7 @@ class Comparable(Protocol): """Defines minimal comparison operations required for sorting.""" + def __lt__(self, other: Any) -> bool: ... def __le__(self, other: Any) -> bool: ...