diff --git a/sorts/merge_sort.py b/sorts/merge_sort.py index 11c202788035..52e7aaf85dbe 100644 --- a/sorts/merge_sort.py +++ b/sorts/merge_sort.py @@ -1,64 +1,80 @@ """ -This is a pure Python implementation of the merge sort algorithm. - -For doctests run following command: -python -m doctest -v merge_sort.py -or -python3 -m doctest -v merge_sort.py -For manual testing run: -python merge_sort.py +Optimized pure Python implementation of the merge sort algorithm. + +Merge Sort is a divide-and-conquer algorithm that splits the input list into halves, +recursively sorts them, and merges the sorted halves. + +Source: https://en.wikipedia.org/wiki/Merge_sort """ +from __future__ import annotations -def merge_sort(collection: list) -> list: - """ - Sorts a list using the merge sort algorithm. +from collections.abc import Sequence +from typing import Any, Protocol, TypeVar + + +class Comparable(Protocol): + """Defines minimal comparison operations required for sorting.""" - :param collection: A mutable ordered collection with comparable items. - :return: The same collection ordered in ascending order. + def __lt__(self, other: Any) -> bool: ... + def __le__(self, other: Any) -> bool: ... + + +T = TypeVar("T", bound=Comparable) + + +def merge_sort(arr: Sequence[T]) -> list[T]: # noqa: UP047 + """ + Sort a sequence in ascending order using merge sort. - Time Complexity: O(n log n) - Space Complexity: O(n) + :param arr: Any sequence of comparable items. + :return: A new sorted list. - Examples: >>> merge_sort([0, 5, 3, 2, 2]) [0, 2, 2, 3, 5] >>> merge_sort([]) [] >>> merge_sort([-2, -5, -45]) [-45, -5, -2] + >>> merge_sort(["b", "a", "c"]) + ['a', 'b', 'c'] + >>> merge_sort((3, 1, 2)) + [1, 2, 3] """ + n = len(arr) + if n <= 1: + return list(arr) - def merge(left: list, right: list) -> list: - """ - Merge two sorted lists into a single sorted list. + mid = n // 2 + left = merge_sort(arr[:mid]) + right = merge_sort(arr[mid:]) + return _merge(left, right) - :param left: Left collection - :param right: Right collection - :return: Merged result - """ - result = [] - while left and right: - result.append(left.pop(0) if left[0] <= right[0] else right.pop(0)) - result.extend(left) - result.extend(right) - return result - if len(collection) <= 1: - return collection - mid_index = len(collection) // 2 - return merge(merge_sort(collection[:mid_index]), merge_sort(collection[mid_index:])) +def _merge(left: list[T], right: list[T]) -> list[T]: + """Merge two sorted lists efficiently using index pointers.""" + merged: list[T] = [] + i = j = 0 + while i < len(left) and j < len(right): + if left[i] <= right[j]: + merged.append(left[i]) + i += 1 + else: + merged.append(right[j]) + j += 1 + merged.extend(left[i:]) + merged.extend(right[j:]) + return merged if __name__ == "__main__": import doctest - doctest.testmod() + doctest.testmod(verbose=True) try: - user_input = input("Enter numbers separated by a comma:\n").strip() - unsorted = [int(item) for item in user_input.split(",")] - sorted_list = merge_sort(unsorted) - print(*sorted_list, sep=",") + user_input = input("Enter numbers separated by commas:\n").strip() + numbers = [int(x) for x in user_input.split(",") if x.strip()] + print("Sorted:", merge_sort(numbers)) except ValueError: - print("Invalid input. Please enter valid integers separated by commas.") + print("Invalid input. Please enter only comma-separated integers.") diff --git a/sorts/tim_sort.py b/sorts/tim_sort.py index 41ab4a10a87b..a3d8c34ce808 100644 --- a/sorts/tim_sort.py +++ b/sorts/tim_sort.py @@ -1,82 +1,117 @@ -def binary_search(lst, item, start, end): - if start == end: - return start if lst[start] > item else start + 1 - if start > end: - return start +from typing import Protocol - mid = (start + end) // 2 - if lst[mid] < item: - return binary_search(lst, item, mid + 1, end) - elif lst[mid] > item: - return binary_search(lst, item, start, mid - 1) - else: - return mid +class Comparable(Protocol): + def __lt__(self, other: object) -> bool: ... + def __le__(self, other: object) -> bool: ... -def insertion_sort(lst): - length = len(lst) - for index in range(1, length): - value = lst[index] - pos = binary_search(lst, value, 0, index - 1) - lst = [*lst[:pos], value, *lst[pos:index], *lst[index + 1 :]] +def binary_search[T: Comparable](arr: list[T], item: T, left: int, right: int) -> int: + """ + Return the index where `item` should be inserted in `arr[left:right+1]` + to keep it sorted. + + >>> binary_search([1, 3, 5, 7], 6, 0, 3) + 3 + >>> binary_search([1, 3, 5, 7], 0, 0, 3) + 0 + >>> binary_search([1, 3, 5, 7], 8, 0, 3) + 4 + """ + while left <= right: + mid = (left + right) // 2 + if arr[mid] == item: + return mid + elif arr[mid] < item: + left = mid + 1 + else: + right = mid - 1 + return left - return lst +def insertion_sort[T: Comparable](arr: list[T]) -> list[T]: + """ + Sort the list in-place using binary insertion sort. -def merge(left, right): - if not left: - return right + >>> insertion_sort([3, 1, 2, 4]) + [1, 2, 3, 4] + """ + for i in range(1, len(arr)): + key = arr[i] + j = binary_search(arr, key, 0, i - 1) + arr[:] = [*arr[:j], key, *arr[j:i], *arr[i + 1 :]] + return arr - if not right: - return left - if left[0] < right[0]: - return [left[0], *merge(left[1:], right)] +def merge[T: Comparable](left: list[T], right: list[T]) -> list[T]: + """ + Merge two sorted lists into one sorted list. - return [right[0], *merge(left, right[1:])] + >>> merge([1, 3, 5], [2, 4, 6]) + [1, 2, 3, 4, 5, 6] + """ + merged: list[T] = [] + i = j = 0 + while i < len(left) and j < len(right): + if left[i] <= right[j]: + merged.append(left[i]) + i += 1 + else: + merged.append(right[j]) + j += 1 + merged.extend(left[i:]) + merged.extend(right[j:]) + return merged -def tim_sort(lst): +def tim_sort[T: Comparable](arr: list[T]) -> list[T]: """ + Simplified version of TimSort for educational purposes. + + TimSort is a hybrid stable sorting algorithm that combines merge sort + and insertion sort. It was originally designed by Tim Peters for Python (2002). + + Source: https://en.wikipedia.org/wiki/Timsort + >>> tim_sort("Python") ['P', 'h', 'n', 'o', 't', 'y'] - >>> tim_sort((1.1, 1, 0, -1, -1.1)) - [-1.1, -1, 0, 1, 1.1] - >>> tim_sort(list(reversed(list(range(7))))) - [0, 1, 2, 3, 4, 5, 6] - >>> tim_sort([3, 2, 1]) == insertion_sort([3, 2, 1]) - True + >>> tim_sort([5, 4, 3, 2, 1]) + [1, 2, 3, 4, 5] >>> tim_sort([3, 2, 1]) == sorted([3, 2, 1]) True + >>> tim_sort([]) # empty input + [] """ - length = len(lst) - runs, sorted_runs = [], [] - new_run = [lst[0]] - sorted_array = [] - i = 1 - while i < length: - if lst[i] < lst[i - 1]: - runs.append(new_run) - new_run = [lst[i]] - else: - new_run.append(lst[i]) - i += 1 - runs.append(new_run) + if not isinstance(arr, list): + arr = list(arr) + if not arr: + return [] + + min_run = 32 + n = len(arr) - for run in runs: - sorted_runs.append(insertion_sort(run)) - for run in sorted_runs: - sorted_array = merge(sorted_array, run) + if n == 1: + return arr.copy() - return sorted_array + runs: list[list[T]] = [] + for start in range(0, n, min_run): + end = min(start + min_run, n) + run = insertion_sort(arr[start:end]) + runs.append(run) + while len(runs) > 1: + new_runs: list[list[T]] = [] + for i in range(0, len(runs), 2): + if i + 1 < len(runs): + new_runs.append(merge(runs[i], runs[i + 1])) + else: + new_runs.append(runs[i]) + runs = new_runs -def main(): - lst = [5, 9, 10, 3, -4, 5, 178, 92, 46, -18, 0, 7] - sorted_lst = tim_sort(lst) - print(sorted_lst) + return runs[0] if runs else [] if __name__ == "__main__": - main() + import doctest + + doctest.testmod()