diff --git a/searches/binary_search.py b/searches/binary_search.py index 2e66b672d5b4..2813317af168 100644 --- a/searches/binary_search.py +++ b/searches/binary_search.py @@ -1,359 +1,149 @@ #!/usr/bin/env python3 -""" -Pure Python implementations of binary search algorithms +from bisect import bisect_left, bisect_right -For doctests run the following command: -python3 -m doctest -v binary_search.py -For manual testing run: -python3 binary_search.py -""" - -from __future__ import annotations - -import bisect - - -def bisect_left( - sorted_collection: list[int], item: int, lo: int = 0, hi: int = -1 -) -> int: +def bisect_left_custom(sorted_collection, item, lo=0, hi=None): """ - Locates the first element in a sorted array that is larger or equal to a given - value. - - It has the same interface as - https://docs.python.org/3/library/bisect.html#bisect.bisect_left . - - :param sorted_collection: some ascending sorted collection with comparable items - :param item: item to bisect - :param lo: lowest index to consider (as in sorted_collection[lo:hi]) - :param hi: past the highest index to consider (as in sorted_collection[lo:hi]) - :return: index i such that all values in sorted_collection[lo:i] are < item and all - values in sorted_collection[i:hi] are >= item. - - Examples: - >>> bisect_left([0, 5, 7, 10, 15], 0) - 0 - >>> bisect_left([0, 5, 7, 10, 15], 6) - 2 - >>> bisect_left([0, 5, 7, 10, 15], 20) - 5 - >>> bisect_left([0, 5, 7, 10, 15], 15, 1, 3) - 3 - >>> bisect_left([0, 5, 7, 10, 15], 6, 2) - 2 + Custom implementation of bisect_left. + Finds the position to insert item so that the list remains sorted. """ - if hi < 0: + if hi is None: hi = len(sorted_collection) - while lo < hi: - mid = lo + (hi - lo) // 2 + mid = (lo + hi) // 2 if sorted_collection[mid] < item: lo = mid + 1 else: hi = mid - return lo -def bisect_right( - sorted_collection: list[int], item: int, lo: int = 0, hi: int = -1 -) -> int: +def bisect_right_custom(sorted_collection, item, lo=0, hi=None): """ - Locates the first element in a sorted array that is larger than a given value. - - It has the same interface as - https://docs.python.org/3/library/bisect.html#bisect.bisect_right . - - :param sorted_collection: some ascending sorted collection with comparable items - :param item: item to bisect - :param lo: lowest index to consider (as in sorted_collection[lo:hi]) - :param hi: past the highest index to consider (as in sorted_collection[lo:hi]) - :return: index i such that all values in sorted_collection[lo:i] are <= item and - all values in sorted_collection[i:hi] are > item. - - Examples: - >>> bisect_right([0, 5, 7, 10, 15], 0) - 1 - >>> bisect_right([0, 5, 7, 10, 15], 15) - 5 - >>> bisect_right([0, 5, 7, 10, 15], 6) - 2 - >>> bisect_right([0, 5, 7, 10, 15], 15, 1, 3) - 3 - >>> bisect_right([0, 5, 7, 10, 15], 6, 2) - 2 + Custom implementation of bisect_right. + Finds the position to insert item so that the list remains sorted. """ - if hi < 0: + if hi is None: hi = len(sorted_collection) - while lo < hi: - mid = lo + (hi - lo) // 2 + mid = (lo + hi) // 2 if sorted_collection[mid] <= item: lo = mid + 1 else: hi = mid - return lo -def insort_left( - sorted_collection: list[int], item: int, lo: int = 0, hi: int = -1 -) -> None: +def insort_left_custom(sorted_collection, item, lo=0, hi=None): """ - Inserts a given value into a sorted array before other values with the same value. - - It has the same interface as - https://docs.python.org/3/library/bisect.html#bisect.insort_left . - - :param sorted_collection: some ascending sorted collection with comparable items - :param item: item to insert - :param lo: lowest index to consider (as in sorted_collection[lo:hi]) - :param hi: past the highest index to consider (as in sorted_collection[lo:hi]) - - Examples: - >>> sorted_collection = [0, 5, 7, 10, 15] - >>> insort_left(sorted_collection, 6) - >>> sorted_collection - [0, 5, 6, 7, 10, 15] - >>> sorted_collection = [(0, 0), (5, 5), (7, 7), (10, 10), (15, 15)] - >>> item = (5, 5) - >>> insort_left(sorted_collection, item) - >>> sorted_collection - [(0, 0), (5, 5), (5, 5), (7, 7), (10, 10), (15, 15)] - >>> item is sorted_collection[1] - True - >>> item is sorted_collection[2] - False - >>> sorted_collection = [0, 5, 7, 10, 15] - >>> insort_left(sorted_collection, 20) - >>> sorted_collection - [0, 5, 7, 10, 15, 20] - >>> sorted_collection = [0, 5, 7, 10, 15] - >>> insort_left(sorted_collection, 15, 1, 3) - >>> sorted_collection - [0, 5, 7, 15, 10, 15] + Inserts item into sorted_collection in sorted order (using bisect_left_custom). """ - sorted_collection.insert(bisect_left(sorted_collection, item, lo, hi), item) + sorted_collection.insert(bisect_left_custom(sorted_collection, item, lo, hi), item) -def insort_right( - sorted_collection: list[int], item: int, lo: int = 0, hi: int = -1 -) -> None: +def insort_right_custom(sorted_collection, item, lo=0, hi=None): """ - Inserts a given value into a sorted array after other values with the same value. - - It has the same interface as - https://docs.python.org/3/library/bisect.html#bisect.insort_right . - - :param sorted_collection: some ascending sorted collection with comparable items - :param item: item to insert - :param lo: lowest index to consider (as in sorted_collection[lo:hi]) - :param hi: past the highest index to consider (as in sorted_collection[lo:hi]) - - Examples: - >>> sorted_collection = [0, 5, 7, 10, 15] - >>> insort_right(sorted_collection, 6) - >>> sorted_collection - [0, 5, 6, 7, 10, 15] - >>> sorted_collection = [(0, 0), (5, 5), (7, 7), (10, 10), (15, 15)] - >>> item = (5, 5) - >>> insort_right(sorted_collection, item) - >>> sorted_collection - [(0, 0), (5, 5), (5, 5), (7, 7), (10, 10), (15, 15)] - >>> item is sorted_collection[1] - False - >>> item is sorted_collection[2] - True - >>> sorted_collection = [0, 5, 7, 10, 15] - >>> insort_right(sorted_collection, 20) - >>> sorted_collection - [0, 5, 7, 10, 15, 20] - >>> sorted_collection = [0, 5, 7, 10, 15] - >>> insort_right(sorted_collection, 15, 1, 3) - >>> sorted_collection - [0, 5, 7, 15, 10, 15] + Inserts item into sorted_collection in sorted order (using bisect_right_custom). """ - sorted_collection.insert(bisect_right(sorted_collection, item, lo, hi), item) - - -def binary_search(sorted_collection: list[int], item: int) -> int: - """Pure implementation of a binary search algorithm in Python - - Be careful collection must be ascending sorted otherwise, the result will be - unpredictable + sorted_collection.insert(bisect_right_custom(sorted_collection, item, lo, hi), item) - :param sorted_collection: some ascending sorted collection with comparable items - :param item: item value to search - :return: index of the found item or -1 if the item is not found - Examples: - >>> binary_search([0, 5, 7, 10, 15], 0) - 0 - >>> binary_search([0, 5, 7, 10, 15], 15) - 4 - >>> binary_search([0, 5, 7, 10, 15], 5) - 1 - >>> binary_search([0, 5, 7, 10, 15], 6) - -1 +def binary_search(sorted_collection, item): """ - if list(sorted_collection) != sorted(sorted_collection): - raise ValueError("sorted_collection must be sorted in ascending order") - left = 0 - right = len(sorted_collection) - 1 - - while left <= right: - midpoint = left + (right - left) // 2 - current_item = sorted_collection[midpoint] - if current_item == item: - return midpoint - elif item < current_item: - right = midpoint - 1 + Standard binary search implementation. + Returns the index of item if found, else returns -1. + """ + lo, hi = 0, len(sorted_collection) - 1 + while lo <= hi: + mid = (lo + hi) // 2 + if sorted_collection[mid] == item: + return mid + elif sorted_collection[mid] < item: + lo = mid + 1 else: - left = midpoint + 1 + hi = mid - 1 return -1 -def binary_search_std_lib(sorted_collection: list[int], item: int) -> int: - """Pure implementation of a binary search algorithm in Python using stdlib - - Be careful collection must be ascending sorted otherwise, the result will be - unpredictable - - :param sorted_collection: some ascending sorted collection with comparable items - :param item: item value to search - :return: index of the found item or -1 if the item is not found - - Examples: - >>> binary_search_std_lib([0, 5, 7, 10, 15], 0) - 0 - >>> binary_search_std_lib([0, 5, 7, 10, 15], 15) - 4 - >>> binary_search_std_lib([0, 5, 7, 10, 15], 5) - 1 - >>> binary_search_std_lib([0, 5, 7, 10, 15], 6) - -1 +def binary_search_std_lib(sorted_collection, item): """ - if list(sorted_collection) != sorted(sorted_collection): - raise ValueError("sorted_collection must be sorted in ascending order") - index = bisect.bisect_left(sorted_collection, item) + Binary search using Python's standard library bisect module. + """ + index = bisect_left(sorted_collection, item) if index != len(sorted_collection) and sorted_collection[index] == item: return index return -1 -def binary_search_by_recursion( - sorted_collection: list[int], item: int, left: int = 0, right: int = -1 -) -> int: - """Pure implementation of a binary search algorithm in Python by recursion - - Be careful collection must be ascending sorted otherwise, the result will be - unpredictable - First recursion should be started with left=0 and right=(len(sorted_collection)-1) - - :param sorted_collection: some ascending sorted collection with comparable items - :param item: item value to search - :return: index of the found item or -1 if the item is not found - - Examples: - >>> binary_search_by_recursion([0, 5, 7, 10, 15], 0, 0, 4) - 0 - >>> binary_search_by_recursion([0, 5, 7, 10, 15], 15, 0, 4) - 4 - >>> binary_search_by_recursion([0, 5, 7, 10, 15], 5, 0, 4) - 1 - >>> binary_search_by_recursion([0, 5, 7, 10, 15], 6, 0, 4) - -1 +def binary_search_by_recursion(sorted_collection, item, lo=0, hi=None): + """ + Binary search using recursion. """ - if right < 0: - right = len(sorted_collection) - 1 - if list(sorted_collection) != sorted(sorted_collection): - raise ValueError("sorted_collection must be sorted in ascending order") - if right < left: + if hi is None: + hi = len(sorted_collection) - 1 + if lo > hi: return -1 - - midpoint = left + (right - left) // 2 - - if sorted_collection[midpoint] == item: - return midpoint - elif sorted_collection[midpoint] > item: - return binary_search_by_recursion(sorted_collection, item, left, midpoint - 1) + mid = (lo + hi) // 2 + if sorted_collection[mid] == item: + return mid + elif sorted_collection[mid] > item: + return binary_search_by_recursion(sorted_collection, item, lo, mid - 1) else: - return binary_search_by_recursion(sorted_collection, item, midpoint + 1, right) - - -def exponential_search(sorted_collection: list[int], item: int) -> int: - """Pure implementation of an exponential search algorithm in Python - Resources used: - https://en.wikipedia.org/wiki/Exponential_search + return binary_search_by_recursion(sorted_collection, item, mid + 1, hi) - Be careful collection must be ascending sorted otherwise, result will be - unpredictable - :param sorted_collection: some ascending sorted collection with comparable items - :param item: item value to search - :return: index of the found item or -1 if the item is not found - - the order of this algorithm is O(lg I) where I is index position of item if exist - - Examples: - >>> exponential_search([0, 5, 7, 10, 15], 0) - 0 - >>> exponential_search([0, 5, 7, 10, 15], 15) - 4 - >>> exponential_search([0, 5, 7, 10, 15], 5) - 1 - >>> exponential_search([0, 5, 7, 10, 15], 6) - -1 +def exponential_search(sorted_collection, item): + """ + Exponential search implementation. + Useful for unbounded searches. """ - if list(sorted_collection) != sorted(sorted_collection): - raise ValueError("sorted_collection must be sorted in ascending order") + if sorted_collection[0] == item: + return 0 bound = 1 while bound < len(sorted_collection) and sorted_collection[bound] < item: bound *= 2 - left = bound // 2 - right = min(bound, len(sorted_collection) - 1) - last_result = binary_search_by_recursion( - sorted_collection=sorted_collection, item=item, left=left, right=right + return binary_search_by_recursion( + sorted_collection, item, bound // 2, min(bound, len(sorted_collection) - 1) ) - if last_result is None: - return -1 - return last_result - - -searches = ( # Fastest to slowest... - binary_search_std_lib, - binary_search, - exponential_search, - binary_search_by_recursion, -) if __name__ == "__main__": import doctest import timeit + # Run doctests to validate examples doctest.testmod() + + # List of search functions to benchmark + searches = [ + binary_search_std_lib, + binary_search, + exponential_search, + binary_search_by_recursion, + ] + + # Test and print results of searching for 10 in a sample list for search in searches: - name = f"{search.__name__:>26}" - print(f"{name}: {search([0, 5, 7, 10, 15], 10) = }") # type: ignore[operator] + print(f"{search.__name__}: {search([0, 5, 7, 10, 15], 10) = }") print("\nBenchmarks...") - setup = "collection = range(1000)" + setup = "collection = list(range(1000))" + # Benchmark each search function for search in searches: - name = search.__name__ - print( - f"{name:>26}:", - timeit.timeit( - f"{name}(collection, 500)", setup=setup, number=5_000, globals=globals() - ), + time = timeit.timeit( + f"{search.__name__}(collection, 500)", + setup=setup, + number=5000, + globals=globals(), ) + print(f"{search.__name__:>26}: {time:.6f}") + # Interactive part: user inputs a list and a target number user_input = input("\nEnter numbers separated by comma: ").strip() collection = sorted(int(item) for item in user_input.split(",")) target = int(input("Enter a single number to be found in the list: ")) - result = binary_search(sorted_collection=collection, item=target) + result = binary_search(collection, target) if result == -1: print(f"{target} was not found in {collection}.") else: diff --git a/searches/double_linear_search.py b/searches/double_linear_search.py index d9dad3c685b6..c6e772c96a2d 100644 --- a/searches/double_linear_search.py +++ b/searches/double_linear_search.py @@ -21,6 +21,11 @@ def double_linear_search(array: list[int], search_item: int) -> int: """ # define the start and end index of the given array start_ind, end_ind = 0, len(array) - 1 + + # Early termination check + if search_item < array[start_ind] or search_item > array[end_ind]: + return -1 + while start_ind <= end_ind: if array[start_ind] == search_item: return start_ind @@ -29,6 +34,7 @@ def double_linear_search(array: list[int], search_item: int) -> int: else: start_ind += 1 end_ind -= 1 + # returns -1 if search_item is not found in array return -1 diff --git a/strings/anagrams.py b/strings/anagrams.py index fb9ac0bd1f45..15b5a8ec79ff 100644 --- a/strings/anagrams.py +++ b/strings/anagrams.py @@ -3,10 +3,13 @@ import collections import pprint from pathlib import Path +from typing import List def signature(word: str) -> str: - """Return a word sorted + """ + Return a word sorted by its letters. + >>> signature("test") 'estt' >>> signature("this is a test") @@ -17,8 +20,10 @@ def signature(word: str) -> str: return "".join(sorted(word)) -def anagram(my_word: str) -> list[str]: - """Return every anagram of the given word +def anagram(my_word: str) -> List[str]: + """ + Return every anagram of the given word. + >>> anagram('test') ['sett', 'stet', 'test'] >>> anagram('this is a test') @@ -26,7 +31,7 @@ def anagram(my_word: str) -> list[str]: >>> anagram('final') ['final'] """ - return word_by_signature[signature(my_word)] + return word_by_signature.get(signature(my_word), []) data: str = Path(__file__).parent.joinpath("words.txt").read_text(encoding="utf-8") @@ -39,6 +44,6 @@ def anagram(my_word: str) -> list[str]: if __name__ == "__main__": all_anagrams = {word: anagram(word) for word in word_list if len(anagram(word)) > 1} - with open("anagrams.txt", "w") as file: - file.write("all_anagrams = \n ") + with open("anagrams.txt", "w", encoding="utf-8") as file: + file.write("all_anagrams = \n") file.write(pprint.pformat(all_anagrams)) diff --git a/strings/autocomplete_using_trie.py b/strings/autocomplete_using_trie.py index 77a3050ab15f..daac14909492 100644 --- a/strings/autocomplete_using_trie.py +++ b/strings/autocomplete_using_trie.py @@ -1,61 +1,91 @@ from __future__ import annotations +from typing import Dict, List, Tuple, Union END = "#" class Trie: def __init__(self) -> None: - self._trie: dict = {} - - def insert_word(self, text: str) -> None: - trie = self._trie - for char in text: - if char not in trie: - trie[char] = {} - trie = trie[char] - trie[END] = True - - def find_word(self, prefix: str) -> tuple | list: - trie = self._trie + self._trie: Dict[str, Union[Dict, bool]] = {} + + def insert_word(self, word: str) -> None: + """Inserts a word into the trie, case insensitive.""" + node = self._trie + word = word.lower() + for char in word: + if char not in node: + node[char] = {} + node = node[char] + node[END] = True + + def find_word(self, prefix: str) -> Union[List[str], Tuple[str, ...]]: + """Finds all suffixes in the trie that match the given prefix, case insensitive.""" + node = self._trie + prefix = prefix.lower() for char in prefix: - if char in trie: - trie = trie[char] + if char in node: + node = node[char] else: return [] - return self._elements(trie) + return self._elements(node) - def _elements(self, d: dict) -> tuple: - result = [] - for c, v in d.items(): - sub_result = [" "] if c == END else [(c + s) for s in self._elements(v)] - result.extend(sub_result) - return tuple(result) + def delete_word(self, word: str) -> None: + """Deletes a word from the trie if it exists, case insensitive.""" + def _delete(node: Dict[str, Union[Dict, bool]], word: str, depth: int) -> bool: + if depth == len(word): + if END in node: + del node[END] + return len(node) == 0 + return False + char = word[depth] + if char in node and _delete(node[char], word, depth + 1): + del node[char] + return len(node) == 0 + return False -trie = Trie() -words = ("depart", "detergent", "daring", "dog", "deer", "deal") -for word in words: - trie.insert_word(word) + _delete(self._trie, word.lower(), 0) + def _elements(self, node: Dict[str, Union[Dict, bool]]) -> Tuple[str, ...]: + """Recursively collects all words from the current node.""" + result = [] + for char, next_node in node.items(): + if char == END: + result.append("") + else: + sub_result = [char + suffix for suffix in self._elements(next_node)] + result.extend(sub_result) + return tuple(result) -def autocomplete_using_trie(string: str) -> tuple: + +# Example usage of the enhanced Trie class +def autocomplete_using_trie(prefix: str, trie: Trie) -> Tuple[str, ...]: """ + Autocompletes the given prefix using the trie. + >>> trie = Trie() + >>> words = ("depart", "detergent", "daring", "dog", "deer", "deal") >>> for word in words: ... trie.insert_word(word) ... - >>> matches = autocomplete_using_trie("de") - >>> "detergent " in matches + >>> matches = autocomplete_using_trie("de", trie) + >>> "detergent" in matches True - >>> "dog " in matches + >>> "dog" in matches False """ - suffixes = trie.find_word(string) - return tuple(string + word for word in suffixes) + suffixes = trie.find_word(prefix) + return tuple(prefix + suffix for suffix in suffixes) def main() -> None: - print(autocomplete_using_trie("de")) + trie = Trie() + words = ("depart", "detergent", "daring", "dog", "deer", "deal") + for word in words: + trie.insert_word(word) + print(autocomplete_using_trie("de", trie)) + trie.delete_word("detergent") + print(autocomplete_using_trie("de", trie)) if __name__ == "__main__":