From 51bb5d5d1a0db1d3f6b533a4032306936188ca65 Mon Sep 17 00:00:00 2001 From: Param Parekh <129577837+Param1304@users.noreply.github.com> Date: Sun, 27 Oct 2024 12:53:33 +0530 Subject: [PATCH 01/11] Create adaptive_merge_sort.py --- sorts/adaptive_merge_sort.py | 93 ++++++++++++++++++++++++++++++++++++ 1 file changed, 93 insertions(+) create mode 100644 sorts/adaptive_merge_sort.py diff --git a/sorts/adaptive_merge_sort.py b/sorts/adaptive_merge_sort.py new file mode 100644 index 000000000000..16ecab2f61e8 --- /dev/null +++ b/sorts/adaptive_merge_sort.py @@ -0,0 +1,93 @@ +""" +This is a pure Python implementation of an adaptive merge sort algorithm. +This implementation detects and merges presorted runs for better performance on partially sorted data. + +For doctests run following command: +python -m doctest -v adaptive_merge_sort.py +or +python3 -m doctest -v adaptive_merge_sort.py +For manual testing run: +python adaptive_merge_sort.py +""" + +def adaptive_merge_sort(collection: list) -> list: + """ + Sorts a list using an adaptive merge sort algorithm. + + :param collection: A mutable ordered collection with comparable items. + :return: The same collection ordered in ascending order. + + Time Complexity: O(n log n) in the average case, + O(n) for nearly sorted input. + + Examples: + >>> adaptive_merge_sort([0, 5, 3, 2, 2]) + [0, 2, 2, 3, 5] + >>> adaptive_merge_sort([]) + [] + >>> adaptive_merge_sort([-2, -5, -45]) + [-45, -5, -2] + """ + + def find_run(collection: list, start: int) -> int: + """ + Detects and returns the length of a naturally occurring run starting from 'start'. + + :param collection: The list to detect runs in. + :param start: The starting index for finding the run. + :return: Length of the detected run. + """ + run_length = 1 + while start + run_length < len(collection) and collection[start + run_length - 1] <= collection[start + run_length]: + run_length += 1 + return run_length + + def merge(left: list, right: list) -> list: + """ + Merge two sorted lists into a single sorted list. + + :param left: Left collection + :param right: Right collection + :return: Merged result + """ + result = [] + while left and right: + result.append(left.pop(0) if left[0] <= right[0] else right.pop(0)) + result.extend(left) + result.extend(right) + return result + + if len(collection) <= 1: + return collection + + runs = [] + i = 0 + # Step 1: Identify naturally occurring runs and store them in 'runs' + while i < len(collection): + run_length = find_run(collection, i) + runs.append(collection[i:i + run_length]) + i += run_length + + # Step 2: Iteratively merge runs until one sorted collection remains + while len(runs) > 1: + merged_runs = [] + for j in range(0, len(runs), 2): + if j + 1 < len(runs): + merged_runs.append(merge(runs[j], runs[j + 1])) + else: + merged_runs.append(runs[j]) + runs = merged_runs + + return runs[0] # The single, fully sorted list + + +if __name__ == "__main__": + import doctest + doctest.testmod() + try: + user_input = input("Enter numbers separated by a comma:\n").strip() + unsorted = [int(item) for item in user_input.split(",")] + sorted_list = adaptive_merge_sort(unsorted) + print(*sorted_list, sep=",") + except ValueError: + print("Invalid input. Please enter valid integers separated by commas.") From a18c14c71405cce0b6523d2d94fbee193a07cb0a Mon Sep 17 00:00:00 2001 From: Param Parekh <129577837+Param1304@users.noreply.github.com> Date: Sun, 27 Oct 2024 13:02:58 +0530 Subject: [PATCH 02/11] Create stalin_merge_sort.py --- sorts/stalin_merge_sort.py | 50 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) create mode 100644 sorts/stalin_merge_sort.py diff --git a/sorts/stalin_merge_sort.py b/sorts/stalin_merge_sort.py new file mode 100644 index 000000000000..4f5e483b2892 --- /dev/null +++ b/sorts/stalin_merge_sort.py @@ -0,0 +1,50 @@ +""" +This is a pure Python implementation of the Stalin Sort algorithm. +Stalin Sort removes any elements that are out of ascending order, +leaving only a sorted subsequence of the original list. + +For doctests run following command: +python -m doctest -v stalin_sort.py +or +python3 -m doctest -v stalin_sort.py +For manual testing run: +python stalin_sort.py +""" + +def stalin_sort(collection: list) -> list: + """ + Sorts a list by removing elements that are out of order, leaving a sorted subsequence. + + :param collection: A list of comparable items. + :return: A list containing only elements that maintain ascending order. + + Examples: + >>> stalin_sort([4, 5, 3, 6, 7, 2, 8]) + [4, 5, 6, 7, 8] + >>> stalin_sort([1, 2, 3, 4, 5]) + [1, 2, 3, 4, 5] + >>> stalin_sort([5, 4, 3, 2, 1]) + [5] + >>> stalin_sort([]) + [] + """ + if not collection: + return [] + + sorted_list = [collection[0]] + for element in collection[1:]: + if element >= sorted_list[-1]: + sorted_list.append(element) + return sorted_list + + +if __name__ == "__main__": + import doctest + doctest.testmod() + try: + user_input = input("Enter numbers separated by a comma:\n").strip() + unsorted = [int(item) for item in user_input.split(",")] + sorted_list = stalin_sort(unsorted) + print("Stalin-sorted list:", *sorted_list, sep=", ") + except ValueError: + print("Invalid input. Please enter valid integers separated by commas.") From cc6eb700489c0fbd8c56dc938481846ab10b4d3c Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 27 Oct 2024 07:51:09 +0000 Subject: [PATCH 03/11] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- sorts/adaptive_merge_sort.py | 11 ++++++++--- sorts/stalin_merge_sort.py | 4 +++- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/sorts/adaptive_merge_sort.py b/sorts/adaptive_merge_sort.py index 16ecab2f61e8..f9d7c099277e 100644 --- a/sorts/adaptive_merge_sort.py +++ b/sorts/adaptive_merge_sort.py @@ -10,6 +10,7 @@ python adaptive_merge_sort.py """ + def adaptive_merge_sort(collection: list) -> list: """ Sorts a list using an adaptive merge sort algorithm. @@ -32,13 +33,16 @@ def adaptive_merge_sort(collection: list) -> list: def find_run(collection: list, start: int) -> int: """ Detects and returns the length of a naturally occurring run starting from 'start'. - + :param collection: The list to detect runs in. :param start: The starting index for finding the run. :return: Length of the detected run. """ run_length = 1 - while start + run_length < len(collection) and collection[start + run_length - 1] <= collection[start + run_length]: + while ( + start + run_length < len(collection) + and collection[start + run_length - 1] <= collection[start + run_length] + ): run_length += 1 return run_length @@ -65,7 +69,7 @@ def merge(left: list, right: list) -> list: # Step 1: Identify naturally occurring runs and store them in 'runs' while i < len(collection): run_length = find_run(collection, i) - runs.append(collection[i:i + run_length]) + runs.append(collection[i : i + run_length]) i += run_length # Step 2: Iteratively merge runs until one sorted collection remains @@ -83,6 +87,7 @@ def merge(left: list, right: list) -> list: if __name__ == "__main__": import doctest + doctest.testmod() try: user_input = input("Enter numbers separated by a comma:\n").strip() diff --git a/sorts/stalin_merge_sort.py b/sorts/stalin_merge_sort.py index 4f5e483b2892..e9a13fca7689 100644 --- a/sorts/stalin_merge_sort.py +++ b/sorts/stalin_merge_sort.py @@ -1,6 +1,6 @@ """ This is a pure Python implementation of the Stalin Sort algorithm. -Stalin Sort removes any elements that are out of ascending order, +Stalin Sort removes any elements that are out of ascending order, leaving only a sorted subsequence of the original list. For doctests run following command: @@ -11,6 +11,7 @@ python stalin_sort.py """ + def stalin_sort(collection: list) -> list: """ Sorts a list by removing elements that are out of order, leaving a sorted subsequence. @@ -40,6 +41,7 @@ def stalin_sort(collection: list) -> list: if __name__ == "__main__": import doctest + doctest.testmod() try: user_input = input("Enter numbers separated by a comma:\n").strip() From e8c258dde38f78b6a947f7ec9b289faf9c931338 Mon Sep 17 00:00:00 2001 From: Param Parekh <129577837+Param1304@users.noreply.github.com> Date: Sun, 27 Oct 2024 16:06:12 +0530 Subject: [PATCH 04/11] Update adaptive_merge_sort.py --- sorts/adaptive_merge_sort.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/sorts/adaptive_merge_sort.py b/sorts/adaptive_merge_sort.py index f9d7c099277e..ab32b7372725 100644 --- a/sorts/adaptive_merge_sort.py +++ b/sorts/adaptive_merge_sort.py @@ -1,7 +1,7 @@ """ This is a pure Python implementation of an adaptive merge sort algorithm. -This implementation detects and merges presorted runs for better performance on partially sorted data. - +This implementation detects and merges presorted runs for better +performance on partially sorted data. For doctests run following command: python -m doctest -v adaptive_merge_sort.py or @@ -14,13 +14,12 @@ def adaptive_merge_sort(collection: list) -> list: """ Sorts a list using an adaptive merge sort algorithm. - - :param collection: A mutable ordered collection with comparable items. - :return: The same collection ordered in ascending order. - + :param collection: A mutable ordered collection + with comparable items. + :return: The same collection ordered in + ascending order. Time Complexity: O(n log n) in the average case, O(n) for nearly sorted input. - Examples: >>> adaptive_merge_sort([0, 5, 3, 2, 2]) [0, 2, 2, 3, 5] @@ -32,8 +31,8 @@ def adaptive_merge_sort(collection: list) -> list: def find_run(collection: list, start: int) -> int: """ - Detects and returns the length of a naturally occurring run starting from 'start'. - + Detects and returns the length of a naturally occurring + run starting from 'start'. :param collection: The list to detect runs in. :param start: The starting index for finding the run. :return: Length of the detected run. From b789baa0030c1ffbe1f290db4ae96ffd4c38e366 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 27 Oct 2024 10:36:34 +0000 Subject: [PATCH 05/11] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- sorts/adaptive_merge_sort.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sorts/adaptive_merge_sort.py b/sorts/adaptive_merge_sort.py index ab32b7372725..a538c54061f0 100644 --- a/sorts/adaptive_merge_sort.py +++ b/sorts/adaptive_merge_sort.py @@ -1,6 +1,6 @@ """ This is a pure Python implementation of an adaptive merge sort algorithm. -This implementation detects and merges presorted runs for better +This implementation detects and merges presorted runs for better performance on partially sorted data. For doctests run following command: python -m doctest -v adaptive_merge_sort.py @@ -14,9 +14,9 @@ def adaptive_merge_sort(collection: list) -> list: """ Sorts a list using an adaptive merge sort algorithm. - :param collection: A mutable ordered collection + :param collection: A mutable ordered collection with comparable items. - :return: The same collection ordered in + :return: The same collection ordered in ascending order. Time Complexity: O(n log n) in the average case, O(n) for nearly sorted input. From 5dd0e514fb4deb5a17a9ffe994d685cddaf11526 Mon Sep 17 00:00:00 2001 From: Param Parekh <129577837+Param1304@users.noreply.github.com> Date: Sun, 27 Oct 2024 16:07:56 +0530 Subject: [PATCH 06/11] Update stalin_merge_sort.py --- sorts/stalin_merge_sort.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/sorts/stalin_merge_sort.py b/sorts/stalin_merge_sort.py index e9a13fca7689..20a61322b4b9 100644 --- a/sorts/stalin_merge_sort.py +++ b/sorts/stalin_merge_sort.py @@ -2,7 +2,6 @@ This is a pure Python implementation of the Stalin Sort algorithm. Stalin Sort removes any elements that are out of ascending order, leaving only a sorted subsequence of the original list. - For doctests run following command: python -m doctest -v stalin_sort.py or @@ -10,15 +9,12 @@ For manual testing run: python stalin_sort.py """ - - def stalin_sort(collection: list) -> list: """ - Sorts a list by removing elements that are out of order, leaving a sorted subsequence. - + Sorts a list by removing elements that are out of order, + leaving a sorted subsequence. :param collection: A list of comparable items. :return: A list containing only elements that maintain ascending order. - Examples: >>> stalin_sort([4, 5, 3, 6, 7, 2, 8]) [4, 5, 6, 7, 8] @@ -38,7 +34,6 @@ def stalin_sort(collection: list) -> list: sorted_list.append(element) return sorted_list - if __name__ == "__main__": import doctest From 801af0cfc3c3f6146f6c7a997b1f3121003e70dc Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 27 Oct 2024 10:38:18 +0000 Subject: [PATCH 07/11] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- sorts/stalin_merge_sort.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/sorts/stalin_merge_sort.py b/sorts/stalin_merge_sort.py index 20a61322b4b9..54172a9ecd24 100644 --- a/sorts/stalin_merge_sort.py +++ b/sorts/stalin_merge_sort.py @@ -9,9 +9,11 @@ For manual testing run: python stalin_sort.py """ + + def stalin_sort(collection: list) -> list: """ - Sorts a list by removing elements that are out of order, + Sorts a list by removing elements that are out of order, leaving a sorted subsequence. :param collection: A list of comparable items. :return: A list containing only elements that maintain ascending order. @@ -34,6 +36,7 @@ def stalin_sort(collection: list) -> list: sorted_list.append(element) return sorted_list + if __name__ == "__main__": import doctest From a7747fe1335c9053b9dd7f9e15736c06f2f882d3 Mon Sep 17 00:00:00 2001 From: Param Parekh <129577837+Param1304@users.noreply.github.com> Date: Sun, 27 Oct 2024 16:39:58 +0530 Subject: [PATCH 08/11] Create gabows_algorithm.py Implemented Gabow's algorithm to find all the strongly connected components in a directed graph with a time complexity of O(V+E) as mentioned in the issue #12297 --- graphs/gabows_algorithm.py | 115 +++++++++++++++++++++++++++++++++++++ 1 file changed, 115 insertions(+) create mode 100644 graphs/gabows_algorithm.py diff --git a/graphs/gabows_algorithm.py b/graphs/gabows_algorithm.py new file mode 100644 index 000000000000..fe5d059d2e40 --- /dev/null +++ b/graphs/gabows_algorithm.py @@ -0,0 +1,115 @@ +""" +This is a pure Python implementation +of Gabow's algorithm for finding +strongly connected components (SCCs) +in a directed graph. +For doctests run: + python -m doctest -v gabow_algorithm.py +or + python3 -m doctest -v gabow_algorithm.py +For manual testing run: + python gabow_algorithm.py +""" +from collections import defaultdict +from typing import List, Dict +from __future__ import annotations +class Graph: + """ + Graph data structure to represent + a directed graph and find SCCs + using Gabow's algorithm. + + Attributes: + vertices (int): Number of + vertices in the graph. + graph (Dict[int, List[int]]): + Adjacency list of the graph. + + Methods: + add_edge(u, v): Adds an edge + from vertex u to vertex v. + find_sccs(): Finds and returns + all SCCs in the graph. + + Examples: + >>> g = Graph(5) + >>> g.add_edge(0, 2) + >>> g.add_edge(2, 1) + >>> g.add_edge(1, 0) + >>> g.add_edge(0, 3) + >>> g.add_edge(3, 4) + >>> sorted(g.find_sccs()) + [[0, 1, 2], [3], [4]] + """ + def __init__(self, vertices: int) -> None: + self.vertices = vertices + self.graph: Dict[int, List[int]] = defaultdict(list) + self.index = 0 + self.stack_s = [] # Stack S + self.stack_p = [] # Stack P + self.visited = [False] * vertices + self.result = [] + + def add_edge(self, u: int, v: int) -> None: + """ + Adds a directed edge from vertex u to vertex v. + + :param u: Starting vertex of the edge. + :param v: Ending vertex of the edge. + """ + self.graph[u].append(v) + def _dfs(self, v: int) -> None: + """ + Depth-first search helper function to + process each vertex and identify SCCs. + + :param v: The current vertex to process in DFS. + """ + self.visited[v] = True + self.stack_s.append(v) + self.stack_p.append(v) + + for neighbor in self.graph[v]: + if not self.visited[neighbor]: + self._dfs(neighbor) + elif neighbor in self.stack_p: + while self.stack_p and self.stack_p[-1] != neighbor: + self.stack_p.pop() + + if self.stack_p and self.stack_p[-1] == v: + scc = [] + while True: + node = self.stack_s.pop() + scc.append(node) + if node == v: + break + self.stack_p.pop() + self.result.append(scc) + + def find_sccs(self) -> List[List[int]]: + """ + Finds all strongly connected components + in the directed graph. + :return: List of SCCs, where each SCC is + represented as a list of vertices. + """ + for v in range(self.vertices): + if not self.visited[v]: + self._dfs(v) + return self.result +if __name__ == "__main__": + import doctest + doctest.testmod() + # Example usage for manual testing + try: + vertex_count = int(input("Enter the number of vertices: ")) + g = Graph(vertex_count) + edge_count = int(input("Enter the number of edges: ")) + print("Enter each edge as a pair of vertices (u v):") + for _ in range(edge_count): + u, v = map(int, input().split()) + g.add_edge(u, v) + sccs = g.find_sccs() + print("Strongly Connected Components:", sccs) + except ValueError: + print("Invalid input. Please enter valid integers.") From 489772f9dccd8fefe0449f3357ba6787b3a12110 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 27 Oct 2024 11:12:36 +0000 Subject: [PATCH 09/11] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- graphs/gabows_algorithm.py | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/graphs/gabows_algorithm.py b/graphs/gabows_algorithm.py index fe5d059d2e40..88e920f3da32 100644 --- a/graphs/gabows_algorithm.py +++ b/graphs/gabows_algorithm.py @@ -1,7 +1,7 @@ """ -This is a pure Python implementation +This is a pure Python implementation of Gabow's algorithm for finding -strongly connected components (SCCs) +strongly connected components (SCCs) in a directed graph. For doctests run: python -m doctest -v gabow_algorithm.py @@ -10,25 +10,28 @@ For manual testing run: python gabow_algorithm.py """ + from collections import defaultdict from typing import List, Dict from __future__ import annotations + + class Graph: """ - Graph data structure to represent + Graph data structure to represent a directed graph and find SCCs using Gabow's algorithm. Attributes: - vertices (int): Number of + vertices (int): Number of vertices in the graph. - graph (Dict[int, List[int]]): + graph (Dict[int, List[int]]): Adjacency list of the graph. Methods: - add_edge(u, v): Adds an edge + add_edge(u, v): Adds an edge from vertex u to vertex v. - find_sccs(): Finds and returns + find_sccs(): Finds and returns all SCCs in the graph. Examples: @@ -41,6 +44,7 @@ class Graph: >>> sorted(g.find_sccs()) [[0, 1, 2], [3], [4]] """ + def __init__(self, vertices: int) -> None: self.vertices = vertices self.graph: Dict[int, List[int]] = defaultdict(list) @@ -58,9 +62,10 @@ def add_edge(self, u: int, v: int) -> None: :param v: Ending vertex of the edge. """ self.graph[u].append(v) + def _dfs(self, v: int) -> None: """ - Depth-first search helper function to + Depth-first search helper function to process each vertex and identify SCCs. :param v: The current vertex to process in DFS. @@ -88,17 +93,20 @@ def _dfs(self, v: int) -> None: def find_sccs(self) -> List[List[int]]: """ - Finds all strongly connected components + Finds all strongly connected components in the directed graph. - :return: List of SCCs, where each SCC is + :return: List of SCCs, where each SCC is represented as a list of vertices. """ for v in range(self.vertices): if not self.visited[v]: self._dfs(v) return self.result + + if __name__ == "__main__": import doctest + doctest.testmod() # Example usage for manual testing try: From 72e62025a5da35be6422d654d4fe5c4e16ee483d Mon Sep 17 00:00:00 2001 From: Param Parekh <129577837+Param1304@users.noreply.github.com> Date: Sun, 27 Oct 2024 17:37:50 +0530 Subject: [PATCH 10/11] Create commentz_walter.py --- strings/commentz_walter.py | 119 +++++++++++++++++++++++++++++++++++++ 1 file changed, 119 insertions(+) create mode 100644 strings/commentz_walter.py diff --git a/strings/commentz_walter.py b/strings/commentz_walter.py new file mode 100644 index 000000000000..e42b4c443654 --- /dev/null +++ b/strings/commentz_walter.py @@ -0,0 +1,119 @@ +""" +This is a pure Python implementation +of the Commentz-Walter algorithm +for searching multiple patterns in a single text. + +The algorithm combines Boyer-Moore's and +Aho-Corasick's techniques for +efficiently searching multiple patterns +by using pattern shifts and suffix automata. + +For doctests run: + python -m doctest -v commentz_walter.py +or + python3 -m doctest -v commentz_walter.py +For manual testing run: + python commentz_walter.py +""" + +from typing import List, Dict, Set, Tuple +from collections import defaultdict +class CommentzWalter: + """ + Class to represent the Commentz-Walter algorithm + for multi-pattern string searching. + + Attributes: + patterns (List[str]): List of patterns to search for. + alphabet (Set[str]): Unique characters in the patterns. + shift_table (Dict[str, int]): Table to store + the shift values for characters. + automaton (Dict[int, Dict[str, int]]): + Automaton used for state transitions. + + Methods: + preprocess(): Builds the shift table + and automaton for pattern matching. + search(text: str) -> List[Tuple[int, str]]: + Searches patterns in the given text. + + Examples: + >>> cw = CommentzWalter(["he", "she", "his", "hers"]) + >>> cw.search("ahishers") + [(1, 'his'), (4, 'she'), (5, 'hers')] + """ + def __init__(self, patterns: List[str]) -> None: + self.patterns = patterns + self.alphabet: Set[str] = set("".join(patterns)) + self.shift_table: Dict[str, int] = {} + self.automaton: Dict[int, Dict[str, int]] = {} + self.preprocess() + def preprocess(self) -> None: + """ + Builds the shift table and automaton required + for the Commentz-Walter algorithm. + """ + # Build the shift table for the rightmost occurrence of characters in patterns + max_len = max(len(pattern) for pattern in self.patterns) + for char in self.alphabet: + self.shift_table[char] = max_len + + for pattern in self.patterns: + for i, char in enumerate(pattern): + self.shift_table[char] = max(1, max_len - i - 1) + # Build the Aho-Corasick automaton for the set of patterns + state = 0 + self.automaton[0] = {} + for pattern in self.patterns: + current_state = 0 + for char in pattern: + if char not in self.automaton[current_state]: + state += 1 + self.automaton[state] = {} + self.automaton[current_state][char] = state + current_state = self.automaton[current_state][char] + + def search(self, text: str) -> List[Tuple[int, str]]: + """ + Searches for patterns in the given text using + the Commentz-Walter algorithm. + :param text: The text to search in. + :return: List of tuples with starting index and matched pattern. + Examples: + >>> cw = CommentzWalter(["abc", "bcd", "cde"]) + >>> cw.search("abcdef") + [(0, 'abc'), (1, 'bcd'), (2, 'cde')] + """ + results = [] + n = len(text) + m = max(len(p) for p in self.patterns) + i = 0 + while i <= n - m: + j = m - 1 + while j >= 0 and text[i + j] in self.shift_table: + j -= 1 + if j < 0: + # We have a potential match; use the automaton to verify + state = 0 + for k in range(m): + if text[i + k] in self.automaton[state]: + state = self.automaton[state][text[i + k]] + else: + break + else: + for pattern in self.patterns: + if text[i:i + len(pattern)] == pattern: + results.append((i, pattern)) + i += self.shift_table.get(text[i + m - 1], m) + else: + i += self.shift_table.get(text[i + j], m) + return results +if __name__ == "__main__": + import doctest + doctest.testmod() + # Example usage for manual testing + patterns = ["abc", "bcd", "cde"] + cw = CommentzWalter(patterns) + text = "abcdef" + matches = cw.search(text) + print("Matches found:", matches) From adfe946f88fc4afd6091da65e32b5a54be70a99c Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 27 Oct 2024 12:09:18 +0000 Subject: [PATCH 11/11] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- strings/commentz_walter.py | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/strings/commentz_walter.py b/strings/commentz_walter.py index e42b4c443654..320a76db7ab7 100644 --- a/strings/commentz_walter.py +++ b/strings/commentz_walter.py @@ -1,11 +1,11 @@ """ -This is a pure Python implementation +This is a pure Python implementation of the Commentz-Walter algorithm for searching multiple patterns in a single text. -The algorithm combines Boyer-Moore's and +The algorithm combines Boyer-Moore's and Aho-Corasick's techniques for -efficiently searching multiple patterns +efficiently searching multiple patterns by using pattern shifts and suffix automata. For doctests run: @@ -18,23 +18,25 @@ from typing import List, Dict, Set, Tuple from collections import defaultdict + + class CommentzWalter: """ - Class to represent the Commentz-Walter algorithm + Class to represent the Commentz-Walter algorithm for multi-pattern string searching. Attributes: patterns (List[str]): List of patterns to search for. alphabet (Set[str]): Unique characters in the patterns. - shift_table (Dict[str, int]): Table to store + shift_table (Dict[str, int]): Table to store the shift values for characters. - automaton (Dict[int, Dict[str, int]]): + automaton (Dict[int, Dict[str, int]]): Automaton used for state transitions. Methods: - preprocess(): Builds the shift table + preprocess(): Builds the shift table and automaton for pattern matching. - search(text: str) -> List[Tuple[int, str]]: + search(text: str) -> List[Tuple[int, str]]: Searches patterns in the given text. Examples: @@ -42,15 +44,17 @@ class CommentzWalter: >>> cw.search("ahishers") [(1, 'his'), (4, 'she'), (5, 'hers')] """ + def __init__(self, patterns: List[str]) -> None: self.patterns = patterns self.alphabet: Set[str] = set("".join(patterns)) self.shift_table: Dict[str, int] = {} self.automaton: Dict[int, Dict[str, int]] = {} self.preprocess() + def preprocess(self) -> None: """ - Builds the shift table and automaton required + Builds the shift table and automaton required for the Commentz-Walter algorithm. """ # Build the shift table for the rightmost occurrence of characters in patterns @@ -102,14 +106,17 @@ def search(self, text: str) -> List[Tuple[int, str]]: break else: for pattern in self.patterns: - if text[i:i + len(pattern)] == pattern: + if text[i : i + len(pattern)] == pattern: results.append((i, pattern)) i += self.shift_table.get(text[i + m - 1], m) else: i += self.shift_table.get(text[i + j], m) return results + + if __name__ == "__main__": import doctest + doctest.testmod() # Example usage for manual testing patterns = ["abc", "bcd", "cde"]