From a03400fc5bc480da0b9da8464ccedb7bb01bcc56 Mon Sep 17 00:00:00 2001 From: Akshit Bansal <155195875+akshitbansal2005@users.noreply.github.com> Date: Wed, 2 Oct 2024 00:29:27 +0530 Subject: [PATCH 1/5] Update ant_colony_optimization_algorithms.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Analysis of the Code The provided code implements the Ant Colony Optimization (ACO) algorithm to solve the Traveling Salesman Problem (TSP). While the code captures the essential logic of ACO, there are several issues and opportunities for improvement: Pheromone Matrix Initialization (Shallow Copy Issue): The pheromone matrix is initialized as [[1.0] * cities_num] * cities_num. This leads to all rows being shallow copies of each other. Any update to one row will reflect in all rows. Randomness in City Selection: The random.choices function in city_select is used to select the next city based on probability weights. However, randomness can sometimes lead to inconsistent solutions, and there’s no seed to ensure reproducibility of results. Unnecessary Deep Copy of Cities: The copy.deepcopy(cities) is used to create a deep copy of the cities dictionary for each ant. This is computationally expensive and unnecessary. Instead, working directly with a list of remaining city indices would be more efficient. Code Readability & Modularity: Some parts of the code can be simplified for better readability. The use of next(iter(...)) to extract the first element of a dictionary in multiple places reduces clarity. Boundary Handling (Empty Input Check): The code does not handle the case when no cities are provided (i.e., cities={}). This results in a StopIteration error in city_select. An explicit check for empty input at the start of the main function would help. Docstrings and Type Hints: The type hints in functions are clear, but some functions lack docstrings explaining their behavior (e.g., pheromone_update, city_select). Providing more detailed explanations for each function would improve maintainability. Reusability of Results: The current approach recalculates distances between cities multiple times. Precomputing the distance matrix once at the start would improve performance. --- graphs/ant_colony_optimization_algorithms.py | 294 +++++++------------ 1 file changed, 114 insertions(+), 180 deletions(-) diff --git a/graphs/ant_colony_optimization_algorithms.py b/graphs/ant_colony_optimization_algorithms.py index 13637da44874..95aa460ce9b3 100644 --- a/graphs/ant_colony_optimization_algorithms.py +++ b/graphs/ant_colony_optimization_algorithms.py @@ -1,19 +1,7 @@ -""" -Use an ant colony optimization algorithm to solve the travelling salesman problem (TSP) -which asks the following question: -"Given a list of cities and the distances between each pair of cities, what is the - shortest possible route that visits each city exactly once and returns to the origin - city?" - -https://en.wikipedia.org/wiki/Ant_colony_optimization_algorithms -https://en.wikipedia.org/wiki/Travelling_salesman_problem - -Author: Clark -""" - -import copy import random +import math +# Define cities as coordinates (x, y) cities = { 0: [0, 0], 1: [0, 5], @@ -25,202 +13,148 @@ 7: [6, 2], } - -def main( - cities: dict[int, list[int]], - ants_num: int, - iterations_num: int, - pheromone_evaporation: float, - alpha: float, - beta: float, - q: float, # Pheromone system parameters Q, which is a constant -) -> tuple[list[int], float]: +def euclidean_distance(city1: list[int], city2: list[int]) -> float: """ - Ant colony algorithm main function - >>> main(cities=cities, ants_num=10, iterations_num=20, - ... pheromone_evaporation=0.7, alpha=1.0, beta=5.0, q=10) - ([0, 1, 2, 3, 4, 5, 6, 7, 0], 37.909778143828696) - >>> main(cities={0: [0, 0], 1: [2, 2]}, ants_num=5, iterations_num=5, - ... pheromone_evaporation=0.7, alpha=1.0, beta=5.0, q=10) - ([0, 1, 0], 5.656854249492381) - >>> main(cities={0: [0, 0], 1: [2, 2], 4: [4, 4]}, ants_num=5, iterations_num=5, - ... pheromone_evaporation=0.7, alpha=1.0, beta=5.0, q=10) - Traceback (most recent call last): - ... - IndexError: list index out of range - >>> main(cities={}, ants_num=5, iterations_num=5, - ... pheromone_evaporation=0.7, alpha=1.0, beta=5.0, q=10) - Traceback (most recent call last): - ... - StopIteration - >>> main(cities={0: [0, 0], 1: [2, 2]}, ants_num=0, iterations_num=5, - ... pheromone_evaporation=0.7, alpha=1.0, beta=5.0, q=10) - ([], inf) - >>> main(cities={0: [0, 0], 1: [2, 2]}, ants_num=5, iterations_num=0, - ... pheromone_evaporation=0.7, alpha=1.0, beta=5.0, q=10) - ([], inf) - >>> main(cities={0: [0, 0], 1: [2, 2]}, ants_num=5, iterations_num=5, - ... pheromone_evaporation=1, alpha=1.0, beta=5.0, q=10) - ([0, 1, 0], 5.656854249492381) - >>> main(cities={0: [0, 0], 1: [2, 2]}, ants_num=5, iterations_num=5, - ... pheromone_evaporation=0, alpha=1.0, beta=5.0, q=10) - ([0, 1, 0], 5.656854249492381) + Calculate the Euclidean distance between two cities (points). """ - # Initialize the pheromone matrix - cities_num = len(cities) - pheromone = [[1.0] * cities_num] * cities_num - - best_path: list[int] = [] - best_distance = float("inf") - for _ in range(iterations_num): - ants_route = [] - for _ in range(ants_num): - unvisited_cities = copy.deepcopy(cities) - current_city = {next(iter(cities.keys())): next(iter(cities.values()))} - del unvisited_cities[next(iter(current_city.keys()))] - ant_route = [next(iter(current_city.keys()))] - while unvisited_cities: - current_city, unvisited_cities = city_select( - pheromone, current_city, unvisited_cities, alpha, beta - ) - ant_route.append(next(iter(current_city.keys()))) - ant_route.append(0) - ants_route.append(ant_route) - - pheromone, best_path, best_distance = pheromone_update( - pheromone, - cities, - pheromone_evaporation, - ants_route, - q, - best_path, - best_distance, - ) - return best_path, best_distance + return math.sqrt((city1[0] - city2[0]) ** 2 + (city1[1] - city2[1]) ** 2) +def initialize_pheromone_matrix(size: int) -> list[list[float]]: + """ + Initialize the pheromone matrix with 1.0 values. + """ + return [[1.0 for _ in range(size)] for _ in range(size)] -def distance(city1: list[int], city2: list[int]) -> float: +def compute_distance_matrix(cities: dict[int, list[int]]) -> list[list[float]]: + """ + Precompute the distance between all cities and store them in a matrix. + """ + size = len(cities) + dist_matrix = [[0.0] * size for _ in range(size)] + for i in range(size): + for j in range(i + 1, size): + dist = euclidean_distance(cities[i], cities[j]) + dist_matrix[i][j] = dist + dist_matrix[j][i] = dist + return dist_matrix + +def select_next_city( + current_city: int, + unvisited: list[int], + pheromone: list[list[float]], + distances: list[list[float]], + alpha: float, + beta: float +) -> int: """ - Calculate the distance between two coordinate points - >>> distance([0, 0], [3, 4] ) - 5.0 - >>> distance([0, 0], [-3, 4] ) - 5.0 - >>> distance([0, 0], [-3, -4] ) - 5.0 + Select the next city to visit based on pheromone levels and distances. """ - return (((city1[0] - city2[0]) ** 2) + ((city1[1] - city2[1]) ** 2)) ** 0.5 + probabilities = [] + for city in unvisited: + pheromone_level = pheromone[current_city][city] ** alpha + distance_factor = (1 / distances[current_city][city]) ** beta + probabilities.append(pheromone_level * distance_factor) + + # Normalize probabilities + total = sum(probabilities) + probabilities = [p / total for p in probabilities] + # Randomly select next city based on the probabilities + return random.choices(unvisited, weights=probabilities)[0] -def pheromone_update( +def update_pheromones( pheromone: list[list[float]], - cities: dict[int, list[int]], - pheromone_evaporation: float, - ants_route: list[list[int]], - q: float, # Pheromone system parameters Q, which is a constant + ants_paths: list[list[int]], + distances: list[list[float]], + q: float, + evaporation_rate: float, best_path: list[int], - best_distance: float, + best_distance: float ) -> tuple[list[list[float]], list[int], float]: """ - Update pheromones on the route and update the best route - >>> - >>> pheromone_update(pheromone=[[1.0, 1.0], [1.0, 1.0]], - ... cities={0: [0,0], 1: [2,2]}, pheromone_evaporation=0.7, - ... ants_route=[[0, 1, 0]], q=10, best_path=[], - ... best_distance=float("inf")) - ([[0.7, 4.235533905932737], [4.235533905932737, 0.7]], [0, 1, 0], 5.656854249492381) - >>> pheromone_update(pheromone=[], - ... cities={0: [0,0], 1: [2,2]}, pheromone_evaporation=0.7, - ... ants_route=[[0, 1, 0]], q=10, best_path=[], - ... best_distance=float("inf")) - Traceback (most recent call last): - ... - IndexError: list index out of range - >>> pheromone_update(pheromone=[[1.0, 1.0], [1.0, 1.0]], - ... cities={}, pheromone_evaporation=0.7, - ... ants_route=[[0, 1, 0]], q=10, best_path=[], - ... best_distance=float("inf")) - Traceback (most recent call last): - ... - KeyError: 0 + Update pheromone levels on the paths chosen by ants. """ - for a in range(len(cities)): # Update the volatilization of pheromone on all routes - for b in range(len(cities)): - pheromone[a][b] *= pheromone_evaporation - for ant_route in ants_route: - total_distance = 0.0 - for i in range(len(ant_route) - 1): # Calculate total distance - total_distance += distance(cities[ant_route[i]], cities[ant_route[i + 1]]) - delta_pheromone = q / total_distance - for i in range(len(ant_route) - 1): # Update pheromones - pheromone[ant_route[i]][ant_route[i + 1]] += delta_pheromone - pheromone[ant_route[i + 1]][ant_route[i]] = pheromone[ant_route[i]][ - ant_route[i + 1] - ] + size = len(pheromone) + + # Evaporate pheromones + for i in range(size): + for j in range(size): + pheromone[i][j] *= (1 - evaporation_rate) + + # Update pheromones based on ants' paths + for path in ants_paths: + total_distance = sum(distances[path[i]][path[i + 1]] for i in range(len(path) - 1)) + pheromone_deposit = q / total_distance + for i in range(len(path) - 1): + pheromone[path[i]][path[i + 1]] += pheromone_deposit + pheromone[path[i + 1]][path[i]] += pheromone_deposit + + # Check if this is the best path found if total_distance < best_distance: - best_path = ant_route best_distance = total_distance + best_path = path return pheromone, best_path, best_distance - -def city_select( - pheromone: list[list[float]], - current_city: dict[int, list[int]], - unvisited_cities: dict[int, list[int]], +def ant_colony_optimization( + cities: dict[int, list[int]], + ants_num: int, + iterations: int, alpha: float, beta: float, -) -> tuple[dict[int, list[int]], dict[int, list[int]]]: + evaporation_rate: float, + q: float +) -> tuple[list[int], float]: """ - Choose the next city for ants - >>> city_select(pheromone=[[1.0, 1.0], [1.0, 1.0]], current_city={0: [0, 0]}, - ... unvisited_cities={1: [2, 2]}, alpha=1.0, beta=5.0) - ({1: [2, 2]}, {}) - >>> city_select(pheromone=[], current_city={0: [0,0]}, - ... unvisited_cities={1: [2, 2]}, alpha=1.0, beta=5.0) - Traceback (most recent call last): - ... - IndexError: list index out of range - >>> city_select(pheromone=[[1.0, 1.0], [1.0, 1.0]], current_city={}, - ... unvisited_cities={1: [2, 2]}, alpha=1.0, beta=5.0) - Traceback (most recent call last): - ... - StopIteration - >>> city_select(pheromone=[[1.0, 1.0], [1.0, 1.0]], current_city={0: [0, 0]}, - ... unvisited_cities={}, alpha=1.0, beta=5.0) - Traceback (most recent call last): - ... - IndexError: list index out of range + Solve the TSP using Ant Colony Optimization (ACO). """ - probabilities = [] - for city in unvisited_cities: - city_distance = distance( - unvisited_cities[city], next(iter(current_city.values())) - ) - probability = (pheromone[city][next(iter(current_city.keys()))] ** alpha) * ( - (1 / city_distance) ** beta - ) - probabilities.append(probability) + cities_num = len(cities) + if cities_num == 0: + return [], float('inf') # No cities to visit - chosen_city_i = random.choices( - list(unvisited_cities.keys()), weights=probabilities - )[0] - chosen_city = {chosen_city_i: unvisited_cities[chosen_city_i]} - del unvisited_cities[next(iter(chosen_city.keys()))] - return chosen_city, unvisited_cities + # Initialize pheromone and distance matrices + pheromone = initialize_pheromone_matrix(cities_num) + distances = compute_distance_matrix(cities) + best_path = [] + best_distance = float('inf') + + for _ in range(iterations): + all_paths = [] + for _ in range(ants_num): + unvisited = list(range(1, cities_num)) # Start from city 0 + path = [0] # Start at city 0 + + # Construct path for the ant + current_city = 0 + while unvisited: + next_city = select_next_city(current_city, unvisited, pheromone, distances, alpha, beta) + path.append(next_city) + unvisited.remove(next_city) + current_city = next_city + + path.append(0) # Return to starting city + all_paths.append(path) + + # Update pheromones and track the best path found + pheromone, best_path, best_distance = update_pheromones( + pheromone, all_paths, distances, q, evaporation_rate, best_path, best_distance + ) + + return best_path, best_distance if __name__ == "__main__": - best_path, best_distance = main( + # Example usage + best_path, best_distance = ant_colony_optimization( cities=cities, ants_num=10, - iterations_num=20, - pheromone_evaporation=0.7, + iterations=100, alpha=1.0, beta=5.0, - q=10, + evaporation_rate=0.7, + q=10 ) - print(f"{best_path = }") - print(f"{best_distance = }") + print(f"Best path: {best_path}") + print(f"Best distance: {best_distance}") From a09138b36bef75623e4204d21d70de2bf5a54375 Mon Sep 17 00:00:00 2001 From: Akshit Bansal <155195875+akshitbansal2005@users.noreply.github.com> Date: Thu, 3 Oct 2024 13:31:36 +0530 Subject: [PATCH 2/5] Update aho_corasick.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Analysis of the Code Type Hinting: The use of type hints is generally good. However, list[str] and dict without explicit type definitions could lead to confusion. Using List and Dict from the typing module would be clearer and more consistent. State Representation: The states in adlist are represented as dictionaries. This works but could be less efficient in terms of memory usage and performance compared to a dedicated class for state representation. Function Naming: The function find_next_state does a lookup but could benefit from being renamed to reflect that it’s finding a state by character, e.g., find_state_by_character. Output Handling: The output accumulation in the set_fail_transitions method uses list concatenation which can be inefficient. Instead, using extend() would be better for performance. Loop Conditions: The loop conditions that involve checking for None states are repetitive. This could be refactored to simplify the logic. Docstring: The docstring for the search_in method could be enhanced to explain the output format in more detail. Main Check: The if __name__ == "__main__": section is good, but it might be beneficial to add a simple example or usage guide for clarity. --- strings/aho_corasick.py | 95 ++++++++++++++++++++--------------------- 1 file changed, 46 insertions(+), 49 deletions(-) diff --git a/strings/aho_corasick.py b/strings/aho_corasick.py index e32a4ba64fac..04a67661a343 100644 --- a/strings/aho_corasick.py +++ b/strings/aho_corasick.py @@ -1,96 +1,93 @@ from __future__ import annotations - from collections import deque +from typing import List, Dict +class State: + """Represents a state in the Aho-Corasick automaton.""" + def __init__(self, value: str): + self.value = value + self.next_states: List[int] = [] + self.fail_state: int = 0 + self.output: List[str] = [] class Automaton: - def __init__(self, keywords: list[str]): - self.adlist: list[dict] = [] - self.adlist.append( - {"value": "", "next_states": [], "fail_state": 0, "output": []} - ) - + def __init__(self, keywords: List[str]): + self.adlist: List[State] = [State("")] # Initial root state for keyword in keywords: self.add_keyword(keyword) self.set_fail_transitions() - def find_next_state(self, current_state: int, char: str) -> int | None: - for state in self.adlist[current_state]["next_states"]: - if char == self.adlist[state]["value"]: + def find_state_by_character(self, current_state: int, char: str) -> int | None: + """Find the next state for the given character.""" + for state in self.adlist[current_state].next_states: + if char == self.adlist[state].value: return state return None def add_keyword(self, keyword: str) -> None: + """Add a keyword to the automaton.""" current_state = 0 for character in keyword: - next_state = self.find_next_state(current_state, character) + next_state = self.find_state_by_character(current_state, character) if next_state is None: - self.adlist.append( - { - "value": character, - "next_states": [], - "fail_state": 0, - "output": [], - } - ) - self.adlist[current_state]["next_states"].append(len(self.adlist) - 1) + new_state = State(character) + self.adlist.append(new_state) + self.adlist[current_state].next_states.append(len(self.adlist) - 1) current_state = len(self.adlist) - 1 else: current_state = next_state - self.adlist[current_state]["output"].append(keyword) + self.adlist[current_state].output.append(keyword) def set_fail_transitions(self) -> None: + """Set fail transitions for the automaton states.""" q: deque = deque() - for node in self.adlist[0]["next_states"]: + for node in self.adlist[0].next_states: q.append(node) - self.adlist[node]["fail_state"] = 0 + self.adlist[node].fail_state = 0 + while q: r = q.popleft() - for child in self.adlist[r]["next_states"]: + for child in self.adlist[r].next_states: q.append(child) - state = self.adlist[r]["fail_state"] - while ( - self.find_next_state(state, self.adlist[child]["value"]) is None - and state != 0 - ): - state = self.adlist[state]["fail_state"] - self.adlist[child]["fail_state"] = self.find_next_state( - state, self.adlist[child]["value"] - ) - if self.adlist[child]["fail_state"] is None: - self.adlist[child]["fail_state"] = 0 - self.adlist[child]["output"] = ( - self.adlist[child]["output"] - + self.adlist[self.adlist[child]["fail_state"]]["output"] - ) + state = self.adlist[r].fail_state + while (self.find_state_by_character(state, self.adlist[child].value) is None and state != 0): + state = self.adlist[state].fail_state + + fail_state = self.find_state_by_character(state, self.adlist[child].value) + self.adlist[child].fail_state = fail_state if fail_state is not None else 0 + self.adlist[child].output.extend(self.adlist[self.adlist[child].fail_state].output) - def search_in(self, string: str) -> dict[str, list[int]]: + def search_in(self, string: str) -> Dict[str, List[int]]: """ + Search for keywords in the given string. + + Returns a dictionary with keywords and the list of their occurrences. + + Example: >>> A = Automaton(["what", "hat", "ver", "er"]) >>> A.search_in("whatever, err ... , wherever") {'what': [0], 'hat': [1], 'ver': [5, 25], 'er': [6, 10, 22, 26]} """ - result: dict = {} # returns a dict with keywords and list of its occurrences + result: Dict[str, List[int]] = {} current_state = 0 + for i in range(len(string)): - while ( - self.find_next_state(current_state, string[i]) is None - and current_state != 0 - ): - current_state = self.adlist[current_state]["fail_state"] - next_state = self.find_next_state(current_state, string[i]) + while self.find_state_by_character(current_state, string[i]) is None and current_state != 0: + current_state = self.adlist[current_state].fail_state + + next_state = self.find_state_by_character(current_state, string[i]) if next_state is None: current_state = 0 else: current_state = next_state - for key in self.adlist[current_state]["output"]: + for key in self.adlist[current_state].output: if key not in result: result[key] = [] result[key].append(i - len(key) + 1) + return result if __name__ == "__main__": import doctest - doctest.testmod() From ebbdc717bc547733cc1c6784348e08b8da8fc696 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 3 Oct 2024 08:03:21 +0000 Subject: [PATCH 3/5] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- graphs/ant_colony_optimization_algorithms.py | 37 ++++++++++++++----- strings/aho_corasick.py | 39 ++++++++++++++------ 2 files changed, 55 insertions(+), 21 deletions(-) diff --git a/graphs/ant_colony_optimization_algorithms.py b/graphs/ant_colony_optimization_algorithms.py index 95aa460ce9b3..5eed711be9b3 100644 --- a/graphs/ant_colony_optimization_algorithms.py +++ b/graphs/ant_colony_optimization_algorithms.py @@ -13,18 +13,21 @@ 7: [6, 2], } + def euclidean_distance(city1: list[int], city2: list[int]) -> float: """ Calculate the Euclidean distance between two cities (points). """ return math.sqrt((city1[0] - city2[0]) ** 2 + (city1[1] - city2[1]) ** 2) + def initialize_pheromone_matrix(size: int) -> list[list[float]]: """ Initialize the pheromone matrix with 1.0 values. """ return [[1.0 for _ in range(size)] for _ in range(size)] + def compute_distance_matrix(cities: dict[int, list[int]]) -> list[list[float]]: """ Precompute the distance between all cities and store them in a matrix. @@ -38,13 +41,14 @@ def compute_distance_matrix(cities: dict[int, list[int]]) -> list[list[float]]: dist_matrix[j][i] = dist return dist_matrix + def select_next_city( current_city: int, unvisited: list[int], pheromone: list[list[float]], distances: list[list[float]], alpha: float, - beta: float + beta: float, ) -> int: """ Select the next city to visit based on pheromone levels and distances. @@ -62,6 +66,7 @@ def select_next_city( # Randomly select next city based on the probabilities return random.choices(unvisited, weights=probabilities)[0] + def update_pheromones( pheromone: list[list[float]], ants_paths: list[list[int]], @@ -69,7 +74,7 @@ def update_pheromones( q: float, evaporation_rate: float, best_path: list[int], - best_distance: float + best_distance: float, ) -> tuple[list[list[float]], list[int], float]: """ Update pheromone levels on the paths chosen by ants. @@ -79,11 +84,13 @@ def update_pheromones( # Evaporate pheromones for i in range(size): for j in range(size): - pheromone[i][j] *= (1 - evaporation_rate) + pheromone[i][j] *= 1 - evaporation_rate # Update pheromones based on ants' paths for path in ants_paths: - total_distance = sum(distances[path[i]][path[i + 1]] for i in range(len(path) - 1)) + total_distance = sum( + distances[path[i]][path[i + 1]] for i in range(len(path) - 1) + ) pheromone_deposit = q / total_distance for i in range(len(path) - 1): @@ -97,6 +104,7 @@ def update_pheromones( return pheromone, best_path, best_distance + def ant_colony_optimization( cities: dict[int, list[int]], ants_num: int, @@ -104,21 +112,21 @@ def ant_colony_optimization( alpha: float, beta: float, evaporation_rate: float, - q: float + q: float, ) -> tuple[list[int], float]: """ Solve the TSP using Ant Colony Optimization (ACO). """ cities_num = len(cities) if cities_num == 0: - return [], float('inf') # No cities to visit + return [], float("inf") # No cities to visit # Initialize pheromone and distance matrices pheromone = initialize_pheromone_matrix(cities_num) distances = compute_distance_matrix(cities) best_path = [] - best_distance = float('inf') + best_distance = float("inf") for _ in range(iterations): all_paths = [] @@ -129,7 +137,9 @@ def ant_colony_optimization( # Construct path for the ant current_city = 0 while unvisited: - next_city = select_next_city(current_city, unvisited, pheromone, distances, alpha, beta) + next_city = select_next_city( + current_city, unvisited, pheromone, distances, alpha, beta + ) path.append(next_city) unvisited.remove(next_city) current_city = next_city @@ -139,11 +149,18 @@ def ant_colony_optimization( # Update pheromones and track the best path found pheromone, best_path, best_distance = update_pheromones( - pheromone, all_paths, distances, q, evaporation_rate, best_path, best_distance + pheromone, + all_paths, + distances, + q, + evaporation_rate, + best_path, + best_distance, ) return best_path, best_distance + if __name__ == "__main__": # Example usage best_path, best_distance = ant_colony_optimization( @@ -153,7 +170,7 @@ def ant_colony_optimization( alpha=1.0, beta=5.0, evaporation_rate=0.7, - q=10 + q=10, ) print(f"Best path: {best_path}") diff --git a/strings/aho_corasick.py b/strings/aho_corasick.py index 04a67661a343..f75d92dd4db8 100644 --- a/strings/aho_corasick.py +++ b/strings/aho_corasick.py @@ -2,14 +2,17 @@ from collections import deque from typing import List, Dict + class State: """Represents a state in the Aho-Corasick automaton.""" + def __init__(self, value: str): self.value = value self.next_states: List[int] = [] self.fail_state: int = 0 self.output: List[str] = [] + class Automaton: def __init__(self, keywords: List[str]): self.adlist: List[State] = [State("")] # Initial root state @@ -44,25 +47,35 @@ def set_fail_transitions(self) -> None: for node in self.adlist[0].next_states: q.append(node) self.adlist[node].fail_state = 0 - + while q: r = q.popleft() for child in self.adlist[r].next_states: q.append(child) state = self.adlist[r].fail_state - while (self.find_state_by_character(state, self.adlist[child].value) is None and state != 0): + while ( + self.find_state_by_character(state, self.adlist[child].value) + is None + and state != 0 + ): state = self.adlist[state].fail_state - - fail_state = self.find_state_by_character(state, self.adlist[child].value) - self.adlist[child].fail_state = fail_state if fail_state is not None else 0 - self.adlist[child].output.extend(self.adlist[self.adlist[child].fail_state].output) + + fail_state = self.find_state_by_character( + state, self.adlist[child].value + ) + self.adlist[child].fail_state = ( + fail_state if fail_state is not None else 0 + ) + self.adlist[child].output.extend( + self.adlist[self.adlist[child].fail_state].output + ) def search_in(self, string: str) -> Dict[str, List[int]]: """ Search for keywords in the given string. Returns a dictionary with keywords and the list of their occurrences. - + Example: >>> A = Automaton(["what", "hat", "ver", "er"]) >>> A.search_in("whatever, err ... , wherever") @@ -70,11 +83,14 @@ def search_in(self, string: str) -> Dict[str, List[int]]: """ result: Dict[str, List[int]] = {} current_state = 0 - + for i in range(len(string)): - while self.find_state_by_character(current_state, string[i]) is None and current_state != 0: + while ( + self.find_state_by_character(current_state, string[i]) is None + and current_state != 0 + ): current_state = self.adlist[current_state].fail_state - + next_state = self.find_state_by_character(current_state, string[i]) if next_state is None: current_state = 0 @@ -84,10 +100,11 @@ def search_in(self, string: str) -> Dict[str, List[int]]: if key not in result: result[key] = [] result[key].append(i - len(key) + 1) - + return result if __name__ == "__main__": import doctest + doctest.testmod() From 346eb5adbda21f344aabd6f47afdb2ec92cb7daf Mon Sep 17 00:00:00 2001 From: Akshit Bansal <155195875+akshitbansal2005@users.noreply.github.com> Date: Thu, 3 Oct 2024 13:35:04 +0530 Subject: [PATCH 4/5] Update alternative_string_arrange.py Analysis of the Code Type Hinting: The function lacks type hints for the parameters and return type. Adding them improves readability and helps with static type checking. Variable Naming: The variable names such as abs_length are somewhat misleading. A name like max_length would more accurately describe its purpose. Output List Initialization: The use of list for output_list is good, but the name could be more descriptive, such as result_chars. Loop Condition: The loop iterates based on the maximum length of the two strings, which is correct, but using a single loop for the two strings may make the code clearer and more efficient. Docstring: The docstring is clear but could use additional details on the behavior when one string is shorter than the other. --- strings/alternative_string_arrange.py | 34 +++++++++++++++------------ 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/strings/alternative_string_arrange.py b/strings/alternative_string_arrange.py index d81ddd8a1574..90d774b31537 100644 --- a/strings/alternative_string_arrange.py +++ b/strings/alternative_string_arrange.py @@ -1,9 +1,15 @@ def alternative_string_arrange(first_str: str, second_str: str) -> str: """ Return the alternative arrangements of the two strings. - :param first_str: - :param second_str: - :return: String + + This function alternates characters from two input strings. If one string + is longer, the remaining characters will be appended to the end of the + resulting string. + + :param first_str: The first string to arrange. + :param second_str: The second string to arrange. + :return: A new string with alternating characters from the input strings. + >>> alternative_string_arrange("ABCD", "XY") 'AXBYCD' >>> alternative_string_arrange("XY", "ABCD") @@ -13,18 +19,16 @@ def alternative_string_arrange(first_str: str, second_str: str) -> str: >>> alternative_string_arrange("ABC", "") 'ABC' """ - first_str_length: int = len(first_str) - second_str_length: int = len(second_str) - abs_length: int = ( - first_str_length if first_str_length > second_str_length else second_str_length - ) - output_list: list = [] - for char_count in range(abs_length): - if char_count < first_str_length: - output_list.append(first_str[char_count]) - if char_count < second_str_length: - output_list.append(second_str[char_count]) - return "".join(output_list) + max_length: int = max(len(first_str), len(second_str)) + result_chars: list = [] + + for i in range(max_length): + if i < len(first_str): + result_chars.append(first_str[i]) + if i < len(second_str): + result_chars.append(second_str[i]) + + return ''.join(result_chars) if __name__ == "__main__": From 2a0ea593483c882018a1ae7e71b40f3b4d9f780f Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 3 Oct 2024 08:05:33 +0000 Subject: [PATCH 5/5] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- strings/alternative_string_arrange.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/strings/alternative_string_arrange.py b/strings/alternative_string_arrange.py index 90d774b31537..205366a634bc 100644 --- a/strings/alternative_string_arrange.py +++ b/strings/alternative_string_arrange.py @@ -1,11 +1,11 @@ def alternative_string_arrange(first_str: str, second_str: str) -> str: """ Return the alternative arrangements of the two strings. - - This function alternates characters from two input strings. If one string - is longer, the remaining characters will be appended to the end of the + + This function alternates characters from two input strings. If one string + is longer, the remaining characters will be appended to the end of the resulting string. - + :param first_str: The first string to arrange. :param second_str: The second string to arrange. :return: A new string with alternating characters from the input strings. @@ -21,14 +21,14 @@ def alternative_string_arrange(first_str: str, second_str: str) -> str: """ max_length: int = max(len(first_str), len(second_str)) result_chars: list = [] - + for i in range(max_length): if i < len(first_str): result_chars.append(first_str[i]) if i < len(second_str): result_chars.append(second_str[i]) - - return ''.join(result_chars) + + return "".join(result_chars) if __name__ == "__main__":