Skip to content

Added adaptive merge sort and stalin sort addition in sorting algorithms #12296

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 13 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
123 changes: 123 additions & 0 deletions graphs/gabows_algorithm.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
"""
This is a pure Python implementation
of Gabow's algorithm for finding
strongly connected components (SCCs)
in a directed graph.
For doctests run:
python -m doctest -v gabow_algorithm.py
or
python3 -m doctest -v gabow_algorithm.py
For manual testing run:
python gabow_algorithm.py
"""

from collections import defaultdict
from typing import List, Dict

Check failure on line 15 in graphs/gabows_algorithm.py

View workflow job for this annotation

GitHub Actions / ruff

Ruff (UP035)

graphs/gabows_algorithm.py:15:1: UP035 `typing.List` is deprecated, use `list` instead

Check failure on line 15 in graphs/gabows_algorithm.py

View workflow job for this annotation

GitHub Actions / ruff

Ruff (UP035)

graphs/gabows_algorithm.py:15:1: UP035 `typing.Dict` is deprecated, use `dict` instead
from __future__ import annotations

Check failure on line 16 in graphs/gabows_algorithm.py

View workflow job for this annotation

GitHub Actions / ruff

Ruff (F404)

graphs/gabows_algorithm.py:16:1: F404 `from __future__` imports must occur at the beginning of the file


class Graph:

Check failure on line 19 in graphs/gabows_algorithm.py

View workflow job for this annotation

GitHub Actions / ruff

Ruff (I001)

graphs/gabows_algorithm.py:14:1: I001 Import block is un-sorted or un-formatted
"""
Graph data structure to represent
a directed graph and find SCCs
using Gabow's algorithm.

Attributes:
vertices (int): Number of
vertices in the graph.
graph (Dict[int, List[int]]):
Adjacency list of the graph.

Methods:
add_edge(u, v): Adds an edge
from vertex u to vertex v.
find_sccs(): Finds and returns
all SCCs in the graph.

Examples:
>>> g = Graph(5)
>>> g.add_edge(0, 2)
>>> g.add_edge(2, 1)
>>> g.add_edge(1, 0)
>>> g.add_edge(0, 3)
>>> g.add_edge(3, 4)
>>> sorted(g.find_sccs())
[[0, 1, 2], [3], [4]]
"""

def __init__(self, vertices: int) -> None:
self.vertices = vertices
self.graph: Dict[int, List[int]] = defaultdict(list)

Check failure on line 50 in graphs/gabows_algorithm.py

View workflow job for this annotation

GitHub Actions / ruff

Ruff (UP006)

graphs/gabows_algorithm.py:50:21: UP006 Use `dict` instead of `Dict` for type annotation

Check failure on line 50 in graphs/gabows_algorithm.py

View workflow job for this annotation

GitHub Actions / ruff

Ruff (UP006)

graphs/gabows_algorithm.py:50:31: UP006 Use `list` instead of `List` for type annotation
self.index = 0
self.stack_s = [] # Stack S
self.stack_p = [] # Stack P
self.visited = [False] * vertices
self.result = []

def add_edge(self, u: int, v: int) -> None:
"""
Adds a directed edge from vertex u to vertex v.

:param u: Starting vertex of the edge.
:param v: Ending vertex of the edge.
"""
self.graph[u].append(v)

def _dfs(self, v: int) -> None:
"""
Depth-first search helper function to
process each vertex and identify SCCs.

:param v: The current vertex to process in DFS.
"""
self.visited[v] = True
self.stack_s.append(v)
self.stack_p.append(v)

for neighbor in self.graph[v]:
if not self.visited[neighbor]:
self._dfs(neighbor)
elif neighbor in self.stack_p:
while self.stack_p and self.stack_p[-1] != neighbor:
self.stack_p.pop()

if self.stack_p and self.stack_p[-1] == v:
scc = []
while True:
node = self.stack_s.pop()
scc.append(node)
if node == v:
break
self.stack_p.pop()
self.result.append(scc)

def find_sccs(self) -> List[List[int]]:

Check failure on line 94 in graphs/gabows_algorithm.py

View workflow job for this annotation

GitHub Actions / ruff

Ruff (UP006)

graphs/gabows_algorithm.py:94:28: UP006 Use `list` instead of `List` for type annotation

Check failure on line 94 in graphs/gabows_algorithm.py

View workflow job for this annotation

GitHub Actions / ruff

Ruff (UP006)

graphs/gabows_algorithm.py:94:33: UP006 Use `list` instead of `List` for type annotation
"""
Finds all strongly connected components
in the directed graph.
:return: List of SCCs, where each SCC is
represented as a list of vertices.
"""
for v in range(self.vertices):
if not self.visited[v]:
self._dfs(v)
return self.result


if __name__ == "__main__":
import doctest

doctest.testmod()
# Example usage for manual testing
try:
vertex_count = int(input("Enter the number of vertices: "))
g = Graph(vertex_count)
edge_count = int(input("Enter the number of edges: "))
print("Enter each edge as a pair of vertices (u v):")
for _ in range(edge_count):
u, v = map(int, input().split())
g.add_edge(u, v)
sccs = g.find_sccs()
print("Strongly Connected Components:", sccs)
except ValueError:
print("Invalid input. Please enter valid integers.")
97 changes: 97 additions & 0 deletions sorts/adaptive_merge_sort.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
"""
This is a pure Python implementation of an adaptive merge sort algorithm.
This implementation detects and merges presorted runs for better
performance on partially sorted data.
For doctests run following command:
python -m doctest -v adaptive_merge_sort.py
or
python3 -m doctest -v adaptive_merge_sort.py
For manual testing run:
python adaptive_merge_sort.py
"""


def adaptive_merge_sort(collection: list) -> list:
"""
Sorts a list using an adaptive merge sort algorithm.
:param collection: A mutable ordered collection
with comparable items.
:return: The same collection ordered in
ascending order.
Time Complexity: O(n log n) in the average case,
O(n) for nearly sorted input.
Examples:
>>> adaptive_merge_sort([0, 5, 3, 2, 2])
[0, 2, 2, 3, 5]
>>> adaptive_merge_sort([])
[]
>>> adaptive_merge_sort([-2, -5, -45])
[-45, -5, -2]
"""

def find_run(collection: list, start: int) -> int:
"""
Detects and returns the length of a naturally occurring
run starting from 'start'.
:param collection: The list to detect runs in.
:param start: The starting index for finding the run.
:return: Length of the detected run.
"""
run_length = 1
while (
start + run_length < len(collection)
and collection[start + run_length - 1] <= collection[start + run_length]
):
run_length += 1
return run_length

def merge(left: list, right: list) -> list:
"""
Merge two sorted lists into a single sorted list.

:param left: Left collection
:param right: Right collection
:return: Merged result
"""
result = []
while left and right:
result.append(left.pop(0) if left[0] <= right[0] else right.pop(0))
result.extend(left)
result.extend(right)
return result

if len(collection) <= 1:
return collection

runs = []
i = 0
# Step 1: Identify naturally occurring runs and store them in 'runs'
while i < len(collection):
run_length = find_run(collection, i)
runs.append(collection[i : i + run_length])
i += run_length

# Step 2: Iteratively merge runs until one sorted collection remains
while len(runs) > 1:
merged_runs = []
for j in range(0, len(runs), 2):
if j + 1 < len(runs):
merged_runs.append(merge(runs[j], runs[j + 1]))
else:
merged_runs.append(runs[j])
runs = merged_runs

return runs[0] # The single, fully sorted list


if __name__ == "__main__":
import doctest

doctest.testmod()
try:
user_input = input("Enter numbers separated by a comma:\n").strip()
unsorted = [int(item) for item in user_input.split(",")]
sorted_list = adaptive_merge_sort(unsorted)
print(*sorted_list, sep=",")
except ValueError:
print("Invalid input. Please enter valid integers separated by commas.")
50 changes: 50 additions & 0 deletions sorts/stalin_merge_sort.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
"""
This is a pure Python implementation of the Stalin Sort algorithm.
Stalin Sort removes any elements that are out of ascending order,
leaving only a sorted subsequence of the original list.
For doctests run following command:
python -m doctest -v stalin_sort.py
or
python3 -m doctest -v stalin_sort.py
For manual testing run:
python stalin_sort.py
"""


def stalin_sort(collection: list) -> list:
"""
Sorts a list by removing elements that are out of order,
leaving a sorted subsequence.
:param collection: A list of comparable items.
:return: A list containing only elements that maintain ascending order.
Examples:
>>> stalin_sort([4, 5, 3, 6, 7, 2, 8])
[4, 5, 6, 7, 8]
>>> stalin_sort([1, 2, 3, 4, 5])
[1, 2, 3, 4, 5]
>>> stalin_sort([5, 4, 3, 2, 1])
[5]
>>> stalin_sort([])
[]
"""
if not collection:
return []

sorted_list = [collection[0]]
for element in collection[1:]:
if element >= sorted_list[-1]:
sorted_list.append(element)
return sorted_list


if __name__ == "__main__":
import doctest

doctest.testmod()
try:
user_input = input("Enter numbers separated by a comma:\n").strip()
unsorted = [int(item) for item in user_input.split(",")]
sorted_list = stalin_sort(unsorted)
print("Stalin-sorted list:", *sorted_list, sep=", ")
except ValueError:
print("Invalid input. Please enter valid integers separated by commas.")
126 changes: 126 additions & 0 deletions strings/commentz_walter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
"""
This is a pure Python implementation
of the Commentz-Walter algorithm
for searching multiple patterns in a single text.

The algorithm combines Boyer-Moore's and
Aho-Corasick's techniques for
efficiently searching multiple patterns
by using pattern shifts and suffix automata.

For doctests run:
python -m doctest -v commentz_walter.py
or
python3 -m doctest -v commentz_walter.py
For manual testing run:
python commentz_walter.py
"""

from typing import List, Dict, Set, Tuple

Check failure on line 19 in strings/commentz_walter.py

View workflow job for this annotation

GitHub Actions / ruff

Ruff (UP035)

strings/commentz_walter.py:19:1: UP035 `typing.List` is deprecated, use `list` instead

Check failure on line 19 in strings/commentz_walter.py

View workflow job for this annotation

GitHub Actions / ruff

Ruff (UP035)

strings/commentz_walter.py:19:1: UP035 `typing.Dict` is deprecated, use `dict` instead
from collections import defaultdict


class CommentzWalter:
"""
Class to represent the Commentz-Walter algorithm
for multi-pattern string searching.

Attributes:
patterns (List[str]): List of patterns to search for.
alphabet (Set[str]): Unique characters in the patterns.
shift_table (Dict[str, int]): Table to store
the shift values for characters.
automaton (Dict[int, Dict[str, int]]):
Automaton used for state transitions.

Methods:
preprocess(): Builds the shift table
and automaton for pattern matching.
search(text: str) -> List[Tuple[int, str]]:
Searches patterns in the given text.

Examples:
>>> cw = CommentzWalter(["he", "she", "his", "hers"])
>>> cw.search("ahishers")
[(1, 'his'), (4, 'she'), (5, 'hers')]
"""

def __init__(self, patterns: List[str]) -> None:
self.patterns = patterns
self.alphabet: Set[str] = set("".join(patterns))
self.shift_table: Dict[str, int] = {}
self.automaton: Dict[int, Dict[str, int]] = {}
self.preprocess()

def preprocess(self) -> None:
"""
Builds the shift table and automaton required
for the Commentz-Walter algorithm.
"""
# Build the shift table for the rightmost occurrence of characters in patterns
max_len = max(len(pattern) for pattern in self.patterns)
for char in self.alphabet:
self.shift_table[char] = max_len

for pattern in self.patterns:
for i, char in enumerate(pattern):
self.shift_table[char] = max(1, max_len - i - 1)
# Build the Aho-Corasick automaton for the set of patterns
state = 0
self.automaton[0] = {}
for pattern in self.patterns:
current_state = 0
for char in pattern:
if char not in self.automaton[current_state]:
state += 1
self.automaton[state] = {}
self.automaton[current_state][char] = state
current_state = self.automaton[current_state][char]

def search(self, text: str) -> List[Tuple[int, str]]:
"""
Searches for patterns in the given text using
the Commentz-Walter algorithm.
:param text: The text to search in.
:return: List of tuples with starting index and matched pattern.
Examples:
>>> cw = CommentzWalter(["abc", "bcd", "cde"])
>>> cw.search("abcdef")
[(0, 'abc'), (1, 'bcd'), (2, 'cde')]
"""
results = []
n = len(text)
m = max(len(p) for p in self.patterns)
i = 0
while i <= n - m:
j = m - 1
while j >= 0 and text[i + j] in self.shift_table:
j -= 1
if j < 0:
# We have a potential match; use the automaton to verify
state = 0
for k in range(m):
if text[i + k] in self.automaton[state]:
state = self.automaton[state][text[i + k]]
else:
break
else:
for pattern in self.patterns:
if text[i : i + len(pattern)] == pattern:
results.append((i, pattern))
i += self.shift_table.get(text[i + m - 1], m)
else:
i += self.shift_table.get(text[i + j], m)
return results


if __name__ == "__main__":
import doctest

doctest.testmod()
# Example usage for manual testing
patterns = ["abc", "bcd", "cde"]
cw = CommentzWalter(patterns)
text = "abcdef"
matches = cw.search(text)
print("Matches found:", matches)
Loading