Skip to content

Added Python Implementation of Suffix Arrays and LCP Arrays #12171

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 20 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 18 commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
6132d40
Suffix Array and LCP implementation.py
putul03 Oct 19, 2024
06a7be7
Delete divide_and_conquer/Suffix Array and LCP implementation.py
putul03 Oct 19, 2024
1e8f767
Added Suffix Array and LCP implementation
putul03 Oct 19, 2024
0094577
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Oct 19, 2024
123e6f0
Suffix Array and LCP implementation.py
putul03 Oct 19, 2024
848a358
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Oct 19, 2024
d950f57
Delete divide_and_conquer/Suffix Array and LCP implementation.py
putul03 Oct 19, 2024
dae072c
Suffix Array and LCP Array Implementation
putul03 Oct 19, 2024
70c3869
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Oct 19, 2024
c7f137e
suffix_array_lcp.py
putul03 Oct 19, 2024
8038826
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Oct 19, 2024
8b0e74e
suffix_array_lcp.py
putul03 Oct 19, 2024
1b37c1c
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Oct 19, 2024
a4073ca
suffix_array_lcp.py
putul03 Oct 19, 2024
8dcffa3
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Oct 19, 2024
81c09d1
Longest Palindromic Subsequence
putul03 Oct 19, 2024
b01fbff
Delete dynamic_programming/longest_palindromic_subsequence.py
putul03 Oct 19, 2024
ada767d
Add files via upload
putul03 Oct 19, 2024
0018a8e
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Oct 19, 2024
6380f89
Delete data_structures/persistent_segment_tree.py
putul03 Oct 19, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
80 changes: 80 additions & 0 deletions data_structures/persistent_segment_tree.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
class Node:
def __init__(self, value: int = 0) -> None:
self.value = value
self.left = None
self.right = None


class PersistentSegmentTree:
def __init__(self, arr: list[int]) -> None:
self.n = len(arr)
self.roots: list[Node] = []
self.roots.append(self._build(arr, 0, self.n - 1))

def _build(self, arr: list[int], start: int, end: int) -> Node:
"""
Builds a segment tree from the provided array.

>>> pst = PersistentSegmentTree([1, 2, 3])
>>> root = pst._build([1, 2, 3], 0, 2)
>>> root.value # Sum of the whole array
6
"""
if start == end:
return Node(arr[start])
mid = (start + end) // 2
node = Node()
node.left = self._build(arr, start, mid)
node.right = self._build(arr, mid + 1, end)
node.value = node.left.value + node.right.value
return node

def update(self, version: int, index: int, value: int) -> int:
"""
Updates the segment tree with a new value at the specified index.

>>> pst = PersistentSegmentTree([1, 2, 3])
>>> version_1 = pst.update(0, 1, 5)
>>> pst.query(version_1, 0, 2) # Query sum from index 0 to 2
9
"""
new_root = self._update(self.roots[version], 0, self.n - 1, index, value)
self.roots.append(new_root)
return len(self.roots) - 1 # return the index of the new version

def _update(self, node: Node, start: int, end: int, index: int, value: int) -> Node:
if start == end:
new_node = Node(value)
return new_node
mid = (start + end) // 2
new_node = Node()
if index <= mid:
new_node.left = self._update(node.left, start, mid, index, value)
new_node.right = node.right
else:
new_node.left = node.left
new_node.right = self._update(node.right, mid + 1, end, index, value)
new_node.value = new_node.left.value + new_node.right.value
return new_node

def query(self, version: int, left: int, right: int) -> int:
"""
Queries the sum in the given range for the specified version.

>>> pst = PersistentSegmentTree([1, 2, 3])
>>> version_1 = pst.update(0, 1, 5)
>>> pst.query(version_1, 0, 1) # Query sum from index 0 to 1
6
>>> pst.query(version_1, 0, 2) # Query sum from index 0 to 2
9
"""
return self._query(self.roots[version], 0, self.n - 1, left, right)

def _query(self, node: Node, start: int, end: int, left: int, right: int) -> int:
if left > end or right < start:
return 0
if left <= start and right >= end:
return node.value
mid = (start + end) // 2
return (self._query(node.left, start, mid, left, right) +
self._query(node.right, mid + 1, end, left, right))
88 changes: 88 additions & 0 deletions divide_and_conquer/suffix_array_lcp.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
#!/usr/bin/env python3

import doctest


def build_suffix_array(input_string: str) -> list[int]:
"""
Build the suffix array for the given string.

Parameters:
input_string (str): The input string.

Returns:
list[int]: The suffix array (a list of starting indices of
suffixes in sorted order).

Examples:
>>> build_suffix_array("banana")
[5, 3, 1, 0, 4, 2]
"""
suffixes = [(input_string[i:], i) for i in range(len(input_string))]
suffixes.sort() # Sort the suffixes lexicographically
suffix_array = [suffix[1] for suffix in suffixes]
return suffix_array


def build_lcp_array(input_string: str, suffix_array: list[int]) -> list[int]:
"""
Build the LCP array for the given string and suffix array.

Parameters:
input_string (str): The input string.
suffix_array (list[int]): The suffix array.

Returns:
list[int]: The LCP array.

Examples:
>>> suffix_array = build_suffix_array("banana")
>>> build_lcp_array("banana", suffix_array)
[0, 1, 3, 0, 0, 2]
"""
n = len(input_string)
rank = [0] * n
lcp = [0] * n

# Compute the rank of each suffix
for i, suffix_index in enumerate(suffix_array):
rank[suffix_index] = i

# Compute the LCP array
h = 0
for i in range(n):
if rank[i] > 0:
j = suffix_array[rank[i] - 1]
while (
(i + h < n)
and (j + h < n)
and (input_string[i + h] == input_string[j + h])
):
h += 1
lcp[rank[i]] = h
if h > 0:
h -= 1 # Decrease h for the next suffix
return lcp


# Example usage
if __name__ == "__main__":
s = "banana"
suffix_array = build_suffix_array(s)
lcp_array = build_lcp_array(s, suffix_array)

print("Suffix Array:")
for i in range(len(suffix_array)):
print(f"{suffix_array[i]}: {s[suffix_array[i]:]}")

print("\nLCP Array:")
for i in range(1, len(lcp_array)):
lcp_info = (
f"LCP between {s[suffix_array[i - 1]:]} and "
f"{s[suffix_array[i]]}: {lcp_array[i]}"
)
print(lcp_info)

# Run doctests
if __name__ == "__main__":
doctest.testmod()
44 changes: 0 additions & 44 deletions dynamic_programming/longest_palindromic_subsequence.py

This file was deleted.