-
-
Notifications
You must be signed in to change notification settings - Fork 46.8k
Added Python Implementation of Suffix Arrays and LCP Arrays #12161
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
6132d40
06a7be7
1e8f767
0094577
123e6f0
848a358
d950f57
dae072c
70c3869
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,67 @@ | ||
#!/usr/bin/env python3 | ||
|
||
|
||
def build_suffix_array(s: str) -> list[int]: | ||
""" | ||
Build the suffix array for the given string. | ||
|
||
Parameters: | ||
s (str): The input string. | ||
|
||
Returns: | ||
list[int]: The suffix array (a list of starting indices of | ||
suffixes in sorted order). | ||
""" | ||
suffixes = [(s[i:], i) for i in range(len(s))] | ||
suffixes.sort() # Sort the suffixes lexicographically | ||
suffix_array = [suffix[1] for suffix in suffixes] | ||
return suffix_array | ||
|
||
|
||
def build_lcp_array(s: str, suffix_array: list[int]) -> list[int]: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As there is no test file in this pull request nor any test function or class in the file Please provide descriptive name for the parameter: |
||
""" | ||
Build the LCP array for the given string and suffix array. | ||
|
||
Parameters: | ||
s (str): The input string. | ||
suffix_array (list[int]): The suffix array. | ||
|
||
Returns: | ||
list[int]: The LCP array. | ||
""" | ||
n = len(s) | ||
rank = [0] * n | ||
lcp = [0] * n | ||
|
||
# Compute the rank of each suffix | ||
for i, suffix_index in enumerate(suffix_array): | ||
rank[suffix_index] = i | ||
|
||
# Compute the LCP array | ||
h = 0 | ||
for i in range(n): | ||
if rank[i] > 0: | ||
j = suffix_array[rank[i] - 1] | ||
while (i + h < n) and (j + h < n) and (s[i + h] == s[j + h]): | ||
h += 1 | ||
lcp[rank[i]] = h | ||
if h > 0: | ||
h -= 1 # Decrease h for the next suffix | ||
return lcp | ||
|
||
|
||
# Example usage | ||
if __name__ == "__main__": | ||
s = "banana" | ||
suffix_array = build_suffix_array(s) | ||
lcp_array = build_lcp_array(s, suffix_array) | ||
|
||
print("Suffix Array:") | ||
for i in range(len(suffix_array)): | ||
print(f"{suffix_array[i]}: {s[suffix_array[i]:]}") | ||
|
||
print("\nLCP Array:") | ||
for i in range(1, len(lcp_array)): | ||
print( | ||
f"LCP between {s[suffix_array[i - 1]:]} and {s[suffix_array[i]]}: {lcp_array[i]}" | ||
Check failure on line 66 in divide_and_conquer/suffix_array_lcp.py
|
||
) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
As there is no test file in this pull request nor any test function or class in the file
divide_and_conquer/suffix_array_lcp.py
, please provide doctest for the functionbuild_suffix_array
Please provide descriptive name for the parameter:
s