From 6132d40a37b5dc156079cfce65deef56edbc0a2c Mon Sep 17 00:00:00 2001 From: Putul Singh <127419636+putul03@users.noreply.github.com> Date: Sat, 19 Oct 2024 12:05:59 +0530 Subject: [PATCH 1/9] Suffix Array and LCP implementation.py This code file provides an implementation of Suffix Arrays and Longest Common Prefix (LCP) Arrays in Python, designed as a contribution to the open-source community during Hacktoberfest 2024. Overview: A suffix array is an essential data structure used in many string-processing algorithms. It provides an efficient way to store and sort all possible suffixes of a given string. This project also includes the construction of the LCP array, which records the lengths of the longest common prefixes between consecutive suffixes in the sorted suffix array. Together, these two arrays form the backbone of many algorithms in text processing and pattern matching. Key Features: Suffix Array Construction: A suffix array is built by sorting all suffixes of the input string in lexicographical order and storing their starting indices. LCP Array Construction: The LCP array is computed using an efficient algorithm that compares consecutive suffixes from the suffix array and records the length of their common prefixes. Optimized Approach: The approach used in this implementation ensures efficient computation of both suffix and LCP arrays with a linear-time construction of the LCP array following the suffix sorting. User-friendly Display: The program clearly displays both the suffix and LCP arrays, allowing users to easily visualize and understand the results for any given input string. Why this Contribution? As part of Hacktoberfest 2024, I wanted to contribute something that could be useful for developers and researchers working with text-processing algorithms. This implementation not only helps in better understanding of basic string operations but also serves as a building block for more complex algorithms in fields like bioinformatics, data compression, and natural language processing. Example Output: For the input string "banana", the program generates the following arrays: Suffix Array: [5, 3, 1, 0, 4, 2] (indicating the starting indices of the lexicographically sorted suffixes) LCP Array: [0, 1, 3, 0, 0, 2] (showing the lengths of the longest common prefixes between consecutive suffixes) Why Suffix Arrays and LCP Arrays Matter: Text Searching: Suffix arrays are used in algorithms for fast substring searching, making them invaluable in tasks like searching through large databases or text files. Repetitive Patterns: The LCP array highlights repeated patterns within the text, which can be useful in applications like data compression, where redundancy needs to be minimized. Bioinformatics: These arrays are critical for genome sequencing and alignment algorithms, where comparing large sequences efficiently is necessary. How to Use: This implementation is easy to run with any input string, and users can quickly get a clear visualization of the suffix and LCP arrays. Whether you're new to algorithms or looking to expand your toolkit for more advanced string manipulation tasks, this project provides a solid foundation. --- .../Suffix Array and LCP implementation.py | 63 +++++++++++++++++++ 1 file changed, 63 insertions(+) create mode 100644 divide_and_conquer/Suffix Array and LCP implementation.py diff --git a/divide_and_conquer/Suffix Array and LCP implementation.py b/divide_and_conquer/Suffix Array and LCP implementation.py new file mode 100644 index 000000000000..4e47e2054a59 --- /dev/null +++ b/divide_and_conquer/Suffix Array and LCP implementation.py @@ -0,0 +1,63 @@ +class SuffixArray: + def __init__(self, input_string): + """ + Initialize the SuffixArray with the input string and generate the suffix and LCP arrays. + """ + self.input_string = input_string + self.suffix_array = self._create_suffix_array() + self.lcp_array = self._create_lcp_array() + + def _create_suffix_array(self): + """ + Create the suffix array for the given input string. + Returns the suffix array, which is a list of indices representing the starting positions of sorted suffixes. + """ + length = len(self.input_string) + sorted_suffixes = sorted(range(length), key=lambda i: self.input_string[i:]) + return sorted_suffixes + + def _create_lcp_array(self): + """ + Construct the LCP (Longest Common Prefix) array based on the suffix array. + LCP[i] stores the length of the longest common prefix between suffixes at suffix_array[i] and suffix_array[i-1]. + """ + length = len(self.input_string) + suffix_array = self.suffix_array + rank = [0] * length + lcp = [0] * length + + # Generate the rank array where rank[i] indicates the position of the suffix starting at index i + for index, suffix in enumerate(suffix_array): + rank[suffix] = index + + lcp_length = 0 + for i in range(length): + if rank[i] > 0: + previous_suffix = suffix_array[rank[i] - 1] + while (i + lcp_length < length) and (previous_suffix + lcp_length < length) and \ + self.input_string[i + lcp_length] == self.input_string[previous_suffix + lcp_length]: + lcp_length += 1 + lcp[rank[i]] = lcp_length + if lcp_length > 0: + lcp_length -= 1 + return lcp + + def display_arrays(self): + """ + Print the suffix array and LCP array for the input string. + """ + print("Suffix Array:") + for idx in self.suffix_array: + print(f"{idx}: {self.input_string[idx:]}") + + print("\nLCP Array:") + for i in range(1, len(self.lcp_array)): + print(f"LCP between {self.input_string[self.suffix_array[i-1]:]} and {self.input_string[self.suffix_array[i]:]}: {self.lcp_array[i]}") + +# Example usage: +if __name__ == "__main__": + input_text = "banana" + suffix_array_instance = SuffixArray(input_text) + + # Show the suffix and LCP arrays + suffix_array_instance.display_arrays() From 06a7be7ba0aa2cf3e464584c8973a26d27fa6e9c Mon Sep 17 00:00:00 2001 From: Putul Singh <127419636+putul03@users.noreply.github.com> Date: Sat, 19 Oct 2024 12:10:54 +0530 Subject: [PATCH 2/9] Delete divide_and_conquer/Suffix Array and LCP implementation.py --- .../Suffix Array and LCP implementation.py | 63 ------------------- 1 file changed, 63 deletions(-) delete mode 100644 divide_and_conquer/Suffix Array and LCP implementation.py diff --git a/divide_and_conquer/Suffix Array and LCP implementation.py b/divide_and_conquer/Suffix Array and LCP implementation.py deleted file mode 100644 index 4e47e2054a59..000000000000 --- a/divide_and_conquer/Suffix Array and LCP implementation.py +++ /dev/null @@ -1,63 +0,0 @@ -class SuffixArray: - def __init__(self, input_string): - """ - Initialize the SuffixArray with the input string and generate the suffix and LCP arrays. - """ - self.input_string = input_string - self.suffix_array = self._create_suffix_array() - self.lcp_array = self._create_lcp_array() - - def _create_suffix_array(self): - """ - Create the suffix array for the given input string. - Returns the suffix array, which is a list of indices representing the starting positions of sorted suffixes. - """ - length = len(self.input_string) - sorted_suffixes = sorted(range(length), key=lambda i: self.input_string[i:]) - return sorted_suffixes - - def _create_lcp_array(self): - """ - Construct the LCP (Longest Common Prefix) array based on the suffix array. - LCP[i] stores the length of the longest common prefix between suffixes at suffix_array[i] and suffix_array[i-1]. - """ - length = len(self.input_string) - suffix_array = self.suffix_array - rank = [0] * length - lcp = [0] * length - - # Generate the rank array where rank[i] indicates the position of the suffix starting at index i - for index, suffix in enumerate(suffix_array): - rank[suffix] = index - - lcp_length = 0 - for i in range(length): - if rank[i] > 0: - previous_suffix = suffix_array[rank[i] - 1] - while (i + lcp_length < length) and (previous_suffix + lcp_length < length) and \ - self.input_string[i + lcp_length] == self.input_string[previous_suffix + lcp_length]: - lcp_length += 1 - lcp[rank[i]] = lcp_length - if lcp_length > 0: - lcp_length -= 1 - return lcp - - def display_arrays(self): - """ - Print the suffix array and LCP array for the input string. - """ - print("Suffix Array:") - for idx in self.suffix_array: - print(f"{idx}: {self.input_string[idx:]}") - - print("\nLCP Array:") - for i in range(1, len(self.lcp_array)): - print(f"LCP between {self.input_string[self.suffix_array[i-1]:]} and {self.input_string[self.suffix_array[i]:]}: {self.lcp_array[i]}") - -# Example usage: -if __name__ == "__main__": - input_text = "banana" - suffix_array_instance = SuffixArray(input_text) - - # Show the suffix and LCP arrays - suffix_array_instance.display_arrays() From 1e8f767d3c00212f0ca347d4bd4d27d9254ce178 Mon Sep 17 00:00:00 2001 From: Putul Singh <127419636+putul03@users.noreply.github.com> Date: Sat, 19 Oct 2024 12:20:31 +0530 Subject: [PATCH 3/9] Added Suffix Array and LCP implementation --- .../Suffix Array and LCP implementation.py | 93 +++++++++++++++++++ 1 file changed, 93 insertions(+) create mode 100644 divide_and_conquer/Suffix Array and LCP implementation.py diff --git a/divide_and_conquer/Suffix Array and LCP implementation.py b/divide_and_conquer/Suffix Array and LCP implementation.py new file mode 100644 index 000000000000..c97197824858 --- /dev/null +++ b/divide_and_conquer/Suffix Array and LCP implementation.py @@ -0,0 +1,93 @@ +from typing import List + + +class SuffixArray: + def __init__(self, text: str) -> None: + """ + Initializes the class with the input text and builds the suffix and LCP arrays. + """ + self.text = text + self.suffix_array = self.build_suffix_array() + self.lcp_array = self.build_lcp_array() + + def build_suffix_array(self) -> List[int]: + """ + Builds the suffix array for the input string. + Returns the suffix array (a list of starting indices of suffixes in sorted order). + + Example: + >>> sa = SuffixArray("banana") + >>> sa.suffix_array + [5, 3, 1, 0, 4, 2] + """ + n = len(self.text) + suffixes = sorted(range(n), key=lambda i: self.text[i:]) + return suffixes + + def build_lcp_array(self) -> List[int]: + """ + Builds the LCP (Longest Common Prefix) array for the suffix array. + LCP[i] gives the length of the longest common prefix of the suffixes starting at suffix_array[i] and suffix_array[i-1]. + + Example: + >>> sa = SuffixArray("banana") + >>> sa.lcp_array + [0, 1, 3, 0, 0, 2] + """ + n = len(self.text) + suffix_array = self.suffix_array + rank = [0] * n + lcp = [0] * n + + # Build the rank array where rank[i] gives the position of the suffix starting at index i + for i, suffix in enumerate(suffix_array): + rank[suffix] = i + + h = 0 + for i in range(n): + if rank[i] > 0: + j = suffix_array[rank[i] - 1] + while (i + h < n) and (j + h < n) and self.text[i + h] == self.text[j + h]: + h += 1 + lcp[rank[i]] = h + if h > 0: + h -= 1 + return lcp + + def display(self) -> None: + """ + Displays the suffix array and LCP array for the input string. + + Example: + >>> sa = SuffixArray("banana") + >>> sa.display() + Suffix Array: + 5: a + 3: ana + 1: anana + 0: banana + 4: na + 2: nana + + LCP Array: + LCP between a and ana: 1 + LCP between ana and anana: 3 + LCP between anana and banana: 0 + LCP between banana and na: 0 + LCP between na and nana: 2 + """ + print("Suffix Array:") + for idx in self.suffix_array: + print(f"{idx}: {self.text[idx:]}") + + print("\nLCP Array:") + for i in range(1, len(self.lcp_array)): + print( + f"LCP between {self.text[self.suffix_array[i - 1]:]} and {self.text[self.suffix_array[i]:]}: {self.lcp_array[i]}") + + +# Example usage: +if __name__ == "__main__": + text = "banana" + sa = SuffixArray(text) + sa.display() From 0094577a48b494bcbaadfaffe450f7cb86ff89da Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 19 Oct 2024 06:54:37 +0000 Subject: [PATCH 4/9] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- divide_and_conquer/Suffix Array and LCP implementation.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/divide_and_conquer/Suffix Array and LCP implementation.py b/divide_and_conquer/Suffix Array and LCP implementation.py index c97197824858..fa4230db5b68 100644 --- a/divide_and_conquer/Suffix Array and LCP implementation.py +++ b/divide_and_conquer/Suffix Array and LCP implementation.py @@ -47,7 +47,9 @@ def build_lcp_array(self) -> List[int]: for i in range(n): if rank[i] > 0: j = suffix_array[rank[i] - 1] - while (i + h < n) and (j + h < n) and self.text[i + h] == self.text[j + h]: + while ( + (i + h < n) and (j + h < n) and self.text[i + h] == self.text[j + h] + ): h += 1 lcp[rank[i]] = h if h > 0: @@ -83,7 +85,8 @@ def display(self) -> None: print("\nLCP Array:") for i in range(1, len(self.lcp_array)): print( - f"LCP between {self.text[self.suffix_array[i - 1]:]} and {self.text[self.suffix_array[i]:]}: {self.lcp_array[i]}") + f"LCP between {self.text[self.suffix_array[i - 1]:]} and {self.text[self.suffix_array[i]:]}: {self.lcp_array[i]}" + ) # Example usage: From 123e6f0c33bc11197edbafcefa24fe2730ceddbf Mon Sep 17 00:00:00 2001 From: Putul Singh <127419636+putul03@users.noreply.github.com> Date: Sat, 19 Oct 2024 12:39:41 +0530 Subject: [PATCH 5/9] Suffix Array and LCP implementation.py --- .../Suffix Array and LCP implementation.py | 37 +++++++------------ 1 file changed, 13 insertions(+), 24 deletions(-) diff --git a/divide_and_conquer/Suffix Array and LCP implementation.py b/divide_and_conquer/Suffix Array and LCP implementation.py index fa4230db5b68..2451dca12d6a 100644 --- a/divide_and_conquer/Suffix Array and LCP implementation.py +++ b/divide_and_conquer/Suffix Array and LCP implementation.py @@ -21,13 +21,15 @@ def build_suffix_array(self) -> List[int]: [5, 3, 1, 0, 4, 2] """ n = len(self.text) - suffixes = sorted(range(n), key=lambda i: self.text[i:]) - return suffixes + # Create a list of suffix indices sorted by the suffixes they point to + sorted_suffix_indices = sorted(range(n), key=lambda suffix_index: self.text[suffix_index:]) + return sorted_suffix_indices def build_lcp_array(self) -> List[int]: """ Builds the LCP (Longest Common Prefix) array for the suffix array. - LCP[i] gives the length of the longest common prefix of the suffixes starting at suffix_array[i] and suffix_array[i-1]. + LCP[i] gives the length of the longest common prefix of the suffixes + starting at suffix_array[i] and suffix_array[i-1]. Example: >>> sa = SuffixArray("banana") @@ -40,20 +42,18 @@ def build_lcp_array(self) -> List[int]: lcp = [0] * n # Build the rank array where rank[i] gives the position of the suffix starting at index i - for i, suffix in enumerate(suffix_array): - rank[suffix] = i + for rank_index, suffix in enumerate(suffix_array): + rank[suffix] = rank_index h = 0 for i in range(n): if rank[i] > 0: - j = suffix_array[rank[i] - 1] - while ( - (i + h < n) and (j + h < n) and self.text[i + h] == self.text[j + h] - ): + j = suffix_array[rank[i] - 1] # Previous suffix in the sorted order + while (i + h < n) and (j + h < n) and self.text[i + h] == self.text[j + h]: h += 1 lcp[rank[i]] = h if h > 0: - h -= 1 + h -= 1 # Decrease h for the next suffix comparison return lcp def display(self) -> None: @@ -71,26 +71,15 @@ def display(self) -> None: 4: na 2: nana - LCP Array: - LCP between a and ana: 1 - LCP between ana and anana: 3 - LCP between anana and banana: 0 - LCP between banana and na: 0 - LCP between na and nana: 2 """ print("Suffix Array:") - for idx in self.suffix_array: - print(f"{idx}: {self.text[idx:]}") + for suffix_index in self.suffix_array: + print(f"{suffix_index}: {self.text[suffix_index:]}") - print("\nLCP Array:") - for i in range(1, len(self.lcp_array)): - print( - f"LCP between {self.text[self.suffix_array[i - 1]:]} and {self.text[self.suffix_array[i]:]}: {self.lcp_array[i]}" - ) # Example usage: if __name__ == "__main__": text = "banana" sa = SuffixArray(text) - sa.display() + sa.display() # Contribution for Hacktoberfest 2024 From 848a358d80945b27cfa76e50ff0847d01da57034 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 19 Oct 2024 07:13:22 +0000 Subject: [PATCH 6/9] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- .../Suffix Array and LCP implementation.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/divide_and_conquer/Suffix Array and LCP implementation.py b/divide_and_conquer/Suffix Array and LCP implementation.py index 2451dca12d6a..0eadf6bb89a8 100644 --- a/divide_and_conquer/Suffix Array and LCP implementation.py +++ b/divide_and_conquer/Suffix Array and LCP implementation.py @@ -22,7 +22,9 @@ def build_suffix_array(self) -> List[int]: """ n = len(self.text) # Create a list of suffix indices sorted by the suffixes they point to - sorted_suffix_indices = sorted(range(n), key=lambda suffix_index: self.text[suffix_index:]) + sorted_suffix_indices = sorted( + range(n), key=lambda suffix_index: self.text[suffix_index:] + ) return sorted_suffix_indices def build_lcp_array(self) -> List[int]: @@ -49,7 +51,9 @@ def build_lcp_array(self) -> List[int]: for i in range(n): if rank[i] > 0: j = suffix_array[rank[i] - 1] # Previous suffix in the sorted order - while (i + h < n) and (j + h < n) and self.text[i + h] == self.text[j + h]: + while ( + (i + h < n) and (j + h < n) and self.text[i + h] == self.text[j + h] + ): h += 1 lcp[rank[i]] = h if h > 0: @@ -77,7 +81,6 @@ def display(self) -> None: print(f"{suffix_index}: {self.text[suffix_index:]}") - # Example usage: if __name__ == "__main__": text = "banana" From d950f5740bbb2675199ad240e58fd7d3e57a8ee4 Mon Sep 17 00:00:00 2001 From: Putul Singh <127419636+putul03@users.noreply.github.com> Date: Sat, 19 Oct 2024 13:22:59 +0530 Subject: [PATCH 7/9] Delete divide_and_conquer/Suffix Array and LCP implementation.py --- .../Suffix Array and LCP implementation.py | 88 ------------------- 1 file changed, 88 deletions(-) delete mode 100644 divide_and_conquer/Suffix Array and LCP implementation.py diff --git a/divide_and_conquer/Suffix Array and LCP implementation.py b/divide_and_conquer/Suffix Array and LCP implementation.py deleted file mode 100644 index 0eadf6bb89a8..000000000000 --- a/divide_and_conquer/Suffix Array and LCP implementation.py +++ /dev/null @@ -1,88 +0,0 @@ -from typing import List - - -class SuffixArray: - def __init__(self, text: str) -> None: - """ - Initializes the class with the input text and builds the suffix and LCP arrays. - """ - self.text = text - self.suffix_array = self.build_suffix_array() - self.lcp_array = self.build_lcp_array() - - def build_suffix_array(self) -> List[int]: - """ - Builds the suffix array for the input string. - Returns the suffix array (a list of starting indices of suffixes in sorted order). - - Example: - >>> sa = SuffixArray("banana") - >>> sa.suffix_array - [5, 3, 1, 0, 4, 2] - """ - n = len(self.text) - # Create a list of suffix indices sorted by the suffixes they point to - sorted_suffix_indices = sorted( - range(n), key=lambda suffix_index: self.text[suffix_index:] - ) - return sorted_suffix_indices - - def build_lcp_array(self) -> List[int]: - """ - Builds the LCP (Longest Common Prefix) array for the suffix array. - LCP[i] gives the length of the longest common prefix of the suffixes - starting at suffix_array[i] and suffix_array[i-1]. - - Example: - >>> sa = SuffixArray("banana") - >>> sa.lcp_array - [0, 1, 3, 0, 0, 2] - """ - n = len(self.text) - suffix_array = self.suffix_array - rank = [0] * n - lcp = [0] * n - - # Build the rank array where rank[i] gives the position of the suffix starting at index i - for rank_index, suffix in enumerate(suffix_array): - rank[suffix] = rank_index - - h = 0 - for i in range(n): - if rank[i] > 0: - j = suffix_array[rank[i] - 1] # Previous suffix in the sorted order - while ( - (i + h < n) and (j + h < n) and self.text[i + h] == self.text[j + h] - ): - h += 1 - lcp[rank[i]] = h - if h > 0: - h -= 1 # Decrease h for the next suffix comparison - return lcp - - def display(self) -> None: - """ - Displays the suffix array and LCP array for the input string. - - Example: - >>> sa = SuffixArray("banana") - >>> sa.display() - Suffix Array: - 5: a - 3: ana - 1: anana - 0: banana - 4: na - 2: nana - - """ - print("Suffix Array:") - for suffix_index in self.suffix_array: - print(f"{suffix_index}: {self.text[suffix_index:]}") - - -# Example usage: -if __name__ == "__main__": - text = "banana" - sa = SuffixArray(text) - sa.display() # Contribution for Hacktoberfest 2024 From dae072c0c78cf555961a250a41f4b38a8f9cbad3 Mon Sep 17 00:00:00 2001 From: Putul Singh <127419636+putul03@users.noreply.github.com> Date: Sat, 19 Oct 2024 13:23:23 +0530 Subject: [PATCH 8/9] Suffix Array and LCP Array Implementation --- divide_and_conquer/suffix_array_lcp.py | 62 ++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) create mode 100644 divide_and_conquer/suffix_array_lcp.py diff --git a/divide_and_conquer/suffix_array_lcp.py b/divide_and_conquer/suffix_array_lcp.py new file mode 100644 index 000000000000..c78acf49cdac --- /dev/null +++ b/divide_and_conquer/suffix_array_lcp.py @@ -0,0 +1,62 @@ +#!/usr/bin/env python3 + +def build_suffix_array(s: str) -> list[int]: + """ + Build the suffix array for the given string. + + Parameters: + s (str): The input string. + + Returns: + list[int]: The suffix array (a list of starting indices of + suffixes in sorted order). + """ + suffixes = [(s[i:], i) for i in range(len(s))] + suffixes.sort() # Sort the suffixes lexicographically + suffix_array = [suffix[1] for suffix in suffixes] + return suffix_array + +def build_lcp_array(s: str, suffix_array: list[int]) -> list[int]: + """ + Build the LCP array for the given string and suffix array. + + Parameters: + s (str): The input string. + suffix_array (list[int]): The suffix array. + + Returns: + list[int]: The LCP array. + """ + n = len(s) + rank = [0] * n + lcp = [0] * n + + # Compute the rank of each suffix + for i, suffix_index in enumerate(suffix_array): + rank[suffix_index] = i + + # Compute the LCP array + h = 0 + for i in range(n): + if rank[i] > 0: + j = suffix_array[rank[i] - 1] + while (i + h < n) and (j + h < n) and (s[i + h] == s[j + h]): + h += 1 + lcp[rank[i]] = h + if h > 0: + h -= 1 # Decrease h for the next suffix + return lcp + +# Example usage +if __name__ == "__main__": + s = "banana" + suffix_array = build_suffix_array(s) + lcp_array = build_lcp_array(s, suffix_array) + + print("Suffix Array:") + for i in range(len(suffix_array)): + print(f"{suffix_array[i]}: {s[suffix_array[i]:]}") + + print("\nLCP Array:") + for i in range(1, len(lcp_array)): + print(f"LCP between {s[suffix_array[i - 1]:]} and {s[suffix_array[i]]}: {lcp_array[i]}") From 70c3869f4270cc7996b68a3a6ed04858d7596a1d Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 19 Oct 2024 07:53:45 +0000 Subject: [PATCH 9/9] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- divide_and_conquer/suffix_array_lcp.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/divide_and_conquer/suffix_array_lcp.py b/divide_and_conquer/suffix_array_lcp.py index c78acf49cdac..d12dc1ec71e0 100644 --- a/divide_and_conquer/suffix_array_lcp.py +++ b/divide_and_conquer/suffix_array_lcp.py @@ -1,5 +1,6 @@ #!/usr/bin/env python3 + def build_suffix_array(s: str) -> list[int]: """ Build the suffix array for the given string. @@ -16,6 +17,7 @@ def build_suffix_array(s: str) -> list[int]: suffix_array = [suffix[1] for suffix in suffixes] return suffix_array + def build_lcp_array(s: str, suffix_array: list[int]) -> list[int]: """ Build the LCP array for the given string and suffix array. @@ -47,6 +49,7 @@ def build_lcp_array(s: str, suffix_array: list[int]) -> list[int]: h -= 1 # Decrease h for the next suffix return lcp + # Example usage if __name__ == "__main__": s = "banana" @@ -59,4 +62,6 @@ def build_lcp_array(s: str, suffix_array: list[int]) -> list[int]: print("\nLCP Array:") for i in range(1, len(lcp_array)): - print(f"LCP between {s[suffix_array[i - 1]:]} and {s[suffix_array[i]]}: {lcp_array[i]}") + print( + f"LCP between {s[suffix_array[i - 1]:]} and {s[suffix_array[i]]}: {lcp_array[i]}" + )