From 6132d40a37b5dc156079cfce65deef56edbc0a2c Mon Sep 17 00:00:00 2001 From: Putul Singh <127419636+putul03@users.noreply.github.com> Date: Sat, 19 Oct 2024 12:05:59 +0530 Subject: [PATCH 01/20] Suffix Array and LCP implementation.py This code file provides an implementation of Suffix Arrays and Longest Common Prefix (LCP) Arrays in Python, designed as a contribution to the open-source community during Hacktoberfest 2024. Overview: A suffix array is an essential data structure used in many string-processing algorithms. It provides an efficient way to store and sort all possible suffixes of a given string. This project also includes the construction of the LCP array, which records the lengths of the longest common prefixes between consecutive suffixes in the sorted suffix array. Together, these two arrays form the backbone of many algorithms in text processing and pattern matching. Key Features: Suffix Array Construction: A suffix array is built by sorting all suffixes of the input string in lexicographical order and storing their starting indices. LCP Array Construction: The LCP array is computed using an efficient algorithm that compares consecutive suffixes from the suffix array and records the length of their common prefixes. Optimized Approach: The approach used in this implementation ensures efficient computation of both suffix and LCP arrays with a linear-time construction of the LCP array following the suffix sorting. User-friendly Display: The program clearly displays both the suffix and LCP arrays, allowing users to easily visualize and understand the results for any given input string. Why this Contribution? As part of Hacktoberfest 2024, I wanted to contribute something that could be useful for developers and researchers working with text-processing algorithms. This implementation not only helps in better understanding of basic string operations but also serves as a building block for more complex algorithms in fields like bioinformatics, data compression, and natural language processing. Example Output: For the input string "banana", the program generates the following arrays: Suffix Array: [5, 3, 1, 0, 4, 2] (indicating the starting indices of the lexicographically sorted suffixes) LCP Array: [0, 1, 3, 0, 0, 2] (showing the lengths of the longest common prefixes between consecutive suffixes) Why Suffix Arrays and LCP Arrays Matter: Text Searching: Suffix arrays are used in algorithms for fast substring searching, making them invaluable in tasks like searching through large databases or text files. Repetitive Patterns: The LCP array highlights repeated patterns within the text, which can be useful in applications like data compression, where redundancy needs to be minimized. Bioinformatics: These arrays are critical for genome sequencing and alignment algorithms, where comparing large sequences efficiently is necessary. How to Use: This implementation is easy to run with any input string, and users can quickly get a clear visualization of the suffix and LCP arrays. Whether you're new to algorithms or looking to expand your toolkit for more advanced string manipulation tasks, this project provides a solid foundation. --- .../Suffix Array and LCP implementation.py | 63 +++++++++++++++++++ 1 file changed, 63 insertions(+) create mode 100644 divide_and_conquer/Suffix Array and LCP implementation.py diff --git a/divide_and_conquer/Suffix Array and LCP implementation.py b/divide_and_conquer/Suffix Array and LCP implementation.py new file mode 100644 index 000000000000..4e47e2054a59 --- /dev/null +++ b/divide_and_conquer/Suffix Array and LCP implementation.py @@ -0,0 +1,63 @@ +class SuffixArray: + def __init__(self, input_string): + """ + Initialize the SuffixArray with the input string and generate the suffix and LCP arrays. + """ + self.input_string = input_string + self.suffix_array = self._create_suffix_array() + self.lcp_array = self._create_lcp_array() + + def _create_suffix_array(self): + """ + Create the suffix array for the given input string. + Returns the suffix array, which is a list of indices representing the starting positions of sorted suffixes. + """ + length = len(self.input_string) + sorted_suffixes = sorted(range(length), key=lambda i: self.input_string[i:]) + return sorted_suffixes + + def _create_lcp_array(self): + """ + Construct the LCP (Longest Common Prefix) array based on the suffix array. + LCP[i] stores the length of the longest common prefix between suffixes at suffix_array[i] and suffix_array[i-1]. + """ + length = len(self.input_string) + suffix_array = self.suffix_array + rank = [0] * length + lcp = [0] * length + + # Generate the rank array where rank[i] indicates the position of the suffix starting at index i + for index, suffix in enumerate(suffix_array): + rank[suffix] = index + + lcp_length = 0 + for i in range(length): + if rank[i] > 0: + previous_suffix = suffix_array[rank[i] - 1] + while (i + lcp_length < length) and (previous_suffix + lcp_length < length) and \ + self.input_string[i + lcp_length] == self.input_string[previous_suffix + lcp_length]: + lcp_length += 1 + lcp[rank[i]] = lcp_length + if lcp_length > 0: + lcp_length -= 1 + return lcp + + def display_arrays(self): + """ + Print the suffix array and LCP array for the input string. + """ + print("Suffix Array:") + for idx in self.suffix_array: + print(f"{idx}: {self.input_string[idx:]}") + + print("\nLCP Array:") + for i in range(1, len(self.lcp_array)): + print(f"LCP between {self.input_string[self.suffix_array[i-1]:]} and {self.input_string[self.suffix_array[i]:]}: {self.lcp_array[i]}") + +# Example usage: +if __name__ == "__main__": + input_text = "banana" + suffix_array_instance = SuffixArray(input_text) + + # Show the suffix and LCP arrays + suffix_array_instance.display_arrays() From 06a7be7ba0aa2cf3e464584c8973a26d27fa6e9c Mon Sep 17 00:00:00 2001 From: Putul Singh <127419636+putul03@users.noreply.github.com> Date: Sat, 19 Oct 2024 12:10:54 +0530 Subject: [PATCH 02/20] Delete divide_and_conquer/Suffix Array and LCP implementation.py --- .../Suffix Array and LCP implementation.py | 63 ------------------- 1 file changed, 63 deletions(-) delete mode 100644 divide_and_conquer/Suffix Array and LCP implementation.py diff --git a/divide_and_conquer/Suffix Array and LCP implementation.py b/divide_and_conquer/Suffix Array and LCP implementation.py deleted file mode 100644 index 4e47e2054a59..000000000000 --- a/divide_and_conquer/Suffix Array and LCP implementation.py +++ /dev/null @@ -1,63 +0,0 @@ -class SuffixArray: - def __init__(self, input_string): - """ - Initialize the SuffixArray with the input string and generate the suffix and LCP arrays. - """ - self.input_string = input_string - self.suffix_array = self._create_suffix_array() - self.lcp_array = self._create_lcp_array() - - def _create_suffix_array(self): - """ - Create the suffix array for the given input string. - Returns the suffix array, which is a list of indices representing the starting positions of sorted suffixes. - """ - length = len(self.input_string) - sorted_suffixes = sorted(range(length), key=lambda i: self.input_string[i:]) - return sorted_suffixes - - def _create_lcp_array(self): - """ - Construct the LCP (Longest Common Prefix) array based on the suffix array. - LCP[i] stores the length of the longest common prefix between suffixes at suffix_array[i] and suffix_array[i-1]. - """ - length = len(self.input_string) - suffix_array = self.suffix_array - rank = [0] * length - lcp = [0] * length - - # Generate the rank array where rank[i] indicates the position of the suffix starting at index i - for index, suffix in enumerate(suffix_array): - rank[suffix] = index - - lcp_length = 0 - for i in range(length): - if rank[i] > 0: - previous_suffix = suffix_array[rank[i] - 1] - while (i + lcp_length < length) and (previous_suffix + lcp_length < length) and \ - self.input_string[i + lcp_length] == self.input_string[previous_suffix + lcp_length]: - lcp_length += 1 - lcp[rank[i]] = lcp_length - if lcp_length > 0: - lcp_length -= 1 - return lcp - - def display_arrays(self): - """ - Print the suffix array and LCP array for the input string. - """ - print("Suffix Array:") - for idx in self.suffix_array: - print(f"{idx}: {self.input_string[idx:]}") - - print("\nLCP Array:") - for i in range(1, len(self.lcp_array)): - print(f"LCP between {self.input_string[self.suffix_array[i-1]:]} and {self.input_string[self.suffix_array[i]:]}: {self.lcp_array[i]}") - -# Example usage: -if __name__ == "__main__": - input_text = "banana" - suffix_array_instance = SuffixArray(input_text) - - # Show the suffix and LCP arrays - suffix_array_instance.display_arrays() From 1e8f767d3c00212f0ca347d4bd4d27d9254ce178 Mon Sep 17 00:00:00 2001 From: Putul Singh <127419636+putul03@users.noreply.github.com> Date: Sat, 19 Oct 2024 12:20:31 +0530 Subject: [PATCH 03/20] Added Suffix Array and LCP implementation --- .../Suffix Array and LCP implementation.py | 93 +++++++++++++++++++ 1 file changed, 93 insertions(+) create mode 100644 divide_and_conquer/Suffix Array and LCP implementation.py diff --git a/divide_and_conquer/Suffix Array and LCP implementation.py b/divide_and_conquer/Suffix Array and LCP implementation.py new file mode 100644 index 000000000000..c97197824858 --- /dev/null +++ b/divide_and_conquer/Suffix Array and LCP implementation.py @@ -0,0 +1,93 @@ +from typing import List + + +class SuffixArray: + def __init__(self, text: str) -> None: + """ + Initializes the class with the input text and builds the suffix and LCP arrays. + """ + self.text = text + self.suffix_array = self.build_suffix_array() + self.lcp_array = self.build_lcp_array() + + def build_suffix_array(self) -> List[int]: + """ + Builds the suffix array for the input string. + Returns the suffix array (a list of starting indices of suffixes in sorted order). + + Example: + >>> sa = SuffixArray("banana") + >>> sa.suffix_array + [5, 3, 1, 0, 4, 2] + """ + n = len(self.text) + suffixes = sorted(range(n), key=lambda i: self.text[i:]) + return suffixes + + def build_lcp_array(self) -> List[int]: + """ + Builds the LCP (Longest Common Prefix) array for the suffix array. + LCP[i] gives the length of the longest common prefix of the suffixes starting at suffix_array[i] and suffix_array[i-1]. + + Example: + >>> sa = SuffixArray("banana") + >>> sa.lcp_array + [0, 1, 3, 0, 0, 2] + """ + n = len(self.text) + suffix_array = self.suffix_array + rank = [0] * n + lcp = [0] * n + + # Build the rank array where rank[i] gives the position of the suffix starting at index i + for i, suffix in enumerate(suffix_array): + rank[suffix] = i + + h = 0 + for i in range(n): + if rank[i] > 0: + j = suffix_array[rank[i] - 1] + while (i + h < n) and (j + h < n) and self.text[i + h] == self.text[j + h]: + h += 1 + lcp[rank[i]] = h + if h > 0: + h -= 1 + return lcp + + def display(self) -> None: + """ + Displays the suffix array and LCP array for the input string. + + Example: + >>> sa = SuffixArray("banana") + >>> sa.display() + Suffix Array: + 5: a + 3: ana + 1: anana + 0: banana + 4: na + 2: nana + + LCP Array: + LCP between a and ana: 1 + LCP between ana and anana: 3 + LCP between anana and banana: 0 + LCP between banana and na: 0 + LCP between na and nana: 2 + """ + print("Suffix Array:") + for idx in self.suffix_array: + print(f"{idx}: {self.text[idx:]}") + + print("\nLCP Array:") + for i in range(1, len(self.lcp_array)): + print( + f"LCP between {self.text[self.suffix_array[i - 1]:]} and {self.text[self.suffix_array[i]:]}: {self.lcp_array[i]}") + + +# Example usage: +if __name__ == "__main__": + text = "banana" + sa = SuffixArray(text) + sa.display() From 0094577a48b494bcbaadfaffe450f7cb86ff89da Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 19 Oct 2024 06:54:37 +0000 Subject: [PATCH 04/20] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- divide_and_conquer/Suffix Array and LCP implementation.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/divide_and_conquer/Suffix Array and LCP implementation.py b/divide_and_conquer/Suffix Array and LCP implementation.py index c97197824858..fa4230db5b68 100644 --- a/divide_and_conquer/Suffix Array and LCP implementation.py +++ b/divide_and_conquer/Suffix Array and LCP implementation.py @@ -47,7 +47,9 @@ def build_lcp_array(self) -> List[int]: for i in range(n): if rank[i] > 0: j = suffix_array[rank[i] - 1] - while (i + h < n) and (j + h < n) and self.text[i + h] == self.text[j + h]: + while ( + (i + h < n) and (j + h < n) and self.text[i + h] == self.text[j + h] + ): h += 1 lcp[rank[i]] = h if h > 0: @@ -83,7 +85,8 @@ def display(self) -> None: print("\nLCP Array:") for i in range(1, len(self.lcp_array)): print( - f"LCP between {self.text[self.suffix_array[i - 1]:]} and {self.text[self.suffix_array[i]:]}: {self.lcp_array[i]}") + f"LCP between {self.text[self.suffix_array[i - 1]:]} and {self.text[self.suffix_array[i]:]}: {self.lcp_array[i]}" + ) # Example usage: From 123e6f0c33bc11197edbafcefa24fe2730ceddbf Mon Sep 17 00:00:00 2001 From: Putul Singh <127419636+putul03@users.noreply.github.com> Date: Sat, 19 Oct 2024 12:39:41 +0530 Subject: [PATCH 05/20] Suffix Array and LCP implementation.py --- .../Suffix Array and LCP implementation.py | 37 +++++++------------ 1 file changed, 13 insertions(+), 24 deletions(-) diff --git a/divide_and_conquer/Suffix Array and LCP implementation.py b/divide_and_conquer/Suffix Array and LCP implementation.py index fa4230db5b68..2451dca12d6a 100644 --- a/divide_and_conquer/Suffix Array and LCP implementation.py +++ b/divide_and_conquer/Suffix Array and LCP implementation.py @@ -21,13 +21,15 @@ def build_suffix_array(self) -> List[int]: [5, 3, 1, 0, 4, 2] """ n = len(self.text) - suffixes = sorted(range(n), key=lambda i: self.text[i:]) - return suffixes + # Create a list of suffix indices sorted by the suffixes they point to + sorted_suffix_indices = sorted(range(n), key=lambda suffix_index: self.text[suffix_index:]) + return sorted_suffix_indices def build_lcp_array(self) -> List[int]: """ Builds the LCP (Longest Common Prefix) array for the suffix array. - LCP[i] gives the length of the longest common prefix of the suffixes starting at suffix_array[i] and suffix_array[i-1]. + LCP[i] gives the length of the longest common prefix of the suffixes + starting at suffix_array[i] and suffix_array[i-1]. Example: >>> sa = SuffixArray("banana") @@ -40,20 +42,18 @@ def build_lcp_array(self) -> List[int]: lcp = [0] * n # Build the rank array where rank[i] gives the position of the suffix starting at index i - for i, suffix in enumerate(suffix_array): - rank[suffix] = i + for rank_index, suffix in enumerate(suffix_array): + rank[suffix] = rank_index h = 0 for i in range(n): if rank[i] > 0: - j = suffix_array[rank[i] - 1] - while ( - (i + h < n) and (j + h < n) and self.text[i + h] == self.text[j + h] - ): + j = suffix_array[rank[i] - 1] # Previous suffix in the sorted order + while (i + h < n) and (j + h < n) and self.text[i + h] == self.text[j + h]: h += 1 lcp[rank[i]] = h if h > 0: - h -= 1 + h -= 1 # Decrease h for the next suffix comparison return lcp def display(self) -> None: @@ -71,26 +71,15 @@ def display(self) -> None: 4: na 2: nana - LCP Array: - LCP between a and ana: 1 - LCP between ana and anana: 3 - LCP between anana and banana: 0 - LCP between banana and na: 0 - LCP between na and nana: 2 """ print("Suffix Array:") - for idx in self.suffix_array: - print(f"{idx}: {self.text[idx:]}") + for suffix_index in self.suffix_array: + print(f"{suffix_index}: {self.text[suffix_index:]}") - print("\nLCP Array:") - for i in range(1, len(self.lcp_array)): - print( - f"LCP between {self.text[self.suffix_array[i - 1]:]} and {self.text[self.suffix_array[i]:]}: {self.lcp_array[i]}" - ) # Example usage: if __name__ == "__main__": text = "banana" sa = SuffixArray(text) - sa.display() + sa.display() # Contribution for Hacktoberfest 2024 From 848a358d80945b27cfa76e50ff0847d01da57034 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 19 Oct 2024 07:13:22 +0000 Subject: [PATCH 06/20] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- .../Suffix Array and LCP implementation.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/divide_and_conquer/Suffix Array and LCP implementation.py b/divide_and_conquer/Suffix Array and LCP implementation.py index 2451dca12d6a..0eadf6bb89a8 100644 --- a/divide_and_conquer/Suffix Array and LCP implementation.py +++ b/divide_and_conquer/Suffix Array and LCP implementation.py @@ -22,7 +22,9 @@ def build_suffix_array(self) -> List[int]: """ n = len(self.text) # Create a list of suffix indices sorted by the suffixes they point to - sorted_suffix_indices = sorted(range(n), key=lambda suffix_index: self.text[suffix_index:]) + sorted_suffix_indices = sorted( + range(n), key=lambda suffix_index: self.text[suffix_index:] + ) return sorted_suffix_indices def build_lcp_array(self) -> List[int]: @@ -49,7 +51,9 @@ def build_lcp_array(self) -> List[int]: for i in range(n): if rank[i] > 0: j = suffix_array[rank[i] - 1] # Previous suffix in the sorted order - while (i + h < n) and (j + h < n) and self.text[i + h] == self.text[j + h]: + while ( + (i + h < n) and (j + h < n) and self.text[i + h] == self.text[j + h] + ): h += 1 lcp[rank[i]] = h if h > 0: @@ -77,7 +81,6 @@ def display(self) -> None: print(f"{suffix_index}: {self.text[suffix_index:]}") - # Example usage: if __name__ == "__main__": text = "banana" From d950f5740bbb2675199ad240e58fd7d3e57a8ee4 Mon Sep 17 00:00:00 2001 From: Putul Singh <127419636+putul03@users.noreply.github.com> Date: Sat, 19 Oct 2024 13:22:59 +0530 Subject: [PATCH 07/20] Delete divide_and_conquer/Suffix Array and LCP implementation.py --- .../Suffix Array and LCP implementation.py | 88 ------------------- 1 file changed, 88 deletions(-) delete mode 100644 divide_and_conquer/Suffix Array and LCP implementation.py diff --git a/divide_and_conquer/Suffix Array and LCP implementation.py b/divide_and_conquer/Suffix Array and LCP implementation.py deleted file mode 100644 index 0eadf6bb89a8..000000000000 --- a/divide_and_conquer/Suffix Array and LCP implementation.py +++ /dev/null @@ -1,88 +0,0 @@ -from typing import List - - -class SuffixArray: - def __init__(self, text: str) -> None: - """ - Initializes the class with the input text and builds the suffix and LCP arrays. - """ - self.text = text - self.suffix_array = self.build_suffix_array() - self.lcp_array = self.build_lcp_array() - - def build_suffix_array(self) -> List[int]: - """ - Builds the suffix array for the input string. - Returns the suffix array (a list of starting indices of suffixes in sorted order). - - Example: - >>> sa = SuffixArray("banana") - >>> sa.suffix_array - [5, 3, 1, 0, 4, 2] - """ - n = len(self.text) - # Create a list of suffix indices sorted by the suffixes they point to - sorted_suffix_indices = sorted( - range(n), key=lambda suffix_index: self.text[suffix_index:] - ) - return sorted_suffix_indices - - def build_lcp_array(self) -> List[int]: - """ - Builds the LCP (Longest Common Prefix) array for the suffix array. - LCP[i] gives the length of the longest common prefix of the suffixes - starting at suffix_array[i] and suffix_array[i-1]. - - Example: - >>> sa = SuffixArray("banana") - >>> sa.lcp_array - [0, 1, 3, 0, 0, 2] - """ - n = len(self.text) - suffix_array = self.suffix_array - rank = [0] * n - lcp = [0] * n - - # Build the rank array where rank[i] gives the position of the suffix starting at index i - for rank_index, suffix in enumerate(suffix_array): - rank[suffix] = rank_index - - h = 0 - for i in range(n): - if rank[i] > 0: - j = suffix_array[rank[i] - 1] # Previous suffix in the sorted order - while ( - (i + h < n) and (j + h < n) and self.text[i + h] == self.text[j + h] - ): - h += 1 - lcp[rank[i]] = h - if h > 0: - h -= 1 # Decrease h for the next suffix comparison - return lcp - - def display(self) -> None: - """ - Displays the suffix array and LCP array for the input string. - - Example: - >>> sa = SuffixArray("banana") - >>> sa.display() - Suffix Array: - 5: a - 3: ana - 1: anana - 0: banana - 4: na - 2: nana - - """ - print("Suffix Array:") - for suffix_index in self.suffix_array: - print(f"{suffix_index}: {self.text[suffix_index:]}") - - -# Example usage: -if __name__ == "__main__": - text = "banana" - sa = SuffixArray(text) - sa.display() # Contribution for Hacktoberfest 2024 From dae072c0c78cf555961a250a41f4b38a8f9cbad3 Mon Sep 17 00:00:00 2001 From: Putul Singh <127419636+putul03@users.noreply.github.com> Date: Sat, 19 Oct 2024 13:23:23 +0530 Subject: [PATCH 08/20] Suffix Array and LCP Array Implementation --- divide_and_conquer/suffix_array_lcp.py | 62 ++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) create mode 100644 divide_and_conquer/suffix_array_lcp.py diff --git a/divide_and_conquer/suffix_array_lcp.py b/divide_and_conquer/suffix_array_lcp.py new file mode 100644 index 000000000000..c78acf49cdac --- /dev/null +++ b/divide_and_conquer/suffix_array_lcp.py @@ -0,0 +1,62 @@ +#!/usr/bin/env python3 + +def build_suffix_array(s: str) -> list[int]: + """ + Build the suffix array for the given string. + + Parameters: + s (str): The input string. + + Returns: + list[int]: The suffix array (a list of starting indices of + suffixes in sorted order). + """ + suffixes = [(s[i:], i) for i in range(len(s))] + suffixes.sort() # Sort the suffixes lexicographically + suffix_array = [suffix[1] for suffix in suffixes] + return suffix_array + +def build_lcp_array(s: str, suffix_array: list[int]) -> list[int]: + """ + Build the LCP array for the given string and suffix array. + + Parameters: + s (str): The input string. + suffix_array (list[int]): The suffix array. + + Returns: + list[int]: The LCP array. + """ + n = len(s) + rank = [0] * n + lcp = [0] * n + + # Compute the rank of each suffix + for i, suffix_index in enumerate(suffix_array): + rank[suffix_index] = i + + # Compute the LCP array + h = 0 + for i in range(n): + if rank[i] > 0: + j = suffix_array[rank[i] - 1] + while (i + h < n) and (j + h < n) and (s[i + h] == s[j + h]): + h += 1 + lcp[rank[i]] = h + if h > 0: + h -= 1 # Decrease h for the next suffix + return lcp + +# Example usage +if __name__ == "__main__": + s = "banana" + suffix_array = build_suffix_array(s) + lcp_array = build_lcp_array(s, suffix_array) + + print("Suffix Array:") + for i in range(len(suffix_array)): + print(f"{suffix_array[i]}: {s[suffix_array[i]:]}") + + print("\nLCP Array:") + for i in range(1, len(lcp_array)): + print(f"LCP between {s[suffix_array[i - 1]:]} and {s[suffix_array[i]]}: {lcp_array[i]}") From 70c3869f4270cc7996b68a3a6ed04858d7596a1d Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 19 Oct 2024 07:53:45 +0000 Subject: [PATCH 09/20] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- divide_and_conquer/suffix_array_lcp.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/divide_and_conquer/suffix_array_lcp.py b/divide_and_conquer/suffix_array_lcp.py index c78acf49cdac..d12dc1ec71e0 100644 --- a/divide_and_conquer/suffix_array_lcp.py +++ b/divide_and_conquer/suffix_array_lcp.py @@ -1,5 +1,6 @@ #!/usr/bin/env python3 + def build_suffix_array(s: str) -> list[int]: """ Build the suffix array for the given string. @@ -16,6 +17,7 @@ def build_suffix_array(s: str) -> list[int]: suffix_array = [suffix[1] for suffix in suffixes] return suffix_array + def build_lcp_array(s: str, suffix_array: list[int]) -> list[int]: """ Build the LCP array for the given string and suffix array. @@ -47,6 +49,7 @@ def build_lcp_array(s: str, suffix_array: list[int]) -> list[int]: h -= 1 # Decrease h for the next suffix return lcp + # Example usage if __name__ == "__main__": s = "banana" @@ -59,4 +62,6 @@ def build_lcp_array(s: str, suffix_array: list[int]) -> list[int]: print("\nLCP Array:") for i in range(1, len(lcp_array)): - print(f"LCP between {s[suffix_array[i - 1]:]} and {s[suffix_array[i]]}: {lcp_array[i]}") + print( + f"LCP between {s[suffix_array[i - 1]:]} and {s[suffix_array[i]]}: {lcp_array[i]}" + ) From c7f137eefc9d2c3f5c2e4b3122ded28219eba873 Mon Sep 17 00:00:00 2001 From: Putul Singh <127419636+putul03@users.noreply.github.com> Date: Sat, 19 Oct 2024 13:40:04 +0530 Subject: [PATCH 10/20] suffix_array_lcp.py --- divide_and_conquer/suffix_array_lcp.py | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/divide_and_conquer/suffix_array_lcp.py b/divide_and_conquer/suffix_array_lcp.py index d12dc1ec71e0..2bde5aa0edb5 100644 --- a/divide_and_conquer/suffix_array_lcp.py +++ b/divide_and_conquer/suffix_array_lcp.py @@ -1,5 +1,6 @@ #!/usr/bin/env python3 +import doctest def build_suffix_array(s: str) -> list[int]: """ @@ -11,13 +12,16 @@ def build_suffix_array(s: str) -> list[int]: Returns: list[int]: The suffix array (a list of starting indices of suffixes in sorted order). + + Examples: + >>> build_suffix_array("banana") + [5, 3, 1, 0, 4, 2] """ suffixes = [(s[i:], i) for i in range(len(s))] suffixes.sort() # Sort the suffixes lexicographically suffix_array = [suffix[1] for suffix in suffixes] return suffix_array - def build_lcp_array(s: str, suffix_array: list[int]) -> list[int]: """ Build the LCP array for the given string and suffix array. @@ -28,6 +32,11 @@ def build_lcp_array(s: str, suffix_array: list[int]) -> list[int]: Returns: list[int]: The LCP array. + + Examples: + >>> suffix_array = build_suffix_array("banana") + >>> build_lcp_array("banana", suffix_array) + [0, 1, 3, 0, 0, 2] """ n = len(s) rank = [0] * n @@ -49,7 +58,6 @@ def build_lcp_array(s: str, suffix_array: list[int]) -> list[int]: h -= 1 # Decrease h for the next suffix return lcp - # Example usage if __name__ == "__main__": s = "banana" @@ -62,6 +70,10 @@ def build_lcp_array(s: str, suffix_array: list[int]) -> list[int]: print("\nLCP Array:") for i in range(1, len(lcp_array)): - print( - f"LCP between {s[suffix_array[i - 1]:]} and {s[suffix_array[i]]}: {lcp_array[i]}" - ) + lcp_info = (f"LCP between {s[suffix_array[i - 1]:]} and " + f"{s[suffix_array[i]]}: {lcp_array[i]}") + print(lcp_info) + +# Run doctests +if __name__ == "__main__": + doctest.testmod() From 8038826cd9793e3ca0edf9448b35cb1079ce3d4f Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 19 Oct 2024 08:11:53 +0000 Subject: [PATCH 11/20] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- divide_and_conquer/suffix_array_lcp.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/divide_and_conquer/suffix_array_lcp.py b/divide_and_conquer/suffix_array_lcp.py index 2bde5aa0edb5..2d90255c6382 100644 --- a/divide_and_conquer/suffix_array_lcp.py +++ b/divide_and_conquer/suffix_array_lcp.py @@ -2,6 +2,7 @@ import doctest + def build_suffix_array(s: str) -> list[int]: """ Build the suffix array for the given string. @@ -22,6 +23,7 @@ def build_suffix_array(s: str) -> list[int]: suffix_array = [suffix[1] for suffix in suffixes] return suffix_array + def build_lcp_array(s: str, suffix_array: list[int]) -> list[int]: """ Build the LCP array for the given string and suffix array. @@ -58,6 +60,7 @@ def build_lcp_array(s: str, suffix_array: list[int]) -> list[int]: h -= 1 # Decrease h for the next suffix return lcp + # Example usage if __name__ == "__main__": s = "banana" @@ -70,8 +73,10 @@ def build_lcp_array(s: str, suffix_array: list[int]) -> list[int]: print("\nLCP Array:") for i in range(1, len(lcp_array)): - lcp_info = (f"LCP between {s[suffix_array[i - 1]:]} and " - f"{s[suffix_array[i]]}: {lcp_array[i]}") + lcp_info = ( + f"LCP between {s[suffix_array[i - 1]:]} and " + f"{s[suffix_array[i]]}: {lcp_array[i]}" + ) print(lcp_info) # Run doctests From 8b0e74e81cf8f89c21850c3f29a2d997b7a8018c Mon Sep 17 00:00:00 2001 From: Putul Singh <127419636+putul03@users.noreply.github.com> Date: Sat, 19 Oct 2024 13:46:04 +0530 Subject: [PATCH 12/20] suffix_array_lcp.py --- divide_and_conquer/suffix_array_lcp.py | 43 ++++++++++---------------- 1 file changed, 16 insertions(+), 27 deletions(-) diff --git a/divide_and_conquer/suffix_array_lcp.py b/divide_and_conquer/suffix_array_lcp.py index 2d90255c6382..f9c5cfdbf6d6 100644 --- a/divide_and_conquer/suffix_array_lcp.py +++ b/divide_and_conquer/suffix_array_lcp.py @@ -2,45 +2,43 @@ import doctest - -def build_suffix_array(s: str) -> list[int]: +def build_suffix_array(input_string: str) -> list[int]: """ Build the suffix array for the given string. Parameters: - s (str): The input string. + input_string (str): The input string. Returns: list[int]: The suffix array (a list of starting indices of suffixes in sorted order). - Examples: + Example: >>> build_suffix_array("banana") [5, 3, 1, 0, 4, 2] """ - suffixes = [(s[i:], i) for i in range(len(s))] + suffixes = [(input_string[i:], i) for i in range(len(input_string))] suffixes.sort() # Sort the suffixes lexicographically suffix_array = [suffix[1] for suffix in suffixes] return suffix_array - -def build_lcp_array(s: str, suffix_array: list[int]) -> list[int]: +def build_lcp_array(input_string: str, suffix_array: list[int]) -> list[int]: """ Build the LCP array for the given string and suffix array. Parameters: - s (str): The input string. + input_string (str): The input string. suffix_array (list[int]): The suffix array. Returns: list[int]: The LCP array. - Examples: - >>> suffix_array = build_suffix_array("banana") - >>> build_lcp_array("banana", suffix_array) + Example: + >>> suffix_arr = build_suffix_array("banana") + >>> build_lcp_array("banana", suffix_arr) [0, 1, 3, 0, 0, 2] """ - n = len(s) + n = len(input_string) rank = [0] * n lcp = [0] * n @@ -53,32 +51,23 @@ def build_lcp_array(s: str, suffix_array: list[int]) -> list[int]: for i in range(n): if rank[i] > 0: j = suffix_array[rank[i] - 1] - while (i + h < n) and (j + h < n) and (s[i + h] == s[j + h]): + while (i + h < n) and (j + h < n) and (input_string[i + h] == input_string[j + h]): h += 1 lcp[rank[i]] = h if h > 0: h -= 1 # Decrease h for the next suffix return lcp - # Example usage if __name__ == "__main__": - s = "banana" - suffix_array = build_suffix_array(s) - lcp_array = build_lcp_array(s, suffix_array) + test_string = "banana" + suffix_array = build_suffix_array(test_string) + lcp_array = build_lcp_array(test_string, suffix_array) print("Suffix Array:") for i in range(len(suffix_array)): - print(f"{suffix_array[i]}: {s[suffix_array[i]:]}") + print(f"{suffix_array[i]}: {test_string[suffix_array[i]:]}") print("\nLCP Array:") for i in range(1, len(lcp_array)): - lcp_info = ( - f"LCP between {s[suffix_array[i - 1]:]} and " - f"{s[suffix_array[i]]}: {lcp_array[i]}" - ) - print(lcp_info) - -# Run doctests -if __name__ == "__main__": - doctest.testmod() + print(f"LCP between {test_string[suffix_array[i - 1]:]} and {test_string[suffix_array[i]]}: {lcp_array[i]}") From 1b37c1c127eb7aef63dc25005a7f8825e861d5c1 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 19 Oct 2024 08:18:09 +0000 Subject: [PATCH 13/20] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- divide_and_conquer/suffix_array_lcp.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/divide_and_conquer/suffix_array_lcp.py b/divide_and_conquer/suffix_array_lcp.py index f9c5cfdbf6d6..f9dda982b934 100644 --- a/divide_and_conquer/suffix_array_lcp.py +++ b/divide_and_conquer/suffix_array_lcp.py @@ -2,6 +2,7 @@ import doctest + def build_suffix_array(input_string: str) -> list[int]: """ Build the suffix array for the given string. @@ -22,6 +23,7 @@ def build_suffix_array(input_string: str) -> list[int]: suffix_array = [suffix[1] for suffix in suffixes] return suffix_array + def build_lcp_array(input_string: str, suffix_array: list[int]) -> list[int]: """ Build the LCP array for the given string and suffix array. @@ -51,13 +53,18 @@ def build_lcp_array(input_string: str, suffix_array: list[int]) -> list[int]: for i in range(n): if rank[i] > 0: j = suffix_array[rank[i] - 1] - while (i + h < n) and (j + h < n) and (input_string[i + h] == input_string[j + h]): + while ( + (i + h < n) + and (j + h < n) + and (input_string[i + h] == input_string[j + h]) + ): h += 1 lcp[rank[i]] = h if h > 0: h -= 1 # Decrease h for the next suffix return lcp + # Example usage if __name__ == "__main__": test_string = "banana" @@ -70,4 +77,6 @@ def build_lcp_array(input_string: str, suffix_array: list[int]) -> list[int]: print("\nLCP Array:") for i in range(1, len(lcp_array)): - print(f"LCP between {test_string[suffix_array[i - 1]:]} and {test_string[suffix_array[i]]}: {lcp_array[i]}") + print( + f"LCP between {test_string[suffix_array[i - 1]:]} and {test_string[suffix_array[i]]}: {lcp_array[i]}" + ) From a4073ca1882e5383f481dfa43c95ca2565a69fee Mon Sep 17 00:00:00 2001 From: Putul Singh <127419636+putul03@users.noreply.github.com> Date: Sat, 19 Oct 2024 13:49:17 +0530 Subject: [PATCH 14/20] suffix_array_lcp.py --- divide_and_conquer/suffix_array_lcp.py | 35 ++++++++++++-------------- 1 file changed, 16 insertions(+), 19 deletions(-) diff --git a/divide_and_conquer/suffix_array_lcp.py b/divide_and_conquer/suffix_array_lcp.py index f9dda982b934..d56a64296d32 100644 --- a/divide_and_conquer/suffix_array_lcp.py +++ b/divide_and_conquer/suffix_array_lcp.py @@ -2,7 +2,6 @@ import doctest - def build_suffix_array(input_string: str) -> list[int]: """ Build the suffix array for the given string. @@ -14,7 +13,7 @@ def build_suffix_array(input_string: str) -> list[int]: list[int]: The suffix array (a list of starting indices of suffixes in sorted order). - Example: + Examples: >>> build_suffix_array("banana") [5, 3, 1, 0, 4, 2] """ @@ -23,7 +22,6 @@ def build_suffix_array(input_string: str) -> list[int]: suffix_array = [suffix[1] for suffix in suffixes] return suffix_array - def build_lcp_array(input_string: str, suffix_array: list[int]) -> list[int]: """ Build the LCP array for the given string and suffix array. @@ -35,9 +33,9 @@ def build_lcp_array(input_string: str, suffix_array: list[int]) -> list[int]: Returns: list[int]: The LCP array. - Example: - >>> suffix_arr = build_suffix_array("banana") - >>> build_lcp_array("banana", suffix_arr) + Examples: + >>> suffix_array = build_suffix_array("banana") + >>> build_lcp_array("banana", suffix_array) [0, 1, 3, 0, 0, 2] """ n = len(input_string) @@ -53,30 +51,29 @@ def build_lcp_array(input_string: str, suffix_array: list[int]) -> list[int]: for i in range(n): if rank[i] > 0: j = suffix_array[rank[i] - 1] - while ( - (i + h < n) - and (j + h < n) - and (input_string[i + h] == input_string[j + h]) - ): + while (i + h < n) and (j + h < n) and (input_string[i + h] == input_string[j + h]): h += 1 lcp[rank[i]] = h if h > 0: h -= 1 # Decrease h for the next suffix return lcp - # Example usage if __name__ == "__main__": - test_string = "banana" - suffix_array = build_suffix_array(test_string) - lcp_array = build_lcp_array(test_string, suffix_array) + s = "banana" + suffix_array = build_suffix_array(s) + lcp_array = build_lcp_array(s, suffix_array) print("Suffix Array:") for i in range(len(suffix_array)): - print(f"{suffix_array[i]}: {test_string[suffix_array[i]:]}") + print(f"{suffix_array[i]}: {s[suffix_array[i]:]}") print("\nLCP Array:") for i in range(1, len(lcp_array)): - print( - f"LCP between {test_string[suffix_array[i - 1]:]} and {test_string[suffix_array[i]]}: {lcp_array[i]}" - ) + lcp_info = (f"LCP between {s[suffix_array[i - 1]:]} and " + f"{s[suffix_array[i]]}: {lcp_array[i]}") + print(lcp_info) + +# Run doctests +if __name__ == "__main__": + doctest.testmod() From 8dcffa3e71b69f2f9ed0051618cd45b9fbf27f93 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 19 Oct 2024 08:21:24 +0000 Subject: [PATCH 15/20] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- divide_and_conquer/suffix_array_lcp.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/divide_and_conquer/suffix_array_lcp.py b/divide_and_conquer/suffix_array_lcp.py index d56a64296d32..3cac0bdd063a 100644 --- a/divide_and_conquer/suffix_array_lcp.py +++ b/divide_and_conquer/suffix_array_lcp.py @@ -2,6 +2,7 @@ import doctest + def build_suffix_array(input_string: str) -> list[int]: """ Build the suffix array for the given string. @@ -22,6 +23,7 @@ def build_suffix_array(input_string: str) -> list[int]: suffix_array = [suffix[1] for suffix in suffixes] return suffix_array + def build_lcp_array(input_string: str, suffix_array: list[int]) -> list[int]: """ Build the LCP array for the given string and suffix array. @@ -51,13 +53,18 @@ def build_lcp_array(input_string: str, suffix_array: list[int]) -> list[int]: for i in range(n): if rank[i] > 0: j = suffix_array[rank[i] - 1] - while (i + h < n) and (j + h < n) and (input_string[i + h] == input_string[j + h]): + while ( + (i + h < n) + and (j + h < n) + and (input_string[i + h] == input_string[j + h]) + ): h += 1 lcp[rank[i]] = h if h > 0: h -= 1 # Decrease h for the next suffix return lcp + # Example usage if __name__ == "__main__": s = "banana" @@ -70,8 +77,10 @@ def build_lcp_array(input_string: str, suffix_array: list[int]) -> list[int]: print("\nLCP Array:") for i in range(1, len(lcp_array)): - lcp_info = (f"LCP between {s[suffix_array[i - 1]:]} and " - f"{s[suffix_array[i]]}: {lcp_array[i]}") + lcp_info = ( + f"LCP between {s[suffix_array[i - 1]:]} and " + f"{s[suffix_array[i]]}: {lcp_array[i]}" + ) print(lcp_info) # Run doctests From 81c09d1a3b0b2a77652bac5c9a61a840c7c8e2d4 Mon Sep 17 00:00:00 2001 From: Putul Singh <127419636+putul03@users.noreply.github.com> Date: Sat, 19 Oct 2024 15:06:54 +0530 Subject: [PATCH 16/20] Longest Palindromic Subsequence --- .../longest_palindromic_subsequence.py | 97 ++++++++++--------- 1 file changed, 53 insertions(+), 44 deletions(-) diff --git a/dynamic_programming/longest_palindromic_subsequence.py b/dynamic_programming/longest_palindromic_subsequence.py index a60d95e460e6..b649b9b3a2d2 100644 --- a/dynamic_programming/longest_palindromic_subsequence.py +++ b/dynamic_programming/longest_palindromic_subsequence.py @@ -1,44 +1,53 @@ -""" -author: Sanket Kittad -Given a string s, find the longest palindromic subsequence's length in s. -Input: s = "bbbab" -Output: 4 -Explanation: One possible longest palindromic subsequence is "bbbb". -Leetcode link: https://leetcode.com/problems/longest-palindromic-subsequence/description/ -""" - - -def longest_palindromic_subsequence(input_string: str) -> int: - """ - This function returns the longest palindromic subsequence in a string - >>> longest_palindromic_subsequence("bbbab") - 4 - >>> longest_palindromic_subsequence("bbabcbcab") - 7 - """ - n = len(input_string) - rev = input_string[::-1] - m = len(rev) - dp = [[-1] * (m + 1) for i in range(n + 1)] - for i in range(n + 1): - dp[i][0] = 0 - for i in range(m + 1): - dp[0][i] = 0 - - # create and initialise dp array - for i in range(1, n + 1): - for j in range(1, m + 1): - # If characters at i and j are the same - # include them in the palindromic subsequence - if input_string[i - 1] == rev[j - 1]: - dp[i][j] = 1 + dp[i - 1][j - 1] - else: - dp[i][j] = max(dp[i - 1][j], dp[i][j - 1]) - - return dp[n][m] - - -if __name__ == "__main__": - import doctest - - doctest.testmod() +def longest_palindromic_subsequence(input_string: str) -> int: + """ + Function to find the length of the longest palindromic subsequence + in a given string using dynamic programming. + + :param input_string: Input string + :return: Length of the longest palindromic subsequence + + >>> longest_palindromic_subsequence("bbbab") + 4 + >>> longest_palindromic_subsequence("cbbd") + 2 + >>> longest_palindromic_subsequence("") + 0 + >>> longest_palindromic_subsequence("a") + 1 + >>> longest_palindromic_subsequence("abcd") + 1 + >>> longest_palindromic_subsequence("agbdba") + 5 + """ + n = len(input_string) + + # Base case: if string is empty, return 0 + if n == 0: + return 0 + + # dp[i][j] will represent the length of the longest palindromic subsequence + # within the substring input_string[i...j] + dp = [[0] * n for _ in range(n)] + + # Every single character is a palindrome of length 1 + for i in range(n): + dp[i][i] = 1 + + # Build the DP table for substrings of increasing length + for length in range(2, n + 1): + for i in range(n - length + 1): + j = i + length - 1 + if input_string[i] == input_string[j]: + dp[i][j] = dp[i + 1][j - 1] + 2 + else: + dp[i][j] = max(dp[i + 1][j], dp[i][j - 1]) + + # The longest palindromic subsequence length for the full string is dp[0][n-1] + return dp[0][n - 1] + + +# Example usage: +if __name__ == "__main__": + input_string = "bbbab" + result = longest_palindromic_subsequence(input_string) + print(f"Length of Longest Palindromic Subsequence: {result}") From b01fbfff1e63d3dce2aa90422940212ff9b493be Mon Sep 17 00:00:00 2001 From: Putul Singh <127419636+putul03@users.noreply.github.com> Date: Sat, 19 Oct 2024 15:12:02 +0530 Subject: [PATCH 17/20] Delete dynamic_programming/longest_palindromic_subsequence.py --- .../longest_palindromic_subsequence.py | 53 ------------------- 1 file changed, 53 deletions(-) delete mode 100644 dynamic_programming/longest_palindromic_subsequence.py diff --git a/dynamic_programming/longest_palindromic_subsequence.py b/dynamic_programming/longest_palindromic_subsequence.py deleted file mode 100644 index b649b9b3a2d2..000000000000 --- a/dynamic_programming/longest_palindromic_subsequence.py +++ /dev/null @@ -1,53 +0,0 @@ -def longest_palindromic_subsequence(input_string: str) -> int: - """ - Function to find the length of the longest palindromic subsequence - in a given string using dynamic programming. - - :param input_string: Input string - :return: Length of the longest palindromic subsequence - - >>> longest_palindromic_subsequence("bbbab") - 4 - >>> longest_palindromic_subsequence("cbbd") - 2 - >>> longest_palindromic_subsequence("") - 0 - >>> longest_palindromic_subsequence("a") - 1 - >>> longest_palindromic_subsequence("abcd") - 1 - >>> longest_palindromic_subsequence("agbdba") - 5 - """ - n = len(input_string) - - # Base case: if string is empty, return 0 - if n == 0: - return 0 - - # dp[i][j] will represent the length of the longest palindromic subsequence - # within the substring input_string[i...j] - dp = [[0] * n for _ in range(n)] - - # Every single character is a palindrome of length 1 - for i in range(n): - dp[i][i] = 1 - - # Build the DP table for substrings of increasing length - for length in range(2, n + 1): - for i in range(n - length + 1): - j = i + length - 1 - if input_string[i] == input_string[j]: - dp[i][j] = dp[i + 1][j - 1] + 2 - else: - dp[i][j] = max(dp[i + 1][j], dp[i][j - 1]) - - # The longest palindromic subsequence length for the full string is dp[0][n-1] - return dp[0][n - 1] - - -# Example usage: -if __name__ == "__main__": - input_string = "bbbab" - result = longest_palindromic_subsequence(input_string) - print(f"Length of Longest Palindromic Subsequence: {result}") From ada767d31ba434a3d41933cd84a1702bbdb758d3 Mon Sep 17 00:00:00 2001 From: Putul Singh <127419636+putul03@users.noreply.github.com> Date: Sat, 19 Oct 2024 15:42:06 +0530 Subject: [PATCH 18/20] Add files via upload --- data_structures/persistent_segment_tree.py | 80 ++++++++++++++++++++++ 1 file changed, 80 insertions(+) create mode 100644 data_structures/persistent_segment_tree.py diff --git a/data_structures/persistent_segment_tree.py b/data_structures/persistent_segment_tree.py new file mode 100644 index 000000000000..8963b54a2ba6 --- /dev/null +++ b/data_structures/persistent_segment_tree.py @@ -0,0 +1,80 @@ +class Node: + def __init__(self, value: int = 0) -> None: + self.value = value + self.left = None + self.right = None + + +class PersistentSegmentTree: + def __init__(self, arr: list[int]) -> None: + self.n = len(arr) + self.roots: list[Node] = [] + self.roots.append(self._build(arr, 0, self.n - 1)) + + def _build(self, arr: list[int], start: int, end: int) -> Node: + """ + Builds a segment tree from the provided array. + + >>> pst = PersistentSegmentTree([1, 2, 3]) + >>> root = pst._build([1, 2, 3], 0, 2) + >>> root.value # Sum of the whole array + 6 + """ + if start == end: + return Node(arr[start]) + mid = (start + end) // 2 + node = Node() + node.left = self._build(arr, start, mid) + node.right = self._build(arr, mid + 1, end) + node.value = node.left.value + node.right.value + return node + + def update(self, version: int, index: int, value: int) -> int: + """ + Updates the segment tree with a new value at the specified index. + + >>> pst = PersistentSegmentTree([1, 2, 3]) + >>> version_1 = pst.update(0, 1, 5) + >>> pst.query(version_1, 0, 2) # Query sum from index 0 to 2 + 9 + """ + new_root = self._update(self.roots[version], 0, self.n - 1, index, value) + self.roots.append(new_root) + return len(self.roots) - 1 # return the index of the new version + + def _update(self, node: Node, start: int, end: int, index: int, value: int) -> Node: + if start == end: + new_node = Node(value) + return new_node + mid = (start + end) // 2 + new_node = Node() + if index <= mid: + new_node.left = self._update(node.left, start, mid, index, value) + new_node.right = node.right + else: + new_node.left = node.left + new_node.right = self._update(node.right, mid + 1, end, index, value) + new_node.value = new_node.left.value + new_node.right.value + return new_node + + def query(self, version: int, left: int, right: int) -> int: + """ + Queries the sum in the given range for the specified version. + + >>> pst = PersistentSegmentTree([1, 2, 3]) + >>> version_1 = pst.update(0, 1, 5) + >>> pst.query(version_1, 0, 1) # Query sum from index 0 to 1 + 6 + >>> pst.query(version_1, 0, 2) # Query sum from index 0 to 2 + 9 + """ + return self._query(self.roots[version], 0, self.n - 1, left, right) + + def _query(self, node: Node, start: int, end: int, left: int, right: int) -> int: + if left > end or right < start: + return 0 + if left <= start and right >= end: + return node.value + mid = (start + end) // 2 + return (self._query(node.left, start, mid, left, right) + + self._query(node.right, mid + 1, end, left, right)) From 0018a8ed269fde65b79cab12a90348e30d402547 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 19 Oct 2024 10:13:22 +0000 Subject: [PATCH 19/20] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- data_structures/persistent_segment_tree.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/data_structures/persistent_segment_tree.py b/data_structures/persistent_segment_tree.py index 8963b54a2ba6..7d7851071612 100644 --- a/data_structures/persistent_segment_tree.py +++ b/data_structures/persistent_segment_tree.py @@ -76,5 +76,6 @@ def _query(self, node: Node, start: int, end: int, left: int, right: int) -> int if left <= start and right >= end: return node.value mid = (start + end) // 2 - return (self._query(node.left, start, mid, left, right) + - self._query(node.right, mid + 1, end, left, right)) + return self._query(node.left, start, mid, left, right) + self._query( + node.right, mid + 1, end, left, right + ) From 6380f891dc0a9a60f02e145d5cae44dfbb2f76bb Mon Sep 17 00:00:00 2001 From: Putul Singh <127419636+putul03@users.noreply.github.com> Date: Sat, 19 Oct 2024 15:46:03 +0530 Subject: [PATCH 20/20] Delete data_structures/persistent_segment_tree.py --- data_structures/persistent_segment_tree.py | 81 ---------------------- 1 file changed, 81 deletions(-) delete mode 100644 data_structures/persistent_segment_tree.py diff --git a/data_structures/persistent_segment_tree.py b/data_structures/persistent_segment_tree.py deleted file mode 100644 index 7d7851071612..000000000000 --- a/data_structures/persistent_segment_tree.py +++ /dev/null @@ -1,81 +0,0 @@ -class Node: - def __init__(self, value: int = 0) -> None: - self.value = value - self.left = None - self.right = None - - -class PersistentSegmentTree: - def __init__(self, arr: list[int]) -> None: - self.n = len(arr) - self.roots: list[Node] = [] - self.roots.append(self._build(arr, 0, self.n - 1)) - - def _build(self, arr: list[int], start: int, end: int) -> Node: - """ - Builds a segment tree from the provided array. - - >>> pst = PersistentSegmentTree([1, 2, 3]) - >>> root = pst._build([1, 2, 3], 0, 2) - >>> root.value # Sum of the whole array - 6 - """ - if start == end: - return Node(arr[start]) - mid = (start + end) // 2 - node = Node() - node.left = self._build(arr, start, mid) - node.right = self._build(arr, mid + 1, end) - node.value = node.left.value + node.right.value - return node - - def update(self, version: int, index: int, value: int) -> int: - """ - Updates the segment tree with a new value at the specified index. - - >>> pst = PersistentSegmentTree([1, 2, 3]) - >>> version_1 = pst.update(0, 1, 5) - >>> pst.query(version_1, 0, 2) # Query sum from index 0 to 2 - 9 - """ - new_root = self._update(self.roots[version], 0, self.n - 1, index, value) - self.roots.append(new_root) - return len(self.roots) - 1 # return the index of the new version - - def _update(self, node: Node, start: int, end: int, index: int, value: int) -> Node: - if start == end: - new_node = Node(value) - return new_node - mid = (start + end) // 2 - new_node = Node() - if index <= mid: - new_node.left = self._update(node.left, start, mid, index, value) - new_node.right = node.right - else: - new_node.left = node.left - new_node.right = self._update(node.right, mid + 1, end, index, value) - new_node.value = new_node.left.value + new_node.right.value - return new_node - - def query(self, version: int, left: int, right: int) -> int: - """ - Queries the sum in the given range for the specified version. - - >>> pst = PersistentSegmentTree([1, 2, 3]) - >>> version_1 = pst.update(0, 1, 5) - >>> pst.query(version_1, 0, 1) # Query sum from index 0 to 1 - 6 - >>> pst.query(version_1, 0, 2) # Query sum from index 0 to 2 - 9 - """ - return self._query(self.roots[version], 0, self.n - 1, left, right) - - def _query(self, node: Node, start: int, end: int, left: int, right: int) -> int: - if left > end or right < start: - return 0 - if left <= start and right >= end: - return node.value - mid = (start + end) // 2 - return self._query(node.left, start, mid, left, right) + self._query( - node.right, mid + 1, end, left, right - )