From 0457860ed491521b02d1559a76874d3e766823c0 Mon Sep 17 00:00:00 2001 From: Putul Singh <127419636+putul03@users.noreply.github.com> Date: Sat, 19 Oct 2024 13:21:16 +0530 Subject: [PATCH 1/6] Suffix Array and LCP Array Implementation --- divide_and_conquer/suffix_array_lcp.py | 62 ++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) create mode 100644 divide_and_conquer/suffix_array_lcp.py diff --git a/divide_and_conquer/suffix_array_lcp.py b/divide_and_conquer/suffix_array_lcp.py new file mode 100644 index 000000000000..c78acf49cdac --- /dev/null +++ b/divide_and_conquer/suffix_array_lcp.py @@ -0,0 +1,62 @@ +#!/usr/bin/env python3 + +def build_suffix_array(s: str) -> list[int]: + """ + Build the suffix array for the given string. + + Parameters: + s (str): The input string. + + Returns: + list[int]: The suffix array (a list of starting indices of + suffixes in sorted order). + """ + suffixes = [(s[i:], i) for i in range(len(s))] + suffixes.sort() # Sort the suffixes lexicographically + suffix_array = [suffix[1] for suffix in suffixes] + return suffix_array + +def build_lcp_array(s: str, suffix_array: list[int]) -> list[int]: + """ + Build the LCP array for the given string and suffix array. + + Parameters: + s (str): The input string. + suffix_array (list[int]): The suffix array. + + Returns: + list[int]: The LCP array. + """ + n = len(s) + rank = [0] * n + lcp = [0] * n + + # Compute the rank of each suffix + for i, suffix_index in enumerate(suffix_array): + rank[suffix_index] = i + + # Compute the LCP array + h = 0 + for i in range(n): + if rank[i] > 0: + j = suffix_array[rank[i] - 1] + while (i + h < n) and (j + h < n) and (s[i + h] == s[j + h]): + h += 1 + lcp[rank[i]] = h + if h > 0: + h -= 1 # Decrease h for the next suffix + return lcp + +# Example usage +if __name__ == "__main__": + s = "banana" + suffix_array = build_suffix_array(s) + lcp_array = build_lcp_array(s, suffix_array) + + print("Suffix Array:") + for i in range(len(suffix_array)): + print(f"{suffix_array[i]}: {s[suffix_array[i]:]}") + + print("\nLCP Array:") + for i in range(1, len(lcp_array)): + print(f"LCP between {s[suffix_array[i - 1]:]} and {s[suffix_array[i]]}: {lcp_array[i]}") From 465cea3cbb59fcde7fb54550d44570b37b1bd174 Mon Sep 17 00:00:00 2001 From: Putul Singh <127419636+putul03@users.noreply.github.com> Date: Sat, 19 Oct 2024 15:55:36 +0530 Subject: [PATCH 2/6] Delete divide_and_conquer/suffix_array_lcp.py --- divide_and_conquer/suffix_array_lcp.py | 62 -------------------------- 1 file changed, 62 deletions(-) delete mode 100644 divide_and_conquer/suffix_array_lcp.py diff --git a/divide_and_conquer/suffix_array_lcp.py b/divide_and_conquer/suffix_array_lcp.py deleted file mode 100644 index c78acf49cdac..000000000000 --- a/divide_and_conquer/suffix_array_lcp.py +++ /dev/null @@ -1,62 +0,0 @@ -#!/usr/bin/env python3 - -def build_suffix_array(s: str) -> list[int]: - """ - Build the suffix array for the given string. - - Parameters: - s (str): The input string. - - Returns: - list[int]: The suffix array (a list of starting indices of - suffixes in sorted order). - """ - suffixes = [(s[i:], i) for i in range(len(s))] - suffixes.sort() # Sort the suffixes lexicographically - suffix_array = [suffix[1] for suffix in suffixes] - return suffix_array - -def build_lcp_array(s: str, suffix_array: list[int]) -> list[int]: - """ - Build the LCP array for the given string and suffix array. - - Parameters: - s (str): The input string. - suffix_array (list[int]): The suffix array. - - Returns: - list[int]: The LCP array. - """ - n = len(s) - rank = [0] * n - lcp = [0] * n - - # Compute the rank of each suffix - for i, suffix_index in enumerate(suffix_array): - rank[suffix_index] = i - - # Compute the LCP array - h = 0 - for i in range(n): - if rank[i] > 0: - j = suffix_array[rank[i] - 1] - while (i + h < n) and (j + h < n) and (s[i + h] == s[j + h]): - h += 1 - lcp[rank[i]] = h - if h > 0: - h -= 1 # Decrease h for the next suffix - return lcp - -# Example usage -if __name__ == "__main__": - s = "banana" - suffix_array = build_suffix_array(s) - lcp_array = build_lcp_array(s, suffix_array) - - print("Suffix Array:") - for i in range(len(suffix_array)): - print(f"{suffix_array[i]}: {s[suffix_array[i]:]}") - - print("\nLCP Array:") - for i in range(1, len(lcp_array)): - print(f"LCP between {s[suffix_array[i - 1]:]} and {s[suffix_array[i]]}: {lcp_array[i]}") From 8ab292751c50cf330e9253402d335006304d43e0 Mon Sep 17 00:00:00 2001 From: Putul Singh <127419636+putul03@users.noreply.github.com> Date: Sat, 19 Oct 2024 15:58:29 +0530 Subject: [PATCH 3/6] Add files via upload --- data_structures/persistent_segment_tree.py | 80 ++++++++++++++++++++++ 1 file changed, 80 insertions(+) create mode 100644 data_structures/persistent_segment_tree.py diff --git a/data_structures/persistent_segment_tree.py b/data_structures/persistent_segment_tree.py new file mode 100644 index 000000000000..8963b54a2ba6 --- /dev/null +++ b/data_structures/persistent_segment_tree.py @@ -0,0 +1,80 @@ +class Node: + def __init__(self, value: int = 0) -> None: + self.value = value + self.left = None + self.right = None + + +class PersistentSegmentTree: + def __init__(self, arr: list[int]) -> None: + self.n = len(arr) + self.roots: list[Node] = [] + self.roots.append(self._build(arr, 0, self.n - 1)) + + def _build(self, arr: list[int], start: int, end: int) -> Node: + """ + Builds a segment tree from the provided array. + + >>> pst = PersistentSegmentTree([1, 2, 3]) + >>> root = pst._build([1, 2, 3], 0, 2) + >>> root.value # Sum of the whole array + 6 + """ + if start == end: + return Node(arr[start]) + mid = (start + end) // 2 + node = Node() + node.left = self._build(arr, start, mid) + node.right = self._build(arr, mid + 1, end) + node.value = node.left.value + node.right.value + return node + + def update(self, version: int, index: int, value: int) -> int: + """ + Updates the segment tree with a new value at the specified index. + + >>> pst = PersistentSegmentTree([1, 2, 3]) + >>> version_1 = pst.update(0, 1, 5) + >>> pst.query(version_1, 0, 2) # Query sum from index 0 to 2 + 9 + """ + new_root = self._update(self.roots[version], 0, self.n - 1, index, value) + self.roots.append(new_root) + return len(self.roots) - 1 # return the index of the new version + + def _update(self, node: Node, start: int, end: int, index: int, value: int) -> Node: + if start == end: + new_node = Node(value) + return new_node + mid = (start + end) // 2 + new_node = Node() + if index <= mid: + new_node.left = self._update(node.left, start, mid, index, value) + new_node.right = node.right + else: + new_node.left = node.left + new_node.right = self._update(node.right, mid + 1, end, index, value) + new_node.value = new_node.left.value + new_node.right.value + return new_node + + def query(self, version: int, left: int, right: int) -> int: + """ + Queries the sum in the given range for the specified version. + + >>> pst = PersistentSegmentTree([1, 2, 3]) + >>> version_1 = pst.update(0, 1, 5) + >>> pst.query(version_1, 0, 1) # Query sum from index 0 to 1 + 6 + >>> pst.query(version_1, 0, 2) # Query sum from index 0 to 2 + 9 + """ + return self._query(self.roots[version], 0, self.n - 1, left, right) + + def _query(self, node: Node, start: int, end: int, left: int, right: int) -> int: + if left > end or right < start: + return 0 + if left <= start and right >= end: + return node.value + mid = (start + end) // 2 + return (self._query(node.left, start, mid, left, right) + + self._query(node.right, mid + 1, end, left, right)) From f5d380aafb63f7f676ff00dfd651ba3c691daac0 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 19 Oct 2024 10:30:45 +0000 Subject: [PATCH 4/6] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- data_structures/persistent_segment_tree.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/data_structures/persistent_segment_tree.py b/data_structures/persistent_segment_tree.py index 8963b54a2ba6..7d7851071612 100644 --- a/data_structures/persistent_segment_tree.py +++ b/data_structures/persistent_segment_tree.py @@ -76,5 +76,6 @@ def _query(self, node: Node, start: int, end: int, left: int, right: int) -> int if left <= start and right >= end: return node.value mid = (start + end) // 2 - return (self._query(node.left, start, mid, left, right) + - self._query(node.right, mid + 1, end, left, right)) + return self._query(node.left, start, mid, left, right) + self._query( + node.right, mid + 1, end, left, right + ) From 819fdc88360a83d6d451c1ac501db54a9f6208b3 Mon Sep 17 00:00:00 2001 From: Putul Singh <127419636+putul03@users.noreply.github.com> Date: Sat, 19 Oct 2024 16:16:21 +0530 Subject: [PATCH 5/6] Update persistent_segment_tree.py --- data_structures/persistent_segment_tree.py | 48 +++++++++++++--------- 1 file changed, 29 insertions(+), 19 deletions(-) diff --git a/data_structures/persistent_segment_tree.py b/data_structures/persistent_segment_tree.py index 7d7851071612..5bcf1f64d80e 100644 --- a/data_structures/persistent_segment_tree.py +++ b/data_structures/persistent_segment_tree.py @@ -7,19 +7,18 @@ def __init__(self, value: int = 0) -> None: class PersistentSegmentTree: def __init__(self, arr: list[int]) -> None: - self.n = len(arr) - self.roots: list[Node] = [] - self.roots.append(self._build(arr, 0, self.n - 1)) - - def _build(self, arr: list[int], start: int, end: int) -> Node: """ - Builds a segment tree from the provided array. + Initialize the Persistent Segment Tree with the given array. >>> pst = PersistentSegmentTree([1, 2, 3]) - >>> root = pst._build([1, 2, 3], 0, 2) - >>> root.value # Sum of the whole array + >>> pst.query(0, 0, 2) 6 """ + self.n = len(arr) + self.roots: list[Node] = [] + self.roots.append(self._build(arr, 0, self.n - 1)) + + def _build(self, arr: list[int], start: int, end: int) -> Node: if start == end: return Node(arr[start]) mid = (start + end) // 2 @@ -31,41 +30,46 @@ def _build(self, arr: list[int], start: int, end: int) -> Node: def update(self, version: int, index: int, value: int) -> int: """ - Updates the segment tree with a new value at the specified index. + Update the value at the given index and return the new version. >>> pst = PersistentSegmentTree([1, 2, 3]) >>> version_1 = pst.update(0, 1, 5) - >>> pst.query(version_1, 0, 2) # Query sum from index 0 to 2 + >>> pst.query(version_1, 0, 2) 9 """ new_root = self._update(self.roots[version], 0, self.n - 1, index, value) self.roots.append(new_root) - return len(self.roots) - 1 # return the index of the new version + return len(self.roots) - 1 def _update(self, node: Node, start: int, end: int, index: int, value: int) -> Node: if start == end: - new_node = Node(value) - return new_node + return Node(value) + mid = (start + end) // 2 new_node = Node() + if index <= mid: new_node.left = self._update(node.left, start, mid, index, value) new_node.right = node.right else: new_node.left = node.left new_node.right = self._update(node.right, mid + 1, end, index, value) + new_node.value = new_node.left.value + new_node.right.value + return new_node def query(self, version: int, left: int, right: int) -> int: """ - Queries the sum in the given range for the specified version. + Query the sum in the given range for the specified version. >>> pst = PersistentSegmentTree([1, 2, 3]) + >>> pst.query(0, 0, 2) + 6 >>> version_1 = pst.update(0, 1, 5) - >>> pst.query(version_1, 0, 1) # Query sum from index 0 to 1 + >>> pst.query(version_1, 0, 1) 6 - >>> pst.query(version_1, 0, 2) # Query sum from index 0 to 2 + >>> pst.query(version_1, 0, 2) 9 """ return self._query(self.roots[version], 0, self.n - 1, left, right) @@ -76,6 +80,12 @@ def _query(self, node: Node, start: int, end: int, left: int, right: int) -> int if left <= start and right >= end: return node.value mid = (start + end) // 2 - return self._query(node.left, start, mid, left, right) + self._query( - node.right, mid + 1, end, left, right - ) + return (self._query(node.left, start, mid, left, right) + + self._query(node.right, mid + 1, end, left, right)) + + +if __name__ == "__main__": + import doctest + print("Running doctests...") + result = doctest.testmod() + print(f"Ran {result.attempted} tests, {result.failed} failed.") From 5de61840107d734cdc44b3b11602dbc53e41e7b5 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 19 Oct 2024 10:47:29 +0000 Subject: [PATCH 6/6] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- data_structures/persistent_segment_tree.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/data_structures/persistent_segment_tree.py b/data_structures/persistent_segment_tree.py index 5bcf1f64d80e..24a44d90a4b3 100644 --- a/data_structures/persistent_segment_tree.py +++ b/data_structures/persistent_segment_tree.py @@ -80,12 +80,14 @@ def _query(self, node: Node, start: int, end: int, left: int, right: int) -> int if left <= start and right >= end: return node.value mid = (start + end) // 2 - return (self._query(node.left, start, mid, left, right) + - self._query(node.right, mid + 1, end, left, right)) + return self._query(node.left, start, mid, left, right) + self._query( + node.right, mid + 1, end, left, right + ) if __name__ == "__main__": import doctest + print("Running doctests...") result = doctest.testmod() print(f"Ran {result.attempted} tests, {result.failed} failed.")