diff --git a/DIRECTORY.md b/DIRECTORY.md index 1ca537b991c8..272b55510d46 100644 --- a/DIRECTORY.md +++ b/DIRECTORY.md @@ -291,6 +291,10 @@ * [Nearest Neighbour Search](data_structures/kd_tree/nearest_neighbour_search.py) * [Hypercibe Points](data_structures/kd_tree/example/hypercube_points.py) * [Example Usage](data_structures/kd_tree/example/example_usage.py) + * Suffix Tree + * [Suffix Tree Node](data_structures/suffix_tree/suffix_tree_node.py) + * [Suffix Tree](data_structures/suffix_tree/suffix_tree.py) + * [Example Usage](data_structures/suffix_tree/example/example_usage.py) ## Digital Image Processing * [Change Brightness](digital_image_processing/change_brightness.py) diff --git a/data_structures/suffix_tree/__init__.py b/data_structures/suffix_tree/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/data_structures/suffix_tree/example/__init__.py b/data_structures/suffix_tree/example/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/data_structures/suffix_tree/example/example_usage.py b/data_structures/suffix_tree/example/example_usage.py new file mode 100644 index 000000000000..d097cb6e33f3 --- /dev/null +++ b/data_structures/suffix_tree/example/example_usage.py @@ -0,0 +1,29 @@ +from data_structures.suffix_tree.suffix_tree import SuffixTree + + +def main() -> None: + """ + Demonstrate the usage of the SuffixTree class. + + - Initializes a SuffixTree with a predefined text. + - Defines a list of patterns to search for within the suffix tree. + - Searches for each pattern in the suffix tree. + + Patterns tested: + - "ana" (found) --> True + - "ban" (found) --> True + - "na" (found) --> True + - "xyz" (not found) --> False + - "mon" (found) --> True + """ + text = "monkey banana" + suffix_tree = SuffixTree(text) + + patterns = ["ana", "ban", "na", "xyz", "mon"] + for pattern in patterns: + found = suffix_tree.search(pattern) + print(f"Pattern '{pattern}' found: {found}") + + +if __name__ == "__main__": + main() diff --git a/data_structures/suffix_tree/suffix_tree.py b/data_structures/suffix_tree/suffix_tree.py new file mode 100644 index 000000000000..1044b7f0a768 --- /dev/null +++ b/data_structures/suffix_tree/suffix_tree.py @@ -0,0 +1,58 @@ +from data_structures.suffix_tree.suffix_tree_node import SuffixTreeNode + + +class SuffixTree: + def __init__(self, text: str) -> None: + """ + Initializes the suffix tree with the given text. + + Args: + text (str): The text for which the suffix tree is to be built. + """ + self.text: str = text + self.root: SuffixTreeNode = SuffixTreeNode() + self.build_suffix_tree() + + def build_suffix_tree(self) -> None: + """ + Builds the suffix tree for the given text by adding all suffixes. + """ + text = self.text + n = len(text) + for i in range(n): + suffix = text[i:] + self._add_suffix(suffix, i) + + def _add_suffix(self, suffix: str, index: int) -> None: + """ + Adds a suffix to the suffix tree. + + Args: + suffix (str): The suffix to add. + index (int): The starting index of the suffix in the original text. + """ + node = self.root + for char in suffix: + if char not in node.children: + node.children[char] = SuffixTreeNode() + node = node.children[char] + node.is_end_of_string = True + node.start = index + node.end = index + len(suffix) - 1 + + def search(self, pattern: str) -> bool: + """ + Searches for a pattern in the suffix tree. + + Args: + pattern (str): The pattern to search for. + + Returns: + bool: True if the pattern is found, False otherwise. + """ + node = self.root + for char in pattern: + if char not in node.children: + return False + node = node.children[char] + return True diff --git a/data_structures/suffix_tree/suffix_tree_node.py b/data_structures/suffix_tree/suffix_tree_node.py new file mode 100644 index 000000000000..b845280e4cc8 --- /dev/null +++ b/data_structures/suffix_tree/suffix_tree_node.py @@ -0,0 +1,26 @@ +from __future__ import annotations +from typing import Dict, Optional + + +class SuffixTreeNode: + def __init__(self, + children: Dict[str, 'SuffixTreeNode'] = None, + is_end_of_string: bool = False, + start: int | None = None, + end: int | None = None, + suffix_link: SuffixTreeNode | None = None) -> None: + """ + Initializes a suffix tree node. + + Parameters: + children (Dict[str, SuffixTreeNode], optional): The children of this node. Defaults to an empty dictionary. + is_end_of_string (bool, optional): Indicates if this node represents the end of a string. Defaults to False. + start (int | None, optional): The start index of the suffix in the text. Defaults to None. + end (int | None, optional): The end index of the suffix in the text. Defaults to None. + suffix_link (SuffixTreeNode | None, optional): Link to another suffix tree node. Defaults to None. + """ + self.children = children or {} + self.is_end_of_string = is_end_of_string + self.start = start + self.end = end + self.suffix_link = suffix_link diff --git a/data_structures/suffix_tree/tests/__init__.py b/data_structures/suffix_tree/tests/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/data_structures/suffix_tree/tests/test_suffix_tree.py b/data_structures/suffix_tree/tests/test_suffix_tree.py new file mode 100644 index 000000000000..cef147a3d41c --- /dev/null +++ b/data_structures/suffix_tree/tests/test_suffix_tree.py @@ -0,0 +1,42 @@ +import unittest +from data_structures.suffix_tree.suffix_tree import SuffixTree + + +class TestSuffixTree(unittest.TestCase): + def setUp(self) -> None: + """Set up the initial conditions for each test.""" + self.text = "banana" + self.suffix_tree = SuffixTree(self.text) + + def test_search_existing_patterns(self): + """Test searching for patterns that exist in the suffix tree.""" + patterns = ["ana", "ban", "na"] + for pattern in patterns: + with self.subTest(pattern = pattern): + self.assertTrue(self.suffix_tree.search(pattern), f"Pattern '{pattern}' should be found.") + + def test_search_non_existing_patterns(self): + """Test searching for patterns that do not exist in the suffix tree.""" + patterns = ["xyz", "apple", "cat"] + for pattern in patterns: + with self.subTest(pattern = pattern): + self.assertFalse(self.suffix_tree.search(pattern), f"Pattern '{pattern}' should not be found.") + + def test_search_empty_pattern(self): + """Test searching for an empty pattern.""" + self.assertTrue(self.suffix_tree.search(""), "An empty pattern should be found.") + + def test_search_full_text(self): + """Test searching for the full text.""" + self.assertTrue(self.suffix_tree.search(self.text), "The full text should be found in the suffix tree.") + + def test_search_substrings(self): + """Test searching for substrings of the full text.""" + substrings = ["ban", "ana", "a", "na"] + for substring in substrings: + with self.subTest(substring = substring): + self.assertTrue(self.suffix_tree.search(substring), f"Substring '{substring}' should be found.") + + +if __name__ == "__main__": + unittest.main()