From 7cd00a14a4fa3823f0e0259173ba1cc194572a74 Mon Sep 17 00:00:00 2001 From: Javon Davis Date: Mon, 21 Aug 2017 11:32:11 -0500 Subject: [PATCH 1/6] Started Trie implementation. * Basic definition of Trie in comments at the top of the file * Defined Trie class and method signatures. --- data_structures/Trie/Trie.py | 21 +++++++++++++++++++++ data_structures/Trie/__init__.py | 0 2 files changed, 21 insertions(+) create mode 100644 data_structures/Trie/Trie.py create mode 100644 data_structures/Trie/__init__.py diff --git a/data_structures/Trie/Trie.py b/data_structures/Trie/Trie.py new file mode 100644 index 000000000000..61d57c0c945c --- /dev/null +++ b/data_structures/Trie/Trie.py @@ -0,0 +1,21 @@ +''' +A Trie/Prefix Tree is a kind of search tree used to provide quick lookup +of words/patterns in a set of words. A basic Trie however has O(n^2) space complexity +making it impractical in practice. It however provides O(max(search_string, length of longest word)) lookup +time making it an optimal approach when space is not an issue. +''' + + +class TrieNode: + def __init__(self): + self.nodes = dict() # Mapping from char to TrieNode + + def add_words(self, words: [str]): + for word in words: + self.add_word(word) + + def add_word(self, word: str): + pass + + def lookup_word(self, word: str) -> bool: + pass \ No newline at end of file diff --git a/data_structures/Trie/__init__.py b/data_structures/Trie/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 From 03b7600eda114f89bc2d9fa8583c7bde50316870 Mon Sep 17 00:00:00 2001 From: Javon Davis Date: Mon, 21 Aug 2017 11:34:49 -0500 Subject: [PATCH 2/6] * Renamed method signatures to match formal definitions --- data_structures/Trie/Trie.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/data_structures/Trie/Trie.py b/data_structures/Trie/Trie.py index 61d57c0c945c..5d726fd2ece5 100644 --- a/data_structures/Trie/Trie.py +++ b/data_structures/Trie/Trie.py @@ -10,12 +10,12 @@ class TrieNode: def __init__(self): self.nodes = dict() # Mapping from char to TrieNode - def add_words(self, words: [str]): + def insert_many(self, words: [str]): for word in words: - self.add_word(word) + self.insert(word) - def add_word(self, word: str): + def insert(self, word: str): pass - def lookup_word(self, word: str) -> bool: + def find(self, word: str) -> bool: pass \ No newline at end of file From 9a521679ea69aeb709bf29cb8a374867bcd0af86 Mon Sep 17 00:00:00 2001 From: Javon Davis Date: Sun, 10 Sep 2017 02:16:10 -0500 Subject: [PATCH 3/6] Finished Simple Trie implementation and added test functions * Finished function to insert a word into Trie * Finished function to find a word in the Trie * Added Test functions with Assertions --- data_structures/Trie/Trie.py | 66 +++++++++++++++++++++++++++++++++--- 1 file changed, 61 insertions(+), 5 deletions(-) diff --git a/data_structures/Trie/Trie.py b/data_structures/Trie/Trie.py index 5d726fd2ece5..1acce956d1c9 100644 --- a/data_structures/Trie/Trie.py +++ b/data_structures/Trie/Trie.py @@ -1,21 +1,77 @@ -''' +""" A Trie/Prefix Tree is a kind of search tree used to provide quick lookup of words/patterns in a set of words. A basic Trie however has O(n^2) space complexity making it impractical in practice. It however provides O(max(search_string, length of longest word)) lookup time making it an optimal approach when space is not an issue. -''' + +This implementation assumes the character $ is not in any of the words. This character is used in the implementation +to mark the end of a word. +""" class TrieNode: def __init__(self): - self.nodes = dict() # Mapping from char to TrieNode + self.nodes = dict() # Mapping from char to TrieNode def insert_many(self, words: [str]): + """ + Inserts a list of words into the Trie + :param words: list of string words + :return: None + """ for word in words: self.insert(word) def insert(self, word: str): - pass + """ + Inserts a word into the Trie + :param word: word to be inserted + :return: None + """ + word += '$' + curr = self + for char in word: + if char not in curr.nodes: + curr.nodes[char] = TrieNode() + curr = curr.nodes[char] def find(self, word: str) -> bool: - pass \ No newline at end of file + """ + Tries to find word in a Trie + :param word: word to look for + :return: Returns True if word is found, False otherwise + """ + word += '$' + curr = self + for char in word: + if char not in curr.nodes: + return False + curr = curr.nodes[char] + return True + + +def print_words(node: TrieNode, word: str): + """ + Prints all the words in a Trie + :param node: root node of Trie + :param word: Word variable should be empty at start + :return: None + """ + if '$' in node.nodes: + print(word, end=' ') + + for key, value in node.nodes.items(): + print_words(value, word + key) + + +def test(): + words = ['banana', 'bananas', 'bandana', 'band', 'apple', 'all', 'beast'] + root = TrieNode() + root.insert_many(words) + # print_words(root, '') + assert root.find('banana') + assert not root.find('bandanas') + assert not root.find('apps') + assert root.find('apple') + +# test() From 8c77a4e04044c4a34984c807151b75355e7ede92 Mon Sep 17 00:00:00 2001 From: Javon Davis Date: Sun, 10 Sep 2017 02:24:20 -0500 Subject: [PATCH 4/6] Updated test function to read from the Dictionary.txt file in repository --- data_structures/Trie/Trie.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/data_structures/Trie/Trie.py b/data_structures/Trie/Trie.py index 1acce956d1c9..76533429b729 100644 --- a/data_structures/Trie/Trie.py +++ b/data_structures/Trie/Trie.py @@ -65,11 +65,15 @@ def print_words(node: TrieNode, word: str): def test(): - words = ['banana', 'bananas', 'bandana', 'band', 'apple', 'all', 'beast'] + words = [] + # Load words from text file into Trie + with open("../../other/Dictionary.txt", "r") as ins: + for line in ins: + words.append(line.strip().lower()) root = TrieNode() root.insert_many(words) # print_words(root, '') - assert root.find('banana') + assert root.find('bananas') assert not root.find('bandanas') assert not root.find('apps') assert root.find('apple') From 2ff9e0d4450f61f58b00bd748ad0462c0760853b Mon Sep 17 00:00:00 2001 From: Javon Davis Date: Sun, 10 Sep 2017 11:54:31 -0500 Subject: [PATCH 5/6] * No longer using $ to mark end of word * No longer reading from file but instead provided simple sample input for easier testing --- data_structures/Trie/Trie.py | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/data_structures/Trie/Trie.py b/data_structures/Trie/Trie.py index 76533429b729..7c886144d1f4 100644 --- a/data_structures/Trie/Trie.py +++ b/data_structures/Trie/Trie.py @@ -4,14 +4,13 @@ making it impractical in practice. It however provides O(max(search_string, length of longest word)) lookup time making it an optimal approach when space is not an issue. -This implementation assumes the character $ is not in any of the words. This character is used in the implementation -to mark the end of a word. """ class TrieNode: def __init__(self): self.nodes = dict() # Mapping from char to TrieNode + self.is_leaf = False def insert_many(self, words: [str]): """ @@ -28,12 +27,12 @@ def insert(self, word: str): :param word: word to be inserted :return: None """ - word += '$' curr = self for char in word: if char not in curr.nodes: curr.nodes[char] = TrieNode() curr = curr.nodes[char] + curr.is_leaf = True def find(self, word: str) -> bool: """ @@ -41,13 +40,12 @@ def find(self, word: str) -> bool: :param word: word to look for :return: Returns True if word is found, False otherwise """ - word += '$' curr = self for char in word: if char not in curr.nodes: return False curr = curr.nodes[char] - return True + return curr.is_leaf def print_words(node: TrieNode, word: str): @@ -57,7 +55,7 @@ def print_words(node: TrieNode, word: str): :param word: Word variable should be empty at start :return: None """ - if '$' in node.nodes: + if node.is_leaf: print(word, end=' ') for key, value in node.nodes.items(): @@ -65,17 +63,13 @@ def print_words(node: TrieNode, word: str): def test(): - words = [] - # Load words from text file into Trie - with open("../../other/Dictionary.txt", "r") as ins: - for line in ins: - words.append(line.strip().lower()) + words = ['banana', 'bananas', 'bandana', 'band', 'apple', 'all', 'beast'] root = TrieNode() root.insert_many(words) # print_words(root, '') - assert root.find('bananas') + assert root.find('banana') assert not root.find('bandanas') assert not root.find('apps') assert root.find('apple') -# test() +test() From 40ffb71682b92378749bc27316affaeedc76b34d Mon Sep 17 00:00:00 2001 From: Javon Davis Date: Sun, 10 Sep 2017 12:21:22 -0500 Subject: [PATCH 6/6] Deleting empty __init__.py file --- data_structures/Trie/__init__.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 data_structures/Trie/__init__.py diff --git a/data_structures/Trie/__init__.py b/data_structures/Trie/__init__.py deleted file mode 100644 index e69de29bb2d1..000000000000