diff --git a/problems/evaluate_division.py b/problems/evaluate_division.py new file mode 100644 index 0000000..a58915e --- /dev/null +++ b/problems/evaluate_division.py @@ -0,0 +1,145 @@ +from collections import defaultdict +from typing import List + + +class OfficialSolution: + """ + == Overview == + The problem can be solved with 2 important data structures, namely Graph and Union- + Find. + """ + + def calcEquation( + self, equations: List[List[str]], values: List[float], queries: List[List[str]] + ) -> List[float]: + """ + == Approach 1: Path Search in Graph == + == Intuition == + First, let us look at the example given in the problem description. Given two + equations, namely a/b=2, b/c=3, we could derive the following equations: + - 1) b/a = 1/2, c/b = 1/3 + - 2) a/c = a/b . b/c = 6 + Each division implies the reverse of the division, which is how we derive the + equations in (1). While by chaining up equations, we could obtain new equations + in (2). + + We could reformulate the equations with the graph data structure, where each + variable can be represented as a node in the graph, and the division + relationship between variables can be modeled as edge with direction and weight. + + The direction of edge indicates the order of division, and the weight of edge + indicates the result of division. + + With the above formulation, we then can convert the initial equations into the + following graph: + + a/b=2 b/c=3 + a ------> b -------> c + a <------- b <------ c + b/a=1/2 c/b=1/3 + + To evaluate the query (e.g. a/c = ?) is equivalent to performing two tasks on + the graph: + 1. Find if there exists a path between the two entities. + 2. If so, calculate the cumulative products along the paths. + + In the above example (a/c = ?), we could find a path between them, and the + cumulative products are 6. As a result, we can conclude that the result of + a/c is 2.3 = 6. + + == Algorithm == + As one can see, we just transform the problem into a path searching problem in + a graph. + + More precisely, we can reinterpret the problem as "given two nodes, we are asked + to check if there exists a path between them. If so, we should return the + cumulative products along the path as the result." + + Given the above problem statement, it seems intuitive that one could apply the + backtracking algorithm, or sometimes people might call it DFS (Depth-First + Search). + + Essentially, we can break down the algorithm into 2 steps overall: + Step 1. We build the graph out of the list of input equations. + - Each equation corresponds to two edges in the graph. + Step 2. Once the graph is built, we then can evaluate the query one by one. + - The evaluation of the query is done via searching the path between the + given two variables. + - Other than the above searching operation, we need to handle two + exceptional cases as follows: + - Case 1. If either of the nodes does not exist in the graph, i.e. the + variables did not appear in any of the input equations, then we can + assert that no path exists. + - Case 2. If the origin and the destination are the same node, i.e. a/a, + we can assume that therre exists an invisible self-loop path for each + node and the result is 1. + + Note: With the built graph, one could also apply the BFS (Breadth-First Search) + algorithm, as opposed to the DFS algorithm we employed. + + However, the essence of the solution remains the same, i.e. we are searching for + a path in a graph. + + == Complexity Analysis == + Let N be the number of input equations and M be the number of queries. + Time Complexity: + O(MN) + - First of all, we iterate through the equations to build a graph. Each + equation takes O(1) time to process. Therefore, this step will take O(N) + time in total. + - For each query, we need to traverse the graph. In the worst case, we might + need to traverse the entire graph, which could take O(N). Hence, in total, + the evaluation of queries could take M*O(N) = O(MN). + - To sum up, the overall time complexity of the algorithm is + O(N) + O(MN) = O(MN). + + Space Complexity: + O(N) + - We build a graph out of the equations. In the worst case where there is no + overlapping among the equations, we would have N edges and 2N nodes in the + graph. Therefore, the space complexity of the graph is O(N+2N)=O(3N)=O(N). + - Since we employ the recursion in the backtracking, we would consume + additional memory in the function call stack, which could amount to O(N) + space. + - In addition, we used a set + """ + + # Graph as Adjacency Lists + graph = defaultdict(defaultdict) + + for (numerator, denominator), value in zip(equations, values): + graph[numerator][denominator] = value + graph[denominator][numerator] = 1 / value + + def dfs(curr: str, end: str, visited=None, cum_weight: float = 1.0) -> float: + if visited is None: + visited = set() + + if curr == end: + return cum_weight + + visited.add(curr) + + for node, value in graph[curr].items(): + if node in visited: + continue + + ans = dfs( + curr=node, end=end, visited=visited, cum_weight=cum_weight * value + ) + + if ans != -1.0: + return ans + + visited.remove(curr) + + return -1.0 + + ans = [] + for (start, end) in queries: + if start not in graph: + ans.append(-1.0) + else: + ans.append(dfs(curr=start, end=end)) + + return ans diff --git a/problems/find_a_corresponding_node_of_a_binary_tree_in_a_clone_of_that_tree.py b/problems/find_a_corresponding_node_of_a_binary_tree_in_a_clone_of_that_tree.py new file mode 100644 index 0000000..fea9d53 --- /dev/null +++ b/problems/find_a_corresponding_node_of_a_binary_tree_in_a_clone_of_that_tree.py @@ -0,0 +1,13 @@ +# Definition for a binary tree node. +class TreeNode: + def __init__(self, x): + self.val = x + self.left = None + self.right = None + + +class Solution: + def getTargetCopy( + self, original: TreeNode, cloned: TreeNode, target: TreeNode + ) -> TreeNode: + pass diff --git a/problems/first_missing_positive.py b/problems/first_missing_positive.py new file mode 100644 index 0000000..d0384d6 --- /dev/null +++ b/problems/first_missing_positive.py @@ -0,0 +1,56 @@ +from typing import List + + +class OfficialSolution: + def firstMissingPositive(self, nums: List[int]) -> int: + """ + == Approach 1: Index as a hash key. == + == Data Clean Up == + First of all, let's get rid of negative numbers and zeros since there is no + need of them. One could get rid of all numbers larger than n as well, since the + first missing positive is for sure smaller or equal to n+1. The case when the + first missing positive is equal to n+1 will be treated separately. + What does it mean - to get rid of, if one has to keep O(N) time complexity and + hence could not pop unwanted elements out? Let's just replace all these by 1s. + To ensure that the first missing positive is not 1, one has to verify the + presence of 1 before proceeding to this operation. + == How to solve in-place == + Now that we we have an array which contains only positive numbers in a range + from 1 to n, and the problem is to find a first missing positive in O(N) time + and constant space. + That would be simple, if one would be allowed to have a hash-map positive number + -> its presence for the array. Sort of "dirty workaround" solution would be to + allocate a string hash_str with n zeros, and use it as a sort of hash map by + changing hash_str[i] to 1 each time one meets number i in the array. + Let's not use this solution, but just take away a pretty nice idea to use index + as a hash-key for a positive number. + The final idea is to use index in nums as a hash key and sign of the element as + a hash value which is presence detector. + For example, negative sign of nums[2] element means that number 2 is present in + nums. The positive sign of nums[3] element means that number 3 is not present + (missing) in nums. + """ + n = len(nums) + one_present = False + + # Replace all numbers < 1 and > n with 1. + for i in range(n): + if nums[i] == 1: + one_present = True + if nums[i] < 1 or nums[i] > n: + nums[i] = 1 + + # If 1 is not present, return it. + if one_present is False: + return 1 + + # Now each element of nums is in [1,n] + # Use index as hash and flip sign to negative. + for i in range(n): + if nums[abs(nums[i]) - 1] > 0: + nums[abs(nums[i]) - 1] *= -1 + + # Smallest index of positive element is missing element. + for i in range(n + 1): + if i == n or nums[i] > 0: + return i + 1 diff --git a/problems/largest_component_size_by_common_factor.py b/problems/largest_component_size_by_common_factor.py new file mode 100644 index 0000000..e69de29 diff --git a/problems/stream_of_characters.py b/problems/stream_of_characters.py new file mode 100644 index 0000000..ea19728 --- /dev/null +++ b/problems/stream_of_characters.py @@ -0,0 +1,33 @@ +from collections import deque +from typing import List + + +class StreamChecker: + def __init__(self, words: List[str]): + trie = {"root": {}} + for word in words: + node = trie["root"] + for ch in word: + if ch not in node: + node[ch] = {} + node = node[ch] + node["$"] = 1 + + self.trie = trie + self.stream = deque([]) + + def query(self, letter: str) -> bool: + self.stream.appendleft(letter) + node = self.trie + for ch in self.stream: + if "$" in node: + return True + if not ch in node: + return False + node = node[ch] + return "$" in node + + +# Your StreamChecker object will be instantiated and called as such: +# obj = StreamChecker(words) +# param_1 = obj.query(letter) diff --git a/problems/word_squares.py b/problems/word_squares.py new file mode 100644 index 0000000..69b37d8 --- /dev/null +++ b/problems/word_squares.py @@ -0,0 +1,158 @@ +from collections import defaultdict +from typing import List, Optional + + +class OfficialSolution: + """ + Before diving into the solutions, it could be helpful to take a step back and + clarify the requirements of the problem first. + Given a list of non-duplicate words, we are asked to construct all possible + combinations of word squares. And here is the definition of word square: + A sequence of words forms a valid word square, if and only if each string Hk + that is formed horizontally from the kth row equals to the string Vk that is + formed vertically from the kth column, i.e. Hk == Vk for all k where + 0 <= k <= max(numRows, numColumns). + Here numRows = numColumns since H0 == V0. + As we can see from the definition, for a word square with equal sized row and + column, the resulting letter matrix should be symmetrical across its diagonal. + In other words, if we know the upper right part of the word square, we could infer + its lower left part, and vice versa. This symmetric property of the word square + could also be interpreted as the constraint of the problem (as in the constraint + programming), which could help us narrow down the valid combinations. + + == Algorithm: Backtracking == + Given a list of words, we are asked to find a combination of words upon with we + could construct a word square. The backbone of the algorithm to solve the above + problem could be surprisingly simple. + The idea is that we construct the word square row by row from top to down. At each + row, we simply do trial and error, i.e. we try with one word, if it does not meet + the constraint then we try another one. + As one might notice, the above idea of the algorithm is actually known as + backtracking, which is often associated with recusion and DFS (Depth-First Search) + as well. + Let us illustrate the idea with an example. Given a list of words [ball, able, area, + lead, lady], we should pile up 4 words together in order to build a word square. + 1. Let us start with the word ball as the first word in the word square, i.e. + the word that we would put in the first row. + 2. We then move on to the second row. Given the symmetric property of the word + square, we now know the letters that we should fill on the first column of the + second row. In other words, we know that the word in the second row should + start with the prefix a. + 3. Among the list of words, there are two words with prefix a (i.e. able, area). + Both of them could be candidates to fill the second row of the square. We then + should try both of them in the next step. + 4. In the next step, let us fill the second row with the word able. Then we + could move on to the third row. Again, due to the symmetric property, we know + that the word in the third row should start with the prefix ll. Unfortunately, + we do not find any word start with ll. As a result, we could no longer move + forwards. We then abandon this path, and backtrack to the previous state (with + the first row filled). + 5. As an alternative next step, we could try with the word area in the second + row. Once we fill the second row, we would know that in the next row, the word + to be filled should start with the prefix le. And this time, we find the + candidate (i.e. lead). + 6. As a result, in the next step, we fill the third row with the word lead. So + on and so forth. + 7. At the end, if one repeats the above steps with each word as the starting + word, one would exhaust all the possibilities to construct a valid word square. + """ + + def wordSquares(self, words: List[str]) -> List[List[str]]: + """Gets TLEd.""" + + def backtrack( + curr_row: int = 0, + curr_square: Optional[List[str]] = None, + squares: Optional[List[str]] = None, + ): + if squares is None: + squares = [] + + if curr_square is None: + curr_square = [] + + if len(curr_square) > 0 and curr_row == len(curr_square[0]): + # curr_square is modified, so need to insert a copy of it. + squares.append(curr_square.copy()) + return + + prefix = "" + for i in range(curr_row): + prefix += curr_square[i][curr_row] + + for i, word in enumerate(words): + if word.startswith(prefix): + curr_square.append(word) + backtrack( + curr_row=curr_row + 1, curr_square=curr_square, squares=squares, + ) + curr_square.pop() + + return squares + + return backtrack() + + def wordSquaresApproach1(self, words: List[str]) -> List[List[str]]: + """ + == Intuition == + As one might notice in the above backtracking algorithm, the bottleneck lies in + the finding all words with the given prefix. At each invocation of the function, + we were iterating through the entire input list of words, which is of linear + time complexity O(N). + + One of the ideas to optimize it would be to process the words beforehand + and to build a data structure that could speed up the lookup procedure + later. + + As one might recall, one of the data structures that provide a fast lookup + operation is called hashtable or dictionary. We could simply build a hashtable + with all possible prefixes as keys and the words that are associated with the + prefix as the values in the table. Later, given the prefix, we should be able + to list all the words with the given prefix in constant time O(1). + + == Algorithm == + - We build upon the backtracking algorithm that we listed above, and tweak two + parts. + - In the first part, we build a hashtable out of the input words. + - In the second part, we simply query the hashtable to retrieve all the words + that possess the given prefix. + """ + + prefix_to_words = defaultdict(list) + for word in words: + prefix = "" + for ch in word: + prefix += ch + prefix_to_words[prefix].append(word) + prefix_to_words[""] = words + + def backtrack( + curr_row: int = 0, + curr_square: Optional[List[str]] = None, + squares: Optional[List[str]] = None, + ): + if squares is None: + squares = [] + + if curr_square is None: + curr_square = [] + + if len(curr_square) > 0 and curr_row == len(curr_square[0]): + # curr_square is modified, so need to insert a copy of it. + squares.append(curr_square.copy()) + return + + prefix = "" + for i in range(curr_row): + prefix += curr_square[i][curr_row] + + for word in prefix_to_words[prefix]: + curr_square.append(word) + backtrack( + curr_row=curr_row + 1, curr_square=curr_square, squares=squares, + ) + curr_square.pop() + + return squares + + return backtrack() diff --git a/tests/test_evaluate_division.py b/tests/test_evaluate_division.py new file mode 100644 index 0000000..ac44dc1 --- /dev/null +++ b/tests/test_evaluate_division.py @@ -0,0 +1,33 @@ +import unittest + +from evaluate_division import OfficialSolution + + +class TestEvaluateDivision(unittest.TestCase): + def test_example_1(self): + assert OfficialSolution().calcEquation( + equations=[["a", "b"], ["b", "c"]], + values=[2.0, 3.0], + queries=[["a", "c"], ["b", "a"], ["a", "e"], ["a", "a"], ["x", "x"]], + ) == [6.00000, 0.50000, -1.00000, 1.00000, -1.00000] + + def test_example_2(self): + assert OfficialSolution().calcEquation( + equations=[["a", "b"], ["b", "c"], ["bc", "cd"]], + values=[1.5, 2.5, 5.0], + queries=[["a", "c"], ["c", "b"], ["bc", "cd"], ["cd", "bc"]], + ) == [3.75000, 0.40000, 5.00000, 0.20000] + + def test_example_3(self): + assert OfficialSolution().calcEquation( + equations=[["a", "b"]], + values=[0.5], + queries=[["a", "b"], ["b", "a"], ["a", "c"], ["x", "y"]], + ) == [0.50000, 2.00000, -1.00000, -1.00000] + + def test_example_4(self): + assert OfficialSolution().calcEquation( + equations=[["a", "e"], ["b", "e"]], + values=[4.0, 3.0], + queries=[["a", "b"], ["e", "e"], ["x", "x"]], + ) == [1.3333333333333333, 1.00000, -1.00000] diff --git a/tests/test_find_a_corresponding_node_of_a_binary_tree_in_a_clone_of_that_tree.py b/tests/test_find_a_corresponding_node_of_a_binary_tree_in_a_clone_of_that_tree.py new file mode 100644 index 0000000..d59b907 --- /dev/null +++ b/tests/test_find_a_corresponding_node_of_a_binary_tree_in_a_clone_of_that_tree.py @@ -0,0 +1,3 @@ +class TestSolution: + def test_example_1(self): + pass diff --git a/tests/test_first_missing_positive.py b/tests/test_first_missing_positive.py new file mode 100644 index 0000000..7a7fa69 --- /dev/null +++ b/tests/test_first_missing_positive.py @@ -0,0 +1,14 @@ +import unittest + +from first_missing_positive import OfficialSolution + + +class TestFirstMissingPositive(unittest.TestCase): + def test_example_1(self): + assert OfficialSolution().firstMissingPositive(nums=[1, 2, 0]) == 3 + + def test_example_2(self): + assert OfficialSolution().firstMissingPositive(nums=[3, 4, -1, 1]) == 2 + + def test_example_3(self): + assert OfficialSolution().firstMissingPositive(nums=[7, 8, 9, 11, 12]) == 1 diff --git a/tests/test_stream_of_characters.py b/tests/test_stream_of_characters.py new file mode 100644 index 0000000..d8bb0e6 --- /dev/null +++ b/tests/test_stream_of_characters.py @@ -0,0 +1,34 @@ +import unittest + +from stream_of_characters import StreamChecker + + +class TestStreamOfCharacters(unittest.TestCase): + def test_example_1(self): + streamChecker = StreamChecker(["cd", "f", "kl"]) + assert streamChecker.query("a") is False + assert streamChecker.query("b") is False + assert streamChecker.query("c") is False + assert streamChecker.query("d") is True + assert streamChecker.query("e") is False + assert streamChecker.query("f") is True + assert streamChecker.query("g") is False + assert streamChecker.query("h") is False + assert streamChecker.query("i") is False + assert streamChecker.query("j") is False + assert streamChecker.query("k") is False + assert streamChecker.query("l") is True + + def test_example_2(self): + streamChecker = StreamChecker(["ab", "ba", "aaab", "abab", "baa"]) + assert streamChecker.query("a") is False + assert streamChecker.query("a") is False + assert streamChecker.query("a") is False + assert streamChecker.query("a") is False + assert streamChecker.query("a") is False + assert streamChecker.query("b") is True + assert streamChecker.query("a") is True + assert streamChecker.query("b") is True + assert streamChecker.query("a") is True + assert streamChecker.query("b") is True + assert streamChecker.query("b") is False diff --git a/tests/test_word_squares.py b/tests/test_word_squares.py new file mode 100644 index 0000000..1ddc443 --- /dev/null +++ b/tests/test_word_squares.py @@ -0,0 +1,21 @@ +import unittest + +from word_squares import OfficialSolution + + +class TestWordSquares(unittest.TestCase): + def test_example_1(self): + assert OfficialSolution().wordSquares( + words=["area", "lead", "wall", "lady", "ball"] + ) == [["wall", "area", "lead", "lady"], ["ball", "area", "lead", "lady"]] + assert OfficialSolution().wordSquaresApproach1( + words=["area", "lead", "wall", "lady", "ball"] + ) == [["wall", "area", "lead", "lady"], ["ball", "area", "lead", "lady"]] + + def test_example_2(self): + assert OfficialSolution().wordSquares( + words=["abat", "baba", "atan", "atal"] + ) == [["baba", "abat", "baba", "atan"], ["baba", "abat", "baba", "atal"]] + assert OfficialSolution().wordSquaresApproach1( + words=["abat", "baba", "atan", "atal"] + ) == [["baba", "abat", "baba", "atan"], ["baba", "abat", "baba", "atal"]]