diff --git a/data_structures/linked_list/remove_duplicates.py b/data_structures/linked_list/remove_duplicates.py new file mode 100644 index 000000000000..91c97268a7c5 --- /dev/null +++ b/data_structures/linked_list/remove_duplicates.py @@ -0,0 +1,109 @@ +from __future__ import annotations + +from dataclasses import dataclass + + +@dataclass +class Node: + data: int + next_node: Node | None = None + + +def print_linked_list(head: Node | None) -> None: + """ + Print the entire linked list iteratively. + + >>> head = insert_node(None, 0) + >>> head = insert_node(head, 2) + >>> head = insert_node(head, 1) + >>> print_linked_list(head) + 0->2->1 + >>> head = insert_node(head, 4) + >>> head = insert_node(head, 5) + >>> print_linked_list(head) + 0->2->1->4->5 + """ + if head is None: + return + while head.next_node is not None: + print(head.data, end="->") + head = head.next_node + print(head.data) + + +def insert_node(head: Node | None, data: int) -> Node | None: + """ + Insert a new node at the end of a linked list + and return the new head. + + >>> head = insert_node(None, 10) + >>> head = insert_node(head, 9) + >>> head = insert_node(head, 8) + >>> print_linked_list(head) + 10->9->8 + """ + new_node = Node(data) + if head is None: + return new_node + + temp_node = head + while temp_node.next_node: + temp_node = temp_node.next_node + temp_node.next_node = new_node + return head + + +def remove_duplicates(head: Node | None) -> Node | None: + """ + Remove nodes with duplicate data + + >>> head=insert_node(None,1) + >>> head=insert_node(head,1) + >>> head=insert_node(head,2) + >>> head=insert_node(head,3) + >>> head=insert_node(head,3) + >>> head=insert_node(head,4) + >>> head=insert_node(head,5) + >>> head=insert_node(head,5) + >>> head=insert_node(head,5) + >>> new_head= remove_duplicates(head) + >>> print_linked_list(new_head) + 1->2->3->4->5 + """ + if head is None or head.next_node is None: + return head + + has_occurred = {} + + new_head = head + last_node = head + has_occurred[head.data] = True + current_node = None + if head.next_node: + current_node = head.next_node + while current_node is not None: + if current_node.data not in has_occurred: + last_node.next_node = current_node + last_node = current_node + has_occurred[current_node.data] = True + current_node = current_node.next_node + last_node.next_node = None + return new_head + + +if __name__ == "__main__": + import doctest + + doctest.testmod() + + head = insert_node(None, 1) + head = insert_node(head, 1) + head = insert_node(head, 2) + head = insert_node(head, 3) + head = insert_node(head, 3) + head = insert_node(head, 4) + head = insert_node(head, 5) + head = insert_node(head, 5) + + new_head = remove_duplicates(head) + print_linked_list(new_head) diff --git a/dynamic_programming/distinct_subsequences.py b/dynamic_programming/distinct_subsequences.py new file mode 100644 index 000000000000..8e7efaedfc3f --- /dev/null +++ b/dynamic_programming/distinct_subsequences.py @@ -0,0 +1,51 @@ +from future import __annotations__ + + +def subsequenceCounting(s1: str, s2: str, n: int, m: int) -> int: + """ + Uses bottom-up dynamic programming/tabulation + to count the number of distinct + subsequences of string s2 in string s1 + + >>> s1 = "babgbag" + >>> s2 = "bag" + >>> subsequenceCounting(s1, s2, len(s1), len(s2)) + """ + # Initialize a DP table to store the count of distinct subsequences + dp = [[0 for i in range(m + 1)] for j in range(n + 1)] + + # Base case: There is exactly one subsequence of an empty string s2 in s1 + for i in range(n + 1): + dp[i][0] = 1 + + # Initialize dp[0][i] to 0 for i > 0 since an empty s1 cannot have a non-empty subsequence of s2 + for i in range(1, m + 1): + dp[0][i] = 0 + + # Fill in the DP table using dynamic programming + for i in range(1, n + 1): + for j in range(1, m + 1): + # If the current characters match, we have two choices: + # 1. Include the current character in both s1 and s2 (dp[i-1][j-1]) + # 2. Skip the current character in s1 (dp[i-1][j]) + if s1[i - 1] == s2[j - 1]: + dp[i][j] = dp[i - 1][j - 1] + dp[i - 1][j] + else: + dp[i - 1][j] + + # The final value in dp[n][m] is the count of distinct subsequences + return dp[n][m] + + +if __name__ == "__main__": + import doctest + + doctest.testmod() + s1 = "babgbag" + s2 = "bag" + + # Find the number of distinct subsequences of string s2 in string s1 + print( + "The Count of Distinct Subsequences is", + subsequenceCounting(s1, s2, len(s1), len(s2)), + )