From d441a4aabe2e11c0f9d6e36e8ddf09cc74bec8a8 Mon Sep 17 00:00:00 2001 From: punithbajaj Date: Sat, 16 Oct 2021 04:06:22 +0530 Subject: [PATCH 01/11] Added regular expression implimentation using dp --- dynamic_programming/regular_expression.py | 97 +++++++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 dynamic_programming/regular_expression.py diff --git a/dynamic_programming/regular_expression.py b/dynamic_programming/regular_expression.py new file mode 100644 index 000000000000..8daad69ec54f --- /dev/null +++ b/dynamic_programming/regular_expression.py @@ -0,0 +1,97 @@ +""" +Implementation of regular expression matching with support for '.' and '*'. +'.' Matches any single character. +'*' Matches zero or more of the preceding element. +The matching should cover the entire input string (not partial). + +""" + + +def string_match_pattern(input_string: str, pattern: str) -> bool: + """ + using bottom-up dynamic programming solution for matching the input + string with a given pattern. + + Runtime: O(len(input_string)*len(pattern)) + + Arguments + -------- + input_string: str, any string which should be compared with pattern + pattern: str, the string that has to be used as pattern and should contain + '.' for single character match and '*' for zero or more of preceding character + match + + Note + ---- + the pattern can not start with a '*', + because there should be at least one character before * + + Returns + ------- + the bool value denoting whether given string follows the pattern + + Examples + ------- + >>> string_match_pattern("aab", "c*a*b") + 1 + >>> string_match_pattern("aaa", "aa") + 0 + >>> string_match_pattern("aaab", "aa*") + 0 + >>> string_match_pattern("aaab", ".*") + 1 + """ + + len_string = len(input_string) + 1 + len_pattern = len(pattern) + 1 + + # dp is a 2d matrix where dp[i][j] denotes whether prefix string of + # length i of input_string matches with prefix string of length j of + # given pattern + dp = [[0 for i in range(len_pattern)] for j in range(len_string)] + + # since string of zero length match pattern of zero length + dp[0][0] = 1 + + # since pattern of zero length will never match with string of non-zero length + for i in range(1, len_string): + dp[i][0] = 0 + + # since string of zero length will match with pattern where there + # is at least one * alternatively + for j in range(1, len_pattern): + if pattern[j - 1] == "*": + dp[0][j] = dp[0][j - 2] + else: + dp[0][j] = 0 + + # now using bottom-up approach to find for all remaining lengths + for i in range(1, len_string): + for j in range(1, len_pattern): + if input_string[i - 1] == pattern[j - 1] or pattern[j - 1] == ".": + dp[i][j] = dp[i - 1][j - 1] + + elif pattern[j - 1] == "*": + if dp[i][j - 2] == 1: + dp[i][j] = 1 + elif pattern[j - 2] == input_string[i - 1] or pattern[j - 2] == ".": + dp[i][j] = dp[i - 1][j] + else: + dp[i][j] = 0 + else: + dp[i][j] = 0 + + return dp[-1][-1] + + +if __name__ == "__main__": + + # inputing the strings + input_string = input("input a string :") + pattern = input("input a pattern :") + + # using function to check whether given string matches the given pattern + if string_match_pattern(input_string, pattern): + print(f"{input_string} matches the given pattern {pattern}") + else: + print(f"{input_string} does not match with the given pattern {pattern}") From 713d919bcfc7a226941e3291b5abd2828b3bf7a8 Mon Sep 17 00:00:00 2001 From: punithbajaj Date: Sat, 16 Oct 2021 12:29:58 +0530 Subject: [PATCH 02/11] replaced input() with example values --- dynamic_programming/regular_expression.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/dynamic_programming/regular_expression.py b/dynamic_programming/regular_expression.py index 8daad69ec54f..c69e74131d28 100644 --- a/dynamic_programming/regular_expression.py +++ b/dynamic_programming/regular_expression.py @@ -87,8 +87,11 @@ def string_match_pattern(input_string: str, pattern: str) -> bool: if __name__ == "__main__": # inputing the strings - input_string = input("input a string :") - pattern = input("input a pattern :") + # input_string = input("input a string :") + # pattern = input("input a pattern :") + + input_string = "aab" + pattern = "c*a*b" # using function to check whether given string matches the given pattern if string_match_pattern(input_string, pattern): From aca329543dc7d33e8a70f1ef5742801ad45a91fd Mon Sep 17 00:00:00 2001 From: P U N I T H <55887644+punithbajaj@users.noreply.github.com> Date: Sat, 16 Oct 2021 15:29:16 +0530 Subject: [PATCH 03/11] Apply suggestions from code review Co-authored-by: Christian Clauss --- dynamic_programming/regular_expression.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/dynamic_programming/regular_expression.py b/dynamic_programming/regular_expression.py index c69e74131d28..29256ad96b01 100644 --- a/dynamic_programming/regular_expression.py +++ b/dynamic_programming/regular_expression.py @@ -60,10 +60,7 @@ def string_match_pattern(input_string: str, pattern: str) -> bool: # since string of zero length will match with pattern where there # is at least one * alternatively for j in range(1, len_pattern): - if pattern[j - 1] == "*": - dp[0][j] = dp[0][j - 2] - else: - dp[0][j] = 0 + dp[0][j] = dp[0][j - 2] if pattern[j - 1] == "*" else 0 # now using bottom-up approach to find for all remaining lengths for i in range(1, len_string): @@ -74,7 +71,7 @@ def string_match_pattern(input_string: str, pattern: str) -> bool: elif pattern[j - 1] == "*": if dp[i][j - 2] == 1: dp[i][j] = 1 - elif pattern[j - 2] == input_string[i - 1] or pattern[j - 2] == ".": + elif pattern[j - 2] in (input_string[i - 1], "."): dp[i][j] = dp[i - 1][j] else: dp[i][j] = 0 From f54d11cf925313a55464960365e96417beee6611 Mon Sep 17 00:00:00 2001 From: punithbajaj Date: Sat, 16 Oct 2021 15:38:05 +0530 Subject: [PATCH 04/11] changed returning value to bool and added test cases --- dynamic_programming/regular_expression.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/dynamic_programming/regular_expression.py b/dynamic_programming/regular_expression.py index c69e74131d28..98f899e4598d 100644 --- a/dynamic_programming/regular_expression.py +++ b/dynamic_programming/regular_expression.py @@ -33,13 +33,19 @@ def string_match_pattern(input_string: str, pattern: str) -> bool: Examples ------- >>> string_match_pattern("aab", "c*a*b") - 1 + True >>> string_match_pattern("aaa", "aa") - 0 + False >>> string_match_pattern("aaab", "aa*") - 0 + False >>> string_match_pattern("aaab", ".*") - 1 + True + >>> string_match_pattern("a", "bbbb") + False + >>> string_match_pattern("", "bbbb") + False + >>> string_match_pattern("a", "") + False """ len_string = len(input_string) + 1 @@ -81,7 +87,7 @@ def string_match_pattern(input_string: str, pattern: str) -> bool: else: dp[i][j] = 0 - return dp[-1][-1] + return bool(dp[-1][-1]) if __name__ == "__main__": @@ -89,7 +95,7 @@ def string_match_pattern(input_string: str, pattern: str) -> bool: # inputing the strings # input_string = input("input a string :") # pattern = input("input a pattern :") - + input_string = "aab" pattern = "c*a*b" From 1b298b288bdf99ac26bcfbbc5741f2e447996320 Mon Sep 17 00:00:00 2001 From: P U N I T H <55887644+punithbajaj@users.noreply.github.com> Date: Tue, 19 Oct 2021 13:01:15 +0530 Subject: [PATCH 05/11] added doctest Co-authored-by: John Law --- dynamic_programming/regular_expression.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/dynamic_programming/regular_expression.py b/dynamic_programming/regular_expression.py index 6b2471c0a982..bc93ce241b11 100644 --- a/dynamic_programming/regular_expression.py +++ b/dynamic_programming/regular_expression.py @@ -88,7 +88,9 @@ def string_match_pattern(input_string: str, pattern: str) -> bool: if __name__ == "__main__": + import doctest + doctest.testmod() # inputing the strings # input_string = input("input a string :") # pattern = input("input a pattern :") From 60a7b7813738dea1b72cc19f43ea079da5a5ef19 Mon Sep 17 00:00:00 2001 From: punithbajaj Date: Tue, 19 Oct 2021 13:06:25 +0530 Subject: [PATCH 06/11] added test cases --- dynamic_programming/regular_expression.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/dynamic_programming/regular_expression.py b/dynamic_programming/regular_expression.py index bc93ce241b11..608798947054 100644 --- a/dynamic_programming/regular_expression.py +++ b/dynamic_programming/regular_expression.py @@ -36,6 +36,8 @@ def string_match_pattern(input_string: str, pattern: str) -> bool: True >>> string_match_pattern("aaa", "aa") False + >>> string_match_pattern("aaa", "a.a") + True >>> string_match_pattern("aaab", "aa*") False >>> string_match_pattern("aaab", ".*") @@ -46,6 +48,8 @@ def string_match_pattern(input_string: str, pattern: str) -> bool: False >>> string_match_pattern("a", "") False + >>> string_match_pattern("", "") + True """ len_string = len(input_string) + 1 From c4d5971999e607cb261c710bac9e70a52cb42914 Mon Sep 17 00:00:00 2001 From: P U N I T H <55887644+punithbajaj@users.noreply.github.com> Date: Tue, 19 Oct 2021 15:46:09 +0530 Subject: [PATCH 07/11] Apply suggestions from code review Co-authored-by: John Law --- dynamic_programming/regular_expression.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/dynamic_programming/regular_expression.py b/dynamic_programming/regular_expression.py index 608798947054..d669f5477c74 100644 --- a/dynamic_programming/regular_expression.py +++ b/dynamic_programming/regular_expression.py @@ -9,26 +9,26 @@ def string_match_pattern(input_string: str, pattern: str) -> bool: """ - using bottom-up dynamic programming solution for matching the input + uses bottom-up dynamic programming solution for matching the input string with a given pattern. Runtime: O(len(input_string)*len(pattern)) Arguments -------- - input_string: str, any string which should be compared with pattern - pattern: str, the string that has to be used as pattern and should contain - '.' for single character match and '*' for zero or more of preceding character - match + input_string: str, any string which should be compared with the pattern + pattern: str, the string that represents a pattern and may contain + '.' for single character matches and '*' for zero or more of preceding character + matches Note ---- - the pattern can not start with a '*', + the pattern cannot start with a '*', because there should be at least one character before * Returns ------- - the bool value denoting whether given string follows the pattern + A Boolean denoting whether the given string follows the pattern Examples ------- @@ -58,6 +58,7 @@ def string_match_pattern(input_string: str, pattern: str) -> bool: # dp is a 2d matrix where dp[i][j] denotes whether prefix string of # length i of input_string matches with prefix string of length j of # given pattern + # "dp" stands for dynamic programming. dp = [[0 for i in range(len_pattern)] for j in range(len_string)] # since string of zero length match pattern of zero length From 6e8e8c123c456365c9b1cb5bd10ddd5945874c3f Mon Sep 17 00:00:00 2001 From: punithbajaj Date: Tue, 19 Oct 2021 15:53:58 +0530 Subject: [PATCH 08/11] shifted to strings --- {dynamic_programming => strings}/regular_expression.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) rename {dynamic_programming => strings}/regular_expression.py (97%) diff --git a/dynamic_programming/regular_expression.py b/strings/regular_expression.py similarity index 97% rename from dynamic_programming/regular_expression.py rename to strings/regular_expression.py index d669f5477c74..0206d3e824ca 100644 --- a/dynamic_programming/regular_expression.py +++ b/strings/regular_expression.py @@ -34,6 +34,8 @@ def string_match_pattern(input_string: str, pattern: str) -> bool: ------- >>> string_match_pattern("aab", "c*a*b") True + >>> string_match_pattern("dabc", "*abc") + False >>> string_match_pattern("aaa", "aa") False >>> string_match_pattern("aaa", "a.a") @@ -57,7 +59,7 @@ def string_match_pattern(input_string: str, pattern: str) -> bool: # dp is a 2d matrix where dp[i][j] denotes whether prefix string of # length i of input_string matches with prefix string of length j of - # given pattern + # given pattern. # "dp" stands for dynamic programming. dp = [[0 for i in range(len_pattern)] for j in range(len_string)] From 77c0357ba601e74a71db77c0fb2e96976f61f81c Mon Sep 17 00:00:00 2001 From: punithbajaj Date: Tue, 19 Oct 2021 19:06:21 +0530 Subject: [PATCH 09/11] Changed filename --- strings/{regular_expression.py => wildcard_pattern_matching.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename strings/{regular_expression.py => wildcard_pattern_matching.py} (100%) diff --git a/strings/regular_expression.py b/strings/wildcard_pattern_matching.py similarity index 100% rename from strings/regular_expression.py rename to strings/wildcard_pattern_matching.py From e09628812d47eb681267d7d8a5bbb52c9fc4b5cb Mon Sep 17 00:00:00 2001 From: P U N I T H <55887644+punithbajaj@users.noreply.github.com> Date: Wed, 20 Oct 2021 13:33:15 +0530 Subject: [PATCH 10/11] Update function name to match_pattern Co-authored-by: John Law --- strings/wildcard_pattern_matching.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/strings/wildcard_pattern_matching.py b/strings/wildcard_pattern_matching.py index 0206d3e824ca..2cbf41788421 100644 --- a/strings/wildcard_pattern_matching.py +++ b/strings/wildcard_pattern_matching.py @@ -7,7 +7,7 @@ """ -def string_match_pattern(input_string: str, pattern: str) -> bool: +def match_pattern(input_string: str, pattern: str) -> bool: """ uses bottom-up dynamic programming solution for matching the input string with a given pattern. From 459c8064edd97982e1fb8a4bdad013b9212da313 Mon Sep 17 00:00:00 2001 From: P U N I T H <55887644+punithbajaj@users.noreply.github.com> Date: Wed, 20 Oct 2021 13:33:15 +0530 Subject: [PATCH 11/11] Update function name to match_pattern Co-authored-by: John Law --- strings/wildcard_pattern_matching.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/strings/wildcard_pattern_matching.py b/strings/wildcard_pattern_matching.py index 0206d3e824ca..83c8d834cca9 100644 --- a/strings/wildcard_pattern_matching.py +++ b/strings/wildcard_pattern_matching.py @@ -7,7 +7,7 @@ """ -def string_match_pattern(input_string: str, pattern: str) -> bool: +def match_pattern(input_string: str, pattern: str) -> bool: """ uses bottom-up dynamic programming solution for matching the input string with a given pattern. @@ -32,25 +32,25 @@ def string_match_pattern(input_string: str, pattern: str) -> bool: Examples ------- - >>> string_match_pattern("aab", "c*a*b") + >>> match_pattern("aab", "c*a*b") True - >>> string_match_pattern("dabc", "*abc") + >>> match_pattern("dabc", "*abc") False - >>> string_match_pattern("aaa", "aa") + >>> match_pattern("aaa", "aa") False - >>> string_match_pattern("aaa", "a.a") + >>> match_pattern("aaa", "a.a") True - >>> string_match_pattern("aaab", "aa*") + >>> match_pattern("aaab", "aa*") False - >>> string_match_pattern("aaab", ".*") + >>> match_pattern("aaab", ".*") True - >>> string_match_pattern("a", "bbbb") + >>> match_pattern("a", "bbbb") False - >>> string_match_pattern("", "bbbb") + >>> match_pattern("", "bbbb") False - >>> string_match_pattern("a", "") + >>> match_pattern("a", "") False - >>> string_match_pattern("", "") + >>> match_pattern("", "") True """ @@ -106,7 +106,7 @@ def string_match_pattern(input_string: str, pattern: str) -> bool: pattern = "c*a*b" # using function to check whether given string matches the given pattern - if string_match_pattern(input_string, pattern): + if match_pattern(input_string, pattern): print(f"{input_string} matches the given pattern {pattern}") else: print(f"{input_string} does not match with the given pattern {pattern}")