Skip to content

Add wildcard pattern matching using dynamic programming #5334

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 14 commits into from
Oct 20, 2021
Merged
103 changes: 103 additions & 0 deletions dynamic_programming/regular_expression.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
"""
Implementation of regular expression matching with support for '.' and '*'.
'.' Matches any single character.
'*' Matches zero or more of the preceding element.
The matching should cover the entire input string (not partial).

"""


def string_match_pattern(input_string: str, pattern: str) -> bool:
"""
using bottom-up dynamic programming solution for matching the input
string with a given pattern.

Runtime: O(len(input_string)*len(pattern))

Arguments
--------
input_string: str, any string which should be compared with pattern
pattern: str, the string that has to be used as pattern and should contain
'.' for single character match and '*' for zero or more of preceding character
match

Note
----
the pattern can not start with a '*',
because there should be at least one character before *

Returns
-------
the bool value denoting whether given string follows the pattern

Examples
-------
>>> string_match_pattern("aab", "c*a*b")
True
>>> string_match_pattern("aaa", "aa")
False
>>> string_match_pattern("aaab", "aa*")
False
>>> string_match_pattern("aaab", ".*")
True
>>> string_match_pattern("a", "bbbb")
False
>>> string_match_pattern("", "bbbb")
False
>>> string_match_pattern("a", "")
False
"""

len_string = len(input_string) + 1
len_pattern = len(pattern) + 1

# dp is a 2d matrix where dp[i][j] denotes whether prefix string of
# length i of input_string matches with prefix string of length j of
# given pattern
dp = [[0 for i in range(len_pattern)] for j in range(len_string)]

# since string of zero length match pattern of zero length
dp[0][0] = 1

# since pattern of zero length will never match with string of non-zero length
for i in range(1, len_string):
dp[i][0] = 0

# since string of zero length will match with pattern where there
# is at least one * alternatively
for j in range(1, len_pattern):
dp[0][j] = dp[0][j - 2] if pattern[j - 1] == "*" else 0

# now using bottom-up approach to find for all remaining lengths
for i in range(1, len_string):
for j in range(1, len_pattern):
if input_string[i - 1] == pattern[j - 1] or pattern[j - 1] == ".":
dp[i][j] = dp[i - 1][j - 1]

elif pattern[j - 1] == "*":
if dp[i][j - 2] == 1:
dp[i][j] = 1
elif pattern[j - 2] in (input_string[i - 1], "."):
dp[i][j] = dp[i - 1][j]
else:
dp[i][j] = 0
else:
dp[i][j] = 0

return bool(dp[-1][-1])


if __name__ == "__main__":

# inputing the strings
# input_string = input("input a string :")
# pattern = input("input a pattern :")

input_string = "aab"
pattern = "c*a*b"

# using function to check whether given string matches the given pattern
if string_match_pattern(input_string, pattern):
print(f"{input_string} matches the given pattern {pattern}")
else:
print(f"{input_string} does not match with the given pattern {pattern}")