Skip to content

Add wildcard pattern matching using dynamic programming #5334

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 14 commits into from
Oct 20, 2021
Merged
Changes from 11 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
112 changes: 112 additions & 0 deletions strings/wildcard_pattern_matching.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
"""
Implementation of regular expression matching with support for '.' and '*'.
'.' Matches any single character.
'*' Matches zero or more of the preceding element.
The matching should cover the entire input string (not partial).

"""


def string_match_pattern(input_string: str, pattern: str) -> bool:
"""
uses bottom-up dynamic programming solution for matching the input
string with a given pattern.

Runtime: O(len(input_string)*len(pattern))

Arguments
--------
input_string: str, any string which should be compared with the pattern
pattern: str, the string that represents a pattern and may contain
'.' for single character matches and '*' for zero or more of preceding character
matches

Note
----
the pattern cannot start with a '*',
because there should be at least one character before *

Returns
-------
A Boolean denoting whether the given string follows the pattern

Examples
-------
>>> string_match_pattern("aab", "c*a*b")
True
>>> string_match_pattern("dabc", "*abc")
False
>>> string_match_pattern("aaa", "aa")
False
>>> string_match_pattern("aaa", "a.a")
True
>>> string_match_pattern("aaab", "aa*")
False
>>> string_match_pattern("aaab", ".*")
True
>>> string_match_pattern("a", "bbbb")
False
>>> string_match_pattern("", "bbbb")
False
>>> string_match_pattern("a", "")
False
>>> string_match_pattern("", "")
True
"""

len_string = len(input_string) + 1
len_pattern = len(pattern) + 1

# dp is a 2d matrix where dp[i][j] denotes whether prefix string of
# length i of input_string matches with prefix string of length j of
# given pattern.
# "dp" stands for dynamic programming.
dp = [[0 for i in range(len_pattern)] for j in range(len_string)]

# since string of zero length match pattern of zero length
dp[0][0] = 1

# since pattern of zero length will never match with string of non-zero length
for i in range(1, len_string):
dp[i][0] = 0

# since string of zero length will match with pattern where there
# is at least one * alternatively
for j in range(1, len_pattern):
dp[0][j] = dp[0][j - 2] if pattern[j - 1] == "*" else 0

# now using bottom-up approach to find for all remaining lengths
for i in range(1, len_string):
for j in range(1, len_pattern):
if input_string[i - 1] == pattern[j - 1] or pattern[j - 1] == ".":
dp[i][j] = dp[i - 1][j - 1]

elif pattern[j - 1] == "*":
if dp[i][j - 2] == 1:
dp[i][j] = 1
elif pattern[j - 2] in (input_string[i - 1], "."):
dp[i][j] = dp[i - 1][j]
else:
dp[i][j] = 0
else:
dp[i][j] = 0

return bool(dp[-1][-1])


if __name__ == "__main__":
import doctest

doctest.testmod()
# inputing the strings
# input_string = input("input a string :")
# pattern = input("input a pattern :")

input_string = "aab"
pattern = "c*a*b"

# using function to check whether given string matches the given pattern
if string_match_pattern(input_string, pattern):
print(f"{input_string} matches the given pattern {pattern}")
else:
print(f"{input_string} does not match with the given pattern {pattern}")