Skip to content

Faster Implementation of Wildcard Matching and added test file for wildcard matching #178

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Oct 16, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
97 changes: 35 additions & 62 deletions aws_xray_sdk/core/utils/search_pattern.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,74 +12,47 @@ def wildcard_match(pattern, text, case_insensitive=True):
if pattern is None or text is None:
return False

pattern_len = len(pattern)
text_len = len(text)
if pattern_len == 0:
return text_len == 0
if len(pattern) == 0:
return len(text) == 0

# Check the special case of a single * pattern, as it's common
if pattern == '*':
return True

if case_insensitive:
pattern = pattern.lower()
text = text.lower()
# If elif logic Checking different conditions like match between the first i chars in text
# and the first p chars in pattern, checking pattern has '?' or '*' also check for case_insensitivity
# iStar is introduced to store length of the text and i, p and pStar for indexing
i = 0
p = 0
iStar = len(text)
pStar = 0
while i < len(text):
if p < len(pattern) and text[i] == pattern[p]:
i = i + 1
p = p + 1

elif p < len(pattern) and case_insensitive and text[i].lower() == pattern[p].lower():
i = i + 1
p = p + 1

elif p < len(pattern) and pattern[p] == '?':
i = i + 1
p = p + 1

elif p < len(pattern) and pattern[p] == '*':
iStar = i
pStar = p
p += 1

elif iStar != len(text):
iStar += 1
i = iStar
p = pStar + 1

# Infix globs are relatively rare, and the below search is expensive.
# Check for infix globs and, in their absence, do the simple thing.
if '*' not in pattern or pattern.index('*') == len(pattern) - 1:
return _simple_wildcard_match(pattern, text)

# The res[i] is used to record if there is a match between
# the first i chars in text and the first j chars in pattern.
# So will return res[textLength+1] in the end
# Loop from the beginning of the pattern
# case not '*': if text[i]==pattern[j] or pattern[j] is '?',
# and res[i] is true, set res[i+1] to true, otherwise false.
# case '*': since '*' can match any globing, as long as there is a true
# in res before i, all the res[i+1], res[i+2],...,res[textLength]
# could be true
res = [None] * (text_len + 1)
res[0] = True
for j in range(0, pattern_len):
p = pattern[j]
if p != '*':
for i in range(text_len - 1, -1, -1):
res[i + 1] = res[i] and (p == '?' or (p == text[i]))
else:
i = 0
while i <= text_len and not res[i]:
i += 1
for m in range(i, text_len + 1):
res[m] = True

res[0] = res[0] and (p == '*')

return res[text_len]


def _simple_wildcard_match(pattern, text):
j = 0
pattern_len = len(pattern)
text_len = len(text)
for i in range(0, pattern_len):
p = pattern[i]
if p == '*':
# Presumption for this method is that globs only occur at end
return True
elif p == '?':
if j == text_len:
# No character to match
return False
j += 1
else:
if j >= text_len:
return False
return False

if(p != text[j]):
return False
j += 1
while p < len(pattern) and pattern[p] == '*':
p = p + 1

# Ate up all the pattern and didn't end at a glob, so a match
# will have consumed all the text
return j == text_len
return p == len(pattern) and i == len(text)
175 changes: 175 additions & 0 deletions tests/test_wildcard_match.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,175 @@
from aws_xray_sdk.core.utils.search_pattern import wildcard_match


def test_match_exact_positive():
pat = 'foo'
bar = 'foo'
assert wildcard_match(pat, bar)


def test_match_exact_negative():
pat = 'foo'
bar = 'cat'
assert not wildcard_match(pat, bar)


def test_single_wildcard_positive():
pat = 'fo?'
bar = 'foo'
assert wildcard_match(pat, bar)


def test_single_wildcard_negative():
pat = 'f?o'
bar = 'boo'
assert not wildcard_match(pat, bar)


def test_multiple_wildcard_positive():
pat = '?o?'
bar = 'foo'
assert wildcard_match(pat, bar)


def test_multiple_wildcard_negative():
pat = 'f??'
bar = 'boo'
assert not wildcard_match(pat, bar)


def test_glob_positive_zero_or_more():
pat = 'foo*'
bar = 'foo'
assert wildcard_match(pat, bar)


def test_glob_negative_zero_or_more():
pat = 'foo*'
bar = 'fo0'
assert not wildcard_match(pat, bar)


def test_glob_negative():
pat = 'fo*'
bar = 'boo'
assert not wildcard_match(pat, bar)


def test_glob_and_single_positive():
pat = '*o?'
bar = 'foo'
assert wildcard_match(pat, bar)


def test_glob_and_single_negative():
pat = 'f?*'
bar = 'boo'
assert not wildcard_match(pat, bar)


def test_pure_wildcard():
pat = '*'
bar = 'foo'
assert wildcard_match(pat, bar)


def test_exact_match():
pat = '6573459'
bar = '6573459'
assert wildcard_match(pat, bar)


def test_misc():
animal1 = '?at'
animal2 = '?o?se'
animal3 = '*s'

vehicle1 = 'J*'
vehicle2 = '????'

assert wildcard_match(animal1, 'bat')
assert wildcard_match(animal1, 'cat')
assert wildcard_match(animal2, 'horse')
assert wildcard_match(animal2, 'mouse')
assert wildcard_match(animal3, 'dogs')
assert wildcard_match(animal3, 'horses')

assert wildcard_match(vehicle1, 'Jeep')
assert wildcard_match(vehicle2, 'ford')
assert not wildcard_match(vehicle2, 'chevy')
assert wildcard_match('*', 'cAr')

assert wildcard_match('*/foo', '/bar/foo')


def test_case_insensitivity():
assert wildcard_match('Foo', 'Foo', False)
assert wildcard_match('Foo', 'Foo', True)

assert not wildcard_match('Foo', 'FOO', False)
assert wildcard_match('Foo', 'FOO', True)

assert wildcard_match('Fo*', 'Foo0', False)
assert wildcard_match('Fo*', 'Foo0', True)

assert not wildcard_match('Fo*', 'FOo0', False)
assert wildcard_match('Fo*', 'FOo0', True)

assert wildcard_match('Fo?', 'Foo', False)
assert wildcard_match('Fo?', 'Foo', True)

assert not wildcard_match('Fo?', 'FOo', False)
assert wildcard_match('Fo?', 'FoO', False)
assert wildcard_match('Fo?', 'FOO', True)


def test_no_globs():
assert not wildcard_match('abcd', 'abc')


def test_edge_case_globs():
assert wildcard_match('', '')
assert wildcard_match('a', 'a')
assert wildcard_match('*a', 'a')
assert wildcard_match('*a', 'ba')
assert wildcard_match('a*', 'a')
assert wildcard_match('a*', 'ab')
assert wildcard_match('a*a', 'aa')
assert wildcard_match('a*a', 'aba')
assert wildcard_match('a*a', 'aaa')
assert wildcard_match('a*a*', 'aa')
assert wildcard_match('a*a*', 'aba')
assert wildcard_match('a*a*', 'aaa')
assert wildcard_match('a*a*', 'aaaaaaaaaaaaaaaaaaaaaaaaaa')
assert wildcard_match('a*b*a*b*a*b*a*b*a*',
'akljd9gsdfbkjhaabajkhbbyiaahkjbjhbuykjakjhabkjhbabjhkaabbabbaaakljdfsjklababkjbsdabab')
assert not wildcard_match('a*na*ha', 'anananahahanahana')


def test_multi_globs():
assert wildcard_match('*a', 'a')
assert wildcard_match('**a', 'a')
assert wildcard_match('***a', 'a')
assert wildcard_match('**a*', 'a')
assert wildcard_match('**a**', 'a')

assert wildcard_match('a**b', 'ab')
assert wildcard_match('a**b', 'abb')

assert wildcard_match('*?', 'a')
assert wildcard_match('*?', 'aa')
assert wildcard_match('*??', 'aa')
assert not wildcard_match('*???', 'aa')
assert wildcard_match('*?', 'aaa')

assert wildcard_match('?', 'a')
assert not wildcard_match('??', 'a')

assert wildcard_match('?*', 'a')
assert wildcard_match('*?', 'a')
assert not wildcard_match('?*?', 'a')
assert wildcard_match('?*?', 'aa')
assert wildcard_match('*?*', 'a')

assert not wildcard_match('*?*a', 'a')
assert wildcard_match('*?*a*', 'ba')