diff --git a/aws_xray_sdk/core/utils/search_pattern.py b/aws_xray_sdk/core/utils/search_pattern.py index a9c0fa48..61ac1a1a 100644 --- a/aws_xray_sdk/core/utils/search_pattern.py +++ b/aws_xray_sdk/core/utils/search_pattern.py @@ -12,74 +12,47 @@ def wildcard_match(pattern, text, case_insensitive=True): if pattern is None or text is None: return False - pattern_len = len(pattern) - text_len = len(text) - if pattern_len == 0: - return text_len == 0 + if len(pattern) == 0: + return len(text) == 0 # Check the special case of a single * pattern, as it's common if pattern == '*': return True - if case_insensitive: - pattern = pattern.lower() - text = text.lower() + # If elif logic Checking different conditions like match between the first i chars in text + # and the first p chars in pattern, checking pattern has '?' or '*' also check for case_insensitivity + # iStar is introduced to store length of the text and i, p and pStar for indexing + i = 0 + p = 0 + iStar = len(text) + pStar = 0 + while i < len(text): + if p < len(pattern) and text[i] == pattern[p]: + i = i + 1 + p = p + 1 + + elif p < len(pattern) and case_insensitive and text[i].lower() == pattern[p].lower(): + i = i + 1 + p = p + 1 + + elif p < len(pattern) and pattern[p] == '?': + i = i + 1 + p = p + 1 + + elif p < len(pattern) and pattern[p] == '*': + iStar = i + pStar = p + p += 1 + + elif iStar != len(text): + iStar += 1 + i = iStar + p = pStar + 1 - # Infix globs are relatively rare, and the below search is expensive. - # Check for infix globs and, in their absence, do the simple thing. - if '*' not in pattern or pattern.index('*') == len(pattern) - 1: - return _simple_wildcard_match(pattern, text) - - # The res[i] is used to record if there is a match between - # the first i chars in text and the first j chars in pattern. - # So will return res[textLength+1] in the end - # Loop from the beginning of the pattern - # case not '*': if text[i]==pattern[j] or pattern[j] is '?', - # and res[i] is true, set res[i+1] to true, otherwise false. - # case '*': since '*' can match any globing, as long as there is a true - # in res before i, all the res[i+1], res[i+2],...,res[textLength] - # could be true - res = [None] * (text_len + 1) - res[0] = True - for j in range(0, pattern_len): - p = pattern[j] - if p != '*': - for i in range(text_len - 1, -1, -1): - res[i + 1] = res[i] and (p == '?' or (p == text[i])) - else: - i = 0 - while i <= text_len and not res[i]: - i += 1 - for m in range(i, text_len + 1): - res[m] = True - - res[0] = res[0] and (p == '*') - - return res[text_len] - - -def _simple_wildcard_match(pattern, text): - j = 0 - pattern_len = len(pattern) - text_len = len(text) - for i in range(0, pattern_len): - p = pattern[i] - if p == '*': - # Presumption for this method is that globs only occur at end - return True - elif p == '?': - if j == text_len: - # No character to match - return False - j += 1 else: - if j >= text_len: - return False + return False - if(p != text[j]): - return False - j += 1 + while p < len(pattern) and pattern[p] == '*': + p = p + 1 - # Ate up all the pattern and didn't end at a glob, so a match - # will have consumed all the text - return j == text_len + return p == len(pattern) and i == len(text) diff --git a/tests/test_wildcard_match.py b/tests/test_wildcard_match.py new file mode 100644 index 00000000..634c6c3c --- /dev/null +++ b/tests/test_wildcard_match.py @@ -0,0 +1,175 @@ +from aws_xray_sdk.core.utils.search_pattern import wildcard_match + + +def test_match_exact_positive(): + pat = 'foo' + bar = 'foo' + assert wildcard_match(pat, bar) + + +def test_match_exact_negative(): + pat = 'foo' + bar = 'cat' + assert not wildcard_match(pat, bar) + + +def test_single_wildcard_positive(): + pat = 'fo?' + bar = 'foo' + assert wildcard_match(pat, bar) + + +def test_single_wildcard_negative(): + pat = 'f?o' + bar = 'boo' + assert not wildcard_match(pat, bar) + + +def test_multiple_wildcard_positive(): + pat = '?o?' + bar = 'foo' + assert wildcard_match(pat, bar) + + +def test_multiple_wildcard_negative(): + pat = 'f??' + bar = 'boo' + assert not wildcard_match(pat, bar) + + +def test_glob_positive_zero_or_more(): + pat = 'foo*' + bar = 'foo' + assert wildcard_match(pat, bar) + + +def test_glob_negative_zero_or_more(): + pat = 'foo*' + bar = 'fo0' + assert not wildcard_match(pat, bar) + + +def test_glob_negative(): + pat = 'fo*' + bar = 'boo' + assert not wildcard_match(pat, bar) + + +def test_glob_and_single_positive(): + pat = '*o?' + bar = 'foo' + assert wildcard_match(pat, bar) + + +def test_glob_and_single_negative(): + pat = 'f?*' + bar = 'boo' + assert not wildcard_match(pat, bar) + + +def test_pure_wildcard(): + pat = '*' + bar = 'foo' + assert wildcard_match(pat, bar) + + +def test_exact_match(): + pat = '6573459' + bar = '6573459' + assert wildcard_match(pat, bar) + + +def test_misc(): + animal1 = '?at' + animal2 = '?o?se' + animal3 = '*s' + + vehicle1 = 'J*' + vehicle2 = '????' + + assert wildcard_match(animal1, 'bat') + assert wildcard_match(animal1, 'cat') + assert wildcard_match(animal2, 'horse') + assert wildcard_match(animal2, 'mouse') + assert wildcard_match(animal3, 'dogs') + assert wildcard_match(animal3, 'horses') + + assert wildcard_match(vehicle1, 'Jeep') + assert wildcard_match(vehicle2, 'ford') + assert not wildcard_match(vehicle2, 'chevy') + assert wildcard_match('*', 'cAr') + + assert wildcard_match('*/foo', '/bar/foo') + + +def test_case_insensitivity(): + assert wildcard_match('Foo', 'Foo', False) + assert wildcard_match('Foo', 'Foo', True) + + assert not wildcard_match('Foo', 'FOO', False) + assert wildcard_match('Foo', 'FOO', True) + + assert wildcard_match('Fo*', 'Foo0', False) + assert wildcard_match('Fo*', 'Foo0', True) + + assert not wildcard_match('Fo*', 'FOo0', False) + assert wildcard_match('Fo*', 'FOo0', True) + + assert wildcard_match('Fo?', 'Foo', False) + assert wildcard_match('Fo?', 'Foo', True) + + assert not wildcard_match('Fo?', 'FOo', False) + assert wildcard_match('Fo?', 'FoO', False) + assert wildcard_match('Fo?', 'FOO', True) + + +def test_no_globs(): + assert not wildcard_match('abcd', 'abc') + + +def test_edge_case_globs(): + assert wildcard_match('', '') + assert wildcard_match('a', 'a') + assert wildcard_match('*a', 'a') + assert wildcard_match('*a', 'ba') + assert wildcard_match('a*', 'a') + assert wildcard_match('a*', 'ab') + assert wildcard_match('a*a', 'aa') + assert wildcard_match('a*a', 'aba') + assert wildcard_match('a*a', 'aaa') + assert wildcard_match('a*a*', 'aa') + assert wildcard_match('a*a*', 'aba') + assert wildcard_match('a*a*', 'aaa') + assert wildcard_match('a*a*', 'aaaaaaaaaaaaaaaaaaaaaaaaaa') + assert wildcard_match('a*b*a*b*a*b*a*b*a*', + 'akljd9gsdfbkjhaabajkhbbyiaahkjbjhbuykjakjhabkjhbabjhkaabbabbaaakljdfsjklababkjbsdabab') + assert not wildcard_match('a*na*ha', 'anananahahanahana') + + +def test_multi_globs(): + assert wildcard_match('*a', 'a') + assert wildcard_match('**a', 'a') + assert wildcard_match('***a', 'a') + assert wildcard_match('**a*', 'a') + assert wildcard_match('**a**', 'a') + + assert wildcard_match('a**b', 'ab') + assert wildcard_match('a**b', 'abb') + + assert wildcard_match('*?', 'a') + assert wildcard_match('*?', 'aa') + assert wildcard_match('*??', 'aa') + assert not wildcard_match('*???', 'aa') + assert wildcard_match('*?', 'aaa') + + assert wildcard_match('?', 'a') + assert not wildcard_match('??', 'a') + + assert wildcard_match('?*', 'a') + assert wildcard_match('*?', 'a') + assert not wildcard_match('?*?', 'a') + assert wildcard_match('?*?', 'aa') + assert wildcard_match('*?*', 'a') + + assert not wildcard_match('*?*a', 'a') + assert wildcard_match('*?*a*', 'ba')