Skip to content

Commit dbdeab6

Browse files
authored
Merge pull request #178 from bhautikpip/master
Faster Implementation of Wildcard Matching and added test file for wildcard matching
2 parents 372efaa + d5d8743 commit dbdeab6

File tree

2 files changed

+210
-62
lines changed

2 files changed

+210
-62
lines changed

aws_xray_sdk/core/utils/search_pattern.py

Lines changed: 35 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -12,74 +12,47 @@ def wildcard_match(pattern, text, case_insensitive=True):
1212
if pattern is None or text is None:
1313
return False
1414

15-
pattern_len = len(pattern)
16-
text_len = len(text)
17-
if pattern_len == 0:
18-
return text_len == 0
15+
if len(pattern) == 0:
16+
return len(text) == 0
1917

2018
# Check the special case of a single * pattern, as it's common
2119
if pattern == '*':
2220
return True
2321

24-
if case_insensitive:
25-
pattern = pattern.lower()
26-
text = text.lower()
22+
# If elif logic Checking different conditions like match between the first i chars in text
23+
# and the first p chars in pattern, checking pattern has '?' or '*' also check for case_insensitivity
24+
# iStar is introduced to store length of the text and i, p and pStar for indexing
25+
i = 0
26+
p = 0
27+
iStar = len(text)
28+
pStar = 0
29+
while i < len(text):
30+
if p < len(pattern) and text[i] == pattern[p]:
31+
i = i + 1
32+
p = p + 1
33+
34+
elif p < len(pattern) and case_insensitive and text[i].lower() == pattern[p].lower():
35+
i = i + 1
36+
p = p + 1
37+
38+
elif p < len(pattern) and pattern[p] == '?':
39+
i = i + 1
40+
p = p + 1
41+
42+
elif p < len(pattern) and pattern[p] == '*':
43+
iStar = i
44+
pStar = p
45+
p += 1
46+
47+
elif iStar != len(text):
48+
iStar += 1
49+
i = iStar
50+
p = pStar + 1
2751

28-
# Infix globs are relatively rare, and the below search is expensive.
29-
# Check for infix globs and, in their absence, do the simple thing.
30-
if '*' not in pattern or pattern.index('*') == len(pattern) - 1:
31-
return _simple_wildcard_match(pattern, text)
32-
33-
# The res[i] is used to record if there is a match between
34-
# the first i chars in text and the first j chars in pattern.
35-
# So will return res[textLength+1] in the end
36-
# Loop from the beginning of the pattern
37-
# case not '*': if text[i]==pattern[j] or pattern[j] is '?',
38-
# and res[i] is true, set res[i+1] to true, otherwise false.
39-
# case '*': since '*' can match any globing, as long as there is a true
40-
# in res before i, all the res[i+1], res[i+2],...,res[textLength]
41-
# could be true
42-
res = [None] * (text_len + 1)
43-
res[0] = True
44-
for j in range(0, pattern_len):
45-
p = pattern[j]
46-
if p != '*':
47-
for i in range(text_len - 1, -1, -1):
48-
res[i + 1] = res[i] and (p == '?' or (p == text[i]))
49-
else:
50-
i = 0
51-
while i <= text_len and not res[i]:
52-
i += 1
53-
for m in range(i, text_len + 1):
54-
res[m] = True
55-
56-
res[0] = res[0] and (p == '*')
57-
58-
return res[text_len]
59-
60-
61-
def _simple_wildcard_match(pattern, text):
62-
j = 0
63-
pattern_len = len(pattern)
64-
text_len = len(text)
65-
for i in range(0, pattern_len):
66-
p = pattern[i]
67-
if p == '*':
68-
# Presumption for this method is that globs only occur at end
69-
return True
70-
elif p == '?':
71-
if j == text_len:
72-
# No character to match
73-
return False
74-
j += 1
7552
else:
76-
if j >= text_len:
77-
return False
53+
return False
7854

79-
if(p != text[j]):
80-
return False
81-
j += 1
55+
while p < len(pattern) and pattern[p] == '*':
56+
p = p + 1
8257

83-
# Ate up all the pattern and didn't end at a glob, so a match
84-
# will have consumed all the text
85-
return j == text_len
58+
return p == len(pattern) and i == len(text)

tests/test_wildcard_match.py

Lines changed: 175 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,175 @@
1+
from aws_xray_sdk.core.utils.search_pattern import wildcard_match
2+
3+
4+
def test_match_exact_positive():
5+
pat = 'foo'
6+
bar = 'foo'
7+
assert wildcard_match(pat, bar)
8+
9+
10+
def test_match_exact_negative():
11+
pat = 'foo'
12+
bar = 'cat'
13+
assert not wildcard_match(pat, bar)
14+
15+
16+
def test_single_wildcard_positive():
17+
pat = 'fo?'
18+
bar = 'foo'
19+
assert wildcard_match(pat, bar)
20+
21+
22+
def test_single_wildcard_negative():
23+
pat = 'f?o'
24+
bar = 'boo'
25+
assert not wildcard_match(pat, bar)
26+
27+
28+
def test_multiple_wildcard_positive():
29+
pat = '?o?'
30+
bar = 'foo'
31+
assert wildcard_match(pat, bar)
32+
33+
34+
def test_multiple_wildcard_negative():
35+
pat = 'f??'
36+
bar = 'boo'
37+
assert not wildcard_match(pat, bar)
38+
39+
40+
def test_glob_positive_zero_or_more():
41+
pat = 'foo*'
42+
bar = 'foo'
43+
assert wildcard_match(pat, bar)
44+
45+
46+
def test_glob_negative_zero_or_more():
47+
pat = 'foo*'
48+
bar = 'fo0'
49+
assert not wildcard_match(pat, bar)
50+
51+
52+
def test_glob_negative():
53+
pat = 'fo*'
54+
bar = 'boo'
55+
assert not wildcard_match(pat, bar)
56+
57+
58+
def test_glob_and_single_positive():
59+
pat = '*o?'
60+
bar = 'foo'
61+
assert wildcard_match(pat, bar)
62+
63+
64+
def test_glob_and_single_negative():
65+
pat = 'f?*'
66+
bar = 'boo'
67+
assert not wildcard_match(pat, bar)
68+
69+
70+
def test_pure_wildcard():
71+
pat = '*'
72+
bar = 'foo'
73+
assert wildcard_match(pat, bar)
74+
75+
76+
def test_exact_match():
77+
pat = '6573459'
78+
bar = '6573459'
79+
assert wildcard_match(pat, bar)
80+
81+
82+
def test_misc():
83+
animal1 = '?at'
84+
animal2 = '?o?se'
85+
animal3 = '*s'
86+
87+
vehicle1 = 'J*'
88+
vehicle2 = '????'
89+
90+
assert wildcard_match(animal1, 'bat')
91+
assert wildcard_match(animal1, 'cat')
92+
assert wildcard_match(animal2, 'horse')
93+
assert wildcard_match(animal2, 'mouse')
94+
assert wildcard_match(animal3, 'dogs')
95+
assert wildcard_match(animal3, 'horses')
96+
97+
assert wildcard_match(vehicle1, 'Jeep')
98+
assert wildcard_match(vehicle2, 'ford')
99+
assert not wildcard_match(vehicle2, 'chevy')
100+
assert wildcard_match('*', 'cAr')
101+
102+
assert wildcard_match('*/foo', '/bar/foo')
103+
104+
105+
def test_case_insensitivity():
106+
assert wildcard_match('Foo', 'Foo', False)
107+
assert wildcard_match('Foo', 'Foo', True)
108+
109+
assert not wildcard_match('Foo', 'FOO', False)
110+
assert wildcard_match('Foo', 'FOO', True)
111+
112+
assert wildcard_match('Fo*', 'Foo0', False)
113+
assert wildcard_match('Fo*', 'Foo0', True)
114+
115+
assert not wildcard_match('Fo*', 'FOo0', False)
116+
assert wildcard_match('Fo*', 'FOo0', True)
117+
118+
assert wildcard_match('Fo?', 'Foo', False)
119+
assert wildcard_match('Fo?', 'Foo', True)
120+
121+
assert not wildcard_match('Fo?', 'FOo', False)
122+
assert wildcard_match('Fo?', 'FoO', False)
123+
assert wildcard_match('Fo?', 'FOO', True)
124+
125+
126+
def test_no_globs():
127+
assert not wildcard_match('abcd', 'abc')
128+
129+
130+
def test_edge_case_globs():
131+
assert wildcard_match('', '')
132+
assert wildcard_match('a', 'a')
133+
assert wildcard_match('*a', 'a')
134+
assert wildcard_match('*a', 'ba')
135+
assert wildcard_match('a*', 'a')
136+
assert wildcard_match('a*', 'ab')
137+
assert wildcard_match('a*a', 'aa')
138+
assert wildcard_match('a*a', 'aba')
139+
assert wildcard_match('a*a', 'aaa')
140+
assert wildcard_match('a*a*', 'aa')
141+
assert wildcard_match('a*a*', 'aba')
142+
assert wildcard_match('a*a*', 'aaa')
143+
assert wildcard_match('a*a*', 'aaaaaaaaaaaaaaaaaaaaaaaaaa')
144+
assert wildcard_match('a*b*a*b*a*b*a*b*a*',
145+
'akljd9gsdfbkjhaabajkhbbyiaahkjbjhbuykjakjhabkjhbabjhkaabbabbaaakljdfsjklababkjbsdabab')
146+
assert not wildcard_match('a*na*ha', 'anananahahanahana')
147+
148+
149+
def test_multi_globs():
150+
assert wildcard_match('*a', 'a')
151+
assert wildcard_match('**a', 'a')
152+
assert wildcard_match('***a', 'a')
153+
assert wildcard_match('**a*', 'a')
154+
assert wildcard_match('**a**', 'a')
155+
156+
assert wildcard_match('a**b', 'ab')
157+
assert wildcard_match('a**b', 'abb')
158+
159+
assert wildcard_match('*?', 'a')
160+
assert wildcard_match('*?', 'aa')
161+
assert wildcard_match('*??', 'aa')
162+
assert not wildcard_match('*???', 'aa')
163+
assert wildcard_match('*?', 'aaa')
164+
165+
assert wildcard_match('?', 'a')
166+
assert not wildcard_match('??', 'a')
167+
168+
assert wildcard_match('?*', 'a')
169+
assert wildcard_match('*?', 'a')
170+
assert not wildcard_match('?*?', 'a')
171+
assert wildcard_match('?*?', 'aa')
172+
assert wildcard_match('*?*', 'a')
173+
174+
assert not wildcard_match('*?*a', 'a')
175+
assert wildcard_match('*?*a*', 'ba')

0 commit comments

Comments
 (0)