Skip to content

Commit b26998f

Browse files
authored
Merge pull request #208 from damelLP/add_string_algos
Added a python implementation of knuth-morris-pratt string search algo
2 parents 7f87515 + 495fdc1 commit b26998f

File tree

1 file changed

+57
-0
lines changed

1 file changed

+57
-0
lines changed

Diff for: strings/knuth-morris-pratt.py

+57
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
def kmp(pattern, text, len_p=None, len_t=None):
2+
"""
3+
The Knuth-Morris-Pratt Algorithm for finding a pattern within a piece of text
4+
with complexity O(n + m)
5+
6+
1) Preprocess pattern to identify any suffixes that are identical to prefixes
7+
8+
This tells us where to continue from if we get a mismatch between a character in our pattern
9+
and the text.
10+
11+
2) Step through the text one character at a time and compare it to a character in the pattern
12+
updating our location within the pattern if necessary
13+
14+
"""
15+
16+
# 1) Construct the failure array
17+
failure = [0]
18+
i = 0
19+
for index, char in enumerate(pattern[1:]):
20+
if pattern[i] == char:
21+
i += 1
22+
else:
23+
i = 0
24+
failure.append(i)
25+
26+
# 2) Step through text searching for pattern
27+
i, j = 0, 0 # index into text, pattern
28+
while i < len(text):
29+
if pattern[j] == text[i]:
30+
if j == (len(pattern) - 1):
31+
return True
32+
i += 1
33+
j += 1
34+
35+
# if this is a prefix in our pattern
36+
# just go back far enough to continue
37+
elif failure[j] > 0:
38+
j = failure[j] - 1
39+
else:
40+
i += 1
41+
return False
42+
43+
44+
if __name__ == '__main__':
45+
46+
# Test 1)
47+
pattern = "abc1abc12"
48+
text1 = "alskfjaldsabc1abc1abc12k23adsfabcabc"
49+
text2 = "alskfjaldsk23adsfabcabc"
50+
assert kmp(pattern, text1) and not kmp(pattern, text2)
51+
52+
# Test 2)
53+
pattern = "ABABX"
54+
text = "ABABZABABYABABX"
55+
assert kmp(pattern, text)
56+
57+

0 commit comments

Comments
 (0)