1
- def kmp (pattern , text , len_p = None , len_t = None ):
1
+ def kmp (pattern , text ):
2
2
"""
3
3
The Knuth-Morris-Pratt Algorithm for finding a pattern within a piece of text
4
4
with complexity O(n + m)
@@ -14,35 +14,46 @@ def kmp(pattern, text, len_p=None, len_t=None):
14
14
"""
15
15
16
16
# 1) Construct the failure array
17
- failure = [0 ]
18
- i = 0
19
- for index , char in enumerate (pattern [1 :]):
20
- if pattern [i ] == char :
21
- i += 1
22
- else :
23
- i = 0
24
- failure .append (i )
17
+ failure = get_failure_array (pattern )
25
18
26
19
# 2) Step through text searching for pattern
27
20
i , j = 0 , 0 # index into text, pattern
28
21
while i < len (text ):
29
22
if pattern [j ] == text [i ]:
30
23
if j == (len (pattern ) - 1 ):
31
24
return True
32
- i += 1
33
25
j += 1
34
26
35
27
# if this is a prefix in our pattern
36
28
# just go back far enough to continue
37
- elif failure [ j ] > 0 :
38
- j = failure [j ] - 1
39
- else :
40
- i += 1
29
+ elif j > 0 :
30
+ j = failure [j - 1 ]
31
+ continue
32
+ i += 1
41
33
return False
42
34
43
35
44
- if __name__ == '__main__' :
36
+ def get_failure_array (pattern ):
37
+ """
38
+ Calculates the new index we should go to if we fail a comparison
39
+ :param pattern:
40
+ :return:
41
+ """
42
+ failure = [0 ]
43
+ i = 0
44
+ j = 1
45
+ while j < len (pattern ):
46
+ if pattern [i ] == pattern [j ]:
47
+ i += 1
48
+ elif i > 0 :
49
+ i = failure [i - 1 ]
50
+ continue
51
+ j += 1
52
+ failure .append (i )
53
+ return failure
54
+
45
55
56
+ if __name__ == '__main__' :
46
57
# Test 1)
47
58
pattern = "abc1abc12"
48
59
text1 = "alskfjaldsabc1abc1abc12k23adsfabcabc"
@@ -54,4 +65,16 @@ def kmp(pattern, text, len_p=None, len_t=None):
54
65
text = "ABABZABABYABABX"
55
66
assert kmp (pattern , text )
56
67
68
+ # Test 3)
69
+ pattern = "AAAB"
70
+ text = "ABAAAAAB"
71
+ assert kmp (pattern , text )
72
+
73
+ # Test 4)
74
+ pattern = "abcdabcy"
75
+ text = "abcxabcdabxabcdabcdabcy"
76
+ assert kmp (pattern , text )
57
77
78
+ # Test 5)
79
+ pattern = "aabaabaaa"
80
+ assert get_failure_array (pattern ) == [0 , 1 , 0 , 1 , 2 , 3 , 4 , 5 , 2 ]
0 commit comments