Skip to content

Commit 0d36dc6

Browse files
committed
fixed failure function and cleaned up code in kmp + added rabin-karp
1 parent 495fdc1 commit 0d36dc6

File tree

2 files changed

+88
-15
lines changed

2 files changed

+88
-15
lines changed

Diff for: strings/knuth-morris-pratt.py

+38-15
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
def kmp(pattern, text, len_p=None, len_t=None):
1+
def kmp(pattern, text):
22
"""
33
The Knuth-Morris-Pratt Algorithm for finding a pattern within a piece of text
44
with complexity O(n + m)
@@ -14,35 +14,46 @@ def kmp(pattern, text, len_p=None, len_t=None):
1414
"""
1515

1616
# 1) Construct the failure array
17-
failure = [0]
18-
i = 0
19-
for index, char in enumerate(pattern[1:]):
20-
if pattern[i] == char:
21-
i += 1
22-
else:
23-
i = 0
24-
failure.append(i)
17+
failure = get_failure_array(pattern)
2518

2619
# 2) Step through text searching for pattern
2720
i, j = 0, 0 # index into text, pattern
2821
while i < len(text):
2922
if pattern[j] == text[i]:
3023
if j == (len(pattern) - 1):
3124
return True
32-
i += 1
3325
j += 1
3426

3527
# if this is a prefix in our pattern
3628
# just go back far enough to continue
37-
elif failure[j] > 0:
38-
j = failure[j] - 1
39-
else:
40-
i += 1
29+
elif j > 0:
30+
j = failure[j - 1]
31+
continue
32+
i += 1
4133
return False
4234

4335

44-
if __name__ == '__main__':
36+
def get_failure_array(pattern):
37+
"""
38+
Calculates the new index we should go to if we fail a comparison
39+
:param pattern:
40+
:return:
41+
"""
42+
failure = [0]
43+
i = 0
44+
j = 1
45+
while j < len(pattern):
46+
if pattern[i] == pattern[j]:
47+
i += 1
48+
elif i > 0:
49+
i = failure[i-1]
50+
continue
51+
j += 1
52+
failure.append(i)
53+
return failure
54+
4555

56+
if __name__ == '__main__':
4657
# Test 1)
4758
pattern = "abc1abc12"
4859
text1 = "alskfjaldsabc1abc1abc12k23adsfabcabc"
@@ -54,4 +65,16 @@ def kmp(pattern, text, len_p=None, len_t=None):
5465
text = "ABABZABABYABABX"
5566
assert kmp(pattern, text)
5667

68+
# Test 3)
69+
pattern = "AAAB"
70+
text = "ABAAAAAB"
71+
assert kmp(pattern, text)
72+
73+
# Test 4)
74+
pattern = "abcdabcy"
75+
text = "abcxabcdabxabcdabcdabcy"
76+
assert kmp(pattern, text)
5777

78+
# Test 5)
79+
pattern = "aabaabaaa"
80+
assert get_failure_array(pattern) == [0, 1, 0, 1, 2, 3, 4, 5, 2]

Diff for: strings/rabin-karp.py

+50
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
def rabin_karp(pattern, text):
2+
"""
3+
4+
The Rabin-Karp Algorithm for finding a pattern within a piece of text
5+
with complexity O(nm), most efficient when it is used with multiple patterns
6+
as it is able to check if any of a set of patterns match a section of text in o(1) given the precomputed hashes.
7+
8+
This will be the simple version which only assumes one pattern is being searched for but it's not hard to modify
9+
10+
1) Calculate pattern hash
11+
12+
2) Step through the text one character at a time passing a window with the same length as the pattern
13+
calculating the hash of the text within the window compare it with the hash of the pattern. Only testing
14+
equality if the hashes match
15+
16+
"""
17+
p_len = len(pattern)
18+
p_hash = hash(pattern)
19+
20+
for i in range(0, len(text) - (p_len - 1)):
21+
22+
# written like this t
23+
text_hash = hash(text[i:i + p_len])
24+
if text_hash == p_hash and \
25+
text[i:i + p_len] == pattern:
26+
return True
27+
return False
28+
29+
30+
if __name__ == '__main__':
31+
# Test 1)
32+
pattern = "abc1abc12"
33+
text1 = "alskfjaldsabc1abc1abc12k23adsfabcabc"
34+
text2 = "alskfjaldsk23adsfabcabc"
35+
assert rabin_karp(pattern, text1) and not rabin_karp(pattern, text2)
36+
37+
# Test 2)
38+
pattern = "ABABX"
39+
text = "ABABZABABYABABX"
40+
assert rabin_karp(pattern, text)
41+
42+
# Test 3)
43+
pattern = "AAAB"
44+
text = "ABAAAAAB"
45+
assert rabin_karp(pattern, text)
46+
47+
# Test 4)
48+
pattern = "abcdabcy"
49+
text = "abcxabcdabxabcdabcdabcy"
50+
assert rabin_karp(pattern, text)

0 commit comments

Comments
 (0)