Skip to content

Changed knuth_morris_pratt to be consistent with str.find() #9079

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 9 commits into from
31 changes: 24 additions & 7 deletions strings/knuth_morris_pratt.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from __future__ import annotations


def kmp(pattern: str, text: str) -> bool:
def knuth_morris_pratt(pattern: str, text: str) -> int:
"""
The Knuth-Morris-Pratt Algorithm for finding a pattern within a piece of text
with complexity O(n + m)
Expand All @@ -14,6 +14,23 @@ def kmp(pattern: str, text: str) -> bool:
2) Step through the text one character at a time and compare it to a character in
the pattern updating our location within the pattern if necessary

>>> kmp = "knuth_morris_pratt"
>>> knuth_morris_pratt(kmp, "kn") == kmp.find("kn")
True
>>> knuth_morris_pratt(kmp, "h_m") == kmp.find("h_m")
True
>>> knuth_morris_pratt(kmp, "rr") == kmp.find("rr")
True
>>> knuth_morris_pratt(kmp, "tt") == kmp.find("tt")
True
>>> knuth_morris_pratt(kmp, "not there") == kmp.find("not there")
True

# A condensed version...
>>> all(knuth_morris_pratt(kmp, s) == kmp.find(s) for s in (
... "kn", "h_m", "rr", "tt", "not there"
... ))
True
"""

# 1) Construct the failure array
Expand All @@ -24,7 +41,7 @@ def kmp(pattern: str, text: str) -> bool:
while i < len(text):
if pattern[j] == text[i]:
if j == (len(pattern) - 1):
return True
return i - j
j += 1

# if this is a prefix in our pattern
Expand All @@ -33,7 +50,7 @@ def kmp(pattern: str, text: str) -> bool:
j = failure[j - 1]
continue
i += 1
return False
return -1


def get_failure_array(pattern: str) -> list[int]:
Expand Down Expand Up @@ -61,22 +78,22 @@ def get_failure_array(pattern: str) -> list[int]:
pattern = "abc1abc12"
text1 = "alskfjaldsabc1abc1abc12k23adsfabcabc"
text2 = "alskfjaldsk23adsfabcabc"
assert kmp(pattern, text1) and not kmp(pattern, text2)
print(knuth_morris_pratt(pattern, text1), knuth_morris_pratt(pattern, text2))

# Test 2)
pattern = "ABABX"
text = "ABABZABABYABABX"
assert kmp(pattern, text)
print(knuth_morris_pratt(pattern, text))

# Test 3)
pattern = "AAAB"
text = "ABAAAAAB"
assert kmp(pattern, text)
print(knuth_morris_pratt(pattern, text))

# Test 4)
pattern = "abcdabcy"
text = "abcxabcdabxabcdabcdabcy"
assert kmp(pattern, text)
print(knuth_morris_pratt(pattern, text))

# Test 5)
pattern = "aabaabaaa"
Expand Down