|
1 | 1 | """
|
2 | 2 | LCS Problem Statement: Given two sequences, find the length of longest subsequence present in both of them.
|
3 |
| -A subsequence is a sequence that appears in the same relative order, but not necessarily continious. |
| 3 | +A subsequence is a sequence that appears in the same relative order, but not necessarily continuous. |
4 | 4 | Example:"abc", "abg" are subsequences of "abcdefgh".
|
5 | 5 | """
|
6 | 6 | from __future__ import print_function
|
7 | 7 |
|
8 |
| -try: |
9 |
| - xrange # Python 2 |
10 |
| -except NameError: |
11 |
| - xrange = range # Python 3 |
12 | 8 |
|
13 |
| -def lcs_dp(x, y): |
| 9 | +def longest_common_subsequence(x: str, y: str): |
| 10 | + """ |
| 11 | + Finds the longest common subsequence between two strings. Also returns the |
| 12 | + The subsequence found |
| 13 | +
|
| 14 | + Parameters |
| 15 | + ---------- |
| 16 | +
|
| 17 | + x: str, one of the strings |
| 18 | + y: str, the other string |
| 19 | +
|
| 20 | + Returns |
| 21 | + ------- |
| 22 | + L[m][n]: int, the length of the longest subsequence. Also equal to len(seq) |
| 23 | + Seq: str, the subsequence found |
| 24 | +
|
| 25 | + >>> longest_common_subsequence("programming", "gaming") |
| 26 | + (6, 'gaming') |
| 27 | + >>> longest_common_subsequence("physics", "smartphone") |
| 28 | + (2, 'ph') |
| 29 | + >>> longest_common_subsequence("computer", "food") |
| 30 | + (1, 'o') |
| 31 | + """ |
14 | 32 | # find the length of strings
|
| 33 | + |
| 34 | + assert x is not None |
| 35 | + assert y is not None |
| 36 | + |
15 | 37 | m = len(x)
|
16 | 38 | n = len(y)
|
17 | 39 |
|
18 | 40 | # declaring the array for storing the dp values
|
19 |
| - L = [[None] * (n + 1) for i in xrange(m + 1)] |
20 |
| - seq = [] |
21 |
| - |
22 |
| - for i in range(m + 1): |
23 |
| - for j in range(n + 1): |
24 |
| - if i == 0 or j == 0: |
25 |
| - L[i][j] = 0 |
26 |
| - elif x[i - 1] == y[ j - 1]: |
27 |
| - L[i][j] = L[i - 1][j - 1] + 1 |
28 |
| - seq.append(x[i -1]) |
| 41 | + L = [[0] * (n + 1) for _ in range(m + 1)] |
| 42 | + |
| 43 | + for i in range(1, m + 1): |
| 44 | + for j in range(1, n + 1): |
| 45 | + if x[i-1] == y[j-1]: |
| 46 | + match = 1 |
29 | 47 | else:
|
30 |
| - L[i][j] = max(L[i - 1][j], L[i][j - 1]) |
31 |
| - # L[m][n] contains the length of LCS of X[0..n-1] & Y[0..m-1] |
| 48 | + match = 0 |
| 49 | + |
| 50 | + L[i][j] = max(L[i-1][j], L[i][j-1], L[i-1][j-1] + match) |
| 51 | + |
| 52 | + seq = "" |
| 53 | + i, j = m, n |
| 54 | + while i > 0 and i > 0: |
| 55 | + if x[i - 1] == y[j - 1]: |
| 56 | + match = 1 |
| 57 | + else: |
| 58 | + match = 0 |
| 59 | + |
| 60 | + if L[i][j] == L[i - 1][j - 1] + match: |
| 61 | + if match == 1: |
| 62 | + seq = x[i - 1] + seq |
| 63 | + i -= 1 |
| 64 | + j -= 1 |
| 65 | + elif L[i][j] == L[i - 1][j]: |
| 66 | + i -= 1 |
| 67 | + else: |
| 68 | + j -= 1 |
| 69 | + |
32 | 70 | return L[m][n], seq
|
33 | 71 |
|
34 |
| -if __name__=='__main__': |
35 |
| - x = 'AGGTAB' |
36 |
| - y = 'GXTXAYB' |
37 |
| - print(lcs_dp(x, y)) |
| 72 | + |
| 73 | +if __name__ == '__main__': |
| 74 | + a = 'AGGTAB' |
| 75 | + b = 'GXTXAYB' |
| 76 | + expected_ln = 4 |
| 77 | + expected_subseq = "GTAB" |
| 78 | + |
| 79 | + ln, subseq = longest_common_subsequence(a, b) |
| 80 | + assert expected_ln == ln |
| 81 | + assert expected_subseq == subseq |
| 82 | + print("len =", ln, ", sub-sequence =", subseq) |
| 83 | + |
0 commit comments