From 46728784ce5bc7a51938dade9a949324d3ece87b Mon Sep 17 00:00:00 2001 From: Belhadj Ahmed Walid Date: Mon, 21 Aug 2023 00:09:04 +0100 Subject: [PATCH 01/10] added smith waterman algorithm --- dynamic_programming/smith_waterman.py | 98 +++++++++++++++++++++++++++ 1 file changed, 98 insertions(+) create mode 100644 dynamic_programming/smith_waterman.py diff --git a/dynamic_programming/smith_waterman.py b/dynamic_programming/smith_waterman.py new file mode 100644 index 000000000000..f689f5cf38d8 --- /dev/null +++ b/dynamic_programming/smith_waterman.py @@ -0,0 +1,98 @@ + +# https://en.wikipedia.org/wiki/Smith%E2%80%93Waterman_algorithm +# Score constants +""" +Score constants used in the Smith-Waterman algorithm. Matches are given a positive +score while mismatches are given a negative score. Gaps are also penalized. +""" +MATCH = 1 +MISMATCH = -1 +GAP = -2 + + +def score_function(a: str, b: str) -> int: + """ + Calculate the score for a character pair based on whether they match or mismatch. + Returns 1 if the characters match, -1 if they mismatch. + >>> score_function('A', 'A') + 1 + >>> score_function('A', 'C') + -1 + """ + if a == b: + return MATCH + else: + return MISMATCH + + +def smith_waterman(query: str, subject: str) -> list[list[int]]: + """ + Perform the Smith-Waterman local sequence alignment algorithm. + Returns a 2D list representing the score matrix. Each value in the matrix + corresponds to the score of the best local alignment ending at that point. + >>> smith_waterman('ACAC', 'CA') + [[0, 0, 0], [0, 0, 1], [0, 1, 0], [0, 0, 2], [0, 1, 0]] + """ + + # Initialize score matrix + m = len(query) + n = len(subject) + score = [[0] * (n + 1) for _ in range(m + 1)] + + for i in range(1, m + 1): + for j in range(1, n + 1): + # Calculate scores for each cell + match = score[i - 1][j - 1] + score_function(query[i - 1], subject[j - 1]) + delete = score[i - 1][j] + GAP + insert = score[i][j - 1] + GAP + + # Take maximum score + score[i][j] = max(0, match, delete, insert) + + return score + + +def traceback(score: list[list[int]], query: str, subject: str) -> str: + r""" + Perform traceback to find the optimal local alignment. + Starts from the highest scoring cell in the matrix and traces back recursively + until a 0 score is found. Returns the alignment strings. + >>> traceback([[0, 0, 0], [0, 0, 1], [0, 1, 0], [0, 0, 2], [0, 1, 0]], 'ACAC', 'CA') + 'CAC\nCA-' + """ + + # Traceback logic to find optimal alignment + i = len(query) + j = len(subject) + align1 = "" + align2 = "" + + while i > 0 and j > 0: + if score[i][j] == score[i - 1][j - 1] + score_function( + query[i - 1], subject[j - 1] + ): + # optimal path is a diagonal take both letters + align1 = query[i - 1] + align1 + align2 = subject[j - 1] + align2 + i -= 1 + j -= 1 + elif score[i][j] == score[i - 1][j] + GAP: + # optimal path is a vertical + align1 = query[i - 1] + align1 + align2 = "-" + align2 + i -= 1 + else: + # optimal path is a horizontal + align1 = "-" + align1 + align2 = subject[j - 1] + align2 + j -= 1 + + return f'{align1}\n{align2}' + + +if __name__ == "__main__": + query = "HEAGAWGHEE" + subject = "PAWHEAE" + + score = smith_waterman(query, subject) + print(traceback(score, query, subject)) From 44314e4e6d271d048234c19431ae3884b60dd3ca Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 20 Aug 2023 23:11:46 +0000 Subject: [PATCH 02/10] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- dynamic_programming/smith_waterman.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/dynamic_programming/smith_waterman.py b/dynamic_programming/smith_waterman.py index f689f5cf38d8..6af69b30aa29 100644 --- a/dynamic_programming/smith_waterman.py +++ b/dynamic_programming/smith_waterman.py @@ -1,4 +1,3 @@ - # https://en.wikipedia.org/wiki/Smith%E2%80%93Waterman_algorithm # Score constants """ @@ -87,7 +86,7 @@ def traceback(score: list[list[int]], query: str, subject: str) -> str: align2 = subject[j - 1] + align2 j -= 1 - return f'{align1}\n{align2}' + return f"{align1}\n{align2}" if __name__ == "__main__": From 65b95a62b565ffbb61fee5d7d1c2ac76ba3ea10d Mon Sep 17 00:00:00 2001 From: Belhadj Ahmed Walid Date: Tue, 22 Aug 2023 13:25:36 +0100 Subject: [PATCH 03/10] descriptive names for the parameters a and b --- dynamic_programming/smith_waterman.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/dynamic_programming/smith_waterman.py b/dynamic_programming/smith_waterman.py index f689f5cf38d8..6aaf462f4dd3 100644 --- a/dynamic_programming/smith_waterman.py +++ b/dynamic_programming/smith_waterman.py @@ -1,4 +1,3 @@ - # https://en.wikipedia.org/wiki/Smith%E2%80%93Waterman_algorithm # Score constants """ @@ -10,7 +9,7 @@ GAP = -2 -def score_function(a: str, b: str) -> int: +def score_function(source_char: str, target_char: str) -> int: """ Calculate the score for a character pair based on whether they match or mismatch. Returns 1 if the characters match, -1 if they mismatch. @@ -19,7 +18,7 @@ def score_function(a: str, b: str) -> int: >>> score_function('A', 'C') -1 """ - if a == b: + if source_char == target_char: return MATCH else: return MISMATCH @@ -87,7 +86,7 @@ def traceback(score: list[list[int]], query: str, subject: str) -> str: align2 = subject[j - 1] + align2 j -= 1 - return f'{align1}\n{align2}' + return f"{align1}\n{align2}" if __name__ == "__main__": From d8a6bcba6a175f5c9b271a0bdc7a6a394e125bbe Mon Sep 17 00:00:00 2001 From: Belhadj Ahmed Walid Date: Tue, 22 Aug 2023 18:08:55 +0100 Subject: [PATCH 04/10] doctesting lowercase upcase empty string cases --- dynamic_programming/smith_waterman.py | 45 ++++++++++++++++++++++----- 1 file changed, 38 insertions(+), 7 deletions(-) diff --git a/dynamic_programming/smith_waterman.py b/dynamic_programming/smith_waterman.py index 6aaf462f4dd3..eaf6da412bb1 100644 --- a/dynamic_programming/smith_waterman.py +++ b/dynamic_programming/smith_waterman.py @@ -12,16 +12,22 @@ def score_function(source_char: str, target_char: str) -> int: """ Calculate the score for a character pair based on whether they match or mismatch. - Returns 1 if the characters match, -1 if they mismatch. + Returns 1 if the characters match, -1 if they mismatch, and -2 if either of the + characters is a gap. >>> score_function('A', 'A') 1 >>> score_function('A', 'C') -1 + >>> score_function('-', 'A') + -2 + >>> score_function('A', '-') + -2 + >>> score_function('-', '-') + -2 """ - if source_char == target_char: - return MATCH - else: - return MISMATCH + if "-" in (source_char, target_char): + return GAP + return MATCH if source_char == target_char else MISMATCH def smith_waterman(query: str, subject: str) -> list[list[int]]: @@ -31,7 +37,20 @@ def smith_waterman(query: str, subject: str) -> list[list[int]]: corresponds to the score of the best local alignment ending at that point. >>> smith_waterman('ACAC', 'CA') [[0, 0, 0], [0, 0, 1], [0, 1, 0], [0, 0, 2], [0, 1, 0]] + >>> smith_waterman('acac', 'ca') + [[0, 0, 0], [0, 0, 1], [0, 1, 0], [0, 0, 2], [0, 1, 0]] + >>> smith_waterman('ACAC', 'ca') + [[0, 0, 0], [0, 0, 1], [0, 1, 0], [0, 0, 2], [0, 1, 0]] + >>> smith_waterman('acac', 'CA') + [[0, 0, 0], [0, 0, 1], [0, 1, 0], [0, 0, 2], [0, 1, 0]] + >>> smith_waterman('ACAC', '') + [[0], [0], [0], [0], [0]] + >>> smith_waterman('', 'CA') + [[0, 0, 0]] """ + # make both query and subject uppercase + query = query.upper() + subject = subject.upper() # Initialize score matrix m = len(query) @@ -58,14 +77,26 @@ def traceback(score: list[list[int]], query: str, subject: str) -> str: until a 0 score is found. Returns the alignment strings. >>> traceback([[0, 0, 0], [0, 0, 1], [0, 1, 0], [0, 0, 2], [0, 1, 0]], 'ACAC', 'CA') 'CAC\nCA-' + >>> traceback([[0, 0, 0], [0, 0, 1], [0, 1, 0], [0, 0, 2], [0, 1, 0]], 'acac', 'ca') + 'CAC\nCA-' + >>> traceback([[0, 0, 0], [0, 0, 1], [0, 1, 0], [0, 0, 2], [0, 1, 0]], 'ACAC', 'ca') + 'CAC\nCA-' + >>> traceback([[0, 0, 0], [0, 0, 1], [0, 1, 0], [0, 0, 2], [0, 1, 0]], 'acac', 'CA') + 'CAC\nCA-' + >>> traceback([[0, 0, 0]], 'ACAC', '') + '' """ - + # make both query and subject uppercase + query = query.upper() + subject = subject.upper() # Traceback logic to find optimal alignment i = len(query) j = len(subject) align1 = "" align2 = "" - + # guard against empty query or subject + if i == 0 or j == 0: + return "" while i > 0 and j > 0: if score[i][j] == score[i - 1][j - 1] + score_function( query[i - 1], subject[j - 1] From fc5880158d902df04387de626ba1aff8f7f54022 Mon Sep 17 00:00:00 2001 From: Belhadj Ahmed Walid Date: Sun, 24 Sep 2023 23:41:17 +0100 Subject: [PATCH 05/10] updated block quot,fixed traceback and doctests --- dynamic_programming/smith_waterman.py | 38 ++++++++++++++++++--------- 1 file changed, 26 insertions(+), 12 deletions(-) diff --git a/dynamic_programming/smith_waterman.py b/dynamic_programming/smith_waterman.py index eaf6da412bb1..f6471e249452 100644 --- a/dynamic_programming/smith_waterman.py +++ b/dynamic_programming/smith_waterman.py @@ -1,9 +1,15 @@ -# https://en.wikipedia.org/wiki/Smith%E2%80%93Waterman_algorithm -# Score constants """ -Score constants used in the Smith-Waterman algorithm. Matches are given a positive -score while mismatches are given a negative score. Gaps are also penalized. +https://en.wikipedia.org/wiki/Smith%E2%80%93Waterman_algorithm +The Smith-Waterman algorithm is a dynamic programming algorithm used for sequence +alignment.It is particularly useful for finding similarities between two sequences, +such as DNA or protein sequences. In this implementation, gaps are penalized +linearly,meaning that the scoreis reduced by a fixed amount for each gap introduced +in the alignment. It's important to notethat the Smith-Waterman algorithm supports +other gap penalty methods as well, but in thisspecific implementation, linear gap +penalties are used. """ +# Score constants: matches are given a positive score while mismatches are given a +# negative score. Gaps are also penalized linearly. MATCH = 1 MISMATCH = -1 GAP = -2 @@ -76,22 +82,30 @@ def traceback(score: list[list[int]], query: str, subject: str) -> str: Starts from the highest scoring cell in the matrix and traces back recursively until a 0 score is found. Returns the alignment strings. >>> traceback([[0, 0, 0], [0, 0, 1], [0, 1, 0], [0, 0, 2], [0, 1, 0]], 'ACAC', 'CA') - 'CAC\nCA-' + 'CA\nCA' >>> traceback([[0, 0, 0], [0, 0, 1], [0, 1, 0], [0, 0, 2], [0, 1, 0]], 'acac', 'ca') - 'CAC\nCA-' + 'CA\nCA' >>> traceback([[0, 0, 0], [0, 0, 1], [0, 1, 0], [0, 0, 2], [0, 1, 0]], 'ACAC', 'ca') - 'CAC\nCA-' + 'CA\nCA' >>> traceback([[0, 0, 0], [0, 0, 1], [0, 1, 0], [0, 0, 2], [0, 1, 0]], 'acac', 'CA') - 'CAC\nCA-' + 'CA\nCA' >>> traceback([[0, 0, 0]], 'ACAC', '') '' """ # make both query and subject uppercase query = query.upper() subject = subject.upper() + # find the indices of the maximum value in the score matrix + max_value = float("-inf") + i_max = j_max = 0 + for i, row in enumerate(score): + for j, value in enumerate(row): + if value > max_value: + max_value = value + i_max, j_max = i, j # Traceback logic to find optimal alignment - i = len(query) - j = len(subject) + i = i_max + j = j_max align1 = "" align2 = "" # guard against empty query or subject @@ -109,11 +123,11 @@ def traceback(score: list[list[int]], query: str, subject: str) -> str: elif score[i][j] == score[i - 1][j] + GAP: # optimal path is a vertical align1 = query[i - 1] + align1 - align2 = "-" + align2 + align2 = f"-{align2}" i -= 1 else: # optimal path is a horizontal - align1 = "-" + align1 + align1 = f"-{align1}" align2 = subject[j - 1] + align2 j -= 1 From 0662f69006890709efc6f9c8f4b5d60387153b23 Mon Sep 17 00:00:00 2001 From: Belhadj Ahmed Walid <80895522+BAW2501@users.noreply.github.com> Date: Mon, 25 Sep 2023 09:36:53 +0100 Subject: [PATCH 06/10] shorter block quote Co-authored-by: Tianyi Zheng --- dynamic_programming/smith_waterman.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/dynamic_programming/smith_waterman.py b/dynamic_programming/smith_waterman.py index f6471e249452..dccafa727507 100644 --- a/dynamic_programming/smith_waterman.py +++ b/dynamic_programming/smith_waterman.py @@ -1,12 +1,11 @@ """ https://en.wikipedia.org/wiki/Smith%E2%80%93Waterman_algorithm The Smith-Waterman algorithm is a dynamic programming algorithm used for sequence -alignment.It is particularly useful for finding similarities between two sequences, +alignment. It is particularly useful for finding similarities between two sequences, such as DNA or protein sequences. In this implementation, gaps are penalized -linearly,meaning that the scoreis reduced by a fixed amount for each gap introduced -in the alignment. It's important to notethat the Smith-Waterman algorithm supports -other gap penalty methods as well, but in thisspecific implementation, linear gap -penalties are used. +linearly, meaning that the score is reduced by a fixed amount for each gap introduced +in the alignment. However, it's important to note that the Smith-Waterman algorithm +supports other gap penalty methods as well. """ # Score constants: matches are given a positive score while mismatches are given a # negative score. Gaps are also penalized linearly. From 892858a84a552dac7a8862a5abea50280f9b3929 Mon Sep 17 00:00:00 2001 From: Belhadj Ahmed Walid Date: Mon, 25 Sep 2023 10:17:53 +0100 Subject: [PATCH 07/10] global vars to func params,more doctests --- dynamic_programming/smith_waterman.py | 87 ++++++++++++++++++++++----- 1 file changed, 71 insertions(+), 16 deletions(-) diff --git a/dynamic_programming/smith_waterman.py b/dynamic_programming/smith_waterman.py index f6471e249452..7a9bc3f7f990 100644 --- a/dynamic_programming/smith_waterman.py +++ b/dynamic_programming/smith_waterman.py @@ -1,21 +1,21 @@ """ https://en.wikipedia.org/wiki/Smith%E2%80%93Waterman_algorithm The Smith-Waterman algorithm is a dynamic programming algorithm used for sequence -alignment.It is particularly useful for finding similarities between two sequences, +alignment. It is particularly useful for finding similarities between two sequences, such as DNA or protein sequences. In this implementation, gaps are penalized -linearly,meaning that the scoreis reduced by a fixed amount for each gap introduced -in the alignment. It's important to notethat the Smith-Waterman algorithm supports -other gap penalty methods as well, but in thisspecific implementation, linear gap -penalties are used. +linearly, meaning that the score is reduced by a fixed amount for each gap introduced +in the alignment. However, it's important to note that the Smith-Waterman algorithm +supports other gap penalty methods as well. """ -# Score constants: matches are given a positive score while mismatches are given a -# negative score. Gaps are also penalized linearly. -MATCH = 1 -MISMATCH = -1 -GAP = -2 -def score_function(source_char: str, target_char: str) -> int: +def score_function( + source_char: str, + target_char: str, + match: int = 1, + mismatch: int = -1, + gap: int = -2, +) -> int: """ Calculate the score for a character pair based on whether they match or mismatch. Returns 1 if the characters match, -1 if they mismatch, and -2 if either of the @@ -32,8 +32,8 @@ def score_function(source_char: str, target_char: str) -> int: -2 """ if "-" in (source_char, target_char): - return GAP - return MATCH if source_char == target_char else MISMATCH + return gap + return match if source_char == target_char else mismatch def smith_waterman(query: str, subject: str) -> list[list[int]]: @@ -53,6 +53,59 @@ def smith_waterman(query: str, subject: str) -> list[list[int]]: [[0], [0], [0], [0], [0]] >>> smith_waterman('', 'CA') [[0, 0, 0]] + >>> smith_waterman('ACAC', 'CA') + [[0, 0, 0], [0, 0, 1], [0, 1, 0], [0, 0, 2], [0, 1, 0]] + + >>> smith_waterman('acac', 'ca') + [[0, 0, 0], [0, 0, 1], [0, 1, 0], [0, 0, 2], [0, 1, 0]] + + >>> smith_waterman('ACAC', 'ca') + [[0, 0, 0], [0, 0, 1], [0, 1, 0], [0, 0, 2], [0, 1, 0]] + + >>> smith_waterman('acac', 'CA') + [[0, 0, 0], [0, 0, 1], [0, 1, 0], [0, 0, 2], [0, 1, 0]] + + >>> smith_waterman('ACAC', '') + [[0], [0], [0], [0], [0]] + + >>> smith_waterman('', 'CA') + [[0, 0, 0]] + + >>> smith_waterman('GTTAC', 'GTTGC') + [[0, 0, 0, 0, 0, 0], [0, 1, 0, 0, 1, 0], [0, 0, 2, 1, 0, 0], [0, 0, 1, 3, 1, 0], + [0, 0, 0, 1, 2, 0], [0, 0, 0, 0, 0, 3]] + + >>> smith_waterman('ACAC', 'ACAC') + [[0, 0, 0, 0, 0], [0, 1, 0, 1, 0], [0, 0, 2, 0, 2], [0, 1, 0, 3, 1], + [0, 0, 2, 1, 4]] + + >>> smith_waterman('ACGT', 'TGCA') + [[0, 0, 0, 0, 0], [0, 0, 0, 0, 1], [0, 0, 0, 1, 0], [0, 0, 1, 0, 0], + [0, 1, 0, 0, 0]] + + >>> smith_waterman('AGT', 'AGT') + [[0, 0, 0, 0], [0, 1, 0, 0], [0, 0, 2, 0], [0, 0, 0, 3]] + + >>> smith_waterman('AGT', 'GTA') + [[0, 0, 0, 0], [0, 0, 0, 1], [0, 1, 0, 0], [0, 0, 2, 0]] + + >>> smith_waterman('AGT', 'GTC') + [[0, 0, 0, 0], [0, 0, 0, 0], [0, 1, 0, 0], [0, 0, 2, 0]] + + >>> smith_waterman('AGT', 'G') + [[0, 0], [0, 0], [0, 1], [0, 0]] + + >>> smith_waterman('G', 'AGT') + [[0, 0, 0, 0], [0, 0, 1, 0]] + + >>> smith_waterman('AGT', 'AGTCT') + [[0, 0, 0, 0, 0, 0], [0, 1, 0, 0, 0, 0], [0, 0, 2, 0, 0, 0], [0, 0, 0, 3, 1, 1]] + + >>> smith_waterman('AGTCT', 'AGT') + [[0, 0, 0, 0], [0, 1, 0, 0], [0, 0, 2, 0], [0, 0, 0, 3], [0, 0, 0, 1], [0, 0, 0, 1]] + + >>> smith_waterman('AGTCT', 'GTC') + [[0, 0, 0, 0], [0, 0, 0, 0], [0, 1, 0, 0], [0, 0, 2, 0], [0, 0, 0, 3], [0, 0, 1, 1]] """ # make both query and subject uppercase query = query.upper() @@ -62,13 +115,14 @@ def smith_waterman(query: str, subject: str) -> list[list[int]]: m = len(query) n = len(subject) score = [[0] * (n + 1) for _ in range(m + 1)] + gap = score_function("-", "-") for i in range(1, m + 1): for j in range(1, n + 1): # Calculate scores for each cell match = score[i - 1][j - 1] + score_function(query[i - 1], subject[j - 1]) - delete = score[i - 1][j] + GAP - insert = score[i][j - 1] + GAP + delete = score[i - 1][j] + gap + insert = score[i][j - 1] + gap # Take maximum score score[i][j] = max(0, match, delete, insert) @@ -108,6 +162,7 @@ def traceback(score: list[list[int]], query: str, subject: str) -> str: j = j_max align1 = "" align2 = "" + gap = score_function("-", "-") # guard against empty query or subject if i == 0 or j == 0: return "" @@ -120,7 +175,7 @@ def traceback(score: list[list[int]], query: str, subject: str) -> str: align2 = subject[j - 1] + align2 i -= 1 j -= 1 - elif score[i][j] == score[i - 1][j] + GAP: + elif score[i][j] == score[i - 1][j] + gap: # optimal path is a vertical align1 = query[i - 1] + align1 align2 = f"-{align2}" From 0e199f771638d26b7497c1c5600bccf7b3afe63b Mon Sep 17 00:00:00 2001 From: Belhadj Ahmed Walid Date: Mon, 25 Sep 2023 10:42:59 +0100 Subject: [PATCH 08/10] updated doctests --- dynamic_programming/smith_waterman.py | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/dynamic_programming/smith_waterman.py b/dynamic_programming/smith_waterman.py index 7a9bc3f7f990..fe363eca519c 100644 --- a/dynamic_programming/smith_waterman.py +++ b/dynamic_programming/smith_waterman.py @@ -71,18 +71,6 @@ def smith_waterman(query: str, subject: str) -> list[list[int]]: >>> smith_waterman('', 'CA') [[0, 0, 0]] - >>> smith_waterman('GTTAC', 'GTTGC') - [[0, 0, 0, 0, 0, 0], [0, 1, 0, 0, 1, 0], [0, 0, 2, 1, 0, 0], [0, 0, 1, 3, 1, 0], - [0, 0, 0, 1, 2, 0], [0, 0, 0, 0, 0, 3]] - - >>> smith_waterman('ACAC', 'ACAC') - [[0, 0, 0, 0, 0], [0, 1, 0, 1, 0], [0, 0, 2, 0, 2], [0, 1, 0, 3, 1], - [0, 0, 2, 1, 4]] - - >>> smith_waterman('ACGT', 'TGCA') - [[0, 0, 0, 0, 0], [0, 0, 0, 0, 1], [0, 0, 0, 1, 0], [0, 0, 1, 0, 0], - [0, 1, 0, 0, 0]] - >>> smith_waterman('AGT', 'AGT') [[0, 0, 0, 0], [0, 1, 0, 0], [0, 0, 2, 0], [0, 0, 0, 3]] From 2729a57145a328fdaecac51747b8df5a1efba8bc Mon Sep 17 00:00:00 2001 From: Belhadj Ahmed Walid Date: Wed, 27 Sep 2023 12:50:27 +0100 Subject: [PATCH 09/10] user access to SW params --- dynamic_programming/smith_waterman.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/dynamic_programming/smith_waterman.py b/dynamic_programming/smith_waterman.py index fe363eca519c..5ed39e163301 100644 --- a/dynamic_programming/smith_waterman.py +++ b/dynamic_programming/smith_waterman.py @@ -36,7 +36,13 @@ def score_function( return match if source_char == target_char else mismatch -def smith_waterman(query: str, subject: str) -> list[list[int]]: +def smith_waterman( + query: str, + subject: str, + match: int = 1, + mismatch: int = -1, + gap: int = -2, + ) -> list[list[int]]: """ Perform the Smith-Waterman local sequence alignment algorithm. Returns a 2D list representing the score matrix. Each value in the matrix @@ -103,12 +109,12 @@ def smith_waterman(query: str, subject: str) -> list[list[int]]: m = len(query) n = len(subject) score = [[0] * (n + 1) for _ in range(m + 1)] - gap = score_function("-", "-") + kwargs = {"match": match, "mismatch": mismatch, "gap": gap} for i in range(1, m + 1): for j in range(1, n + 1): # Calculate scores for each cell - match = score[i - 1][j - 1] + score_function(query[i - 1], subject[j - 1]) + match = score[i - 1][j - 1] + score_function(query[i - 1], subject[j - 1],**kwargs) delete = score[i - 1][j] + gap insert = score[i][j - 1] + gap @@ -181,5 +187,5 @@ def traceback(score: list[list[int]], query: str, subject: str) -> str: query = "HEAGAWGHEE" subject = "PAWHEAE" - score = smith_waterman(query, subject) + score = smith_waterman(query, subject, match=1, mismatch=-1, gap=-2) print(traceback(score, query, subject)) From 2a3e20a57c932a6bf036a082dd434267a5a99316 Mon Sep 17 00:00:00 2001 From: Belhadj Ahmed Walid Date: Wed, 27 Sep 2023 12:52:31 +0100 Subject: [PATCH 10/10] formating --- dynamic_programming/smith_waterman.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/dynamic_programming/smith_waterman.py b/dynamic_programming/smith_waterman.py index 5ed39e163301..4c5d58379f07 100644 --- a/dynamic_programming/smith_waterman.py +++ b/dynamic_programming/smith_waterman.py @@ -37,12 +37,12 @@ def score_function( def smith_waterman( - query: str, + query: str, subject: str, match: int = 1, mismatch: int = -1, - gap: int = -2, - ) -> list[list[int]]: + gap: int = -2, +) -> list[list[int]]: """ Perform the Smith-Waterman local sequence alignment algorithm. Returns a 2D list representing the score matrix. Each value in the matrix @@ -114,7 +114,9 @@ def smith_waterman( for i in range(1, m + 1): for j in range(1, n + 1): # Calculate scores for each cell - match = score[i - 1][j - 1] + score_function(query[i - 1], subject[j - 1],**kwargs) + match = score[i - 1][j - 1] + score_function( + query[i - 1], subject[j - 1], **kwargs + ) delete = score[i - 1][j] + gap insert = score[i][j - 1] + gap