From 9efd29eaca734da227eb470cf5fcc95d296acc6a Mon Sep 17 00:00:00 2001 From: Pedram_Mohajer <48964282+pedram-mohajer@users.noreply.github.com> Date: Mon, 20 Nov 2023 22:36:58 -0500 Subject: [PATCH 1/2] Update edit_distance.py --- strings/edit_distance.py | 51 ++++++++++++++++++++++++---------------- 1 file changed, 31 insertions(+), 20 deletions(-) diff --git a/strings/edit_distance.py b/strings/edit_distance.py index e842c8555c8e..ff56e7cb0d4a 100644 --- a/strings/edit_distance.py +++ b/strings/edit_distance.py @@ -1,32 +1,43 @@ def edit_distance(source: str, target: str) -> int: """ - Edit distance algorithm is a string metric, i.e., it is a way of quantifying how - dissimilar two strings are to one another. It is measured by counting the minimum - number of operations required to transform one string into another. - - This implementation assumes that the cost of operations (insertion, deletion and - substitution) is always 1 + Calculate the edit distance between two strings using dynamic programming. + Edit distance is the minimum number of operations (insertions, deletions, or + substitutions) required to transform one string into another. Args: - source: the initial string with respect to which we are calculating the edit - distance for the target - target: the target string, formed after performing n operations on the source string + source (str): The original string. + target (str): The string to transform into. + + Returns: + int: The minimum number of operations required. + Examples: >>> edit_distance("GATTIC", "GALTIC") 1 + >>> edit_distance("ATCGCTG", "TAGCTAA") + 4 """ - if len(source) == 0: - return len(target) - elif len(target) == 0: - return len(source) - delta = int(source[-1] != target[-1]) # Substitution - return min( - edit_distance(source[:-1], target[:-1]) + delta, - edit_distance(source, target[:-1]) + 1, - edit_distance(source[:-1], target) + 1, - ) + dp = [[0 for _ in range(len(target) + 1)] for _ in range(len(source) + 1)] + + # Populate the matrix + for i in range(len(source) + 1): + for j in range(len(target) + 1): + if i == 0: + dp[i][j] = j + elif j == 0: + dp[i][j] = i + elif source[i - 1] == target[j - 1]: + dp[i][j] = dp[i - 1][j - 1] + else: + dp[i][j] = 1 + min( + dp[i - 1][j], dp[i][j - 1], dp[i - 1][j - 1] # Remove # Insert + ) # Replace + + return dp[len(source)][len(target)] if __name__ == "__main__": - print(edit_distance("ATCGCTG", "TAGCTAA")) # Answer is 4 + print(edit_distance("GATTIC", "GALTIC")) # Output: 1 + print(edit_distance("ATCGCTG", "TAGCTAA")) # Output: 4 + From eade82215b3252d0d226ec955983fc9afbdfbfcd Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 21 Nov 2023 03:44:10 +0000 Subject: [PATCH 2/2] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- strings/edit_distance.py | 1 - 1 file changed, 1 deletion(-) diff --git a/strings/edit_distance.py b/strings/edit_distance.py index ff56e7cb0d4a..c7a924c64549 100644 --- a/strings/edit_distance.py +++ b/strings/edit_distance.py @@ -40,4 +40,3 @@ def edit_distance(source: str, target: str) -> int: if __name__ == "__main__": print(edit_distance("GATTIC", "GALTIC")) # Output: 1 print(edit_distance("ATCGCTG", "TAGCTAA")) # Output: 4 -