From 67767f1c4094da6380847f1f0d385ed8ffcf3663 Mon Sep 17 00:00:00 2001 From: anshul-2010 Date: Mon, 16 Oct 2023 08:42:43 +0530 Subject: [PATCH 1/6] Edit Distance Algorithm for String Matching --- strings/edit_distance.py | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) create mode 100644 strings/edit_distance.py diff --git a/strings/edit_distance.py b/strings/edit_distance.py new file mode 100644 index 000000000000..d69139f86c05 --- /dev/null +++ b/strings/edit_distance.py @@ -0,0 +1,40 @@ +def edit_distance(source: str, target: str) -> int: + """ + Edit distance algorithm is a string metric, i.e., it is a way of quantifying + how dissimilar two strings are to one another, that is measured by + counting the minimum number of operations required to transform one string + into another. + In genetic algorithms consisting of A,T, G, and C nucleotides, this matching + becomes essential in understanding the mutation in successive genes. + Hence, this algorithm comes in handy when we are trying to quantify the + mutations in successive generations. + Args: + source (type __string__): This is the source string, the initial string with + respect to which we are calculating the edit_distance for the target + target (type __string__): This is the target string, which is formed after n + number of operations performed on the source string. + Assumptions: + The cost of operations (insertion, deletion and subtraction) is all 1 + Given two integers, return the sum. + :param source: str + :param target: str + :return: int + >>> edit_distance("GATTIC", "GALTIC") + 1 + """ + delta = {True: 0, False: 1} # Substitution + + if len(source) == 0: + return len(target) + elif len(target) == 0: + return len(source) + + return min( + edit_distance(source[:-1], target[:-1]) + delta[source[-1] == target[-1]], + edit_distance(source, target[:-1]) + 1, + edit_distance(source[:-1], target) + 1, + ) + + +print(edit_distance("ATCGCTG", "TAGCTAA")) +# Answer is 4 \ No newline at end of file From 2a3f3852f22f5a2222cecf62ab37fe75219fd182 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 16 Oct 2023 03:20:55 +0000 Subject: [PATCH 2/6] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- strings/edit_distance.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/strings/edit_distance.py b/strings/edit_distance.py index d69139f86c05..b219cb53d149 100644 --- a/strings/edit_distance.py +++ b/strings/edit_distance.py @@ -23,7 +23,7 @@ def edit_distance(source: str, target: str) -> int: 1 """ delta = {True: 0, False: 1} # Substitution - + if len(source) == 0: return len(target) elif len(target) == 0: @@ -37,4 +37,4 @@ def edit_distance(source: str, target: str) -> int: print(edit_distance("ATCGCTG", "TAGCTAA")) -# Answer is 4 \ No newline at end of file +# Answer is 4 From 71a18c82275e618773ac67255d579473189e935c Mon Sep 17 00:00:00 2001 From: Tianyi Zheng Date: Thu, 19 Oct 2023 08:31:30 -0400 Subject: [PATCH 3/6] Apply suggestions from code review --- strings/edit_distance.py | 32 ++++++++++++-------------------- 1 file changed, 12 insertions(+), 20 deletions(-) diff --git a/strings/edit_distance.py b/strings/edit_distance.py index b219cb53d149..769e5c2c99dd 100644 --- a/strings/edit_distance.py +++ b/strings/edit_distance.py @@ -1,40 +1,32 @@ def edit_distance(source: str, target: str) -> int: """ Edit distance algorithm is a string metric, i.e., it is a way of quantifying - how dissimilar two strings are to one another, that is measured by + how dissimilar two strings are to one another. It is measured by counting the minimum number of operations required to transform one string into another. - In genetic algorithms consisting of A,T, G, and C nucleotides, this matching - becomes essential in understanding the mutation in successive genes. - Hence, this algorithm comes in handy when we are trying to quantify the - mutations in successive generations. + + This implementation assumes that the cost of operations (insertion, deletion and + substitution) is always 1 + Args: - source (type __string__): This is the source string, the initial string with - respect to which we are calculating the edit_distance for the target - target (type __string__): This is the target string, which is formed after n - number of operations performed on the source string. - Assumptions: - The cost of operations (insertion, deletion and subtraction) is all 1 - Given two integers, return the sum. - :param source: str - :param target: str - :return: int + source: the initial string with respect to which we are calculating the edit distance + for the target + target: the target string, formed after performing n operations on the source string >>> edit_distance("GATTIC", "GALTIC") 1 """ - delta = {True: 0, False: 1} # Substitution - if len(source) == 0: return len(target) elif len(target) == 0: return len(source) + delta = int(source[-1] != target[-1]) # Substitution return min( - edit_distance(source[:-1], target[:-1]) + delta[source[-1] == target[-1]], + edit_distance(source[:-1], target[:-1]) + delta, edit_distance(source, target[:-1]) + 1, edit_distance(source[:-1], target) + 1, ) -print(edit_distance("ATCGCTG", "TAGCTAA")) -# Answer is 4 +if __name__ == "__main__": + print(edit_distance("ATCGCTG", "TAGCTAA")) # Answer is 4 From f91cc1c3ec2f4a2a9a092f0caef14eeb5848dae5 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 19 Oct 2023 12:32:00 +0000 Subject: [PATCH 4/6] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- strings/edit_distance.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/strings/edit_distance.py b/strings/edit_distance.py index 769e5c2c99dd..03a90b1447eb 100644 --- a/strings/edit_distance.py +++ b/strings/edit_distance.py @@ -4,10 +4,10 @@ def edit_distance(source: str, target: str) -> int: how dissimilar two strings are to one another. It is measured by counting the minimum number of operations required to transform one string into another. - + This implementation assumes that the cost of operations (insertion, deletion and substitution) is always 1 - + Args: source: the initial string with respect to which we are calculating the edit distance for the target From 45e67f1f7c4bd7c39e486c0d60f9fb0b3a6fd3f3 Mon Sep 17 00:00:00 2001 From: Tianyi Zheng Date: Thu, 19 Oct 2023 08:33:39 -0400 Subject: [PATCH 5/6] Update edit_distance.py --- strings/edit_distance.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/strings/edit_distance.py b/strings/edit_distance.py index 03a90b1447eb..f7ae832b17c9 100644 --- a/strings/edit_distance.py +++ b/strings/edit_distance.py @@ -1,17 +1,17 @@ def edit_distance(source: str, target: str) -> int: """ - Edit distance algorithm is a string metric, i.e., it is a way of quantifying - how dissimilar two strings are to one another. It is measured by - counting the minimum number of operations required to transform one string - into another. + Edit distance algorithm is a string metric, i.e., it is a way of quantifying how + dissimilar two strings are to one another. It is measured by counting the minimum + number of operations required to transform one string into another. This implementation assumes that the cost of operations (insertion, deletion and substitution) is always 1 Args: - source: the initial string with respect to which we are calculating the edit distance - for the target + source: the initial string with respect to which we are calculating the edit + distance for the target target: the target string, formed after performing n operations on the source string + >>> edit_distance("GATTIC", "GALTIC") 1 """ From f49453c1f3bd0b24104bad3d4d650937ba22753a Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 19 Oct 2023 12:34:14 +0000 Subject: [PATCH 6/6] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- strings/edit_distance.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/strings/edit_distance.py b/strings/edit_distance.py index f7ae832b17c9..e842c8555c8e 100644 --- a/strings/edit_distance.py +++ b/strings/edit_distance.py @@ -11,7 +11,7 @@ def edit_distance(source: str, target: str) -> int: source: the initial string with respect to which we are calculating the edit distance for the target target: the target string, formed after performing n operations on the source string - + >>> edit_distance("GATTIC", "GALTIC") 1 """