From f574ce1a5372a1277a72d14c3e99720a56dd73f1 Mon Sep 17 00:00:00 2001 From: anshul-2010 Date: Thu, 12 Oct 2023 23:51:45 +0530 Subject: [PATCH 01/10] Edit_Distance algorithm for genetic string matching --- genetic_algorithm/edit_distance.py | 39 ++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) create mode 100644 genetic_algorithm/edit_distance.py diff --git a/genetic_algorithm/edit_distance.py b/genetic_algorithm/edit_distance.py new file mode 100644 index 000000000000..9cda97801ab8 --- /dev/null +++ b/genetic_algorithm/edit_distance.py @@ -0,0 +1,39 @@ +def edit_distance(source, target): + """ + Edit distance algorithm is a string metric, i.e., it is a way of quantifying + how dissimilar two strings are to one another, that is measured by + counting the minimum number of operations required to transform one string + into another. + + In genetic algorithms consisting of A,T,G,C ncleotides, this matching + becomes essential in understanding the mutation in succesive genes. + + Hence, this algorithm comes in handy when we are trying to quantify the + mutations in successive generations. + + Args: + source (string): This is the source string, the initial string with + respect to which we are calculating the edit_distance for the target + target (string): This is the target string, which is formed after n + number of operations performed on the source string. + + Assumptions: + Cost of operations insertion, deletion and subtraction is all 1 + """ + delta = {True:0, False:1} # Substitution + + if(len(source) == 0): + return len(target) + elif(len(target) == 0): + return len(source) + + return min(edit_distance(source[:-1], target[:-1]) + delta[source[-1] == target[-1]], + edit_distance(source, target[:-1]) + 1, + edit_distance(source[:-1], target) + 1) + +print(edit_distance("ATCGCTG", "TAGCTAA")) +# Answer is 4 + + + + \ No newline at end of file From caeda9f1440e6d22e846c8882f047bc384d3cb54 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 12 Oct 2023 18:24:45 +0000 Subject: [PATCH 02/10] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- genetic_algorithm/edit_distance.py | 35 +++++++++++++++--------------- 1 file changed, 17 insertions(+), 18 deletions(-) diff --git a/genetic_algorithm/edit_distance.py b/genetic_algorithm/edit_distance.py index 9cda97801ab8..6a6e8c7449b9 100644 --- a/genetic_algorithm/edit_distance.py +++ b/genetic_algorithm/edit_distance.py @@ -2,38 +2,37 @@ def edit_distance(source, target): """ Edit distance algorithm is a string metric, i.e., it is a way of quantifying how dissimilar two strings are to one another, that is measured by - counting the minimum number of operations required to transform one string + counting the minimum number of operations required to transform one string into another. - + In genetic algorithms consisting of A,T,G,C ncleotides, this matching becomes essential in understanding the mutation in succesive genes. - + Hence, this algorithm comes in handy when we are trying to quantify the mutations in successive generations. Args: source (string): This is the source string, the initial string with respect to which we are calculating the edit_distance for the target - target (string): This is the target string, which is formed after n + target (string): This is the target string, which is formed after n number of operations performed on the source string. - - Assumptions: + + Assumptions: Cost of operations insertion, deletion and subtraction is all 1 """ - delta = {True:0, False:1} # Substitution - - if(len(source) == 0): + delta = {True: 0, False: 1} # Substitution + + if len(source) == 0: return len(target) - elif(len(target) == 0): + elif len(target) == 0: return len(source) - - return min(edit_distance(source[:-1], target[:-1]) + delta[source[-1] == target[-1]], - edit_distance(source, target[:-1]) + 1, - edit_distance(source[:-1], target) + 1) -print(edit_distance("ATCGCTG", "TAGCTAA")) -# Answer is 4 - + return min( + edit_distance(source[:-1], target[:-1]) + delta[source[-1] == target[-1]], + edit_distance(source, target[:-1]) + 1, + edit_distance(source[:-1], target) + 1, + ) - \ No newline at end of file +print(edit_distance("ATCGCTG", "TAGCTAA")) +# Answer is 4 From f3303b062bdb595d76550ef7dc0a449acc71b522 Mon Sep 17 00:00:00 2001 From: anshul-2010 <96651393+anshul-2010@users.noreply.github.com> Date: Fri, 13 Oct 2023 00:09:09 +0530 Subject: [PATCH 03/10] Update edit_distance.py --- genetic_algorithm/edit_distance.py | 38 +++++++++++++----------------- 1 file changed, 17 insertions(+), 21 deletions(-) diff --git a/genetic_algorithm/edit_distance.py b/genetic_algorithm/edit_distance.py index 6a6e8c7449b9..9ceb89d65cbf 100644 --- a/genetic_algorithm/edit_distance.py +++ b/genetic_algorithm/edit_distance.py @@ -1,25 +1,21 @@ def edit_distance(source, target): - """ - Edit distance algorithm is a string metric, i.e., it is a way of quantifying - how dissimilar two strings are to one another, that is measured by - counting the minimum number of operations required to transform one string - into another. - - In genetic algorithms consisting of A,T,G,C ncleotides, this matching - becomes essential in understanding the mutation in succesive genes. - - Hence, this algorithm comes in handy when we are trying to quantify the - mutations in successive generations. - - Args: - source (string): This is the source string, the initial string with - respect to which we are calculating the edit_distance for the target - target (string): This is the target string, which is formed after n - number of operations performed on the source string. - - Assumptions: - Cost of operations insertion, deletion and subtraction is all 1 - """ +""" +Edit distance algorithm is a string metric, i.e., it is a way of quantifying +how dissimilar two strings are to one another, that is measured by +counting the minimum number of operations required to transform one string +into another. +In genetic algorithms consisting of A,T, G, and C ncleotides, this matching +becomes essential in understanding the mutation in succesive genes. +Hence, this algorithm comes in handy when we are trying to quantify the +mutations in successive generations. +Args: +source (string): This is the source string, the initial string with +respect to which we are calculating the edit_distance for the target +target (string): This is the target string, which is formed after n +number of operations performed on the source string. +Assumptions: +The cost of operations (insertion, deletion and subtraction) is all 1 +""" delta = {True: 0, False: 1} # Substitution if len(source) == 0: From a6ef5bdd64a622b83c621c608bfe7b6bf750a9f1 Mon Sep 17 00:00:00 2001 From: anshul-2010 <96651393+anshul-2010@users.noreply.github.com> Date: Fri, 13 Oct 2023 00:11:08 +0530 Subject: [PATCH 04/10] Update edit_distance.py --- genetic_algorithm/edit_distance.py | 34 +++++++++++++++--------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/genetic_algorithm/edit_distance.py b/genetic_algorithm/edit_distance.py index 9ceb89d65cbf..87d48b172653 100644 --- a/genetic_algorithm/edit_distance.py +++ b/genetic_algorithm/edit_distance.py @@ -1,21 +1,21 @@ def edit_distance(source, target): -""" -Edit distance algorithm is a string metric, i.e., it is a way of quantifying -how dissimilar two strings are to one another, that is measured by -counting the minimum number of operations required to transform one string -into another. -In genetic algorithms consisting of A,T, G, and C ncleotides, this matching -becomes essential in understanding the mutation in succesive genes. -Hence, this algorithm comes in handy when we are trying to quantify the -mutations in successive generations. -Args: -source (string): This is the source string, the initial string with -respect to which we are calculating the edit_distance for the target -target (string): This is the target string, which is formed after n -number of operations performed on the source string. -Assumptions: -The cost of operations (insertion, deletion and subtraction) is all 1 -""" + """ + Edit distance algorithm is a string metric, i.e., it is a way of quantifying + how dissimilar two strings are to one another, that is measured by + counting the minimum number of operations required to transform one string + into another. + In genetic algorithms consisting of A,T, G, and C ncleotides, this matching + becomes essential in understanding the mutation in succesive genes. + Hence, this algorithm comes in handy when we are trying to quantify the + mutations in successive generations. + Args: + source (string): This is the source string, the initial string with + respect to which we are calculating the edit_distance for the target + target (string): This is the target string, which is formed after n + number of operations performed on the source string. + Assumptions: + The cost of operations (insertion, deletion and subtraction) is all 1 + """ delta = {True: 0, False: 1} # Substitution if len(source) == 0: From 00da496565e962cff14b6bf262d0eb84a56270ca Mon Sep 17 00:00:00 2001 From: anshul-2010 <96651393+anshul-2010@users.noreply.github.com> Date: Fri, 13 Oct 2023 00:12:19 +0530 Subject: [PATCH 05/10] Update edit_distance.py --- genetic_algorithm/edit_distance.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/genetic_algorithm/edit_distance.py b/genetic_algorithm/edit_distance.py index 87d48b172653..590388e982e4 100644 --- a/genetic_algorithm/edit_distance.py +++ b/genetic_algorithm/edit_distance.py @@ -9,9 +9,9 @@ def edit_distance(source, target): Hence, this algorithm comes in handy when we are trying to quantify the mutations in successive generations. Args: - source (string): This is the source string, the initial string with + source (type __string__): This is the source string, the initial string with respect to which we are calculating the edit_distance for the target - target (string): This is the target string, which is formed after n + target (type __string__): This is the target string, which is formed after n number of operations performed on the source string. Assumptions: The cost of operations (insertion, deletion and subtraction) is all 1 From 4d1fd42c1088b25e0898acc629e6888c9a82877c Mon Sep 17 00:00:00 2001 From: anshul-2010 <96651393+anshul-2010@users.noreply.github.com> Date: Fri, 13 Oct 2023 00:17:31 +0530 Subject: [PATCH 06/10] Update edit_distance.py --- genetic_algorithm/edit_distance.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/genetic_algorithm/edit_distance.py b/genetic_algorithm/edit_distance.py index 590388e982e4..9ff5981e0dd1 100644 --- a/genetic_algorithm/edit_distance.py +++ b/genetic_algorithm/edit_distance.py @@ -4,7 +4,7 @@ def edit_distance(source, target): how dissimilar two strings are to one another, that is measured by counting the minimum number of operations required to transform one string into another. - In genetic algorithms consisting of A,T, G, and C ncleotides, this matching + In genetic algorithms consisting of A,T, G, and C nucleotides, this matching becomes essential in understanding the mutation in succesive genes. Hence, this algorithm comes in handy when we are trying to quantify the mutations in successive generations. @@ -15,6 +15,14 @@ def edit_distance(source, target): number of operations performed on the source string. Assumptions: The cost of operations (insertion, deletion and subtraction) is all 1 + Given two integers, return the sum. + + :param source: str + :param target: str + :return: int + + >>> edit_distance("GATTIC, GALTIC) + 1 """ delta = {True: 0, False: 1} # Substitution From 1455e307bef6befabc8c81c61da17f04ed56e547 Mon Sep 17 00:00:00 2001 From: anshul-2010 <96651393+anshul-2010@users.noreply.github.com> Date: Fri, 13 Oct 2023 00:22:45 +0530 Subject: [PATCH 07/10] Update edit_distance.py --- genetic_algorithm/edit_distance.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/genetic_algorithm/edit_distance.py b/genetic_algorithm/edit_distance.py index 9ff5981e0dd1..fea0838bc154 100644 --- a/genetic_algorithm/edit_distance.py +++ b/genetic_algorithm/edit_distance.py @@ -1,4 +1,4 @@ -def edit_distance(source, target): +def edit_distance(source: str, target: str): -> int """ Edit distance algorithm is a string metric, i.e., it is a way of quantifying how dissimilar two strings are to one another, that is measured by From c97c94aac4a344fcc1bfb01efeedf52e23564cc1 Mon Sep 17 00:00:00 2001 From: anshul-2010 <96651393+anshul-2010@users.noreply.github.com> Date: Fri, 13 Oct 2023 00:24:29 +0530 Subject: [PATCH 08/10] Update edit_distance.py --- genetic_algorithm/edit_distance.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/genetic_algorithm/edit_distance.py b/genetic_algorithm/edit_distance.py index fea0838bc154..ec6fd62e01a4 100644 --- a/genetic_algorithm/edit_distance.py +++ b/genetic_algorithm/edit_distance.py @@ -1,4 +1,4 @@ -def edit_distance(source: str, target: str): -> int +def edit_distance(source: str, target: str) -> int: """ Edit distance algorithm is a string metric, i.e., it is a way of quantifying how dissimilar two strings are to one another, that is measured by From a5d5316fb7832efce0941e1250afe7f02dc54f16 Mon Sep 17 00:00:00 2001 From: anshul-2010 <96651393+anshul-2010@users.noreply.github.com> Date: Fri, 13 Oct 2023 00:25:48 +0530 Subject: [PATCH 09/10] Update edit_distance.py --- genetic_algorithm/edit_distance.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/genetic_algorithm/edit_distance.py b/genetic_algorithm/edit_distance.py index ec6fd62e01a4..228270d0c53e 100644 --- a/genetic_algorithm/edit_distance.py +++ b/genetic_algorithm/edit_distance.py @@ -5,7 +5,7 @@ def edit_distance(source: str, target: str) -> int: counting the minimum number of operations required to transform one string into another. In genetic algorithms consisting of A,T, G, and C nucleotides, this matching - becomes essential in understanding the mutation in succesive genes. + becomes essential in understanding the mutation in successive genes. Hence, this algorithm comes in handy when we are trying to quantify the mutations in successive generations. Args: From 4d64c3ee1c5109c7bf7adae4ad2882ee308297d6 Mon Sep 17 00:00:00 2001 From: anshul-2010 <96651393+anshul-2010@users.noreply.github.com> Date: Fri, 13 Oct 2023 00:33:09 +0530 Subject: [PATCH 10/10] Update edit_distance.py --- genetic_algorithm/edit_distance.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/genetic_algorithm/edit_distance.py b/genetic_algorithm/edit_distance.py index 228270d0c53e..2e7f6d1b7486 100644 --- a/genetic_algorithm/edit_distance.py +++ b/genetic_algorithm/edit_distance.py @@ -21,7 +21,7 @@ def edit_distance(source: str, target: str) -> int: :param target: str :return: int - >>> edit_distance("GATTIC, GALTIC) + >>> edit_distance("GATTIC", "GALTIC") 1 """ delta = {True: 0, False: 1} # Substitution