From f6bf88b6ed9d70435bd6c29d06c5abfd28151536 Mon Sep 17 00:00:00 2001 From: Pedram_Mohajer <48964282+pedram-mohajer@users.noreply.github.com> Date: Mon, 20 Nov 2023 22:57:14 -0500 Subject: [PATCH 01/11] Update levenshtein_distance.py --- strings/levenshtein_distance.py | 61 +++++++++++++++++---------------- 1 file changed, 31 insertions(+), 30 deletions(-) diff --git a/strings/levenshtein_distance.py b/strings/levenshtein_distance.py index 7be4074dc39b..634ac9dbb6f6 100644 --- a/strings/levenshtein_distance.py +++ b/strings/levenshtein_distance.py @@ -1,42 +1,48 @@ """ -This is a Python implementation of the levenshtein distance. -Levenshtein distance is a string metric for measuring the -difference between two sequences. +This is an optimized Python implementation of the Levenshtein distance algorithm. +Levenshtein distance is a metric for measuring differences between sequences. -For doctests run following command: -python -m doctest -v levenshtein-distance.py +For doctests, run the following command: +python -m doctest -v levenshtein_distance.py or -python3 -m doctest -v levenshtein-distance.py +python3 -m doctest -v levenshtein_distance.py -For manual testing run: -python levenshtein-distance.py +For manual testing, run: +python levenshtein_distance.py """ -def levenshtein_distance(first_word: str, second_word: str) -> int: - """Implementation of the levenshtein distance in Python. - :param first_word: the first word to measure the difference. - :param second_word: the second word to measure the difference. - :return: the levenshtein distance between the two words. +def levenshtein_distance_optimized(first_word: str, second_word: str) -> int: + """ + Compute the Levenshtein distance between two words (strings). + + The function is optimized for efficiency by modifying rows in place. + + Parameters: + first_word (str): The first word to measure the difference. + second_word (str): The second word to measure the difference. + + Returns: + int: The Levenshtein distance between the two words. + Examples: - >>> levenshtein_distance("planet", "planetary") + >>> levenshtein_distance_optimized("planet", "planetary") 3 - >>> levenshtein_distance("", "test") + >>> levenshtein_distance_optimized("", "test") 4 - >>> levenshtein_distance("book", "back") + >>> levenshtein_distance_optimized("book", "back") 2 - >>> levenshtein_distance("book", "book") + >>> levenshtein_distance_optimized("book", "book") 0 - >>> levenshtein_distance("test", "") + >>> levenshtein_distance_optimized("test", "") 4 - >>> levenshtein_distance("", "") + >>> levenshtein_distance_optimized("", "") 0 - >>> levenshtein_distance("orchestration", "container") + >>> levenshtein_distance_optimized("orchestration", "container") 10 """ - # The longer word should come first if len(first_word) < len(second_word): - return levenshtein_distance(second_word, first_word) + return levenshtein_distance_optimized(second_word, first_word) if len(second_word) == 0: return len(first_word) @@ -44,21 +50,16 @@ def levenshtein_distance(first_word: str, second_word: str) -> int: previous_row = list(range(len(second_word) + 1)) for i, c1 in enumerate(first_word): - current_row = [i + 1] + current_row = [i + 1] + [0] * len(second_word) for j, c2 in enumerate(second_word): - # Calculate insertions, deletions and substitutions insertions = previous_row[j + 1] + 1 deletions = current_row[j] + 1 substitutions = previous_row[j] + (c1 != c2) + current_row[j + 1] = min(insertions, deletions, substitutions) - # Get the minimum to append to the current row - current_row.append(min(insertions, deletions, substitutions)) - - # Store the previous row previous_row = current_row - # Returns the last element (distance) return previous_row[-1] @@ -66,5 +67,5 @@ def levenshtein_distance(first_word: str, second_word: str) -> int: first_word = input("Enter the first word:\n").strip() second_word = input("Enter the second word:\n").strip() - result = levenshtein_distance(first_word, second_word) + result = levenshtein_distance_optimized(first_word, second_word) print(f"Levenshtein distance between {first_word} and {second_word} is {result}") From 0f5c583043119e47033f5fca94b09b10bde8939e Mon Sep 17 00:00:00 2001 From: Pedram_Mohajer <48964282+pedram-mohajer@users.noreply.github.com> Date: Sun, 26 Nov 2023 14:21:02 -0500 Subject: [PATCH 02/11] Update levenshtein_distance.py --- strings/levenshtein_distance.py | 108 +++++++++++++++++++++++++++----- 1 file changed, 92 insertions(+), 16 deletions(-) diff --git a/strings/levenshtein_distance.py b/strings/levenshtein_distance.py index 634ac9dbb6f6..1405d1c7a4a3 100644 --- a/strings/levenshtein_distance.py +++ b/strings/levenshtein_distance.py @@ -1,17 +1,65 @@ -""" -This is an optimized Python implementation of the Levenshtein distance algorithm. -Levenshtein distance is a metric for measuring differences between sequences. +import timeit -For doctests, run the following command: -python -m doctest -v levenshtein_distance.py -or -python3 -m doctest -v levenshtein_distance.py +#################################### +# Levenshtein Distance Function +#################################### +def levenshtein_distance(first_word: str, second_word: str) -> int: + """ + Implementation of the Levenshtein distance in Python. -For manual testing, run: -python levenshtein_distance.py -""" + Parameters: + - first_word (str): The first word to measure the difference. + - second_word (str): The second word to measure the difference. + Returns: + int: The Levenshtein distance between the two words. + Examples: + >>> levenshtein_distance("planet", "planetary") + 3 + >>> levenshtein_distance("", "test") + 4 + >>> levenshtein_distance("book", "back") + 2 + >>> levenshtein_distance("book", "book") + 0 + >>> levenshtein_distance("test", "") + 4 + >>> levenshtein_distance("", "") + 0 + >>> levenshtein_distance("orchestration", "container") + 10 + """ + # The longer word should come first + if len(first_word) < len(second_word): + return levenshtein_distance(second_word, first_word) + + if len(second_word) == 0: + return len(first_word) + + previous_row = list(range(len(second_word) + 1)) + + for i, c1 in enumerate(first_word): + current_row = [i + 1] + + for j, c2 in enumerate(second_word): + # Calculate insertions, deletions, and substitutions + insertions = previous_row[j + 1] + 1 + deletions = current_row[j] + 1 + substitutions = previous_row[j] + (c1 != c2) + + # Get the minimum to append to the current row + current_row.append(min(insertions, deletions, substitutions)) + + # Store the previous row + previous_row = current_row + + # Returns the last element (distance) + return previous_row[-1] + +#################################### +# Optimized Levenshtein Distance Function +#################################### def levenshtein_distance_optimized(first_word: str, second_word: str) -> int: """ Compute the Levenshtein distance between two words (strings). @@ -19,8 +67,8 @@ def levenshtein_distance_optimized(first_word: str, second_word: str) -> int: The function is optimized for efficiency by modifying rows in place. Parameters: - first_word (str): The first word to measure the difference. - second_word (str): The second word to measure the difference. + - first_word (str): The first word to measure the difference. + - second_word (str): The second word to measure the difference. Returns: int: The Levenshtein distance between the two words. @@ -62,10 +110,38 @@ def levenshtein_distance_optimized(first_word: str, second_word: str) -> int: return previous_row[-1] +#################################### +# Benchmarking Function +#################################### +def benchmark_levenshtein_distance(name: str, func) -> None: + """ + Benchmark the Levenshtein distance function. + Parameters: + - name (str): The name of the function being benchmarked. + - func: The function to be benchmarked. + """ + stmt = f"{func.__name__}('sitting', 'kitten')" + setup = f"from __main__ import {func.__name__}" + number = 1000 + result = timeit.timeit(stmt=stmt, setup=setup, number=number) + print(f"{name:<35} finished {number:,} runs in {result:.5f} seconds") + +#################################### +# Main Execution +#################################### if __name__ == "__main__": - first_word = input("Enter the first word:\n").strip() - second_word = input("Enter the second word:\n").strip() + # Get user input for words + levenshtein_first_word = input("Enter the first word for Levenshtein distance:\n").strip() + levenshtein_second_word = input("Enter the second word for Levenshtein distance:\n").strip() + + # Calculate and print Levenshtein distances + levenshtein_result = levenshtein_distance(levenshtein_first_word, levenshtein_second_word) + print(f"Levenshtein distance between {levenshtein_first_word} and {levenshtein_second_word} is {levenshtein_result}") + + levenshtein_optimized_result = levenshtein_distance_optimized(levenshtein_first_word, levenshtein_second_word) + print(f"Levenshtein distance (optimized) between {levenshtein_first_word} and {levenshtein_second_word} is {levenshtein_optimized_result}") - result = levenshtein_distance_optimized(first_word, second_word) - print(f"Levenshtein distance between {first_word} and {second_word} is {result}") + # Benchmark the Levenshtein distance functions + benchmark_levenshtein_distance("Levenshtein Distance", levenshtein_distance) + benchmark_levenshtein_distance("Optimized Levenshtein", levenshtein_distance_optimized) From 994f7a49e5aa0da022489ef5da0b71c7535955d5 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 26 Nov 2023 19:21:48 +0000 Subject: [PATCH 03/11] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- strings/levenshtein_distance.py | 34 +++++++++++++++++++++++++-------- 1 file changed, 26 insertions(+), 8 deletions(-) diff --git a/strings/levenshtein_distance.py b/strings/levenshtein_distance.py index 1405d1c7a4a3..ce83c89f53d4 100644 --- a/strings/levenshtein_distance.py +++ b/strings/levenshtein_distance.py @@ -1,5 +1,6 @@ import timeit + #################################### # Levenshtein Distance Function #################################### @@ -57,6 +58,7 @@ def levenshtein_distance(first_word: str, second_word: str) -> int: # Returns the last element (distance) return previous_row[-1] + #################################### # Optimized Levenshtein Distance Function #################################### @@ -110,6 +112,7 @@ def levenshtein_distance_optimized(first_word: str, second_word: str) -> int: return previous_row[-1] + #################################### # Benchmarking Function #################################### @@ -127,21 +130,36 @@ def benchmark_levenshtein_distance(name: str, func) -> None: result = timeit.timeit(stmt=stmt, setup=setup, number=number) print(f"{name:<35} finished {number:,} runs in {result:.5f} seconds") + #################################### # Main Execution #################################### if __name__ == "__main__": # Get user input for words - levenshtein_first_word = input("Enter the first word for Levenshtein distance:\n").strip() - levenshtein_second_word = input("Enter the second word for Levenshtein distance:\n").strip() + levenshtein_first_word = input( + "Enter the first word for Levenshtein distance:\n" + ).strip() + levenshtein_second_word = input( + "Enter the second word for Levenshtein distance:\n" + ).strip() # Calculate and print Levenshtein distances - levenshtein_result = levenshtein_distance(levenshtein_first_word, levenshtein_second_word) - print(f"Levenshtein distance between {levenshtein_first_word} and {levenshtein_second_word} is {levenshtein_result}") - - levenshtein_optimized_result = levenshtein_distance_optimized(levenshtein_first_word, levenshtein_second_word) - print(f"Levenshtein distance (optimized) between {levenshtein_first_word} and {levenshtein_second_word} is {levenshtein_optimized_result}") + levenshtein_result = levenshtein_distance( + levenshtein_first_word, levenshtein_second_word + ) + print( + f"Levenshtein distance between {levenshtein_first_word} and {levenshtein_second_word} is {levenshtein_result}" + ) + + levenshtein_optimized_result = levenshtein_distance_optimized( + levenshtein_first_word, levenshtein_second_word + ) + print( + f"Levenshtein distance (optimized) between {levenshtein_first_word} and {levenshtein_second_word} is {levenshtein_optimized_result}" + ) # Benchmark the Levenshtein distance functions benchmark_levenshtein_distance("Levenshtein Distance", levenshtein_distance) - benchmark_levenshtein_distance("Optimized Levenshtein", levenshtein_distance_optimized) + benchmark_levenshtein_distance( + "Optimized Levenshtein", levenshtein_distance_optimized + ) From 7ed8364728bb7eb0a30a4031d7f93e9eb2ecc038 Mon Sep 17 00:00:00 2001 From: Pedram_Mohajer <48964282+pedram-mohajer@users.noreply.github.com> Date: Sun, 26 Nov 2023 14:23:57 -0500 Subject: [PATCH 04/11] Update levenshtein_distance.py --- strings/levenshtein_distance.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/strings/levenshtein_distance.py b/strings/levenshtein_distance.py index ce83c89f53d4..4b0e2a8031b4 100644 --- a/strings/levenshtein_distance.py +++ b/strings/levenshtein_distance.py @@ -148,14 +148,16 @@ def benchmark_levenshtein_distance(name: str, func) -> None: levenshtein_first_word, levenshtein_second_word ) print( - f"Levenshtein distance between {levenshtein_first_word} and {levenshtein_second_word} is {levenshtein_result}" + f"Levenshtein distance between {levenshtein_first_word} and " + f"{levenshtein_second_word} is {levenshtein_result}" ) levenshtein_optimized_result = levenshtein_distance_optimized( levenshtein_first_word, levenshtein_second_word ) print( - f"Levenshtein distance (optimized) between {levenshtein_first_word} and {levenshtein_second_word} is {levenshtein_optimized_result}" + f"Levenshtein distance (optimized) between {levenshtein_first_word} and " + f"{levenshtein_second_word} is {levenshtein_optimized_result}" ) # Benchmark the Levenshtein distance functions From 1b98019716ab3c3689a8ad6c7dbfdb2cf410cacf Mon Sep 17 00:00:00 2001 From: Pedram_Mohajer <48964282+pedram-mohajer@users.noreply.github.com> Date: Sun, 26 Nov 2023 14:33:41 -0500 Subject: [PATCH 05/11] Update levenshtein_distance.py --- strings/levenshtein_distance.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/strings/levenshtein_distance.py b/strings/levenshtein_distance.py index 4b0e2a8031b4..a605fabd7a66 100644 --- a/strings/levenshtein_distance.py +++ b/strings/levenshtein_distance.py @@ -67,13 +67,10 @@ def levenshtein_distance_optimized(first_word: str, second_word: str) -> int: Compute the Levenshtein distance between two words (strings). The function is optimized for efficiency by modifying rows in place. - - Parameters: - - first_word (str): The first word to measure the difference. - - second_word (str): The second word to measure the difference. - - Returns: - int: The Levenshtein distance between the two words. + + :param first_word: the first word to measure the difference. + :param second_word: the second word to measure the difference. + :return: the Levenshtein distance between the two words. Examples: >>> levenshtein_distance_optimized("planet", "planetary") From b01dc7ed1b4127409639ab5c9fda356e14bf904b Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 26 Nov 2023 19:34:15 +0000 Subject: [PATCH 06/11] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- strings/levenshtein_distance.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/strings/levenshtein_distance.py b/strings/levenshtein_distance.py index a605fabd7a66..0df642df8965 100644 --- a/strings/levenshtein_distance.py +++ b/strings/levenshtein_distance.py @@ -67,7 +67,7 @@ def levenshtein_distance_optimized(first_word: str, second_word: str) -> int: Compute the Levenshtein distance between two words (strings). The function is optimized for efficiency by modifying rows in place. - + :param first_word: the first word to measure the difference. :param second_word: the second word to measure the difference. :return: the Levenshtein distance between the two words. From 4d0fb186dcf62f882d774b11a0cfc9ff5d6e5278 Mon Sep 17 00:00:00 2001 From: Pedram_Mohajer <48964282+pedram-mohajer@users.noreply.github.com> Date: Sun, 26 Nov 2023 14:48:22 -0500 Subject: [PATCH 07/11] Update levenshtein_distance.py --- strings/levenshtein_distance.py | 21 +++++---------------- 1 file changed, 5 insertions(+), 16 deletions(-) diff --git a/strings/levenshtein_distance.py b/strings/levenshtein_distance.py index 0df642df8965..0af434bbc4e9 100644 --- a/strings/levenshtein_distance.py +++ b/strings/levenshtein_distance.py @@ -7,14 +7,9 @@ def levenshtein_distance(first_word: str, second_word: str) -> int: """ Implementation of the Levenshtein distance in Python. - - Parameters: - - first_word (str): The first word to measure the difference. - - second_word (str): The second word to measure the difference. - - Returns: - int: The Levenshtein distance between the two words. - + :param first_word: the first word to measure the difference. + :param second_word: the second word to measure the difference. + :return: the levenshtein distance between the two words. Examples: >>> levenshtein_distance("planet", "planetary") 3 @@ -65,13 +60,10 @@ def levenshtein_distance(first_word: str, second_word: str) -> int: def levenshtein_distance_optimized(first_word: str, second_word: str) -> int: """ Compute the Levenshtein distance between two words (strings). - The function is optimized for efficiency by modifying rows in place. - :param first_word: the first word to measure the difference. :param second_word: the second word to measure the difference. :return: the Levenshtein distance between the two words. - Examples: >>> levenshtein_distance_optimized("planet", "planetary") 3 @@ -109,17 +101,14 @@ def levenshtein_distance_optimized(first_word: str, second_word: str) -> int: return previous_row[-1] - #################################### # Benchmarking Function #################################### def benchmark_levenshtein_distance(name: str, func) -> None: """ Benchmark the Levenshtein distance function. - - Parameters: - - name (str): The name of the function being benchmarked. - - func: The function to be benchmarked. + :param str: The name of the function being benchmarked. + :param func: The function to be benchmarked. """ stmt = f"{func.__name__}('sitting', 'kitten')" setup = f"from __main__ import {func.__name__}" From ab2e321cf27486823d2d26361a66740421c844d8 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 26 Nov 2023 19:48:55 +0000 Subject: [PATCH 08/11] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- strings/levenshtein_distance.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/strings/levenshtein_distance.py b/strings/levenshtein_distance.py index 0af434bbc4e9..b96d01775261 100644 --- a/strings/levenshtein_distance.py +++ b/strings/levenshtein_distance.py @@ -101,6 +101,7 @@ def levenshtein_distance_optimized(first_word: str, second_word: str) -> int: return previous_row[-1] + #################################### # Benchmarking Function #################################### @@ -108,7 +109,7 @@ def benchmark_levenshtein_distance(name: str, func) -> None: """ Benchmark the Levenshtein distance function. :param str: The name of the function being benchmarked. - :param func: The function to be benchmarked. + :param func: The function to be benchmarked. """ stmt = f"{func.__name__}('sitting', 'kitten')" setup = f"from __main__ import {func.__name__}" From 6952b48e22f15aa195552acb053bb3670e8c9165 Mon Sep 17 00:00:00 2001 From: Pedram_Mohajer <48964282+pedram-mohajer@users.noreply.github.com> Date: Sun, 26 Nov 2023 14:51:58 -0500 Subject: [PATCH 09/11] Update levenshtein_distance.py From 54bb20e84ad43643e1ac6671c2ecc3273302fd76 Mon Sep 17 00:00:00 2001 From: Christian Clauss Date: Sun, 26 Nov 2023 23:43:03 +0100 Subject: [PATCH 10/11] Update levenshtein_distance.py --- strings/levenshtein_distance.py | 51 +++++++++------------------------ 1 file changed, 13 insertions(+), 38 deletions(-) diff --git a/strings/levenshtein_distance.py b/strings/levenshtein_distance.py index b96d01775261..06e1d178c564 100644 --- a/strings/levenshtein_distance.py +++ b/strings/levenshtein_distance.py @@ -1,9 +1,6 @@ -import timeit +from collections.abc import Callable -#################################### -# Levenshtein Distance Function -#################################### def levenshtein_distance(first_word: str, second_word: str) -> int: """ Implementation of the Levenshtein distance in Python. @@ -54,9 +51,6 @@ def levenshtein_distance(first_word: str, second_word: str) -> int: return previous_row[-1] -#################################### -# Optimized Levenshtein Distance Function -#################################### def levenshtein_distance_optimized(first_word: str, second_word: str) -> int: """ Compute the Levenshtein distance between two words (strings). @@ -102,53 +96,34 @@ def levenshtein_distance_optimized(first_word: str, second_word: str) -> int: return previous_row[-1] -#################################### -# Benchmarking Function -#################################### -def benchmark_levenshtein_distance(name: str, func) -> None: +def benchmark_levenshtein_distance(func: Callable) -> None: """ Benchmark the Levenshtein distance function. :param str: The name of the function being benchmarked. :param func: The function to be benchmarked. """ + from timeit import timeit + stmt = f"{func.__name__}('sitting', 'kitten')" setup = f"from __main__ import {func.__name__}" - number = 1000 - result = timeit.timeit(stmt=stmt, setup=setup, number=number) - print(f"{name:<35} finished {number:,} runs in {result:.5f} seconds") + number = 25_000 + result = timeit(stmt=stmt, setup=setup, number=number) + print(f"{func.__name__:<30} finished {number:,} runs in {result:.5f} seconds") -#################################### -# Main Execution -#################################### if __name__ == "__main__": # Get user input for words - levenshtein_first_word = input( + first_word = input( "Enter the first word for Levenshtein distance:\n" ).strip() - levenshtein_second_word = input( + second_word = input( "Enter the second word for Levenshtein distance:\n" ).strip() # Calculate and print Levenshtein distances - levenshtein_result = levenshtein_distance( - levenshtein_first_word, levenshtein_second_word - ) - print( - f"Levenshtein distance between {levenshtein_first_word} and " - f"{levenshtein_second_word} is {levenshtein_result}" - ) - - levenshtein_optimized_result = levenshtein_distance_optimized( - levenshtein_first_word, levenshtein_second_word - ) - print( - f"Levenshtein distance (optimized) between {levenshtein_first_word} and " - f"{levenshtein_second_word} is {levenshtein_optimized_result}" - ) + print(f"{levenshtein_distance(first_word, second_word) = }") + print(f"{levenshtein_distance_optimized(first_word, second_word) = }") # Benchmark the Levenshtein distance functions - benchmark_levenshtein_distance("Levenshtein Distance", levenshtein_distance) - benchmark_levenshtein_distance( - "Optimized Levenshtein", levenshtein_distance_optimized - ) + benchmark_levenshtein_distance(levenshtein_distance) + benchmark_levenshtein_distance(levenshtein_distance_optimized) From 9a64f33538128de68f2c9d7b69c564b222f4de2b Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 26 Nov 2023 22:44:16 +0000 Subject: [PATCH 11/11] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- strings/levenshtein_distance.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/strings/levenshtein_distance.py b/strings/levenshtein_distance.py index 06e1d178c564..3af6608723a5 100644 --- a/strings/levenshtein_distance.py +++ b/strings/levenshtein_distance.py @@ -113,12 +113,8 @@ def benchmark_levenshtein_distance(func: Callable) -> None: if __name__ == "__main__": # Get user input for words - first_word = input( - "Enter the first word for Levenshtein distance:\n" - ).strip() - second_word = input( - "Enter the second word for Levenshtein distance:\n" - ).strip() + first_word = input("Enter the first word for Levenshtein distance:\n").strip() + second_word = input("Enter the second word for Levenshtein distance:\n").strip() # Calculate and print Levenshtein distances print(f"{levenshtein_distance(first_word, second_word) = }")