Skip to content

Commit dda3c9c

Browse files
sozelfistvil02
andauthored
Refactor Levenshtein distance implementation (#5138)
* ref: refactor Levenshtein distance implementation - Rewrite the original levenshtein distance implementation in functional style - Add optimized version of levenshtein distance * ref: make `LevenshteinDistance` class a proper utility * ref: remove duplicated test data * ref: update tests --- Co-authored-by: Piotr Idzik <[email protected]>
1 parent b3903f5 commit dda3c9c

File tree

2 files changed

+106
-42
lines changed

2 files changed

+106
-42
lines changed
Original file line numberDiff line numberDiff line change
@@ -1,49 +1,84 @@
11
package com.thealgorithms.dynamicprogramming;
22

3+
import java.util.stream.IntStream;
4+
35
/**
4-
* @author Kshitij VERMA (github.com/kv19971) LEVENSHTEIN DISTANCE dyamic
5-
* programming implementation to show the difference between two strings
6-
* (https://en.wikipedia.org/wiki/Levenshtein_distance)
6+
* Provides functions to calculate the Levenshtein distance between two strings.
7+
*
8+
* The Levenshtein distance is a measure of the similarity between two strings by calculating the minimum number of single-character
9+
* edits (insertions, deletions, or substitutions) required to change one string into the other.
710
*/
8-
public class LevenshteinDistance {
9-
10-
private static int minimum(int a, int b, int c) {
11-
if (a < b && a < c) {
12-
return a;
13-
} else if (b < a && b < c) {
14-
return b;
15-
} else {
16-
return c;
17-
}
11+
public final class LevenshteinDistance {
12+
private LevenshteinDistance() {
1813
}
1914

20-
public static int calculateLevenshteinDistance(String str1, String str2) {
21-
int len1 = str1.length() + 1;
22-
int len2 = str2.length() + 1;
23-
int[][] distanceMat = new int[len1][len2];
24-
for (int i = 0; i < len1; i++) {
25-
distanceMat[i][0] = i;
26-
}
27-
for (int j = 0; j < len2; j++) {
28-
distanceMat[0][j] = j;
15+
/**
16+
* Calculates the Levenshtein distance between two strings using a naive dynamic programming approach.
17+
*
18+
* This function computes the Levenshtein distance by constructing a dynamic programming matrix and iteratively filling it in.
19+
* It follows the standard top-to-bottom, left-to-right approach for filling in the matrix.
20+
*
21+
* @param string1 The first string.
22+
* @param string2 The second string.
23+
* @return The Levenshtein distance between the two input strings.
24+
*
25+
* Time complexity: O(nm),
26+
* Space complexity: O(nm),
27+
*
28+
* where n and m are lengths of `string1` and `string2`.
29+
*
30+
* Note that this implementation uses a straightforward dynamic programming approach without any space optimization.
31+
* It may consume more memory for larger input strings compared to the optimized version.
32+
*/
33+
public static int naiveLevenshteinDistance(final String string1, final String string2) {
34+
int[][] distanceMatrix = IntStream.rangeClosed(0, string1.length()).mapToObj(i -> IntStream.rangeClosed(0, string2.length()).map(j -> (i == 0) ? j : (j == 0) ? i : 0).toArray()).toArray(int[][] ::new);
35+
36+
IntStream.range(1, string1.length() + 1).forEach(i -> IntStream.range(1, string2.length() + 1).forEach(j -> {
37+
final int cost = (string1.charAt(i - 1) == string2.charAt(j - 1)) ? 0 : 1;
38+
distanceMatrix[i][j] = Math.min(distanceMatrix[i - 1][j - 1] + cost, Math.min(distanceMatrix[i][j - 1] + 1, distanceMatrix[i - 1][j] + 1));
39+
}));
40+
41+
return distanceMatrix[string1.length()][string2.length()];
42+
}
43+
44+
/**
45+
* Calculates the Levenshtein distance between two strings using an optimized dynamic programming approach.
46+
*
47+
* This edit distance is defined as 1 point per insertion, substitution, or deletion required to make the strings equal.
48+
*
49+
* @param string1 The first string.
50+
* @param string2 The second string.
51+
* @return The Levenshtein distance between the two input strings.
52+
*
53+
* Time complexity: O(nm),
54+
* Space complexity: O(n),
55+
*
56+
* where n and m are lengths of `string1` and `string2`.
57+
*
58+
* Note that this implementation utilizes an optimized dynamic programming approach, significantly reducing the space complexity from O(nm) to O(n), where n and m are the lengths of `string1` and `string2`.
59+
*
60+
* Additionally, it minimizes space usage by leveraging the shortest string horizontally and the longest string vertically in the computation matrix.
61+
*/
62+
public static int optimizedLevenshteinDistance(final String string1, final String string2) {
63+
if (string1.isEmpty()) {
64+
return string2.length();
2965
}
30-
for (int i = 1; i < len1; i++) {
31-
for (int j = 1; j < len2; j++) {
32-
if (str1.charAt(i - 1) == str2.charAt(j - 1)) {
33-
distanceMat[i][j] = distanceMat[i - 1][j - 1];
34-
} else {
35-
distanceMat[i][j] = 1 + minimum(distanceMat[i - 1][j], distanceMat[i - 1][j - 1], distanceMat[i][j - 1]);
36-
}
66+
67+
int[] previousDistance = IntStream.rangeClosed(0, string1.length()).toArray();
68+
69+
for (int j = 1; j <= string2.length(); j++) {
70+
int prevSubstitutionCost = previousDistance[0];
71+
previousDistance[0] = j;
72+
73+
for (int i = 1; i <= string1.length(); i++) {
74+
final int deletionCost = previousDistance[i] + 1;
75+
final int insertionCost = previousDistance[i - 1] + 1;
76+
final int substitutionCost = (string1.charAt(i - 1) == string2.charAt(j - 1)) ? prevSubstitutionCost : prevSubstitutionCost + 1;
77+
prevSubstitutionCost = previousDistance[i];
78+
previousDistance[i] = Math.min(deletionCost, Math.min(insertionCost, substitutionCost));
3779
}
3880
}
39-
return distanceMat[len1 - 1][len2 - 1];
40-
}
41-
42-
public static void main(String[] args) {
43-
String str1 = ""; // enter your string here
44-
String str2 = ""; // enter your string here
4581

46-
System.out.print("Levenshtein distance between " + str1 + " and " + str2 + " is: ");
47-
System.out.println(calculateLevenshteinDistance(str1, str2));
82+
return previousDistance[string1.length()];
4883
}
4984
}

src/test/java/com/thealgorithms/dynamicprogramming/LevenshteinDistanceTests.java

+34-5
Original file line numberDiff line numberDiff line change
@@ -2,15 +2,44 @@
22

33
import static org.junit.jupiter.api.Assertions.assertEquals;
44

5+
import java.util.Arrays;
6+
import java.util.List;
7+
import java.util.function.ToIntBiFunction;
8+
import java.util.stream.Stream;
59
import org.junit.jupiter.params.ParameterizedTest;
6-
import org.junit.jupiter.params.provider.CsvSource;
10+
import org.junit.jupiter.params.provider.Arguments;
11+
import org.junit.jupiter.params.provider.MethodSource;
712

813
public class LevenshteinDistanceTests {
914

1015
@ParameterizedTest
11-
@CsvSource({"dog,cat,3", "sunday,saturday,3", "cat,cats,1", "rain,train,1"})
12-
void levenshteinDistanceTest(String str1, String str2, int distance) {
13-
int result = LevenshteinDistance.calculateLevenshteinDistance(str1, str2);
14-
assertEquals(distance, result);
16+
@MethodSource("testCases")
17+
public void testLevenshteinDistance(final int expected, final String str1, final String str2, final ToIntBiFunction<String, String> dist) {
18+
assertEquals(expected, dist.applyAsInt(str1, str2));
19+
assertEquals(expected, dist.applyAsInt(str2, str1));
20+
assertEquals(0, dist.applyAsInt(str1, str1));
21+
assertEquals(0, dist.applyAsInt(str2, str2));
22+
}
23+
24+
private static Stream<Arguments> testCases() {
25+
final Object[][] testData = {
26+
{0, "", ""},
27+
{0, "Hello, World!", "Hello, World!"},
28+
{4, "", "Rust"},
29+
{3, "horse", "ros"},
30+
{6, "tan", "elephant"},
31+
{8, "execute", "intention"},
32+
{1, "a", "b"},
33+
{1, "a", "aa"},
34+
{1, "a", ""},
35+
{1, "a", "ab"},
36+
{1, "a", "ba"},
37+
{2, "a", "bc"},
38+
{2, "a", "cb"},
39+
};
40+
41+
final List<ToIntBiFunction<String, String>> methods = Arrays.asList(LevenshteinDistance::naiveLevenshteinDistance, LevenshteinDistance::optimizedLevenshteinDistance);
42+
43+
return Stream.of(testData).flatMap(input -> methods.stream().map(method -> Arguments.of(input[0], input[1], input[2], method)));
1544
}
1645
}

0 commit comments

Comments
 (0)