Skip to content

Commit 622a3bf

Browse files
authored
refactor: cleanup AhoCorasick (#5358)
1 parent 8a89b42 commit 622a3bf

File tree

2 files changed

+22
-24
lines changed

2 files changed

+22
-24
lines changed

src/main/java/com/thealgorithms/strings/AhoCorasick.java

+16-19
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
import java.util.ArrayList;
1515
import java.util.HashMap;
1616
import java.util.LinkedList;
17+
import java.util.List;
1718
import java.util.Map;
1819
import java.util.Queue;
1920

@@ -24,7 +25,7 @@ private AhoCorasick() {
2425
// Trie Node Class
2526
private static class Node {
2627
// Represents a character in the trie
27-
private HashMap<Character, Node> child = new HashMap<>(); // Child nodes of the current node
28+
private final Map<Character, Node> child = new HashMap<>(); // Child nodes of the current node
2829
private Node suffixLink; // Suffix link to another node in the trie
2930
private Node outputLink; // Output link to another node in the trie
3031
private int patternInd; // Index of the pattern that ends at this node
@@ -35,7 +36,7 @@ private static class Node {
3536
this.patternInd = -1;
3637
}
3738

38-
public HashMap<Character, Node> getChild() {
39+
public Map<Character, Node> getChild() {
3940
return child;
4041
}
4142

@@ -148,16 +149,16 @@ private void buildSuffixAndOutputLinks() {
148149
}
149150
}
150151

151-
private ArrayList<ArrayList<Integer>> initializePositionByStringIndexValue() {
152-
ArrayList<ArrayList<Integer>> positionByStringIndexValue = new ArrayList<>(patterns.length); // Stores positions where patterns are found in the text
152+
private List<List<Integer>> initializePositionByStringIndexValue() {
153+
List<List<Integer>> positionByStringIndexValue = new ArrayList<>(patterns.length); // Stores positions where patterns are found in the text
153154
for (int i = 0; i < patterns.length; i++) {
154-
positionByStringIndexValue.add(new ArrayList<Integer>());
155+
positionByStringIndexValue.add(new ArrayList<>());
155156
}
156157
return positionByStringIndexValue;
157158
}
158159

159160
// Searches for patterns in the input text and records their positions
160-
public ArrayList<ArrayList<Integer>> searchIn(final String text) {
161+
public List<List<Integer>> searchIn(final String text) {
161162
var positionByStringIndexValue = initializePositionByStringIndexValue(); // Initialize a list to store positions of the current pattern
162163
Node parent = root; // Start searching from the root node
163164

@@ -187,7 +188,7 @@ public ArrayList<ArrayList<Integer>> searchIn(final String text) {
187188

188189
// by default positionByStringIndexValue contains end-points. This function converts those
189190
// endpoints to start points
190-
private void setUpStartPoints(ArrayList<ArrayList<Integer>> positionByStringIndexValue) {
191+
private void setUpStartPoints(List<List<Integer>> positionByStringIndexValue) {
191192
for (int i = 0; i < patterns.length; i++) {
192193
for (int j = 0; j < positionByStringIndexValue.get(i).size(); j++) {
193194
int endpoint = positionByStringIndexValue.get(i).get(j);
@@ -198,20 +199,15 @@ private void setUpStartPoints(ArrayList<ArrayList<Integer>> positionByStringInde
198199
}
199200

200201
// Class to handle pattern position recording
201-
private static class PatternPositionRecorder {
202-
private ArrayList<ArrayList<Integer>> positionByStringIndexValue;
203-
202+
private record PatternPositionRecorder(List<List<Integer>> positionByStringIndexValue) {
204203
// Constructor to initialize the recorder with the position list
205-
PatternPositionRecorder(final ArrayList<ArrayList<Integer>> positionByStringIndexValue) {
206-
this.positionByStringIndexValue = positionByStringIndexValue;
207-
}
208204

209205
/**
210206
* Records positions for a pattern when it's found in the input text and follows
211207
* output links to record positions of other patterns.
212208
*
213-
* @param parent The current node representing a character in the pattern trie.
214-
* @param currentPosition The current position in the input text.
209+
* @param parent The current node representing a character in the pattern trie.
210+
* @param currentPosition The current position in the input text.
215211
*/
216212
public void recordPatternPositions(final Node parent, final int currentPosition) {
217213
// Check if the current node represents the end of a pattern
@@ -229,19 +225,20 @@ public void recordPatternPositions(final Node parent, final int currentPosition)
229225
}
230226
}
231227
}
228+
232229
// method to search for patterns in text
233-
public static Map<String, ArrayList<Integer>> search(final String text, final String[] patterns) {
230+
public static Map<String, List<Integer>> search(final String text, final String[] patterns) {
234231
final var trie = new Trie(patterns);
235232
final var positionByStringIndexValue = trie.searchIn(text);
236233
return convert(positionByStringIndexValue, patterns);
237234
}
238235

239236
// method for converting results to a map
240-
private static Map<String, ArrayList<Integer>> convert(final ArrayList<ArrayList<Integer>> positionByStringIndexValue, final String[] patterns) {
241-
Map<String, ArrayList<Integer>> positionByString = new HashMap<>();
237+
private static Map<String, List<Integer>> convert(final List<List<Integer>> positionByStringIndexValue, final String[] patterns) {
238+
Map<String, List<Integer>> positionByString = new HashMap<>();
242239
for (int i = 0; i < patterns.length; i++) {
243240
String pattern = patterns[i];
244-
ArrayList<Integer> positions = positionByStringIndexValue.get(i);
241+
List<Integer> positions = positionByStringIndexValue.get(i);
245242
positionByString.put(pattern, new ArrayList<>(positions));
246243
}
247244
return positionByString;

src/test/java/com/thealgorithms/strings/AhoCorasickTest.java

+6-5
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212

1313
import java.util.ArrayList;
1414
import java.util.Arrays;
15+
import java.util.List;
1516
import java.util.Map;
1617
import org.junit.jupiter.api.BeforeEach;
1718
import org.junit.jupiter.api.Test;
@@ -42,7 +43,7 @@ void setUp() {
4243
@Test
4344
void testSearch() {
4445
// Define the expected results for each pattern
45-
final var expected = Map.of("ACC", new ArrayList<>(Arrays.asList()), "ATC", new ArrayList<>(Arrays.asList(2)), "CAT", new ArrayList<>(Arrays.asList(1)), "GCG", new ArrayList<>(Arrays.asList()), "C", new ArrayList<>(Arrays.asList(1, 4)), "T", new ArrayList<>(Arrays.asList(3)));
46+
final var expected = Map.of("ACC", new ArrayList<>(List.of()), "ATC", new ArrayList<>(List.of(2)), "CAT", new ArrayList<>(List.of(1)), "GCG", new ArrayList<>(List.of()), "C", new ArrayList<>(List.of(1, 4)), "T", new ArrayList<>(List.of(3)));
4647
assertEquals(expected, AhoCorasick.search(text, patterns));
4748
}
4849

@@ -77,7 +78,7 @@ void testPatternNotFound() {
7778
void testPatternAtBeginning() {
7879
// Define patterns that start at the beginning of the text
7980
final var searchPatterns = new String[] {"GC", "GCA", "GCAT"};
80-
final var expected = Map.of("GC", new ArrayList<Integer>(Arrays.asList(0)), "GCA", new ArrayList<Integer>(Arrays.asList(0)), "GCAT", new ArrayList<Integer>(Arrays.asList(0)));
81+
final var expected = Map.of("GC", new ArrayList<>(List.of(0)), "GCA", new ArrayList<>(List.of(0)), "GCAT", new ArrayList<>(List.of(0)));
8182
assertEquals(expected, AhoCorasick.search(text, searchPatterns));
8283
}
8384

@@ -89,7 +90,7 @@ void testPatternAtBeginning() {
8990
void testPatternAtEnd() {
9091
// Define patterns that end at the end of the text
9192
final var searchPatterns = new String[] {"CG", "TCG", "ATCG"};
92-
final var expected = Map.of("CG", new ArrayList<Integer>(Arrays.asList(4)), "TCG", new ArrayList<Integer>(Arrays.asList(3)), "ATCG", new ArrayList<Integer>(Arrays.asList(2)));
93+
final var expected = Map.of("CG", new ArrayList<>(List.of(4)), "TCG", new ArrayList<>(List.of(3)), "ATCG", new ArrayList<>(List.of(2)));
9394
assertEquals(expected, AhoCorasick.search(text, searchPatterns));
9495
}
9596

@@ -102,7 +103,7 @@ void testPatternAtEnd() {
102103
void testMultipleOccurrencesOfPattern() {
103104
// Define patterns with multiple occurrences in the text
104105
final var searchPatterns = new String[] {"AT", "T"};
105-
final var expected = Map.of("AT", new ArrayList<Integer>(Arrays.asList(2)), "T", new ArrayList<Integer>(Arrays.asList(3)));
106+
final var expected = Map.of("AT", new ArrayList<>(List.of(2)), "T", new ArrayList<>(List.of(3)));
106107
assertEquals(expected, AhoCorasick.search(text, searchPatterns));
107108
}
108109

@@ -114,7 +115,7 @@ void testMultipleOccurrencesOfPattern() {
114115
void testCaseInsensitiveSearch() {
115116
// Define patterns with different cases
116117
final var searchPatterns = new String[] {"gca", "aTc", "C"};
117-
final var expected = Map.of("gca", new ArrayList<Integer>(), "aTc", new ArrayList<Integer>(), "C", new ArrayList<Integer>(Arrays.asList(1, 4)));
118+
final var expected = Map.of("gca", new ArrayList<Integer>(), "aTc", new ArrayList<Integer>(), "C", new ArrayList<>(Arrays.asList(1, 4)));
118119
assertEquals(expected, AhoCorasick.search(text, searchPatterns));
119120
}
120121
}

0 commit comments

Comments
 (0)