Skip to content

Commit 90d20b3

Browse files
prayas7102prayas7102alxkm
authored
Add smoothing constant to IDF formula in BM25 to prevent negative scores (#5696)
Co-authored-by: prayas7102 <[email protected]> Co-authored-by: Alex Klymenko <[email protected]>
1 parent d4fff30 commit 90d20b3

File tree

2 files changed

+8
-6
lines changed

2 files changed

+8
-6
lines changed

src/main/java/com/thealgorithms/searches/BM25InvertedIndex.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -215,6 +215,6 @@ private double computeBM25Score(int termFrequency, double docLength, double idf)
215215
*/
216216
private double computeIDF(int docFrequency) {
217217
// Total number of documents in the index
218-
return Math.log((totalDocuments - docFrequency + 0.5) / (docFrequency + 0.5));
218+
return Math.log((totalDocuments - docFrequency + 0.5) / (docFrequency + 0.5) + 1);
219219
}
220220
}

src/test/java/com/thealgorithms/searches/BM25InvertedIndexTest.java

+7-5
Original file line numberDiff line numberDiff line change
@@ -50,13 +50,15 @@ void testSearchRanking() {
5050
// Perform search for the term "good"
5151
List<SearchResult> results = index.search("good");
5252
assertFalse(results.isEmpty());
53-
53+
for (SearchResult result : results) {
54+
System.out.println(result);
55+
}
5456
// Validate the ranking based on the provided relevance scores
55-
assertEquals(6, results.get(0).getDocId()); // It's a Wonderful Life should be ranked 1st
56-
assertEquals(7, results.get(1).getDocId()); // The Pursuit of Happyness should be ranked 2nd
57+
assertEquals(1, results.get(0).getDocId()); // The Shawshank Redemption should be ranked 1st
58+
assertEquals(8, results.get(1).getDocId()); // A Few Good Men should be ranked 2nd
5759
assertEquals(5, results.get(2).getDocId()); // Good Will Hunting should be ranked 3rd
58-
assertEquals(8, results.get(3).getDocId()); // A Few Good Men should be ranked 4th
59-
assertEquals(1, results.get(4).getDocId()); // The Shawshank Redemption should be ranked 5th
60+
assertEquals(7, results.get(3).getDocId()); // The Pursuit of Happyness should be ranked 4th
61+
assertEquals(6, results.get(4).getDocId()); // It's a Wonderful Life should be ranked 5th
6062

6163
// Ensure the relevance scores are in descending order
6264
for (int i = 0; i < results.size() - 1; i++) {

0 commit comments

Comments
 (0)