Skip to content

Commit 8c2f2ca

Browse files
authored
chore: merge "Dice" (#703)
* feat: add dice coefficient * chore: link to wikipedia article * chore: convert to esm * refactor: add tests * chore: formatting
1 parent ab65e2a commit 8c2f2ca

File tree

2 files changed

+72
-0
lines changed

2 files changed

+72
-0
lines changed

String/DiceCoefficient.js

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
/* The Sørensen–Dice coefficient is a statistic used to gauge the similarity of two samples.
2+
* Applied to strings, it can give you a value between 0 and 1 (included) which tells you how similar they are.
3+
* Dice coefficient is calculated by comparing the bigrams of both stings,
4+
* a bigram is a substring of the string of length 2.
5+
* read more: https://en.wikipedia.org/wiki/S%C3%B8rensen%E2%80%93Dice_coefficient
6+
*/
7+
8+
// Time complexity: O(m + n), m and n being the sizes of string A and string B
9+
10+
// Find the bistrings of a string and return a hashmap (key => bistring, value => count)
11+
function mapBigrams (string) {
12+
const bigrams = new Map()
13+
for (let i = 0; i < string.length - 1; i++) {
14+
const bigram = string.substring(i, i + 2)
15+
const count = bigrams.get(bigram)
16+
bigrams.set(bigram, (count || 0) + 1)
17+
}
18+
return bigrams
19+
}
20+
21+
// Calculate the number of common bigrams between a map of bigrams and a string
22+
23+
function countCommonBigrams (bigrams, string) {
24+
let count = 0
25+
for (let i = 0; i < string.length - 1; i++) {
26+
const bigram = string.substring(i, i + 2)
27+
if (bigrams.has(bigram)) count++
28+
}
29+
return count
30+
}
31+
32+
// Calculate Dice coeff of 2 strings
33+
function diceCoefficient (stringA, stringB) {
34+
if (stringA === stringB) return 1
35+
else if (stringA.length < 2 || stringB.length < 2) return 0
36+
37+
const bigramsA = mapBigrams(stringA)
38+
39+
const lengthA = stringA.length - 1
40+
const lengthB = stringB.length - 1
41+
42+
let dice = (2 * countCommonBigrams(bigramsA, stringB)) / (lengthA + lengthB)
43+
44+
// cut 0.xxxxxx to 0.xx for simplicity
45+
dice = Math.floor(dice * 100) / 100
46+
47+
console.log('Dice coefficient of', stringA, 'and', stringB, 'is', dice)
48+
49+
return dice
50+
}
51+
export { diceCoefficient }

String/test/DiceCoefficient.test.js

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
import { diceCoefficient } from '../DiceCoefficient'
2+
3+
describe('diceCoefficient', () => {
4+
it('should calculate edit distance between two strings', () => {
5+
// equal strings return 1 (max possible value)
6+
expect(diceCoefficient('abc', 'abc')).toBe(1)
7+
expect(diceCoefficient('', '')).toBe(1)
8+
9+
// string length needs to be atleast 2 (unless equal)
10+
expect(diceCoefficient('a', '')).toBe(0)
11+
expect(diceCoefficient('', 'a')).toBe(0)
12+
13+
expect(diceCoefficient('skate', 'ate')).toBe(0.66)
14+
15+
expect(diceCoefficient('money', 'honey')).toBe(0.75)
16+
17+
expect(diceCoefficient('love', 'hate')).toBe(0)
18+
19+
expect(diceCoefficient('skilled', 'killed')).toBe(0.9)
20+
})
21+
})

0 commit comments

Comments
 (0)