Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit cc4258d

Browse files
committedSep 11, 2024·
Jaccard Similarity | Machine Learning
1 parent 90c4446 commit cc4258d

File tree

1 file changed

+63
-0
lines changed

1 file changed

+63
-0
lines changed
 
Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
import logging
2+
3+
"""
4+
Jaccard Similarity Algorithm - Natural Language Processing (NLP) Algorithm
5+
6+
Use Case:
7+
- Useful in text analysis and natural language processing (NLP) tasks.
8+
- Can be used for document similarity, plagiarism detection,
9+
and information retrieval.
10+
11+
Dependencies:
12+
- Python Standard Library's logging module for error handling.
13+
"""
14+
class JaccardSimilarity:
15+
def __init__(self) -> None:
16+
"""
17+
Initialize the JaccardSimilarity class.
18+
"""
19+
20+
def jaccard_similarity(self, str1: str, str2: str) -> float:
21+
"""
22+
Calculate the Jaccard Similarity between two strings.
23+
24+
Parameters:
25+
- str1 (str): The first string for comparison.
26+
- str2 (str): The second string for comparison.
27+
28+
Returns:
29+
- float: The Jaccard similarity between the two strings as a percentage.
30+
31+
Examples:
32+
>>> js = JaccardSimilarity()
33+
>>> js.jaccard_similarity("hello world", "hello there")
34+
50.0
35+
>>> js.jaccard_similarity("apple orange banana", "banana orange apple")
36+
100.0
37+
>>> js.jaccard_similarity("apple", "banana")
38+
0.0
39+
"""
40+
if not str1 or not str2:
41+
raise ValueError("Input strings must not be empty.")
42+
43+
try:
44+
set1 = set(str1.split())
45+
set2 = set(str2.split())
46+
47+
intersection = set1.intersection(set2)
48+
union = set1.union(set2)
49+
50+
similarity = len(intersection) / len(union)
51+
52+
return similarity * 100
53+
except Exception as e:
54+
logging.error("An Error Occurred: ", exc_info=e)
55+
raise e
56+
57+
if __name__ == "__main__":
58+
"""
59+
Main function to Test the Jaccard Similarity between two Texts.
60+
"""
61+
import doctest
62+
63+
doctest.testmod() # Run the Doctests

0 commit comments

Comments
 (0)
Please sign in to comment.