|
| 1 | +import logging |
| 2 | + |
| 3 | +""" |
| 4 | +Jaccard Similarity Algorithm - Natural Language Processing (NLP) Algorithm |
| 5 | +
|
| 6 | +Use Case: |
| 7 | + - Useful in text analysis and natural language processing (NLP) tasks. |
| 8 | + - Can be used for document similarity, plagiarism detection, |
| 9 | +and information retrieval. |
| 10 | +
|
| 11 | +Dependencies: |
| 12 | + - Python Standard Library's logging module for error handling. |
| 13 | +""" |
| 14 | +class JaccardSimilarity: |
| 15 | + def __init__(self) -> None: |
| 16 | + """ |
| 17 | + Initialize the JaccardSimilarity class. |
| 18 | + """ |
| 19 | + |
| 20 | + def jaccard_similarity(self, str1: str, str2: str) -> float: |
| 21 | + """ |
| 22 | + Calculate the Jaccard Similarity between two strings. |
| 23 | +
|
| 24 | + Parameters: |
| 25 | + - str1 (str): The first string for comparison. |
| 26 | + - str2 (str): The second string for comparison. |
| 27 | +
|
| 28 | + Returns: |
| 29 | + - float: The Jaccard similarity between the two strings as a percentage. |
| 30 | +
|
| 31 | + Examples: |
| 32 | + >>> js = JaccardSimilarity() |
| 33 | + >>> js.jaccard_similarity("hello world", "hello there") |
| 34 | + 50.0 |
| 35 | + >>> js.jaccard_similarity("apple orange banana", "banana orange apple") |
| 36 | + 100.0 |
| 37 | + >>> js.jaccard_similarity("apple", "banana") |
| 38 | + 0.0 |
| 39 | + """ |
| 40 | + if not str1 or not str2: |
| 41 | + raise ValueError("Input strings must not be empty.") |
| 42 | + |
| 43 | + try: |
| 44 | + set1 = set(str1.split()) |
| 45 | + set2 = set(str2.split()) |
| 46 | + |
| 47 | + intersection = set1.intersection(set2) |
| 48 | + union = set1.union(set2) |
| 49 | + |
| 50 | + similarity = len(intersection) / len(union) |
| 51 | + |
| 52 | + return similarity * 100 |
| 53 | + except Exception as e: |
| 54 | + logging.error("An Error Occurred: ", exc_info=e) |
| 55 | + raise e |
| 56 | + |
| 57 | +if __name__ == "__main__": |
| 58 | + """ |
| 59 | + Main function to Test the Jaccard Similarity between two Texts. |
| 60 | + """ |
| 61 | + import doctest |
| 62 | + |
| 63 | + doctest.testmod() # Run the Doctests |
0 commit comments