2
2
import logging
3
3
import numpy as np
4
4
5
+
5
6
class Cosine_Similarity :
6
7
"""
7
8
Cosine Similarity Algorithm
@@ -15,13 +16,13 @@ class Cosine_Similarity:
15
16
- spacy: A Natural Language Processing library for Python, used here for Tokenization and Vectorization.
16
17
- numpy: A Library for Numerical Operations in Python, used for Mathematical Computations.
17
18
"""
18
-
19
+
19
20
def __init__ (self ) -> None :
20
21
"""
21
22
Initializes the Cosine Similarity class by loading the SpaCy model.
22
23
"""
23
- self .nlp = spacy .load (' en_core_web_md' )
24
-
24
+ self .nlp = spacy .load (" en_core_web_md" )
25
+
25
26
def Tokenize (self , text : str ) -> list :
26
27
"""
27
28
Tokenizes the input text into a list of lowercased tokens.
@@ -39,7 +40,7 @@ def Tokenize(self, text: str) -> list:
39
40
except Exception as e :
40
41
logging .error ("An error occurred during Tokenization: " , exc_info = e )
41
42
raise e
42
-
43
+
43
44
def Vectorize (self , tokens : list ) -> list :
44
45
"""
45
46
Converts tokens into their corresponding vector representations.
@@ -51,12 +52,16 @@ def Vectorize(self, tokens: list) -> list:
51
52
- list: A list of vectors corresponding to the tokens.
52
53
"""
53
54
try :
54
- vectors = [self .nlp (token ).vector for token in tokens if self .nlp (token ).vector .any ()]
55
+ vectors = [
56
+ self .nlp (token ).vector
57
+ for token in tokens
58
+ if self .nlp (token ).vector .any ()
59
+ ]
55
60
return vectors
56
61
except Exception as e :
57
62
logging .error ("An error occurred during Vectorization: " , exc_info = e )
58
63
raise e
59
-
64
+
60
65
def Mean_Vector (self , vectors : list ) -> np .ndarray :
61
66
"""
62
67
Computes the mean vector of a list of vectors.
@@ -72,9 +77,11 @@ def Mean_Vector(self, vectors: list) -> np.ndarray:
72
77
return np .zeros (self .nlp .vocab .vectors_length )
73
78
return np .mean (vectors , axis = 0 )
74
79
except Exception as e :
75
- logging .error ("An error occurred while computing the Mean Vector: " , exc_info = e )
80
+ logging .error (
81
+ "An error occurred while computing the Mean Vector: " , exc_info = e
82
+ )
76
83
raise e
77
-
84
+
78
85
def Dot_Product (self , vector1 : np .ndarray , vector2 : np .ndarray ) -> float :
79
86
"""
80
87
Computes the dot product between two vectors.
@@ -89,9 +96,11 @@ def Dot_Product(self, vector1: np.ndarray, vector2: np.ndarray) -> float:
89
96
try :
90
97
return np .dot (vector1 , vector2 )
91
98
except Exception as e :
92
- logging .error ("An error occurred during the dot Product Calculation: " , exc_info = e )
99
+ logging .error (
100
+ "An error occurred during the dot Product Calculation: " , exc_info = e
101
+ )
93
102
raise e
94
-
103
+
95
104
def Magnitude (self , vector : np .ndarray ) -> float :
96
105
"""
97
106
Computes the magnitude (norm) of a vector.
@@ -103,11 +112,13 @@ def Magnitude(self, vector: np.ndarray) -> float:
103
112
- float: The magnitude of the vector.
104
113
"""
105
114
try :
106
- return np .sqrt (np .sum (vector ** 2 ))
115
+ return np .sqrt (np .sum (vector ** 2 ))
107
116
except Exception as e :
108
- logging .error ("An error occurred while computing the Magnitude: " , exc_info = e )
117
+ logging .error (
118
+ "An error occurred while computing the Magnitude: " , exc_info = e
119
+ )
109
120
raise e
110
-
121
+
111
122
def Cosine_Similarity (self , vector1 : np .ndarray , vector2 : np .ndarray ) -> float :
112
123
"""
113
124
Computes the cosine similarity between two vectors.
@@ -126,9 +137,11 @@ def Cosine_Similarity(self, vector1: np.ndarray, vector2: np.ndarray) -> float:
126
137
return 0.0
127
138
return dot / (magnitude1 * magnitude2 )
128
139
except Exception as e :
129
- logging .error ("An error occurred during Cosine Similarity Calculation: " , exc_info = e )
140
+ logging .error (
141
+ "An error occurred during Cosine Similarity Calculation: " , exc_info = e
142
+ )
130
143
raise e
131
-
144
+
132
145
def Cosine_Similarity_Percentage (self , text1 : str , text2 : str ) -> float :
133
146
"""
134
147
Computes the cosine similarity percentage between two texts.
@@ -143,19 +156,23 @@ def Cosine_Similarity_Percentage(self, text1: str, text2: str) -> float:
143
156
try :
144
157
tokens1 = self .Tokenize (text1 )
145
158
tokens2 = self .Tokenize (text2 )
146
-
159
+
147
160
vectors1 = self .Vectorize (tokens1 )
148
161
vectors2 = self .Vectorize (tokens2 )
149
-
162
+
150
163
mean_vec1 = self .Mean_Vector (vectors1 )
151
164
mean_vec2 = self .Mean_Vector (vectors2 )
152
-
165
+
153
166
similarity = self .Cosine_Similarity (mean_vec1 , mean_vec2 )
154
167
return similarity * 100
155
168
except Exception as e :
156
- logging .error ("An error occurred while computing the Cosine Similarity Percentage: " , exc_info = e )
169
+ logging .error (
170
+ "An error occurred while computing the Cosine Similarity Percentage: " ,
171
+ exc_info = e ,
172
+ )
157
173
raise e
158
-
174
+
175
+
159
176
if __name__ == "__main__" :
160
177
"""
161
178
Main function to Test the Cosine Similarity between two Texts.
@@ -164,4 +181,4 @@ def Cosine_Similarity_Percentage(self, text1: str, text2: str) -> float:
164
181
text2 = "The name of the talllest Tower in the world is Burj Khalifa"
165
182
166
183
similarity_percentage = Cosine_Similarity ().Cosine_Similarity_Percentage (text1 , text2 )
167
- print (f"Cosine Similarity: { similarity_percentage :.2f} %" )
184
+ print (f"Cosine Similarity: { similarity_percentage :.2f} %" )
0 commit comments