3
3
import numpy as np
4
4
5
5
6
- class Cosine_Similarity :
6
+ class cosine_similarity :
7
7
"""
8
8
Cosine Similarity Algorithm
9
9
@@ -23,7 +23,7 @@ def __init__(self) -> None:
23
23
"""
24
24
self .nlp = spacy .load ("en_core_web_md" )
25
25
26
- def Tokenize (self , text : str ) -> list :
26
+ def tokenize (self , text : str ) -> list :
27
27
"""
28
28
Tokenizes the input text into a list of lowercased tokens.
29
29
@@ -41,7 +41,7 @@ def Tokenize(self, text: str) -> list:
41
41
logging .error ("An error occurred during Tokenization: " , exc_info = e )
42
42
raise e
43
43
44
- def Vectorize (self , tokens : list ) -> list :
44
+ def vectorize (self , tokens : list ) -> list :
45
45
"""
46
46
Converts tokens into their corresponding vector representations.
47
47
@@ -62,7 +62,7 @@ def Vectorize(self, tokens: list) -> list:
62
62
logging .error ("An error occurred during Vectorization: " , exc_info = e )
63
63
raise e
64
64
65
- def Mean_Vector (self , vectors : list ) -> np .ndarray :
65
+ def mean_vector (self , vectors : list ) -> np .ndarray :
66
66
"""
67
67
Computes the mean vector of a list of vectors.
68
68
@@ -82,7 +82,7 @@ def Mean_Vector(self, vectors: list) -> np.ndarray:
82
82
)
83
83
raise e
84
84
85
- def Dot_Product (self , vector1 : np .ndarray , vector2 : np .ndarray ) -> float :
85
+ def dot_product (self , vector1 : np .ndarray , vector2 : np .ndarray ) -> float :
86
86
"""
87
87
Computes the dot product between two vectors.
88
88
@@ -101,7 +101,7 @@ def Dot_Product(self, vector1: np.ndarray, vector2: np.ndarray) -> float:
101
101
)
102
102
raise e
103
103
104
- def Magnitude (self , vector : np .ndarray ) -> float :
104
+ def magnitude (self , vector : np .ndarray ) -> float :
105
105
"""
106
106
Computes the magnitude (norm) of a vector.
107
107
@@ -119,7 +119,7 @@ def Magnitude(self, vector: np.ndarray) -> float:
119
119
)
120
120
raise e
121
121
122
- def Cosine_Similarity (self , vector1 : np .ndarray , vector2 : np .ndarray ) -> float :
122
+ def cosine_similarity (self , vector1 : np .ndarray , vector2 : np .ndarray ) -> float :
123
123
"""
124
124
Computes the cosine similarity between two vectors.
125
125
@@ -131,8 +131,8 @@ def Cosine_Similarity(self, vector1: np.ndarray, vector2: np.ndarray) -> float:
131
131
- float: The cosine similarity between the two vectors.
132
132
"""
133
133
try :
134
- dot = self .Dot_Product (vector1 , vector2 )
135
- magnitude1 , magnitude2 = self .Magnitude (vector1 ), self .Magnitude (vector2 )
134
+ dot = self .dot_product (vector1 , vector2 )
135
+ magnitude1 , magnitude2 = self .magnitude (vector1 ), self .magnitude (vector2 )
136
136
if magnitude1 == 0 or magnitude2 == 0 :
137
137
return 0.0
138
138
return dot / (magnitude1 * magnitude2 )
@@ -142,7 +142,7 @@ def Cosine_Similarity(self, vector1: np.ndarray, vector2: np.ndarray) -> float:
142
142
)
143
143
raise e
144
144
145
- def Cosine_Similarity_Percentage (self , text1 : str , text2 : str ) -> float :
145
+ def cosine_similarity_percentage (self , text1 : str , text2 : str ) -> float :
146
146
"""
147
147
Computes the cosine similarity percentage between two texts.
148
148
@@ -154,16 +154,16 @@ def Cosine_Similarity_Percentage(self, text1: str, text2: str) -> float:
154
154
- float: The cosine similarity percentage between the two texts.
155
155
"""
156
156
try :
157
- tokens1 = self .Tokenize (text1 )
158
- tokens2 = self .Tokenize (text2 )
157
+ tokens1 = self .tokenize (text1 )
158
+ tokens2 = self .tokenize (text2 )
159
159
160
- vectors1 = self .Vectorize (tokens1 )
161
- vectors2 = self .Vectorize (tokens2 )
160
+ vectors1 = self .vectorize (tokens1 )
161
+ vectors2 = self .vectorize (tokens2 )
162
162
163
- mean_vec1 = self .Mean_Vector (vectors1 )
164
- mean_vec2 = self .Mean_Vector (vectors2 )
163
+ mean_vec1 = self .mean_vector (vectors1 )
164
+ mean_vec2 = self .mean_vector (vectors2 )
165
165
166
- similarity = self .Cosine_Similarity (mean_vec1 , mean_vec2 )
166
+ similarity = self .cosine_similarity (mean_vec1 , mean_vec2 )
167
167
return similarity * 100
168
168
except Exception as e :
169
169
logging .error (
@@ -179,8 +179,6 @@ def Cosine_Similarity_Percentage(self, text1: str, text2: str) -> float:
179
179
"""
180
180
text1 = "The biggest Infrastructure in the World is Burj Khalifa"
181
181
text2 = "The name of the talllest Tower in the world is Burj Khalifa"
182
-
183
- similarity_percentage = Cosine_Similarity ().Cosine_Similarity_Percentage (
184
- text1 , text2
185
- )
182
+
183
+ similarity_percentage = cosine_similarity ().cosine_similarity_percentage (text1 , text2 )
186
184
print (f"Cosine Similarity: { similarity_percentage :.2f} %" )
0 commit comments