Skip to content

Commit 1458803

Browse files
committed
Input Fixes
2 parents e8ec6df + 3a62339 commit 1458803

File tree

1 file changed

+38
-21
lines changed

1 file changed

+38
-21
lines changed

machine_learning/cosine_similarity.py

+38-21
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
import logging
33
import numpy as np
44

5+
56
class Cosine_Similarity:
67
"""
78
Cosine Similarity Algorithm
@@ -15,13 +16,13 @@ class Cosine_Similarity:
1516
- spacy: A Natural Language Processing library for Python, used here for Tokenization and Vectorization.
1617
- numpy: A Library for Numerical Operations in Python, used for Mathematical Computations.
1718
"""
18-
19+
1920
def __init__(self) -> None:
2021
"""
2122
Initializes the Cosine Similarity class by loading the SpaCy model.
2223
"""
23-
self.nlp = spacy.load('en_core_web_md')
24-
24+
self.nlp = spacy.load("en_core_web_md")
25+
2526
def Tokenize(self, text: str) -> list:
2627
"""
2728
Tokenizes the input text into a list of lowercased tokens.
@@ -39,7 +40,7 @@ def Tokenize(self, text: str) -> list:
3940
except Exception as e:
4041
logging.error("An error occurred during Tokenization: ", exc_info=e)
4142
raise e
42-
43+
4344
def Vectorize(self, tokens: list) -> list:
4445
"""
4546
Converts tokens into their corresponding vector representations.
@@ -51,12 +52,16 @@ def Vectorize(self, tokens: list) -> list:
5152
- list: A list of vectors corresponding to the tokens.
5253
"""
5354
try:
54-
vectors = [self.nlp(token).vector for token in tokens if self.nlp(token).vector.any()]
55+
vectors = [
56+
self.nlp(token).vector
57+
for token in tokens
58+
if self.nlp(token).vector.any()
59+
]
5560
return vectors
5661
except Exception as e:
5762
logging.error("An error occurred during Vectorization: ", exc_info=e)
5863
raise e
59-
64+
6065
def Mean_Vector(self, vectors: list) -> np.ndarray:
6166
"""
6267
Computes the mean vector of a list of vectors.
@@ -72,9 +77,11 @@ def Mean_Vector(self, vectors: list) -> np.ndarray:
7277
return np.zeros(self.nlp.vocab.vectors_length)
7378
return np.mean(vectors, axis=0)
7479
except Exception as e:
75-
logging.error("An error occurred while computing the Mean Vector: ", exc_info=e)
80+
logging.error(
81+
"An error occurred while computing the Mean Vector: ", exc_info=e
82+
)
7683
raise e
77-
84+
7885
def Dot_Product(self, vector1: np.ndarray, vector2: np.ndarray) -> float:
7986
"""
8087
Computes the dot product between two vectors.
@@ -89,9 +96,11 @@ def Dot_Product(self, vector1: np.ndarray, vector2: np.ndarray) -> float:
8996
try:
9097
return np.dot(vector1, vector2)
9198
except Exception as e:
92-
logging.error("An error occurred during the dot Product Calculation: ", exc_info=e)
99+
logging.error(
100+
"An error occurred during the dot Product Calculation: ", exc_info=e
101+
)
93102
raise e
94-
103+
95104
def Magnitude(self, vector: np.ndarray) -> float:
96105
"""
97106
Computes the magnitude (norm) of a vector.
@@ -103,11 +112,13 @@ def Magnitude(self, vector: np.ndarray) -> float:
103112
- float: The magnitude of the vector.
104113
"""
105114
try:
106-
return np.sqrt(np.sum(vector ** 2))
115+
return np.sqrt(np.sum(vector**2))
107116
except Exception as e:
108-
logging.error("An error occurred while computing the Magnitude: ", exc_info=e)
117+
logging.error(
118+
"An error occurred while computing the Magnitude: ", exc_info=e
119+
)
109120
raise e
110-
121+
111122
def Cosine_Similarity(self, vector1: np.ndarray, vector2: np.ndarray) -> float:
112123
"""
113124
Computes the cosine similarity between two vectors.
@@ -126,9 +137,11 @@ def Cosine_Similarity(self, vector1: np.ndarray, vector2: np.ndarray) -> float:
126137
return 0.0
127138
return dot / (magnitude1 * magnitude2)
128139
except Exception as e:
129-
logging.error("An error occurred during Cosine Similarity Calculation: ", exc_info=e)
140+
logging.error(
141+
"An error occurred during Cosine Similarity Calculation: ", exc_info=e
142+
)
130143
raise e
131-
144+
132145
def Cosine_Similarity_Percentage(self, text1: str, text2: str) -> float:
133146
"""
134147
Computes the cosine similarity percentage between two texts.
@@ -143,19 +156,23 @@ def Cosine_Similarity_Percentage(self, text1: str, text2: str) -> float:
143156
try:
144157
tokens1 = self.Tokenize(text1)
145158
tokens2 = self.Tokenize(text2)
146-
159+
147160
vectors1 = self.Vectorize(tokens1)
148161
vectors2 = self.Vectorize(tokens2)
149-
162+
150163
mean_vec1 = self.Mean_Vector(vectors1)
151164
mean_vec2 = self.Mean_Vector(vectors2)
152-
165+
153166
similarity = self.Cosine_Similarity(mean_vec1, mean_vec2)
154167
return similarity * 100
155168
except Exception as e:
156-
logging.error("An error occurred while computing the Cosine Similarity Percentage: ", exc_info=e)
169+
logging.error(
170+
"An error occurred while computing the Cosine Similarity Percentage: ",
171+
exc_info=e,
172+
)
157173
raise e
158-
174+
175+
159176
if __name__ == "__main__":
160177
"""
161178
Main function to Test the Cosine Similarity between two Texts.
@@ -164,4 +181,4 @@ def Cosine_Similarity_Percentage(self, text1: str, text2: str) -> float:
164181
text2 = "The name of the talllest Tower in the world is Burj Khalifa"
165182

166183
similarity_percentage = Cosine_Similarity().Cosine_Similarity_Percentage(text1, text2)
167-
print(f"Cosine Similarity: {similarity_percentage:.2f}%")
184+
print(f"Cosine Similarity: {similarity_percentage:.2f}%")

0 commit comments

Comments
 (0)