|
| 1 | +""" |
| 2 | + Linear Discriminant Analysis |
| 3 | +
|
| 4 | +
|
| 5 | + Assumptions About Data : |
| 6 | + 1. The input variables has a gaussian distribution. |
| 7 | + 2. The variance calculated for each input variables by class grouping is the |
| 8 | + same. |
| 9 | + 3. The mix of classes in your training set is representative of the problem. |
| 10 | +
|
| 11 | +
|
| 12 | + Learning The Model : |
| 13 | + The LDA model requires the estimation of statistics from the training data : |
| 14 | + 1. Mean of each input value for each class. |
| 15 | + 2. Probability of an instance belong to each class. |
| 16 | + 3. Covariance for the input data for each class |
| 17 | +
|
| 18 | + Calculate the class means : |
| 19 | + mean(x) = 1/n ( for i = 1 to i = n --> sum(xi)) |
| 20 | +
|
| 21 | + Calculate the class probabilities : |
| 22 | + P(y = 0) = count(y = 0) / (count(y = 0) + count(y = 1)) |
| 23 | + P(y = 1) = count(y = 1) / (count(y = 0) + count(y = 1)) |
| 24 | +
|
| 25 | + Calculate the variance : |
| 26 | + We can calculate the variance for dataset in two steps : |
| 27 | + 1. Calculate the squared difference for each input variable from the |
| 28 | + group mean. |
| 29 | + 2. Calculate the mean of the squared difference. |
| 30 | + ------------------------------------------------ |
| 31 | + Squared_Difference = (x - mean(k)) ** 2 |
| 32 | + Variance = (1 / (count(x) - count(classes))) * |
| 33 | + (for i = 1 to i = n --> sum(Squared_Difference(xi))) |
| 34 | +
|
| 35 | + Making Predictions : |
| 36 | + discriminant(x) = x * (mean / variance) - |
| 37 | + ((mean ** 2) / (2 * variance)) + Ln(probability) |
| 38 | + --------------------------------------------------------------------------- |
| 39 | + After calculating the discriminant value for each class, the class with the |
| 40 | + largest discriminant value is taken as the prediction. |
| 41 | +
|
| 42 | + Author: @EverLookNeverSee |
| 43 | +""" |
| 44 | + |
| 45 | +from math import log |
| 46 | +from os import name, system |
| 47 | +from random import gauss |
| 48 | + |
| 49 | + |
| 50 | +# Make a training dataset drawn from a gaussian distribution |
| 51 | +def gaussian_distribution(mean: float, std_dev: float, instance_count: int) -> list: |
| 52 | + """ |
| 53 | + Generate gaussian distribution instances based-on given mean and standard deviation |
| 54 | + :param mean: mean value of class |
| 55 | + :param std_dev: value of standard deviation entered by usr or default value of it |
| 56 | + :param instance_count: instance number of class |
| 57 | + :return: a list containing generated values based-on given mean, std_dev and |
| 58 | + instance_count |
| 59 | + """ |
| 60 | + return [gauss(mean, std_dev) for _ in range(instance_count)] |
| 61 | + |
| 62 | + |
| 63 | +# Make corresponding Y flags to detecting classes |
| 64 | +def y_generator(class_count: int, instance_count: list) -> list: |
| 65 | + """ |
| 66 | + Generate y values for corresponding classes |
| 67 | + :param class_count: Number of classes(data groupings) in dataset |
| 68 | + :param instance_count: number of instances in class |
| 69 | + :return: corresponding values for data groupings in dataset |
| 70 | + """ |
| 71 | + |
| 72 | + return [k for k in range(class_count) for _ in range(instance_count[k])] |
| 73 | + |
| 74 | + |
| 75 | +# Calculate the class means |
| 76 | +def calculate_mean(instance_count: int, items: list) -> float: |
| 77 | + """ |
| 78 | + Calculate given class mean |
| 79 | + :param instance_count: Number of instances in class |
| 80 | + :param items: items that related to specific class(data grouping) |
| 81 | + :return: calculated actual mean of considered class |
| 82 | + """ |
| 83 | + # the sum of all items divided by number of instances |
| 84 | + return sum(items) / instance_count |
| 85 | + |
| 86 | + |
| 87 | +# Calculate the class probabilities |
| 88 | +def calculate_probabilities(instance_count: int, total_count: int) -> float: |
| 89 | + """ |
| 90 | + Calculate the probability that a given instance will belong to which class |
| 91 | + :param instance_count: number of instances in class |
| 92 | + :param total_count: the number of all instances |
| 93 | + :return: value of probability for considered class |
| 94 | + """ |
| 95 | + # number of instances in specific class divided by number of all instances |
| 96 | + return instance_count / total_count |
| 97 | + |
| 98 | + |
| 99 | +# Calculate the variance |
| 100 | +def calculate_variance(items: list, means: list, total_count: int) -> float: |
| 101 | + """ |
| 102 | + Calculate the variance |
| 103 | + :param items: a list containing all items(gaussian distribution of all classes) |
| 104 | + :param means: a list containing real mean values of each class |
| 105 | + :param total_count: the number of all instances |
| 106 | + :return: calculated variance for considered dataset |
| 107 | + """ |
| 108 | + squared_diff = [] # An empty list to store all squared differences |
| 109 | + # iterate over number of elements in items |
| 110 | + for i in range(len(items)): |
| 111 | + # for loop iterates over number of elements in inner layer of items |
| 112 | + for j in range(len(items[i])): |
| 113 | + # appending squared differences to 'squared_diff' list |
| 114 | + squared_diff.append((items[i][j] - means[i]) ** 2) |
| 115 | + |
| 116 | + # one divided by (the number of all instances - number of classes) multiplied by |
| 117 | + # sum of all squared differences |
| 118 | + n_classes = len(means) # Number of classes in dataset |
| 119 | + return 1 / (total_count - n_classes) * sum(squared_diff) |
| 120 | + |
| 121 | + |
| 122 | +# Making predictions |
| 123 | +def predict_y_values( |
| 124 | + x_items: list, means: list, variance: float, probabilities: list |
| 125 | +) -> list: |
| 126 | + """ This function predicts new indexes(groups for our data) |
| 127 | + :param x_items: a list containing all items(gaussian distribution of all classes) |
| 128 | + :param means: a list containing real mean values of each class |
| 129 | + :param variance: calculated value of variance by calculate_variance function |
| 130 | + :param probabilities: a list containing all probabilities of classes |
| 131 | + :return: a list containing predicted Y values |
| 132 | + """ |
| 133 | + # An empty list to store generated discriminant values of all items in dataset for |
| 134 | + # each class |
| 135 | + results = [] |
| 136 | + # for loop iterates over number of elements in list |
| 137 | + for i in range(len(x_items)): |
| 138 | + # for loop iterates over number of inner items of each element |
| 139 | + for j in range(len(x_items[i])): |
| 140 | + temp = [] # to store all discriminant values of each item as a list |
| 141 | + # for loop iterates over number of classes we have in our dataset |
| 142 | + for k in range(len(x_items)): |
| 143 | + # appending values of discriminants for each class to 'temp' list |
| 144 | + temp.append( |
| 145 | + x_items[i][j] * (means[k] / variance) |
| 146 | + - (means[k] ** 2 / (2 * variance)) |
| 147 | + + log(probabilities[k]) |
| 148 | + ) |
| 149 | + # appending discriminant values of each item to 'results' list |
| 150 | + results.append(temp) |
| 151 | + print("Generated Discriminants: \n", results) |
| 152 | + return [l.index(max(l)) for l in results] |
| 153 | + |
| 154 | + |
| 155 | +# Calculating Accuracy |
| 156 | +def accuracy(actual_y: list, predicted_y: list) -> float: |
| 157 | + """ |
| 158 | + Calculate the value of accuracy based-on predictions |
| 159 | + :param actual_y:a list containing initial Y values generated by 'y_generator' |
| 160 | + function |
| 161 | + :param predicted_y: a list containing predicted Y values generated by |
| 162 | + 'predict_y_values' function |
| 163 | + :return: percentage of accuracy |
| 164 | + """ |
| 165 | + # iterate over one element of each list at a time (zip mode) |
| 166 | + # prediction is correct if actual Y value equals to predicted Y value |
| 167 | + correct = sum(1 for i, j in zip(actual_y, predicted_y) if i == j) |
| 168 | + # percentage of accuracy equals to number of correct predictions divided by number |
| 169 | + # of all data and multiplied by 100 |
| 170 | + return (correct / len(actual_y)) * 100 |
| 171 | + |
| 172 | + |
| 173 | +# Main Function |
| 174 | +def main(): |
| 175 | + """ This function starts execution phase """ |
| 176 | + while True: |
| 177 | + print(" Linear Discriminant Analysis ".center(100, "*")) |
| 178 | + print("*" * 100, "\n") |
| 179 | + print("First of all we should specify the number of classes that") |
| 180 | + print("we want to generate as training dataset") |
| 181 | + # Trying to get number of classes |
| 182 | + n_classes = 0 |
| 183 | + while True: |
| 184 | + try: |
| 185 | + user_input = int( |
| 186 | + input("Enter the number of classes (Data Groupings): ").strip() |
| 187 | + ) |
| 188 | + if user_input > 0: |
| 189 | + n_classes = user_input |
| 190 | + break |
| 191 | + else: |
| 192 | + print( |
| 193 | + f"Your entered value is {user_input} , Number of classes " |
| 194 | + f"should be positive!" |
| 195 | + ) |
| 196 | + continue |
| 197 | + except ValueError: |
| 198 | + print("Your entered value is not numerical!") |
| 199 | + |
| 200 | + print("-" * 100) |
| 201 | + |
| 202 | + std_dev = 1.0 # Default value for standard deviation of dataset |
| 203 | + # Trying to get the value of standard deviation |
| 204 | + while True: |
| 205 | + try: |
| 206 | + user_sd = float( |
| 207 | + input( |
| 208 | + "Enter the value of standard deviation" |
| 209 | + "(Default value is 1.0 for all classes): " |
| 210 | + ).strip() |
| 211 | + or "1.0" |
| 212 | + ) |
| 213 | + if user_sd >= 0.0: |
| 214 | + std_dev = user_sd |
| 215 | + break |
| 216 | + else: |
| 217 | + print( |
| 218 | + f"Your entered value is {user_sd}, Standard deviation should " |
| 219 | + f"not be negative!" |
| 220 | + ) |
| 221 | + continue |
| 222 | + except ValueError: |
| 223 | + print("Your entered value is not numerical!") |
| 224 | + |
| 225 | + print("-" * 100) |
| 226 | + |
| 227 | + # Trying to get number of instances in classes and theirs means to generate |
| 228 | + # dataset |
| 229 | + counts = [] # An empty list to store instance counts of classes in dataset |
| 230 | + for i in range(n_classes): |
| 231 | + while True: |
| 232 | + try: |
| 233 | + user_count = int( |
| 234 | + input(f"Enter The number of instances for class_{i+1}: ") |
| 235 | + ) |
| 236 | + if user_count > 0: |
| 237 | + counts.append(user_count) |
| 238 | + break |
| 239 | + else: |
| 240 | + print( |
| 241 | + f"Your entered value is {user_count}, Number of " |
| 242 | + f"instances should be positive!" |
| 243 | + ) |
| 244 | + continue |
| 245 | + except ValueError: |
| 246 | + print("Your entered value is not numerical!") |
| 247 | + print("-" * 100) |
| 248 | + |
| 249 | + # An empty list to store values of user-entered means of classes |
| 250 | + user_means = [] |
| 251 | + for a in range(n_classes): |
| 252 | + while True: |
| 253 | + try: |
| 254 | + user_mean = float( |
| 255 | + input(f"Enter the value of mean for class_{a+1}: ") |
| 256 | + ) |
| 257 | + if isinstance(user_mean, float): |
| 258 | + user_means.append(user_mean) |
| 259 | + break |
| 260 | + print(f"You entered an invalid value: {user_mean}") |
| 261 | + except ValueError: |
| 262 | + print("Your entered value is not numerical!") |
| 263 | + print("-" * 100) |
| 264 | + |
| 265 | + print("Standard deviation: ", std_dev) |
| 266 | + # print out the number of instances in classes in separated line |
| 267 | + for i, count in enumerate(counts, 1): |
| 268 | + print(f"Number of instances in class_{i} is: {count}") |
| 269 | + print("-" * 100) |
| 270 | + |
| 271 | + # print out mean values of classes separated line |
| 272 | + for i, user_mean in enumerate(user_means, 1): |
| 273 | + print(f"Mean of class_{i} is: {user_mean}") |
| 274 | + print("-" * 100) |
| 275 | + |
| 276 | + # Generating training dataset drawn from gaussian distribution |
| 277 | + x = [ |
| 278 | + gaussian_distribution(user_means[j], std_dev, counts[j]) |
| 279 | + for j in range(n_classes) |
| 280 | + ] |
| 281 | + print("Generated Normal Distribution: \n", x) |
| 282 | + print("-" * 100) |
| 283 | + |
| 284 | + # Generating Ys to detecting corresponding classes |
| 285 | + y = y_generator(n_classes, counts) |
| 286 | + print("Generated Corresponding Ys: \n", y) |
| 287 | + print("-" * 100) |
| 288 | + |
| 289 | + # Calculating the value of actual mean for each class |
| 290 | + actual_means = [calculate_mean(counts[k], x[k]) for k in range(n_classes)] |
| 291 | + # for loop iterates over number of elements in 'actual_means' list and print |
| 292 | + # out them in separated line |
| 293 | + for i, actual_mean in enumerate(actual_means, 1): |
| 294 | + print(f"Actual(Real) mean of class_{i} is: {actual_mean}") |
| 295 | + print("-" * 100) |
| 296 | + |
| 297 | + # Calculating the value of probabilities for each class |
| 298 | + # An empty list to store values of probabilities for each class |
| 299 | + probabilities = ( |
| 300 | + calculate_probabilities(counts[i], sum(counts)) for i in range(n_classes) |
| 301 | + ) |
| 302 | + # for loop iterates over number of elements in 'probabilities' list and print |
| 303 | + # out them in separated line |
| 304 | + for i, probability in enumerate(probabilities, 1): |
| 305 | + print("Probability of class_{} is: {}".format(i, probability)) |
| 306 | + print("-" * 100) |
| 307 | + |
| 308 | + # Calculating the values of variance for each class |
| 309 | + variance = calculate_variance(x, actual_means, sum(counts)) |
| 310 | + print("Variance: ", variance) |
| 311 | + print("-" * 100) |
| 312 | + |
| 313 | + # Predicting Y values |
| 314 | + # storing predicted Y values in 'pre_indexes' variable |
| 315 | + pre_indexes = predict_y_values(x, actual_means, variance, probabilities) |
| 316 | + print("-" * 100) |
| 317 | + |
| 318 | + # Calculating Accuracy of the model |
| 319 | + print(f"Accuracy: {accuracy(y, pre_indexes)}") |
| 320 | + print("-" * 100) |
| 321 | + print(" DONE ".center(100, "+")) |
| 322 | + |
| 323 | + if input("Press any key to restart or 'q' for quit: ").strip().lower() == "q": |
| 324 | + print("\n" + "GoodBye!".center(100, "-") + "\n") |
| 325 | + break |
| 326 | + system("cls" if name == "nt" else "clear") |
| 327 | + |
| 328 | + |
| 329 | +if __name__ == "__main__": |
| 330 | + main() |
0 commit comments