From 411130e89dce86ba46a23c2691ed2a1eb9b388b1 Mon Sep 17 00:00:00 2001 From: EverLookNeverSee Date: Mon, 25 Nov 2019 22:21:08 +0330 Subject: [PATCH 01/24] Adding new file to the machine_learning directory --- machine_learning/linear discriminant analysis.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 machine_learning/linear discriminant analysis.py diff --git a/machine_learning/linear discriminant analysis.py b/machine_learning/linear discriminant analysis.py new file mode 100644 index 000000000000..e69de29bb2d1 From c23de59588053031c46483dd818f160b088c11b6 Mon Sep 17 00:00:00 2001 From: EverLookNeverSee Date: Mon, 25 Nov 2019 22:54:51 +0330 Subject: [PATCH 02/24] Adding initial documentation --- .../linear discriminant analysis.py | 40 +++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/machine_learning/linear discriminant analysis.py b/machine_learning/linear discriminant analysis.py index e69de29bb2d1..144d176a4bc2 100644 --- a/machine_learning/linear discriminant analysis.py +++ b/machine_learning/linear discriminant analysis.py @@ -0,0 +1,40 @@ +""" + Linear Discriminant Analysis + + + Assumptions About Data : + 1. The input variables has a gaussian distribution. + 2. The variance calculated for each input variables by class grouping is the same. + 3. The mix of classes in your training set is representative of the problem. + + + Learning The Model : + The LDA model requires the estimation of statistics from the training data : + 1. Mean of each input value for each class. + 2. Probability of an instance belong to each class. + 3. Covariance for the input data for each class + + Calculate the class means : + mean(x) = 1/n ( for i = 1 to i = n --> sum(xi)) + + Calculate the class probabilities : + P(y = 0) = count(y = 0) / (count(y = 0) + count(y = 1)) + P(y = 1) = count(y = 1) / (count(y = 0) + count(y = 1)) + + Calculate the variance : + We can calculate the variance for dataset in two steps : + 1. Calculate the squared difference for each input variable from the group mean. + 2. Calculate the mean of the squared difference. + ------------------------------------------------ + Squared_Difference = (x - mean(k)) ** 2 + Variance = (1 / (count(x) - count(classes))) * (for i = 1 to i = n --> sum(Squared_Difference(xi))) + + Making Predictions : + discriminant(x) = x * (mean / variance) - ((mean ** 2) / (2 * variance)) + Ln(probability) + ------------------------------------------------------------------------------------------ + After calculating the discriminant value for each class, the class with the largest discriminant value + is taken as the prediction. + + Author: @EverLookNeverSee + +""" From 257545a9eba29f5d14a95633593976d70c675301 Mon Sep 17 00:00:00 2001 From: EverLookNeverSee Date: Mon, 25 Nov 2019 22:59:15 +0330 Subject: [PATCH 03/24] importing modules --- machine_learning/linear discriminant analysis.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/machine_learning/linear discriminant analysis.py b/machine_learning/linear discriminant analysis.py index 144d176a4bc2..47dad056282c 100644 --- a/machine_learning/linear discriminant analysis.py +++ b/machine_learning/linear discriminant analysis.py @@ -38,3 +38,8 @@ Author: @EverLookNeverSee """ + +# importing modules +from random import gauss +from math import log +from os import system, name # to use < clear > or < cls > commands in terminal or cmd \ No newline at end of file From b72d0a7d26ad5365d3767ec6b53204c81a5e91c6 Mon Sep 17 00:00:00 2001 From: EverLookNeverSee Date: Mon, 25 Nov 2019 23:02:22 +0330 Subject: [PATCH 04/24] Adding Normal_gen function --- .../linear discriminant analysis.py | 20 ++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/machine_learning/linear discriminant analysis.py b/machine_learning/linear discriminant analysis.py index 47dad056282c..e548231716de 100644 --- a/machine_learning/linear discriminant analysis.py +++ b/machine_learning/linear discriminant analysis.py @@ -42,4 +42,22 @@ # importing modules from random import gauss from math import log -from os import system, name # to use < clear > or < cls > commands in terminal or cmd \ No newline at end of file +from os import system, name # to use < clear > or < cls > commands in terminal or cmd + + +# Making training dataset drawn from a gaussian distribution +def Normal_gen(mean: float, std_dev: float, instance_count: int) -> list: + """ This function generates gaussian distribution instances + based-on given mean and standard deviation + :param mean: mean value of class + :param std_dev: value of standard deviation entered by usr or default value of it + :param instance_count: instance number of class + :return: a list containing generated values based-on given mean, std_dev and instance_count + """ + generated_instances = [] # An empty list to store generated instances + # for loop iterates over instance_count + for r in range(instance_count): + # appending corresponding gaussian distribution to 'generated_instances' list + generated_instances.append(gauss(mean, std_dev)) + + return generated_instances From 2db9260a2b6f87785c65f614105cb36e2d83f1f8 Mon Sep 17 00:00:00 2001 From: EverLookNeverSee Date: Mon, 25 Nov 2019 23:04:02 +0330 Subject: [PATCH 05/24] Adding Y_gen function --- .../linear discriminant analysis.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/machine_learning/linear discriminant analysis.py b/machine_learning/linear discriminant analysis.py index e548231716de..fc3d04a23b4e 100644 --- a/machine_learning/linear discriminant analysis.py +++ b/machine_learning/linear discriminant analysis.py @@ -61,3 +61,20 @@ def Normal_gen(mean: float, std_dev: float, instance_count: int) -> list: generated_instances.append(gauss(mean, std_dev)) return generated_instances + + +# Making corresponding Y flags to detecting classes +def Y_gen(class_count: int, instance_count: list) -> list: + """ This function generates y values for corresponding classes + :param class_count: Number of classes(data groupings) in dataset + :param instance_count: number of instances in class + :return: corresponding values for data groupings in dataset + """ + ys = [] # An empty list to store generated corresponding Ys + # for loop iterates over class_count + for k in range(class_count): + # for loop iterates over related number of instances of each class + for p in range(instance_count[k]): + # appending corresponding Ys to 'ys' list + ys.append(k) + return ys From 13f3fcc67c1deca98dd18632f3df6c9126c9e999 Mon Sep 17 00:00:00 2001 From: EverLookNeverSee Date: Mon, 25 Nov 2019 23:04:49 +0330 Subject: [PATCH 06/24] Adding mean_calc function --- machine_learning/linear discriminant analysis.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/machine_learning/linear discriminant analysis.py b/machine_learning/linear discriminant analysis.py index fc3d04a23b4e..2047799a2531 100644 --- a/machine_learning/linear discriminant analysis.py +++ b/machine_learning/linear discriminant analysis.py @@ -78,3 +78,15 @@ def Y_gen(class_count: int, instance_count: list) -> list: # appending corresponding Ys to 'ys' list ys.append(k) return ys + + +# Calculating the class means +def mean_calc(instance_count: int, items: list) -> float: + """ This function calculates given class mean + :param instance_count: Number of instances in class + :param items: items that related to specific class(data grouping) + :return: calculated actual mean of considered class + """ + # the sum of all items divided by number of instances + class_mean = sum(items) / instance_count + return class_mean From 844309e5e5af286710e7a6bd00063b4b724a84ff Mon Sep 17 00:00:00 2001 From: EverLookNeverSee Date: Mon, 25 Nov 2019 23:05:31 +0330 Subject: [PATCH 07/24] Adding prob_calc function --- machine_learning/linear discriminant analysis.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/machine_learning/linear discriminant analysis.py b/machine_learning/linear discriminant analysis.py index 2047799a2531..09f8fdf5c6d4 100644 --- a/machine_learning/linear discriminant analysis.py +++ b/machine_learning/linear discriminant analysis.py @@ -90,3 +90,16 @@ def mean_calc(instance_count: int, items: list) -> float: # the sum of all items divided by number of instances class_mean = sum(items) / instance_count return class_mean + + +# Calculating the class probabilities +def prob_calc(instance_count: int, total_count: int) -> float: + """ This function calculates the probability that a given instance + will belong to which class + :param instance_count: number of instances in class + :param total_count: the number of all instances + :return: value of probability for considered class + """ + # number of instances in specific class divided by number of all instances + probability = instance_count / total_count + return probability From c576c5333602dfe92a3b767718acffa7bf5a1160 Mon Sep 17 00:00:00 2001 From: EverLookNeverSee Date: Mon, 25 Nov 2019 23:06:16 +0330 Subject: [PATCH 08/24] Adding var_calc function --- .../linear discriminant analysis.py | 24 +++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/machine_learning/linear discriminant analysis.py b/machine_learning/linear discriminant analysis.py index 09f8fdf5c6d4..a8349bfde0d3 100644 --- a/machine_learning/linear discriminant analysis.py +++ b/machine_learning/linear discriminant analysis.py @@ -103,3 +103,27 @@ def prob_calc(instance_count: int, total_count: int) -> float: # number of instances in specific class divided by number of all instances probability = instance_count / total_count return probability + + +# Calculating the variance +def var_calc(items: list, means: list, total_count: int) -> float: + """ This function calculates the variance + :param items: a list containing all items(gaussian distribution of all classes) + :param means: a list containing real mean values of each class + :param total_count: the number of all instances + :return: calculated variance for considered dataset + """ + + squared_diff = [] # An empty list to store all squared differences + n_classes = len(means) # Number of classes in dataSet + + # for loo iterates over number of elements in items + for i in range(len(items)): + # for loop iterates over number of elements in inner layer of items + for j in range(len(items[i])): + # appending squared differences to 'squared_diff' list + squared_diff.append((items[i][j] - means[i]) ** 2) + + # one divided by (the number of all instances - number of classes) multiplied by sum of all squared differences + variance = 1 / (total_count - n_classes) * sum(squared_diff) + return variance From da7b8ffbe31d4dc8d794af2f8cf46a72a9df0194 Mon Sep 17 00:00:00 2001 From: EverLookNeverSee Date: Mon, 25 Nov 2019 23:08:02 +0330 Subject: [PATCH 09/24] Adding predict function --- .../linear discriminant analysis.py | 35 +++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/machine_learning/linear discriminant analysis.py b/machine_learning/linear discriminant analysis.py index a8349bfde0d3..c5075831e2ae 100644 --- a/machine_learning/linear discriminant analysis.py +++ b/machine_learning/linear discriminant analysis.py @@ -127,3 +127,38 @@ def var_calc(items: list, means: list, total_count: int) -> float: # one divided by (the number of all instances - number of classes) multiplied by sum of all squared differences variance = 1 / (total_count - n_classes) * sum(squared_diff) return variance + + +# Making predictions +def predict(x_items: list, means: list, variance: float, probabilities: list) -> list: + """ This function predicts new indexes(groups for our data) + :param x_items: a list containing all items(gaussian distribution of all classes) + :param means: a list containing real mean values of each class + :param variance: calculated value of variance by var_calc function + :param probabilities: a list containing all probabilities of classes + :return: a list containing predicted Y values + """ + + results = [] # An empty list to store generated discriminant values of all items in dataset for each class + # for loop iterates over number of elements in list + for i in range(len(x_items)): + # for loop iterates over number of inner items of each element + for j in range(len(x_items[i])): + temp = [] # to store all discriminant values of each item as a list + # for loop iterates over number of classes we have in our dataset + for k in range(len(x_items)): + # appending values of discriminants for each class to 'temp' list + temp.append(x_items[i][j] * (means[k] / variance) - (means[k] ** 2 / (2 * variance)) + + log(probabilities[k])) + # appending discriminant values of each item to 'results' list + results.append(temp) + + print("Generated Discriminants: \n", results) + + predicted_index = [] # An empty list to store predicted indexes + # for loop iterates over elements in 'results' + for l in results: + # after calculating the discriminant value for each class , the class with the largest + # discriminant value is taken as the prediction, than we try to get index of that. + predicted_index.append(l.index(max(l))) + return predicted_index From d67a5ab1608349189434b577d9f0f16cd217786f Mon Sep 17 00:00:00 2001 From: EverLookNeverSee Date: Mon, 25 Nov 2019 23:08:39 +0330 Subject: [PATCH 10/24] Adding accuracy function --- .../linear discriminant analysis.py | 20 +++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/machine_learning/linear discriminant analysis.py b/machine_learning/linear discriminant analysis.py index c5075831e2ae..97c309256f05 100644 --- a/machine_learning/linear discriminant analysis.py +++ b/machine_learning/linear discriminant analysis.py @@ -162,3 +162,23 @@ def predict(x_items: list, means: list, variance: float, probabilities: list) -> # discriminant value is taken as the prediction, than we try to get index of that. predicted_index.append(l.index(max(l))) return predicted_index + + +# Calculating Accuracy +def accuracy(actual_y: list, predicted_y: list) -> float: + """ This function calculates the value of accuracy based-on predictions + :param actual_y:a list containing initial Y values generated by 'Y_gen' function + :param predicted_y: a list containing predicted Y values generated by 'predict' function + :return: percentage of accuracy + """ + correct = 0 # initial value for number of correct predictions + # for loop iterates over one element of each list at a time (zip mode) + for i, j in zip(actual_y, predicted_y): + # if actual Y value equals to predicted Y value + if i == j: + # prediction is correct + correct += 1 + # percentage of accuracy equals to number of correct predictions divided by number of + # all data and multiplied by 100 + percentage = (correct / len(actual_y)) * 100 + return percentage From 4669d3d94f646c5c555097052f1677cf693108b6 Mon Sep 17 00:00:00 2001 From: EverLookNeverSee Date: Mon, 25 Nov 2019 23:10:18 +0330 Subject: [PATCH 11/24] Adding main function --- .../linear discriminant analysis.py | 165 ++++++++++++++++++ 1 file changed, 165 insertions(+) diff --git a/machine_learning/linear discriminant analysis.py b/machine_learning/linear discriminant analysis.py index 97c309256f05..878432615134 100644 --- a/machine_learning/linear discriminant analysis.py +++ b/machine_learning/linear discriminant analysis.py @@ -182,3 +182,168 @@ def accuracy(actual_y: list, predicted_y: list) -> float: # all data and multiplied by 100 percentage = (correct / len(actual_y)) * 100 return percentage + + +# Main Function +def main(): + """ This function starts execution phase """ + + while True: + + print(" Linear Discriminant Analysis ".center(100, "*")) + print("*" * 100, "\n") + print("First of all we should specify the number of classes that \n" + "we want to generate as training dataset") + + # Trying to get number of classes + n_classes = 0 + while True: + try: + user_input = int(input("Enter the number of classes (Data Groupings): ")) + if user_input > 0: + n_classes = user_input + break + else: + print("Your entered value is {} , Number of classes should be positive!".format(user_input)) + continue + except ValueError: + print("Your entered value is not numerical!") + + print("-" * 100) + + std_dev = 1.0 # Default value for standard deviation of dataset + # Trying to get the value of standard deviation + while True: + try: + user_sd = float(input("Enter the value of standard deviation" + "(Default value is 1.0 for all classes): ") or "1.0") + if user_sd >= 0.0: + std_dev = user_sd + break + else: + print("Your entered value is {}, Standard deviation should not be negative!".format(user_sd)) + continue + except ValueError: + print("Your entered value is not numerical!") + + print("-" * 100) + + # Trying to get number of instances in classes and theirs means to generate dataset + counts = [] # An empty list to store instance counts of classes in dataset + for i in range(n_classes): + while True: + try: + user_count = int(input("Enter The number of instances for class_{}: ".format(i + 1))) + if user_count > 0: + counts.append(user_count) + break + else: + print("Your entered value is {}, Number of instances should be positive!".format(user_count)) + continue + except ValueError: + print("Your entered value is not numerical!") + + print("-" * 100) + + user_means = [] # An empty list to store values of user-entered means of classes + for a in range(n_classes): + while True: + try: + user_mean = float(input("Enter the value of mean for class_{}: ".format(a + 1))) + if isinstance(user_mean, float): + user_means.append(user_mean) + break + else: + print("Your entered value is {}, And this is not valid!".format(user_mean)) + + except ValueError: + print("Your entered value is not numerical!") + + print("-" * 100) + + print("Standard deviation: ", std_dev) + + # print out the number of instances in classes in separated line + for b in range(len(counts)): + print("Number of instances in class_{} is: {}".format(b + 1, counts[b])) + + print("-" * 100) + + # print out mean values of classes separated line + for c in range(len(user_means)): + print("Mean of class_{} is: {}".format(c + 1, user_means[c])) + + print("-" * 100) + + # Generating training dataset drawn from gaussian distribution + x = [] # An empty list to store generated values of gaussian distribution + # for loop iterates over number of classes + for j in range(n_classes): + # appending return values of 'Normal_gen' function to 'x' list + x.append(Normal_gen(user_means[j], std_dev, counts[j])) + print("Generated Normal Distribution: \n", x) + + print("-" * 100) + + # Generating Ys to detecting corresponding classes + y = Y_gen(n_classes, counts) + print("Generated Corresponding Ys: \n", y) + + print("-" * 100) + + # Calculating the value of actual mean for each class + actual_means = [] # An empty list to store value of actual means + # for loop iterates over number of classes(data groupings) + for k in range(n_classes): + # appending return values of 'mean_calc' function to 'actual_means' list + actual_means.append(mean_calc(counts[k], x[k])) + # for loop iterates over number of elements in 'actual_means' list and print out them in separated line + for d in range(len(actual_means)): + print("Actual(Real) mean of class_{} is: {}".format(d + 1, actual_means[d])) + + print("-" * 100) + + # Calculating the value of probabilities for each class + probabilities = [] # An empty list to store values of probabilities for each class + # # for loop iterates over number of classes(data groupings) + for l in range(n_classes): + # appending return values of 'prob_calc' function to 'probabilities' list + probabilities.append(prob_calc(counts[l], sum(counts))) + # for loop iterates over number of elements in 'probabilities' list and print out them in separated line + for e in range(len(probabilities)): + print("Probability of class_{} is: {}".format(e + 1, probabilities[e])) + + print("-" * 100) + + # Calculating the values of variance for each class + variance = var_calc(x, actual_means, sum(counts)) + print("Variance: ", variance) + + print("-" * 100) + + # Predicting Y values + # storing predicted Y values in 'pre_indexes' variable + pre_indexes = predict(x, actual_means, variance, probabilities) + + print("-" * 100) + + # Calculating Accuracy of the model + print("Accuracy: ", accuracy(y, pre_indexes)) + print("-" * 100) + print(" DONE ".center(100, "+")) + + command = input("Press any key to restart and 'q' for quit: ") + if command.lower() == "q": + print("\n" + "GoodBye!".center(100, "-") + "\n") + break + else: + if name == "nt": # Related to Windows OS + system("cls") + continue + else: # Related to Mac OSX and Linux OS + system("clear") + continue + + +if __name__ == '__main__': + main() From 47367c14a9aa5e4fe6a09524f6728a3eff9f6475 Mon Sep 17 00:00:00 2001 From: EverLookNeverSee Date: Mon, 25 Nov 2019 23:55:02 +0330 Subject: [PATCH 12/24] Renaming LDA file --- ...r discriminant analysis.py => linear_discriminant_analysis.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename machine_learning/{linear discriminant analysis.py => linear_discriminant_analysis.py} (100%) diff --git a/machine_learning/linear discriminant analysis.py b/machine_learning/linear_discriminant_analysis.py similarity index 100% rename from machine_learning/linear discriminant analysis.py rename to machine_learning/linear_discriminant_analysis.py From 16a18313adb363fe083dcfde6716ac5d020c6c10 Mon Sep 17 00:00:00 2001 From: EverLookNeverSee Date: Tue, 26 Nov 2019 11:23:07 +0330 Subject: [PATCH 13/24] Adding requested changes --- .../linear_discriminant_analysis.py | 23 +++++-------------- 1 file changed, 6 insertions(+), 17 deletions(-) diff --git a/machine_learning/linear_discriminant_analysis.py b/machine_learning/linear_discriminant_analysis.py index 878432615134..a0b8cf44afe1 100644 --- a/machine_learning/linear_discriminant_analysis.py +++ b/machine_learning/linear_discriminant_analysis.py @@ -55,12 +55,7 @@ def Normal_gen(mean: float, std_dev: float, instance_count: int) -> list: :return: a list containing generated values based-on given mean, std_dev and instance_count """ generated_instances = [] # An empty list to store generated instances - # for loop iterates over instance_count - for r in range(instance_count): - # appending corresponding gaussian distribution to 'generated_instances' list - generated_instances.append(gauss(mean, std_dev)) - - return generated_instances + return [gauss(mean, std_dev) for _ in range(instance_count)] # Making corresponding Y flags to detecting classes @@ -73,11 +68,7 @@ def Y_gen(class_count: int, instance_count: list) -> list: ys = [] # An empty list to store generated corresponding Ys # for loop iterates over class_count for k in range(class_count): - # for loop iterates over related number of instances of each class - for p in range(instance_count[k]): - # appending corresponding Ys to 'ys' list - ys.append(k) - return ys + return [k for _ in range(instance_count[k]) for k in range(class_count)] # Calculating the class means @@ -88,12 +79,11 @@ def mean_calc(instance_count: int, items: list) -> float: :return: calculated actual mean of considered class """ # the sum of all items divided by number of instances - class_mean = sum(items) / instance_count - return class_mean + return sum(items) / instance_count # Calculating the class probabilities -def prob_calc(instance_count: int, total_count: int) -> float: +def calculate_probabilities(instance_count: int, total_count: int) -> float: """ This function calculates the probability that a given instance will belong to which class :param instance_count: number of instances in class @@ -101,8 +91,7 @@ def prob_calc(instance_count: int, total_count: int) -> float: :return: value of probability for considered class """ # number of instances in specific class divided by number of all instances - probability = instance_count / total_count - return probability + return instance_count / total_count # Calculating the variance @@ -308,7 +297,7 @@ def main(): # # for loop iterates over number of classes(data groupings) for l in range(n_classes): # appending return values of 'prob_calc' function to 'probabilities' list - probabilities.append(prob_calc(counts[l], sum(counts))) + probabilities.append(calculate_probabilities(counts[l], sum(counts))) # for loop iterates over number of elements in 'probabilities' list and print out them in separated line for e in range(len(probabilities)): print("Probability of class_{} is: {}".format(e + 1, probabilities[e])) From 714fa69fff7200089d327ae02151b6f12c3ed4aa Mon Sep 17 00:00:00 2001 From: EverLookNeverSee Date: Tue, 26 Nov 2019 12:48:24 +0330 Subject: [PATCH 14/24] Renaming some of functions --- .../linear_discriminant_analysis.py | 30 +++++++++---------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/machine_learning/linear_discriminant_analysis.py b/machine_learning/linear_discriminant_analysis.py index a0b8cf44afe1..5993445cb828 100644 --- a/machine_learning/linear_discriminant_analysis.py +++ b/machine_learning/linear_discriminant_analysis.py @@ -46,7 +46,7 @@ # Making training dataset drawn from a gaussian distribution -def Normal_gen(mean: float, std_dev: float, instance_count: int) -> list: +def gaussian_distribution(mean: float, std_dev: float, instance_count: int) -> list: """ This function generates gaussian distribution instances based-on given mean and standard deviation :param mean: mean value of class @@ -59,7 +59,7 @@ def Normal_gen(mean: float, std_dev: float, instance_count: int) -> list: # Making corresponding Y flags to detecting classes -def Y_gen(class_count: int, instance_count: list) -> list: +def y_generator(class_count: int, instance_count: list) -> list: """ This function generates y values for corresponding classes :param class_count: Number of classes(data groupings) in dataset :param instance_count: number of instances in class @@ -72,7 +72,7 @@ def Y_gen(class_count: int, instance_count: list) -> list: # Calculating the class means -def mean_calc(instance_count: int, items: list) -> float: +def calculate_mean(instance_count: int, items: list) -> float: """ This function calculates given class mean :param instance_count: Number of instances in class :param items: items that related to specific class(data grouping) @@ -95,7 +95,7 @@ def calculate_probabilities(instance_count: int, total_count: int) -> float: # Calculating the variance -def var_calc(items: list, means: list, total_count: int) -> float: +def calculate_variance(items: list, means: list, total_count: int) -> float: """ This function calculates the variance :param items: a list containing all items(gaussian distribution of all classes) :param means: a list containing real mean values of each class @@ -119,11 +119,11 @@ def var_calc(items: list, means: list, total_count: int) -> float: # Making predictions -def predict(x_items: list, means: list, variance: float, probabilities: list) -> list: +def predict_y_values(x_items: list, means: list, variance: float, probabilities: list) -> list: """ This function predicts new indexes(groups for our data) :param x_items: a list containing all items(gaussian distribution of all classes) :param means: a list containing real mean values of each class - :param variance: calculated value of variance by var_calc function + :param variance: calculated value of variance by calculate_variance function :param probabilities: a list containing all probabilities of classes :return: a list containing predicted Y values """ @@ -156,8 +156,8 @@ def predict(x_items: list, means: list, variance: float, probabilities: list) -> # Calculating Accuracy def accuracy(actual_y: list, predicted_y: list) -> float: """ This function calculates the value of accuracy based-on predictions - :param actual_y:a list containing initial Y values generated by 'Y_gen' function - :param predicted_y: a list containing predicted Y values generated by 'predict' function + :param actual_y:a list containing initial Y values generated by 'y_generator' function + :param predicted_y: a list containing predicted Y values generated by 'predict_y_values' function :return: percentage of accuracy """ correct = 0 # initial value for number of correct predictions @@ -268,14 +268,14 @@ def main(): x = [] # An empty list to store generated values of gaussian distribution # for loop iterates over number of classes for j in range(n_classes): - # appending return values of 'Normal_gen' function to 'x' list - x.append(Normal_gen(user_means[j], std_dev, counts[j])) + # appending return values of 'gaussian_distribution' function to 'x' list + x.append(gaussian_distribution(user_means[j], std_dev, counts[j])) print("Generated Normal Distribution: \n", x) print("-" * 100) # Generating Ys to detecting corresponding classes - y = Y_gen(n_classes, counts) + y = y_generator(n_classes, counts) print("Generated Corresponding Ys: \n", y) print("-" * 100) @@ -284,8 +284,8 @@ def main(): actual_means = [] # An empty list to store value of actual means # for loop iterates over number of classes(data groupings) for k in range(n_classes): - # appending return values of 'mean_calc' function to 'actual_means' list - actual_means.append(mean_calc(counts[k], x[k])) + # appending return values of 'calculate_mean' function to 'actual_means' list + actual_means.append(calculate_mean(counts[k], x[k])) # for loop iterates over number of elements in 'actual_means' list and print out them in separated line for d in range(len(actual_means)): print("Actual(Real) mean of class_{} is: {}".format(d + 1, actual_means[d])) @@ -305,14 +305,14 @@ def main(): print("-" * 100) # Calculating the values of variance for each class - variance = var_calc(x, actual_means, sum(counts)) + variance = calculate_variance(x, actual_means, sum(counts)) print("Variance: ", variance) print("-" * 100) # Predicting Y values # storing predicted Y values in 'pre_indexes' variable - pre_indexes = predict(x, actual_means, variance, probabilities) + pre_indexes = predict_y_values(x, actual_means, variance, probabilities) print("-" * 100) From b4e3a25953340ff99da337f40f108542f623f720 Mon Sep 17 00:00:00 2001 From: EverLookNeverSee Date: Tue, 26 Nov 2019 13:37:24 +0330 Subject: [PATCH 15/24] Refactoring str.format() statements to f-string --- .../linear_discriminant_analysis.py | 22 ++++++++++--------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/machine_learning/linear_discriminant_analysis.py b/machine_learning/linear_discriminant_analysis.py index 5993445cb828..fffe49055717 100644 --- a/machine_learning/linear_discriminant_analysis.py +++ b/machine_learning/linear_discriminant_analysis.py @@ -193,7 +193,7 @@ def main(): n_classes = user_input break else: - print("Your entered value is {} , Number of classes should be positive!".format(user_input)) + print(f"Your entered value is {user_input} , Number of classes should be positive!") continue except ValueError: print("Your entered value is not numerical!") @@ -210,7 +210,7 @@ def main(): std_dev = user_sd break else: - print("Your entered value is {}, Standard deviation should not be negative!".format(user_sd)) + print(f"Your entered value is {user_sd}, Standard deviation should not be negative!") continue except ValueError: print("Your entered value is not numerical!") @@ -222,12 +222,12 @@ def main(): for i in range(n_classes): while True: try: - user_count = int(input("Enter The number of instances for class_{}: ".format(i + 1))) + user_count = int(input(f"Enter The number of instances for class_{i+1}: ")) if user_count > 0: counts.append(user_count) break else: - print("Your entered value is {}, Number of instances should be positive!".format(user_count)) + print(f"Your entered value is {user_count}, Number of instances should be positive!") continue except ValueError: print("Your entered value is not numerical!") @@ -238,12 +238,12 @@ def main(): for a in range(n_classes): while True: try: - user_mean = float(input("Enter the value of mean for class_{}: ".format(a + 1))) + user_mean = float(input(f"Enter the value of mean for class_{a+1}: ")) if isinstance(user_mean, float): user_means.append(user_mean) break else: - print("Your entered value is {}, And this is not valid!".format(user_mean)) + print(f"Your entered value is {user_mean}, And this is not valid!") except ValueError: print("Your entered value is not numerical!") @@ -254,13 +254,13 @@ def main(): # print out the number of instances in classes in separated line for b in range(len(counts)): - print("Number of instances in class_{} is: {}".format(b + 1, counts[b])) + print(f"Number of instances in class_{b+1} is: {counts[b]}") print("-" * 100) # print out mean values of classes separated line for c in range(len(user_means)): - print("Mean of class_{} is: {}".format(c + 1, user_means[c])) + print(f"Mean of class_{c+1} is: {user_means[c]}") print("-" * 100) @@ -288,12 +288,14 @@ def main(): actual_means.append(calculate_mean(counts[k], x[k])) # for loop iterates over number of elements in 'actual_means' list and print out them in separated line for d in range(len(actual_means)): - print("Actual(Real) mean of class_{} is: {}".format(d + 1, actual_means[d])) + print(f"Actual(Real) mean of class_{d+1} is: {actual_means[d]}") print("-" * 100) # Calculating the value of probabilities for each class - probabilities = [] # An empty list to store values of probabilities for each class + probabilities = ( + [] + ) # An empty list to store values of probabilities for each class # # for loop iterates over number of classes(data groupings) for l in range(n_classes): # appending return values of 'prob_calc' function to 'probabilities' list From 7fc92b0f9ec0804fd646e0db4087a73e89eeb853 Mon Sep 17 00:00:00 2001 From: EverLookNeverSee Date: Tue, 26 Nov 2019 13:46:08 +0330 Subject: [PATCH 16/24] Removing unnecessary list objects inside two functions --- machine_learning/linear_discriminant_analysis.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/machine_learning/linear_discriminant_analysis.py b/machine_learning/linear_discriminant_analysis.py index fffe49055717..23e747e62168 100644 --- a/machine_learning/linear_discriminant_analysis.py +++ b/machine_learning/linear_discriminant_analysis.py @@ -54,7 +54,7 @@ def gaussian_distribution(mean: float, std_dev: float, instance_count: int) -> l :param instance_count: instance number of class :return: a list containing generated values based-on given mean, std_dev and instance_count """ - generated_instances = [] # An empty list to store generated instances + return [gauss(mean, std_dev) for _ in range(instance_count)] @@ -65,7 +65,7 @@ def y_generator(class_count: int, instance_count: list) -> list: :param instance_count: number of instances in class :return: corresponding values for data groupings in dataset """ - ys = [] # An empty list to store generated corresponding Ys + # for loop iterates over class_count for k in range(class_count): return [k for _ in range(instance_count[k]) for k in range(class_count)] From 8a349305135e7accbfbf9af08eb20656123fcf9e Mon Sep 17 00:00:00 2001 From: EverLookNeverSee Date: Tue, 26 Nov 2019 13:52:52 +0330 Subject: [PATCH 17/24] changing code style in some lines --- .../linear_discriminant_analysis.py | 34 +++++++++++++------ 1 file changed, 23 insertions(+), 11 deletions(-) diff --git a/machine_learning/linear_discriminant_analysis.py b/machine_learning/linear_discriminant_analysis.py index 23e747e62168..2800517f7a3c 100644 --- a/machine_learning/linear_discriminant_analysis.py +++ b/machine_learning/linear_discriminant_analysis.py @@ -103,7 +103,9 @@ def calculate_variance(items: list, means: list, total_count: int) -> float: :return: calculated variance for considered dataset """ - squared_diff = [] # An empty list to store all squared differences + squared_diff = ( + [] + ) # An empty list to store all squared differences n_classes = len(means) # Number of classes in dataSet # for loo iterates over number of elements in items @@ -114,8 +116,7 @@ def calculate_variance(items: list, means: list, total_count: int) -> float: squared_diff.append((items[i][j] - means[i]) ** 2) # one divided by (the number of all instances - number of classes) multiplied by sum of all squared differences - variance = 1 / (total_count - n_classes) * sum(squared_diff) - return variance + return 1 / (total_count - n_classes) * sum(squared_diff) # Making predictions @@ -128,7 +129,9 @@ def predict_y_values(x_items: list, means: list, variance: float, probabilities: :return: a list containing predicted Y values """ - results = [] # An empty list to store generated discriminant values of all items in dataset for each class + results = ( + [] + ) # An empty list to store generated discriminant values of all items in dataset for each class # for loop iterates over number of elements in list for i in range(len(x_items)): # for loop iterates over number of inner items of each element @@ -144,7 +147,9 @@ def predict_y_values(x_items: list, means: list, variance: float, probabilities: print("Generated Discriminants: \n", results) - predicted_index = [] # An empty list to store predicted indexes + predicted_index = ( + [] + ) # An empty list to store predicted indexes # for loop iterates over elements in 'results' for l in results: # after calculating the discriminant value for each class , the class with the largest @@ -169,8 +174,7 @@ def accuracy(actual_y: list, predicted_y: list) -> float: correct += 1 # percentage of accuracy equals to number of correct predictions divided by number of # all data and multiplied by 100 - percentage = (correct / len(actual_y)) * 100 - return percentage + return (correct / len(actual_y)) * 100 # Main Function @@ -218,7 +222,9 @@ def main(): print("-" * 100) # Trying to get number of instances in classes and theirs means to generate dataset - counts = [] # An empty list to store instance counts of classes in dataset + counts = ( + [] + ) # An empty list to store instance counts of classes in dataset for i in range(n_classes): while True: try: @@ -234,7 +240,9 @@ def main(): print("-" * 100) - user_means = [] # An empty list to store values of user-entered means of classes + user_means = ( + [] + ) # An empty list to store values of user-entered means of classes for a in range(n_classes): while True: try: @@ -265,7 +273,9 @@ def main(): print("-" * 100) # Generating training dataset drawn from gaussian distribution - x = [] # An empty list to store generated values of gaussian distribution + x = ( + [] + ) # An empty list to store generated values of gaussian distribution # for loop iterates over number of classes for j in range(n_classes): # appending return values of 'gaussian_distribution' function to 'x' list @@ -281,7 +291,9 @@ def main(): print("-" * 100) # Calculating the value of actual mean for each class - actual_means = [] # An empty list to store value of actual means + actual_means = ( + [] + ) # An empty list to store value of actual means # for loop iterates over number of classes(data groupings) for k in range(n_classes): # appending return values of 'calculate_mean' function to 'actual_means' list From b5b3e1c2ae9fb0d938cfc03a4ec1e813bff9ed1a Mon Sep 17 00:00:00 2001 From: EverLookNeverSee Date: Tue, 26 Nov 2019 14:08:37 +0330 Subject: [PATCH 18/24] Fixing y_generator function --- machine_learning/linear_discriminant_analysis.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/machine_learning/linear_discriminant_analysis.py b/machine_learning/linear_discriminant_analysis.py index 2800517f7a3c..8fed7dc42be6 100644 --- a/machine_learning/linear_discriminant_analysis.py +++ b/machine_learning/linear_discriminant_analysis.py @@ -66,9 +66,7 @@ def y_generator(class_count: int, instance_count: list) -> list: :return: corresponding values for data groupings in dataset """ - # for loop iterates over class_count - for k in range(class_count): - return [k for _ in range(instance_count[k]) for k in range(class_count)] + return [k for k in range(class_count) for _ in range(instance_count[k])] # Calculating the class means From 67047ce10b6b21ff31de3e55f34a884836bf3e84 Mon Sep 17 00:00:00 2001 From: EverLookNeverSee Date: Tue, 26 Nov 2019 14:16:18 +0330 Subject: [PATCH 19/24] Refactoring 'predict_y_values' function by using list comprehensions --- machine_learning/linear_discriminant_analysis.py | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/machine_learning/linear_discriminant_analysis.py b/machine_learning/linear_discriminant_analysis.py index 8fed7dc42be6..b5327840d52a 100644 --- a/machine_learning/linear_discriminant_analysis.py +++ b/machine_learning/linear_discriminant_analysis.py @@ -145,15 +145,7 @@ def predict_y_values(x_items: list, means: list, variance: float, probabilities: print("Generated Discriminants: \n", results) - predicted_index = ( - [] - ) # An empty list to store predicted indexes - # for loop iterates over elements in 'results' - for l in results: - # after calculating the discriminant value for each class , the class with the largest - # discriminant value is taken as the prediction, than we try to get index of that. - predicted_index.append(l.index(max(l))) - return predicted_index + return [l.index(max(l)) for l in results] # Calculating Accuracy From e62e6ec41dea497a05cdac7d387e472ace2f376c Mon Sep 17 00:00:00 2001 From: EverLookNeverSee Date: Tue, 26 Nov 2019 14:22:29 +0330 Subject: [PATCH 20/24] Changing code style in import statements --- machine_learning/linear_discriminant_analysis.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/machine_learning/linear_discriminant_analysis.py b/machine_learning/linear_discriminant_analysis.py index b5327840d52a..b05d27cc402a 100644 --- a/machine_learning/linear_discriminant_analysis.py +++ b/machine_learning/linear_discriminant_analysis.py @@ -42,7 +42,8 @@ # importing modules from random import gauss from math import log -from os import system, name # to use < clear > or < cls > commands in terminal or cmd +from os import system +from os import name # Making training dataset drawn from a gaussian distribution From 0d5d0a72410126379036ad5d9cda1d41d2caa20e Mon Sep 17 00:00:00 2001 From: EverLookNeverSee Date: Tue, 26 Nov 2019 14:26:40 +0330 Subject: [PATCH 21/24] Refactoring CLI code block --- machine_learning/linear_discriminant_analysis.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/machine_learning/linear_discriminant_analysis.py b/machine_learning/linear_discriminant_analysis.py index b05d27cc402a..67728cf3c694 100644 --- a/machine_learning/linear_discriminant_analysis.py +++ b/machine_learning/linear_discriminant_analysis.py @@ -334,8 +334,8 @@ def main(): if name == "nt": # Related to Windows OS system("cls") continue - else: # Related to Mac OSX and Linux OS - system("clear") + else: + system("cls" if name == "nt" else "clear") continue From 33b86791cf39aca658021111e86dd2ff4719509c Mon Sep 17 00:00:00 2001 From: github-actions <${GITHUB_ACTOR}@users.noreply.github.com> Date: Tue, 26 Nov 2019 11:00:29 +0000 Subject: [PATCH 22/24] fixup! Format Python code with psf/black push --- .../linear_discriminant_analysis.py | 78 ++++++++++++------- 1 file changed, 48 insertions(+), 30 deletions(-) diff --git a/machine_learning/linear_discriminant_analysis.py b/machine_learning/linear_discriminant_analysis.py index 67728cf3c694..a840507cb274 100644 --- a/machine_learning/linear_discriminant_analysis.py +++ b/machine_learning/linear_discriminant_analysis.py @@ -102,9 +102,7 @@ def calculate_variance(items: list, means: list, total_count: int) -> float: :return: calculated variance for considered dataset """ - squared_diff = ( - [] - ) # An empty list to store all squared differences + squared_diff = [] # An empty list to store all squared differences n_classes = len(means) # Number of classes in dataSet # for loo iterates over number of elements in items @@ -119,7 +117,9 @@ def calculate_variance(items: list, means: list, total_count: int) -> float: # Making predictions -def predict_y_values(x_items: list, means: list, variance: float, probabilities: list) -> list: +def predict_y_values( + x_items: list, means: list, variance: float, probabilities: list +) -> list: """ This function predicts new indexes(groups for our data) :param x_items: a list containing all items(gaussian distribution of all classes) :param means: a list containing real mean values of each class @@ -130,17 +130,20 @@ def predict_y_values(x_items: list, means: list, variance: float, probabilities: results = ( [] - ) # An empty list to store generated discriminant values of all items in dataset for each class + ) # An empty list to store generated discriminant values of all items in dataset for each class # for loop iterates over number of elements in list for i in range(len(x_items)): # for loop iterates over number of inner items of each element for j in range(len(x_items[i])): - temp = [] # to store all discriminant values of each item as a list + temp = [] # to store all discriminant values of each item as a list # for loop iterates over number of classes we have in our dataset for k in range(len(x_items)): # appending values of discriminants for each class to 'temp' list - temp.append(x_items[i][j] * (means[k] / variance) - (means[k] ** 2 / (2 * variance)) + - log(probabilities[k])) + temp.append( + x_items[i][j] * (means[k] / variance) + - (means[k] ** 2 / (2 * variance)) + + log(probabilities[k]) + ) # appending discriminant values of each item to 'results' list results.append(temp) @@ -156,7 +159,7 @@ def accuracy(actual_y: list, predicted_y: list) -> float: :param predicted_y: a list containing predicted Y values generated by 'predict_y_values' function :return: percentage of accuracy """ - correct = 0 # initial value for number of correct predictions + correct = 0 # initial value for number of correct predictions # for loop iterates over one element of each list at a time (zip mode) for i, j in zip(actual_y, predicted_y): # if actual Y value equals to predicted Y value @@ -176,19 +179,25 @@ def main(): print(" Linear Discriminant Analysis ".center(100, "*")) print("*" * 100, "\n") - print("First of all we should specify the number of classes that \n" - "we want to generate as training dataset") + print( + "First of all we should specify the number of classes that \n" + "we want to generate as training dataset" + ) # Trying to get number of classes n_classes = 0 while True: try: - user_input = int(input("Enter the number of classes (Data Groupings): ")) + user_input = int( + input("Enter the number of classes (Data Groupings): ") + ) if user_input > 0: n_classes = user_input break else: - print(f"Your entered value is {user_input} , Number of classes should be positive!") + print( + f"Your entered value is {user_input} , Number of classes should be positive!" + ) continue except ValueError: print("Your entered value is not numerical!") @@ -199,13 +208,20 @@ def main(): # Trying to get the value of standard deviation while True: try: - user_sd = float(input("Enter the value of standard deviation" - "(Default value is 1.0 for all classes): ") or "1.0") + user_sd = float( + input( + "Enter the value of standard deviation" + "(Default value is 1.0 for all classes): " + ) + or "1.0" + ) if user_sd >= 0.0: std_dev = user_sd break else: - print(f"Your entered value is {user_sd}, Standard deviation should not be negative!") + print( + f"Your entered value is {user_sd}, Standard deviation should not be negative!" + ) continue except ValueError: print("Your entered value is not numerical!") @@ -213,18 +229,20 @@ def main(): print("-" * 100) # Trying to get number of instances in classes and theirs means to generate dataset - counts = ( - [] - ) # An empty list to store instance counts of classes in dataset + counts = [] # An empty list to store instance counts of classes in dataset for i in range(n_classes): while True: try: - user_count = int(input(f"Enter The number of instances for class_{i+1}: ")) + user_count = int( + input(f"Enter The number of instances for class_{i+1}: ") + ) if user_count > 0: counts.append(user_count) break else: - print(f"Your entered value is {user_count}, Number of instances should be positive!") + print( + f"Your entered value is {user_count}, Number of instances should be positive!" + ) continue except ValueError: print("Your entered value is not numerical!") @@ -237,12 +255,16 @@ def main(): for a in range(n_classes): while True: try: - user_mean = float(input(f"Enter the value of mean for class_{a+1}: ")) + user_mean = float( + input(f"Enter the value of mean for class_{a+1}: ") + ) if isinstance(user_mean, float): user_means.append(user_mean) break else: - print(f"Your entered value is {user_mean}, And this is not valid!") + print( + f"Your entered value is {user_mean}, And this is not valid!" + ) except ValueError: print("Your entered value is not numerical!") @@ -264,9 +286,7 @@ def main(): print("-" * 100) # Generating training dataset drawn from gaussian distribution - x = ( - [] - ) # An empty list to store generated values of gaussian distribution + x = [] # An empty list to store generated values of gaussian distribution # for loop iterates over number of classes for j in range(n_classes): # appending return values of 'gaussian_distribution' function to 'x' list @@ -282,9 +302,7 @@ def main(): print("-" * 100) # Calculating the value of actual mean for each class - actual_means = ( - [] - ) # An empty list to store value of actual means + actual_means = [] # An empty list to store value of actual means # for loop iterates over number of classes(data groupings) for k in range(n_classes): # appending return values of 'calculate_mean' function to 'actual_means' list @@ -339,5 +357,5 @@ def main(): continue -if __name__ == '__main__': +if __name__ == "__main__": main() From 228574198b1ff9c9d9a307ba1ad2b28cc477999f Mon Sep 17 00:00:00 2001 From: Christian Clauss Date: Tue, 26 Nov 2019 12:53:03 +0100 Subject: [PATCH 23/24] No lines longer than 88 characters --- .../linear_discriminant_analysis.py | 219 ++++++++---------- 1 file changed, 94 insertions(+), 125 deletions(-) diff --git a/machine_learning/linear_discriminant_analysis.py b/machine_learning/linear_discriminant_analysis.py index a840507cb274..8a89f6f5922e 100644 --- a/machine_learning/linear_discriminant_analysis.py +++ b/machine_learning/linear_discriminant_analysis.py @@ -4,7 +4,8 @@ Assumptions About Data : 1. The input variables has a gaussian distribution. - 2. The variance calculated for each input variables by class grouping is the same. + 2. The variance calculated for each input variables by class grouping is the + same. 3. The mix of classes in your training set is representative of the problem. @@ -23,45 +24,46 @@ Calculate the variance : We can calculate the variance for dataset in two steps : - 1. Calculate the squared difference for each input variable from the group mean. + 1. Calculate the squared difference for each input variable from the + group mean. 2. Calculate the mean of the squared difference. ------------------------------------------------ Squared_Difference = (x - mean(k)) ** 2 - Variance = (1 / (count(x) - count(classes))) * (for i = 1 to i = n --> sum(Squared_Difference(xi))) + Variance = (1 / (count(x) - count(classes))) * + (for i = 1 to i = n --> sum(Squared_Difference(xi))) Making Predictions : - discriminant(x) = x * (mean / variance) - ((mean ** 2) / (2 * variance)) + Ln(probability) - ------------------------------------------------------------------------------------------ - After calculating the discriminant value for each class, the class with the largest discriminant value - is taken as the prediction. + discriminant(x) = x * (mean / variance) - + ((mean ** 2) / (2 * variance)) + Ln(probability) + --------------------------------------------------------------------------- + After calculating the discriminant value for each class, the class with the + largest discriminant value is taken as the prediction. Author: @EverLookNeverSee - """ -# importing modules -from random import gauss from math import log -from os import system -from os import name +from os import name, system +from random import gauss -# Making training dataset drawn from a gaussian distribution +# Make a training dataset drawn from a gaussian distribution def gaussian_distribution(mean: float, std_dev: float, instance_count: int) -> list: - """ This function generates gaussian distribution instances - based-on given mean and standard deviation - :param mean: mean value of class - :param std_dev: value of standard deviation entered by usr or default value of it - :param instance_count: instance number of class - :return: a list containing generated values based-on given mean, std_dev and instance_count - """ - + """ + Generate gaussian distribution instances based-on given mean and standard deviation + :param mean: mean value of class + :param std_dev: value of standard deviation entered by usr or default value of it + :param instance_count: instance number of class + :return: a list containing generated values based-on given mean, std_dev and + instance_count + """ return [gauss(mean, std_dev) for _ in range(instance_count)] -# Making corresponding Y flags to detecting classes +# Make corresponding Y flags to detecting classes def y_generator(class_count: int, instance_count: list) -> list: - """ This function generates y values for corresponding classes + """ + Generate y values for corresponding classes :param class_count: Number of classes(data groupings) in dataset :param instance_count: number of instances in class :return: corresponding values for data groupings in dataset @@ -70,9 +72,10 @@ def y_generator(class_count: int, instance_count: list) -> list: return [k for k in range(class_count) for _ in range(instance_count[k])] -# Calculating the class means +# Calculate the class means def calculate_mean(instance_count: int, items: list) -> float: - """ This function calculates given class mean + """ + Calculate given class mean :param instance_count: Number of instances in class :param items: items that related to specific class(data grouping) :return: calculated actual mean of considered class @@ -81,38 +84,38 @@ def calculate_mean(instance_count: int, items: list) -> float: return sum(items) / instance_count -# Calculating the class probabilities +# Calculate the class probabilities def calculate_probabilities(instance_count: int, total_count: int) -> float: - """ This function calculates the probability that a given instance - will belong to which class - :param instance_count: number of instances in class - :param total_count: the number of all instances - :return: value of probability for considered class - """ + """ + Calculate the probability that a given instance will belong to which class + :param instance_count: number of instances in class + :param total_count: the number of all instances + :return: value of probability for considered class + """ # number of instances in specific class divided by number of all instances return instance_count / total_count -# Calculating the variance +# Calculate the variance def calculate_variance(items: list, means: list, total_count: int) -> float: - """ This function calculates the variance + """ + Calculate the variance :param items: a list containing all items(gaussian distribution of all classes) :param means: a list containing real mean values of each class :param total_count: the number of all instances :return: calculated variance for considered dataset """ - squared_diff = [] # An empty list to store all squared differences - n_classes = len(means) # Number of classes in dataSet - - # for loo iterates over number of elements in items + # iterate over number of elements in items for i in range(len(items)): # for loop iterates over number of elements in inner layer of items for j in range(len(items[i])): # appending squared differences to 'squared_diff' list squared_diff.append((items[i][j] - means[i]) ** 2) - # one divided by (the number of all instances - number of classes) multiplied by sum of all squared differences + # one divided by (the number of all instances - number of classes) multiplied by + # sum of all squared differences + n_classes = len(means) # Number of classes in dataset return 1 / (total_count - n_classes) * sum(squared_diff) @@ -127,10 +130,9 @@ def predict_y_values( :param probabilities: a list containing all probabilities of classes :return: a list containing predicted Y values """ - - results = ( - [] - ) # An empty list to store generated discriminant values of all items in dataset for each class + # An empty list to store generated discriminant values of all items in dataset for + # each class + results = [] # for loop iterates over number of elements in list for i in range(len(x_items)): # for loop iterates over number of inner items of each element @@ -146,57 +148,50 @@ def predict_y_values( ) # appending discriminant values of each item to 'results' list results.append(temp) - print("Generated Discriminants: \n", results) - return [l.index(max(l)) for l in results] # Calculating Accuracy def accuracy(actual_y: list, predicted_y: list) -> float: - """ This function calculates the value of accuracy based-on predictions - :param actual_y:a list containing initial Y values generated by 'y_generator' function - :param predicted_y: a list containing predicted Y values generated by 'predict_y_values' function + """ + Calculate the value of accuracy based-on predictions + :param actual_y:a list containing initial Y values generated by 'y_generator' + function + :param predicted_y: a list containing predicted Y values generated by + 'predict_y_values' function :return: percentage of accuracy """ - correct = 0 # initial value for number of correct predictions - # for loop iterates over one element of each list at a time (zip mode) - for i, j in zip(actual_y, predicted_y): - # if actual Y value equals to predicted Y value - if i == j: - # prediction is correct - correct += 1 - # percentage of accuracy equals to number of correct predictions divided by number of - # all data and multiplied by 100 + # iterate over one element of each list at a time (zip mode) + # prediction is correct if actual Y value equals to predicted Y value + correct = sum(1 for i, j in zip(actual_y, predicted_y) if i == j) + # percentage of accuracy equals to number of correct predictions divided by number + # of all data and multiplied by 100 return (correct / len(actual_y)) * 100 # Main Function def main(): """ This function starts execution phase """ - while True: - print(" Linear Discriminant Analysis ".center(100, "*")) print("*" * 100, "\n") - print( - "First of all we should specify the number of classes that \n" - "we want to generate as training dataset" - ) - + print("First of all we should specify the number of classes that") + print("we want to generate as training dataset") # Trying to get number of classes n_classes = 0 while True: try: user_input = int( - input("Enter the number of classes (Data Groupings): ") + input("Enter the number of classes (Data Groupings): ").strip() ) if user_input > 0: n_classes = user_input break else: print( - f"Your entered value is {user_input} , Number of classes should be positive!" + f"Your entered value is {user_input} , Number of classes " + f"should be positive!" ) continue except ValueError: @@ -212,7 +207,7 @@ def main(): input( "Enter the value of standard deviation" "(Default value is 1.0 for all classes): " - ) + ).strip() or "1.0" ) if user_sd >= 0.0: @@ -220,7 +215,8 @@ def main(): break else: print( - f"Your entered value is {user_sd}, Standard deviation should not be negative!" + f"Your entered value is {user_sd}, Standard deviation should " + f"not be negative!" ) continue except ValueError: @@ -228,7 +224,8 @@ def main(): print("-" * 100) - # Trying to get number of instances in classes and theirs means to generate dataset + # Trying to get number of instances in classes and theirs means to generate + # dataset counts = [] # An empty list to store instance counts of classes in dataset for i in range(n_classes): while True: @@ -241,17 +238,16 @@ def main(): break else: print( - f"Your entered value is {user_count}, Number of instances should be positive!" + f"Your entered value is {user_count}, Number of " + f"instances should be positive!" ) continue except ValueError: print("Your entered value is not numerical!") - print("-" * 100) - user_means = ( - [] - ) # An empty list to store values of user-entered means of classes + # An empty list to store values of user-entered means of classes + user_means = [] for a in range(n_classes): while True: try: @@ -261,100 +257,73 @@ def main(): if isinstance(user_mean, float): user_means.append(user_mean) break - else: - print( - f"Your entered value is {user_mean}, And this is not valid!" - ) - + print(f"You entered an invalid value: {user_mean}") except ValueError: print("Your entered value is not numerical!") - print("-" * 100) print("Standard deviation: ", std_dev) - # print out the number of instances in classes in separated line - for b in range(len(counts)): - print(f"Number of instances in class_{b+1} is: {counts[b]}") - + for i, count in enumerate(counts, 1): + print(f"Number of instances in class_{i} is: {count}") print("-" * 100) # print out mean values of classes separated line - for c in range(len(user_means)): - print(f"Mean of class_{c+1} is: {user_means[c]}") - + for i, user_mean in enumerate(user_means, 1): + print(f"Mean of class_{i} is: {user_mean}") print("-" * 100) # Generating training dataset drawn from gaussian distribution - x = [] # An empty list to store generated values of gaussian distribution - # for loop iterates over number of classes - for j in range(n_classes): - # appending return values of 'gaussian_distribution' function to 'x' list - x.append(gaussian_distribution(user_means[j], std_dev, counts[j])) + x = [ + gaussian_distribution(user_means[j], std_dev, counts[j]) + for j in range(n_classes) + ] print("Generated Normal Distribution: \n", x) - print("-" * 100) # Generating Ys to detecting corresponding classes y = y_generator(n_classes, counts) print("Generated Corresponding Ys: \n", y) - print("-" * 100) # Calculating the value of actual mean for each class - actual_means = [] # An empty list to store value of actual means - # for loop iterates over number of classes(data groupings) - for k in range(n_classes): - # appending return values of 'calculate_mean' function to 'actual_means' list - actual_means.append(calculate_mean(counts[k], x[k])) - # for loop iterates over number of elements in 'actual_means' list and print out them in separated line - for d in range(len(actual_means)): - print(f"Actual(Real) mean of class_{d+1} is: {actual_means[d]}") - + actual_means = [calculate_mean(counts[k], x[k]) for k in range(n_classes)] + # for loop iterates over number of elements in 'actual_means' list and print + # out them in separated line + for i, actual_mean in enumerate(actual_means, 1): + print(f"Actual(Real) mean of class_{i} is: {actual_mean}") print("-" * 100) # Calculating the value of probabilities for each class + # An empty list to store values of probabilities for each class probabilities = ( - [] - ) # An empty list to store values of probabilities for each class - # # for loop iterates over number of classes(data groupings) - for l in range(n_classes): - # appending return values of 'prob_calc' function to 'probabilities' list - probabilities.append(calculate_probabilities(counts[l], sum(counts))) - # for loop iterates over number of elements in 'probabilities' list and print out them in separated line - for e in range(len(probabilities)): - print("Probability of class_{} is: {}".format(e + 1, probabilities[e])) - + calculate_probabilities(counts[i], sum(counts)) for i in range(n_classes) + ) + # for loop iterates over number of elements in 'probabilities' list and print + # out them in separated line + for i, probability in enumerate(probabilities, 1): + print("Probability of class_{} is: {}".format(i, probability)) print("-" * 100) # Calculating the values of variance for each class variance = calculate_variance(x, actual_means, sum(counts)) print("Variance: ", variance) - print("-" * 100) # Predicting Y values # storing predicted Y values in 'pre_indexes' variable pre_indexes = predict_y_values(x, actual_means, variance, probabilities) - print("-" * 100) # Calculating Accuracy of the model - print("Accuracy: ", accuracy(y, pre_indexes)) + print(f"Accuracy: {accuracy(y, pre_indexes)}") print("-" * 100) print(" DONE ".center(100, "+")) - command = input("Press any key to restart and 'q' for quit: ") - if command.lower() == "q": + if input("Press any key to restart or 'q' for quit: ").strip().lower() == "q": print("\n" + "GoodBye!".center(100, "-") + "\n") break - else: - if name == "nt": # Related to Windows OS - system("cls") - continue - else: - system("cls" if name == "nt" else "clear") - continue + system("cls" if name == "nt" else "clear") if __name__ == "__main__": From 5f7ffcca7db6d21e7e481e52c53687837392179b Mon Sep 17 00:00:00 2001 From: github-actions <${GITHUB_ACTOR}@users.noreply.github.com> Date: Tue, 26 Nov 2019 11:53:34 +0000 Subject: [PATCH 24/24] updating DIRECTORY.md --- DIRECTORY.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/DIRECTORY.md b/DIRECTORY.md index 6e64f034df62..74c63d144e40 100644 --- a/DIRECTORY.md +++ b/DIRECTORY.md @@ -52,6 +52,7 @@ * [Rsa Factorization](https://github.com/TheAlgorithms/Python/blob/master/ciphers/rsa_factorization.py) * [Rsa Key Generator](https://github.com/TheAlgorithms/Python/blob/master/ciphers/rsa_key_generator.py) * [Shuffled Shift Cipher](https://github.com/TheAlgorithms/Python/blob/master/ciphers/shuffled_shift_cipher.py) + * [Simple Keyword Cypher](https://github.com/TheAlgorithms/Python/blob/master/ciphers/simple_keyword_cypher.py) * [Simple Substitution Cipher](https://github.com/TheAlgorithms/Python/blob/master/ciphers/simple_substitution_cipher.py) * [Trafid Cipher](https://github.com/TheAlgorithms/Python/blob/master/ciphers/trafid_cipher.py) * [Transposition Cipher](https://github.com/TheAlgorithms/Python/blob/master/ciphers/transposition_cipher.py) @@ -95,6 +96,7 @@ * [Heap](https://github.com/TheAlgorithms/Python/blob/master/data_structures/heap/heap.py) * [Min Heap](https://github.com/TheAlgorithms/Python/blob/master/data_structures/heap/min_heap.py) * Linked List + * [Circular Linked List](https://github.com/TheAlgorithms/Python/blob/master/data_structures/linked_list/circular_linked_list.py) * [Doubly Linked List](https://github.com/TheAlgorithms/Python/blob/master/data_structures/linked_list/doubly_linked_list.py) * [From Sequence](https://github.com/TheAlgorithms/Python/blob/master/data_structures/linked_list/from_sequence.py) * [Is Palindrome](https://github.com/TheAlgorithms/Python/blob/master/data_structures/linked_list/is_palindrome.py) @@ -102,6 +104,7 @@ * [Singly Linked List](https://github.com/TheAlgorithms/Python/blob/master/data_structures/linked_list/singly_linked_list.py) * [Swap Nodes](https://github.com/TheAlgorithms/Python/blob/master/data_structures/linked_list/swap_nodes.py) * Queue + * [Circular Queue](https://github.com/TheAlgorithms/Python/blob/master/data_structures/queue/circular_queue.py) * [Double Ended Queue](https://github.com/TheAlgorithms/Python/blob/master/data_structures/queue/double_ended_queue.py) * [Linked Queue](https://github.com/TheAlgorithms/Python/blob/master/data_structures/queue/linked_queue.py) * [Queue On List](https://github.com/TheAlgorithms/Python/blob/master/data_structures/queue/queue_on_list.py) @@ -149,6 +152,7 @@ * [Fibonacci](https://github.com/TheAlgorithms/Python/blob/master/dynamic_programming/fibonacci.py) * [Floyd Warshall](https://github.com/TheAlgorithms/Python/blob/master/dynamic_programming/floyd_warshall.py) * [Fractional Knapsack](https://github.com/TheAlgorithms/Python/blob/master/dynamic_programming/fractional_knapsack.py) + * [Fractional Knapsack 2](https://github.com/TheAlgorithms/Python/blob/master/dynamic_programming/fractional_knapsack_2.py) * [Integer Partition](https://github.com/TheAlgorithms/Python/blob/master/dynamic_programming/integer_partition.py) * [K Means Clustering Tensorflow](https://github.com/TheAlgorithms/Python/blob/master/dynamic_programming/k_means_clustering_tensorflow.py) * [Knapsack](https://github.com/TheAlgorithms/Python/blob/master/dynamic_programming/knapsack.py) @@ -224,6 +228,7 @@ * [K Means Clust](https://github.com/TheAlgorithms/Python/blob/master/machine_learning/k_means_clust.py) * [K Nearest Neighbours](https://github.com/TheAlgorithms/Python/blob/master/machine_learning/k_nearest_neighbours.py) * [Knn Sklearn](https://github.com/TheAlgorithms/Python/blob/master/machine_learning/knn_sklearn.py) + * [Linear Discriminant Analysis](https://github.com/TheAlgorithms/Python/blob/master/machine_learning/linear_discriminant_analysis.py) * [Linear Regression](https://github.com/TheAlgorithms/Python/blob/master/machine_learning/linear_regression.py) * [Logistic Regression](https://github.com/TheAlgorithms/Python/blob/master/machine_learning/logistic_regression.py) * [Polymonial Regression](https://github.com/TheAlgorithms/Python/blob/master/machine_learning/polymonial_regression.py) @@ -521,6 +526,7 @@ * [Naive String Search](https://github.com/TheAlgorithms/Python/blob/master/strings/naive_string_search.py) * [Rabin Karp](https://github.com/TheAlgorithms/Python/blob/master/strings/rabin_karp.py) * [Remove Duplicate](https://github.com/TheAlgorithms/Python/blob/master/strings/remove_duplicate.py) + * [Reverse Words](https://github.com/TheAlgorithms/Python/blob/master/strings/reverse_words.py) * [Word Occurence](https://github.com/TheAlgorithms/Python/blob/master/strings/word_occurence.py) ## Traversals @@ -528,6 +534,7 @@ ## Web Programming * [Crawl Google Results](https://github.com/TheAlgorithms/Python/blob/master/web_programming/crawl_google_results.py) + * [Current Stock Price](https://github.com/TheAlgorithms/Python/blob/master/web_programming/current_stock_price.py) * [Fetch Bbc News](https://github.com/TheAlgorithms/Python/blob/master/web_programming/fetch_bbc_news.py) * [Fetch Github Info](https://github.com/TheAlgorithms/Python/blob/master/web_programming/fetch_github_info.py) * [Get Imdbtop](https://github.com/TheAlgorithms/Python/blob/master/web_programming/get_imdbtop.py)