From 44cb41112b215e2d763e3234b00504e6c203effc Mon Sep 17 00:00:00 2001 From: PARIKSHIT SINGH Date: Sat, 1 Mar 2025 09:47:53 +0530 Subject: [PATCH 1/3] feat: Implement Principal Component Analysis (PCA) - Added a Python implementation of PCA using NumPy and scikit-learn - Standardizes the dataset before applying PCA for better performance - Computes principal components and explained variance ratio - Uses the Iris dataset as a sample for demonstration - Provides a modular structure for easy extension and dataset modification --- .../principle_component_analysis.py | 66 +++++++++++++++++++ 1 file changed, 66 insertions(+) create mode 100644 machine_learning/principle_component_analysis.py diff --git a/machine_learning/principle_component_analysis.py b/machine_learning/principle_component_analysis.py new file mode 100644 index 000000000000..fb06e12dacea --- /dev/null +++ b/machine_learning/principle_component_analysis.py @@ -0,0 +1,66 @@ +""" +Principal Component Analysis (PCA) is a dimensionality reduction technique +commonly used in machine learning. It transforms high-dimensional data into +lower dimensions while retaining most of the information. + +Here,we use a dataset (Iris dataset) and apply PCA to reduce the +dimensionality. We compute the principal components and transform the dataset +into a lower-dimensional space. + +We reduce the number of columns form 4 to 2 + +""" + +import numpy as np +import requests +from sklearn.decomposition import PCA +from sklearn.preprocessing import StandardScaler +from sklearn.datasets import load_iris + + +def collect_dataset(): + """Collect dataset (Iris dataset) + :return: Feature matrix and target values + """ + data = load_iris() + return np.array(data.data), np.array(data.target) + + +def apply_pca(data_x, n_components): + """Apply Principal Component Analysis (PCA) + :param data_x: Original dataset + :param n_components: Number of principal components + :return: Transformed dataset and explained variance + """ + # Standardizing the features + scaler = StandardScaler() + data_x_scaled = scaler.fit_transform(data_x) + + # Applying PCA + pca = PCA(n_components=n_components) + principal_components = pca.fit_transform(data_x_scaled) + + # Explained variance ratio + explained_variance = pca.explained_variance_ratio_ + + return principal_components, explained_variance + + +def main(): + """Driver function""" + data_x, data_y = collect_dataset() + # Set number of principal components + n_components = 3 + + # Apply PCA + transformed_data, variance_ratio = apply_pca(data_x, n_components) + + print("Transformed Dataset (First 5 rows):") + print(transformed_data[:5]) + + print("\nExplained Variance Ratio:") + print(variance_ratio) + + +if __name__ == "__main__": + main() From ac605b512e35ef23f6fc5aa61c1cff499c69dce7 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 1 Mar 2025 04:28:50 +0000 Subject: [PATCH 2/3] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- machine_learning/principle_component_analysis.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/machine_learning/principle_component_analysis.py b/machine_learning/principle_component_analysis.py index fb06e12dacea..96f64cb7b3e0 100644 --- a/machine_learning/principle_component_analysis.py +++ b/machine_learning/principle_component_analysis.py @@ -1,13 +1,13 @@ """ -Principal Component Analysis (PCA) is a dimensionality reduction technique -commonly used in machine learning. It transforms high-dimensional data into +Principal Component Analysis (PCA) is a dimensionality reduction technique +commonly used in machine learning. It transforms high-dimensional data into lower dimensions while retaining most of the information. -Here,we use a dataset (Iris dataset) and apply PCA to reduce the -dimensionality. We compute the principal components and transform the dataset +Here,we use a dataset (Iris dataset) and apply PCA to reduce the +dimensionality. We compute the principal components and transform the dataset into a lower-dimensional space. -We reduce the number of columns form 4 to 2 +We reduce the number of columns form 4 to 2 """ From e735cbe5ed7a47caac788233d3b8fe43d89d3a32 Mon Sep 17 00:00:00 2001 From: PARIKSHIT SINGH Date: Sat, 1 Mar 2025 10:02:51 +0530 Subject: [PATCH 3/3] refactor:Removed requests from imports --- machine_learning/principle_component_analysis.py | 1 - 1 file changed, 1 deletion(-) diff --git a/machine_learning/principle_component_analysis.py b/machine_learning/principle_component_analysis.py index fb06e12dacea..5d9b15e3649d 100644 --- a/machine_learning/principle_component_analysis.py +++ b/machine_learning/principle_component_analysis.py @@ -12,7 +12,6 @@ """ import numpy as np -import requests from sklearn.decomposition import PCA from sklearn.preprocessing import StandardScaler from sklearn.datasets import load_iris