Update lgbm_classifier.py

ArchismanKarmakar · web-flow · commit daf0f8bb9903 · 2024-10-04T23:42:06.000+05:30
diff --git a/machine_learning/lgbm_classifier.py b/machine_learning/lgbm_classifier.py
@@ -1,68 +1,58 @@
-# LGBM Classifier Example
+# LGBM Classifier Example using Bank Marketing Dataset
 import numpy as np
 from matplotlib import pyplot as plt
-from sklearn.datasets import load_iris
+from sklearn.datasets import fetch_openml
 from sklearn.metrics import ConfusionMatrixDisplay
 from sklearn.model_selection import train_test_split
 from lightgbm import LGBMClassifier
 
 
 def data_handling(data: dict) -> tuple:
+    # Split dataset into features and target. Data is features.
     """
-    Splits dataset into features and target labels.
-
-    >>> data_handling({'data': '[5.1, 3.5, 1.4, 0.2]', 'target': [0]})
-    ('[5.1, 3.5, 1.4, 0.2]', [0])
-    >>> data_handling({'data': '[4.9, 3.0, 1.4, 0.2], [4.7, 3.2, 1.3, 0.2]', 'target': [0, 0]})
-    ('[4.9, 3.0, 1.4, 0.2], [4.7, 3.2, 1.3, 0.2]', [0, 0])
+    >>> data_handling((
+    ...  {'data':'[0.12, 0.02, 0.01, 0.25, 0.09]',
+    ...  'target':([1])}))
+    ('[0.12, 0.02, 0.01, 0.25, 0.09]', [1])
     """
-    return data["data"], data["target"]
+    return (data["data"], data["target"])
 
 
 def lgbm_classifier(features: np.ndarray, target: np.ndarray) -> LGBMClassifier:
     """
-    Trains an LGBM Classifier on the given features and target labels.
-
-    >>> lgbm_classifier(np.array([[5.1, 3.6, 1.4, 0.2]]), np.array([0]))
-    LGBMClassifier()
+    >>> lgbm_classifier(np.array([[0.12, 0.02, 0.01, 0.25, 0.09]]), np.array([1]))
+    LGBMClassifier(...)
     """
-    classifier = LGBMClassifier()
+    classifier = LGBMClassifier(random_state=42)
     classifier.fit(features, target)
     return classifier
 
 
 def main() -> None:
     """
-    Main function to demonstrate LGBM classification on the Iris dataset.
-
-    URL for LightGBM documentation:
+    The URL for this algorithm:
     https://lightgbm.readthedocs.io/en/latest/
+    Bank Marketing Dataset is used to demonstrate the algorithm.
     """
-    # Load the Iris dataset
-    iris = load_iris()
-    features, targets = data_handling(iris)
-
-    # Split the dataset into training and testing sets
+    # Load Bank Marketing dataset
+    bank_data = fetch_openml(name='bank-marketing', version=1, as_frame=False)
+    data, target = data_handling(bank_data)
     x_train, x_test, y_train, y_test = train_test_split(
-        features, targets, test_size=0.25, random_state=42
+        data, target, test_size=0.25, random_state=1
     )
+    # Create an LGBM Classifier from the training data
+    lgbm_classifier_model = lgbm_classifier(x_train, y_train)
 
-    # Class names for display
-    names = iris["target_names"]
-
-    # Train the LGBM classifier
-    lgbm_clf = lgbm_classifier(x_train, y_train)
-
-    # Display the confusion matrix for the classifier
+    # Display the confusion matrix of the classifier
     ConfusionMatrixDisplay.from_estimator(
-        lgbm_clf,
+        lgbm_classifier_model,
         x_test,
         y_test,
-        display_labels=names,
+        display_labels=['No', 'Yes'],
         cmap="Blues",
         normalize="true",
     )
-    plt.title("Normalized Confusion Matrix - IRIS Dataset")
+    plt.title("Normalized Confusion Matrix - Bank Marketing Dataset")
     plt.show()