|
1 |
| -# LGBM Classifier Example |
| 1 | +# LGBM Classifier Example using Bank Marketing Dataset |
2 | 2 | import numpy as np
|
3 | 3 | from matplotlib import pyplot as plt
|
4 |
| -from sklearn.datasets import load_iris |
| 4 | +from sklearn.datasets import fetch_openml |
5 | 5 | from sklearn.metrics import ConfusionMatrixDisplay
|
6 | 6 | from sklearn.model_selection import train_test_split
|
7 | 7 | from lightgbm import LGBMClassifier
|
8 | 8 |
|
9 | 9 |
|
10 | 10 | def data_handling(data: dict) -> tuple:
|
| 11 | + # Split dataset into features and target. Data is features. |
11 | 12 | """
|
12 |
| - Splits dataset into features and target labels. |
13 |
| -
|
14 |
| - >>> data_handling({'data': '[5.1, 3.5, 1.4, 0.2]', 'target': [0]}) |
15 |
| - ('[5.1, 3.5, 1.4, 0.2]', [0]) |
16 |
| - >>> data_handling({'data': '[4.9, 3.0, 1.4, 0.2], [4.7, 3.2, 1.3, 0.2]', 'target': [0, 0]}) |
17 |
| - ('[4.9, 3.0, 1.4, 0.2], [4.7, 3.2, 1.3, 0.2]', [0, 0]) |
| 13 | + >>> data_handling(( |
| 14 | + ... {'data':'[0.12, 0.02, 0.01, 0.25, 0.09]', |
| 15 | + ... 'target':([1])})) |
| 16 | + ('[0.12, 0.02, 0.01, 0.25, 0.09]', [1]) |
18 | 17 | """
|
19 |
| - return data["data"], data["target"] |
| 18 | + return (data["data"], data["target"]) |
20 | 19 |
|
21 | 20 |
|
22 | 21 | def lgbm_classifier(features: np.ndarray, target: np.ndarray) -> LGBMClassifier:
|
23 | 22 | """
|
24 |
| - Trains an LGBM Classifier on the given features and target labels. |
25 |
| -
|
26 |
| - >>> lgbm_classifier(np.array([[5.1, 3.6, 1.4, 0.2]]), np.array([0])) |
27 |
| - LGBMClassifier() |
| 23 | + >>> lgbm_classifier(np.array([[0.12, 0.02, 0.01, 0.25, 0.09]]), np.array([1])) |
| 24 | + LGBMClassifier(...) |
28 | 25 | """
|
29 |
| - classifier = LGBMClassifier() |
| 26 | + classifier = LGBMClassifier(random_state=42) |
30 | 27 | classifier.fit(features, target)
|
31 | 28 | return classifier
|
32 | 29 |
|
33 | 30 |
|
34 | 31 | def main() -> None:
|
35 | 32 | """
|
36 |
| - Main function to demonstrate LGBM classification on the Iris dataset. |
37 |
| -
|
38 |
| - URL for LightGBM documentation: |
| 33 | + The URL for this algorithm: |
39 | 34 | https://lightgbm.readthedocs.io/en/latest/
|
| 35 | + Bank Marketing Dataset is used to demonstrate the algorithm. |
40 | 36 | """
|
41 |
| - # Load the Iris dataset |
42 |
| - iris = load_iris() |
43 |
| - features, targets = data_handling(iris) |
44 |
| - |
45 |
| - # Split the dataset into training and testing sets |
| 37 | + # Load Bank Marketing dataset |
| 38 | + bank_data = fetch_openml(name='bank-marketing', version=1, as_frame=False) |
| 39 | + data, target = data_handling(bank_data) |
46 | 40 | x_train, x_test, y_train, y_test = train_test_split(
|
47 |
| - features, targets, test_size=0.25, random_state=42 |
| 41 | + data, target, test_size=0.25, random_state=1 |
48 | 42 | )
|
| 43 | + # Create an LGBM Classifier from the training data |
| 44 | + lgbm_classifier_model = lgbm_classifier(x_train, y_train) |
49 | 45 |
|
50 |
| - # Class names for display |
51 |
| - names = iris["target_names"] |
52 |
| - |
53 |
| - # Train the LGBM classifier |
54 |
| - lgbm_clf = lgbm_classifier(x_train, y_train) |
55 |
| - |
56 |
| - # Display the confusion matrix for the classifier |
| 46 | + # Display the confusion matrix of the classifier |
57 | 47 | ConfusionMatrixDisplay.from_estimator(
|
58 |
| - lgbm_clf, |
| 48 | + lgbm_classifier_model, |
59 | 49 | x_test,
|
60 | 50 | y_test,
|
61 |
| - display_labels=names, |
| 51 | + display_labels=['No', 'Yes'], |
62 | 52 | cmap="Blues",
|
63 | 53 | normalize="true",
|
64 | 54 | )
|
65 |
| - plt.title("Normalized Confusion Matrix - IRIS Dataset") |
| 55 | + plt.title("Normalized Confusion Matrix - Bank Marketing Dataset") |
66 | 56 | plt.show()
|
67 | 57 |
|
68 | 58 |
|
|
0 commit comments