forked from TheAlgorithms/Python
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlgbm_classifier.py
73 lines (57 loc) · 2 KB
/
lgbm_classifier.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
# LGBM Classifier Example
import numpy as np
from matplotlib import pyplot as plt
from sklearn.datasets import load_iris
from sklearn.metrics import ConfusionMatrixDisplay
from sklearn.model_selection import train_test_split
from lightgbm import LGBMClassifier
def data_handling(data: dict) -> tuple:
"""
Splits dataset into features and target labels.
>>> data_handling({'data': '[5.1, 3.5, 1.4, 0.2]', 'target': [0]})
('[5.1, 3.5, 1.4, 0.2]', [0])
>>> data_handling({'data': '[4.9, 3.0, 1.4, 0.2], [4.7, 3.2, 1.3, 0.2]', 'target': [0, 0]})
('[4.9, 3.0, 1.4, 0.2], [4.7, 3.2, 1.3, 0.2]', [0, 0])
"""
return data["data"], data["target"]
def lgbm_classifier(features: np.ndarray, target: np.ndarray) -> LGBMClassifier:
"""
Trains an LGBM Classifier on the given features and target labels.
>>> lgbm_classifier(np.array([[5.1, 3.6, 1.4, 0.2]]), np.array([0]))
LGBMClassifier()
"""
classifier = LGBMClassifier()
classifier.fit(features, target)
return classifier
def main() -> None:
"""
Main function to demonstrate LGBM classification on the Iris dataset.
URL for LightGBM documentation:
https://lightgbm.readthedocs.io/en/latest/
"""
# Load the Iris dataset
iris = load_iris()
features, targets = data_handling(iris)
# Split the dataset into training and testing sets
x_train, x_test, y_train, y_test = train_test_split(
features, targets, test_size=0.25, random_state=42
)
# Class names for display
names = iris["target_names"]
# Train the LGBM classifier
lgbm_clf = lgbm_classifier(x_train, y_train)
# Display the confusion matrix for the classifier
ConfusionMatrixDisplay.from_estimator(
lgbm_clf,
x_test,
y_test,
display_labels=names,
cmap="Blues",
normalize="true",
)
plt.title("Normalized Confusion Matrix - IRIS Dataset")
plt.show()
if __name__ == "__main__":
import doctest
doctest.testmod(verbose=True)
main()