-
-
Notifications
You must be signed in to change notification settings - Fork 46.7k
Created LGBM classifier @ lgbm_classifier.py #11759
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 12 commits
82711a4
3d46dd0
e3086a8
3e93a6c
daf0f8b
6c5fbdc
e9dc1aa
e9425d7
69ec309
75ff081
e3197a2
f0ceb04
7298a54
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,63 @@ | ||
# LGBM Classifier Example using Bank Marketing Dataset | ||
import numpy as np | ||
from lightgbm import LGBMClassifier | ||
from matplotlib import pyplot as plt | ||
from sklearn.datasets import fetch_openml | ||
from sklearn.metrics import ConfusionMatrixDisplay | ||
from sklearn.model_selection import train_test_split | ||
|
||
|
||
def data_handling(data: dict) -> tuple: | ||
# Split dataset into features and target. Data is features. | ||
""" | ||
>>> data_handling(( | ||
... {'data':'[0.12, 0.02, 0.01, 0.25, 0.09]', | ||
... 'target':([1])})) | ||
('[0.12, 0.02, 0.01, 0.25, 0.09]', [1]) | ||
""" | ||
return (data["data"], data["target"]) | ||
|
||
|
||
def lgbm_classifier(features: np.ndarray, target: np.ndarray) -> LGBMClassifier: | ||
""" | ||
>>> lgbm_classifier(np.array([[0.12, 0.02, 0.01, 0.25, 0.09]]), np.array([1])) | ||
LGBMClassifier(...) | ||
""" | ||
classifier = LGBMClassifier(random_state=42) | ||
classifier.fit(features, target) | ||
return classifier | ||
|
||
|
||
def main() -> None: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As there is no test file in this pull request nor any test function or class in the file There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As there is no test file in this pull request nor any test function or class in the file There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As there is no test file in this pull request nor any test function or class in the file There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As there is no test file in this pull request nor any test function or class in the file There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As there is no test file in this pull request nor any test function or class in the file There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As there is no test file in this pull request nor any test function or class in the file There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As there is no test file in this pull request nor any test function or class in the file |
||
""" | ||
The URL for this algorithm: | ||
https://lightgbm.readthedocs.io/en/latest/ | ||
Bank Marketing Dataset is used to demonstrate the algorithm. | ||
""" | ||
# Load Bank Marketing dataset | ||
bank_data = fetch_openml(name='bank-marketing', version=1, as_frame=False) | ||
data, target = data_handling(bank_data) | ||
x_train, x_test, y_train, y_test = train_test_split( | ||
data, target, test_size=0.25, random_state=1 | ||
) | ||
# Create an LGBM Classifier from the training data | ||
lgbm_classifier_model = lgbm_classifier(x_train, y_train) | ||
|
||
# Display the confusion matrix of the classifier | ||
ConfusionMatrixDisplay.from_estimator( | ||
lgbm_classifier_model, | ||
x_test, | ||
y_test, | ||
display_labels=['No', 'Yes'], | ||
cmap="Blues", | ||
normalize="true", | ||
) | ||
plt.title("Normalized Confusion Matrix - Bank Marketing Dataset") | ||
plt.show() | ||
|
||
|
||
if __name__ == "__main__": | ||
import doctest | ||
|
||
doctest.testmod(verbose=True) | ||
main() |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
# LGBM Regressor Example using Bank Marketing Dataset | ||
import numpy as np | ||
from lightgbm import LGBMRegressor | ||
from sklearn.datasets import fetch_openml | ||
from sklearn.metrics import mean_absolute_error, mean_squared_error | ||
from sklearn.model_selection import train_test_split | ||
|
||
|
||
def data_handling(data: dict) -> tuple: | ||
# Split dataset into features and target. Data is features. | ||
""" | ||
>>> data_handling(( | ||
... {'data':'[0.12, 0.02, 0.01, 0.25, 0.09]', | ||
... 'target':([1])})) | ||
('[0.12, 0.02, 0.01, 0.25, 0.09]', [1]) | ||
""" | ||
return (data["data"], data["target"]) | ||
|
||
|
||
def lgbm_regressor(features: np.ndarray, target: np.ndarray, | ||
test_features: np.ndarray) -> np.ndarray: | ||
""" | ||
>>> lgbm_regressor(np.array([[0.12, 0.02, 0.01, 0.25, 0.09]]), | ||
... np.array([1]), np.array([[0.11, 0.03, 0.02, 0.28, 0.08]])) | ||
array([[0.98]], dtype=float32) | ||
""" | ||
lgbm = LGBMRegressor(random_state=42) | ||
lgbm.fit(features, target) | ||
# Predict target for test data | ||
predictions = lgbm.predict(test_features) | ||
predictions = predictions.reshape(len(predictions), 1) | ||
return predictions | ||
|
||
|
||
def main() -> None: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As there is no test file in this pull request nor any test function or class in the file There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As there is no test file in this pull request nor any test function or class in the file There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As there is no test file in this pull request nor any test function or class in the file There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As there is no test file in this pull request nor any test function or class in the file There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As there is no test file in this pull request nor any test function or class in the file There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As there is no test file in this pull request nor any test function or class in the file There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As there is no test file in this pull request nor any test function or class in the file There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As there is no test file in this pull request nor any test function or class in the file |
||
""" | ||
The URL for this algorithm: | ||
https://lightgbm.readthedocs.io/en/latest/ | ||
Bank Marketing Dataset is used to demonstrate the algorithm. | ||
""" | ||
# Load Bank Marketing dataset | ||
bank_data = fetch_openml(name='bank-marketing', version=1, as_frame=False) | ||
data, target = data_handling(bank_data) | ||
x_train, x_test, y_train, y_test = train_test_split( | ||
data, target, test_size=0.25, random_state=1 | ||
) | ||
predictions = lgbm_regressor(x_train, y_train, x_test) | ||
# Error printing | ||
print(f"Mean Absolute Error: {mean_absolute_error(y_test, predictions)}") | ||
print(f"Mean Square Error: {mean_squared_error(y_test, predictions)}") | ||
|
||
|
||
if __name__ == "__main__": | ||
import doctest | ||
|
||
doctest.testmod(verbose=True) | ||
main() |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
As there is no test file in this pull request nor any test function or class in the file
machine_learning/lgbm_classifier.py
, please provide doctest for the functionmain