-
-
Notifications
You must be signed in to change notification settings - Fork 46.6k
Added Catboost Algorithm in Machine Learning #11654
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 8 commits
86219ce
dca7ae5
a5d8112
e89f71f
23f5e8f
73475dc
002bbd8
f5d45c1
073c62a
5272789
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,72 @@ | ||
# Catboost Classifier Example | ||
import numpy as np | ||
from catboost import CatBoostClassifier | ||
from matplotlib import pyplot as plt | ||
from sklearn.datasets import load_iris | ||
from sklearn.metrics import ConfusionMatrixDisplay | ||
from sklearn.model_selection import train_test_split | ||
|
||
|
||
def data_handling(data: dict) -> tuple: | ||
# Split dataset into features and target | ||
# data is features | ||
""" | ||
>>> data_handling(({'data':'[5.1, 3.5, 1.4, 0.2]','target':([0])})) | ||
('[5.1, 3.5, 1.4, 0.2]', [0]) | ||
>>> data_handling( | ||
... {'data': '[4.9, 3.0, 1.4, 0.2], [4.7, 3.2, 1.3, 0.2]', 'target': ([0, 0])} | ||
... ) | ||
('[4.9, 3.0, 1.4, 0.2], [4.7, 3.2, 1.3, 0.2]', [0, 0]) | ||
""" | ||
return (data["data"], data["target"]) | ||
|
||
|
||
def catboost(features: np.ndarray, target: np.ndarray) -> CatBoostClassifier: | ||
""" | ||
>>> catboost(np.array([[5.1, 3.6, 1.4, 0.2]]), np.array([0])) | ||
<catboost.core.CatBoostClassifier object at 0x...> | ||
""" | ||
classifier = CatBoostClassifier(verbose=0) | ||
classifier.fit(features, target) | ||
return classifier | ||
|
||
|
||
def main() -> None: | ||
""" | ||
>>> main() | ||
|
||
Url for the algorithm: | ||
https://catboost.ai/ | ||
Iris type dataset is used to demonstrate algorithm. | ||
""" | ||
|
||
# Load Iris dataset | ||
iris = load_iris() | ||
features, targets = data_handling(iris) | ||
x_train, x_test, y_train, y_test = train_test_split( | ||
features, targets, test_size=0.25 | ||
) | ||
|
||
names = iris["target_names"] | ||
|
||
# Create a CatBoost Classifier from the training data | ||
catboost_classifier = catboost(x_train, y_train) | ||
|
||
# Display the confusion matrix of the classifier with both training and test sets | ||
ConfusionMatrixDisplay.from_estimator( | ||
catboost_classifier, | ||
x_test, | ||
y_test, | ||
display_labels=names, | ||
cmap="Blues", | ||
normalize="true", | ||
) | ||
plt.title("Normalized Confusion Matrix - IRIS Dataset (CatBoost)") | ||
plt.show() | ||
|
||
|
||
if __name__ == "__main__": | ||
import doctest | ||
|
||
doctest.testmod(verbose=True) | ||
main() |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
# Catboost Regressor Example | ||
import numpy as np | ||
from catboost import CatBoostRegressor | ||
from sklearn.datasets import fetch_california_housing | ||
from sklearn.metrics import mean_absolute_error, mean_squared_error | ||
from sklearn.model_selection import train_test_split | ||
|
||
|
||
def data_handling(data: dict) -> tuple: | ||
# Split dataset into features and target. Data is features. | ||
""" | ||
>>> data_handling(( | ||
... {'data':'[ 8.3252 41. 6.9841269 1.02380952 322. 2.55555556 37.88 -122.23 ]' | ||
... ,'target':([4.526])})) | ||
('[ 8.3252 41. 6.9841269 1.02380952 322. 2.55555556 37.88 -122.23 ]', [4.526]) | ||
""" | ||
return (data["data"], data["target"]) | ||
|
||
|
||
def catboost( | ||
features: np.ndarray, target: np.ndarray, test_features: np.ndarray | ||
) -> np.ndarray: | ||
""" | ||
>>> catboost(np.array([[ 2.3571 , 52. , 6.00813008, 1.06775068, | ||
... 907. , 2.45799458, 40.58 , -124.26]]),np.array([1.114]), | ||
... np.array([[1.97840000e+00, 3.70000000e+01, 4.98858447e+00, 1.03881279e+00, | ||
... 1.14300000e+03, 2.60958904e+00, 3.67800000e+01, -1.19780000e+02]])) | ||
array([1.1139996]) | ||
""" | ||
# Create and fit the CatBoost Regressor | ||
catboost_model = CatBoostRegressor(verbose=0, random_seed=42, loss_function="RMSE") | ||
catboost_model.fit(features, target) | ||
# Predict target for test data | ||
predictions = catboost_model.predict(test_features) | ||
predictions = predictions.reshape(len(predictions), 1) | ||
return predictions | ||
|
||
|
||
def main() -> None: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As there is no test file in this pull request nor any test function or class in the file There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As there is no test file in this pull request nor any test function or class in the file |
||
""" | ||
The URL for this algorithm: | ||
https://catboost.ai/ | ||
California house price dataset is used to demonstrate the algorithm. | ||
|
||
Expected error values: | ||
Mean Absolute Error: 0.30957163379906033 | ||
Mean Square Error: 0.22611560196662744 | ||
""" | ||
# Load California house price dataset | ||
california = fetch_california_housing() | ||
data, target = data_handling(california) | ||
x_train, x_test, y_train, y_test = train_test_split( | ||
data, target, test_size=0.25, random_state=1 | ||
) | ||
predictions = catboost(x_train, y_train, x_test) | ||
# Error printing | ||
print(f"Mean Absolute Error: {mean_absolute_error(y_test, predictions)}") | ||
print(f"Mean Square Error: {mean_squared_error(y_test, predictions)}") | ||
|
||
|
||
if __name__ == "__main__": | ||
import doctest | ||
|
||
doctest.testmod(verbose=True) | ||
main() |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,5 @@ | ||
beautifulsoup4 | ||
catboost | ||
fake_useragent | ||
imageio | ||
keras | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
As there is no test file in this pull request nor any test function or class in the file
machine_learning/catboost_regressor.py
, please provide doctest for the functionmain