Skip to content

Added Catboost Algorithm in Machine Learning #11654

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 10 commits into from
72 changes: 72 additions & 0 deletions machine_learning/catboost_classifier.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
# Catboost Classifier Example
import numpy as np
from catboost import CatBoostClassifier
from matplotlib import pyplot as plt
from sklearn.datasets import load_iris
from sklearn.metrics import ConfusionMatrixDisplay
from sklearn.model_selection import train_test_split


def data_handling(data: dict) -> tuple:
# Split dataset into features and target
# data is features
"""
>>> data_handling(({'data':'[5.1, 3.5, 1.4, 0.2]','target':([0])}))
('[5.1, 3.5, 1.4, 0.2]', [0])
>>> data_handling(
... {'data': '[4.9, 3.0, 1.4, 0.2], [4.7, 3.2, 1.3, 0.2]', 'target': ([0, 0])}
... )
('[4.9, 3.0, 1.4, 0.2], [4.7, 3.2, 1.3, 0.2]', [0, 0])
"""
return (data["data"], data["target"])


def catboost(features: np.ndarray, target: np.ndarray) -> CatBoostClassifier:
"""
>>> catboost(np.array([[5.1, 3.6, 1.4, 0.2]]), np.array([0]))
<catboost.core.CatBoostClassifier object at 0x...>
"""
classifier = CatBoostClassifier(verbose=0)
classifier.fit(features, target)
return classifier


def main() -> None:
"""
>>> main()

Url for the algorithm:
https://catboost.ai/
Iris type dataset is used to demonstrate algorithm.
"""

# Load Iris dataset
iris = load_iris()
features, targets = data_handling(iris)
x_train, x_test, y_train, y_test = train_test_split(
features, targets, test_size=0.25
)

names = iris["target_names"]

# Create a CatBoost Classifier from the training data
catboost_classifier = catboost(x_train, y_train)

# Display the confusion matrix of the classifier with both training and test sets
ConfusionMatrixDisplay.from_estimator(
catboost_classifier,
x_test,
y_test,
display_labels=names,
cmap="Blues",
normalize="true",
)
plt.title("Normalized Confusion Matrix - IRIS Dataset (CatBoost)")
plt.show()


if __name__ == "__main__":
import doctest

doctest.testmod(verbose=True)
main()
65 changes: 65 additions & 0 deletions machine_learning/catboost_regressor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
# Catboost Regressor Example
import numpy as np
from catboost import CatBoostRegressor
from sklearn.datasets import fetch_california_housing
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.model_selection import train_test_split


def data_handling(data: dict) -> tuple:
# Split dataset into features and target. Data is features.
"""
>>> data_handling((
... {'data':'[ 8.3252 41. 6.9841269 1.02380952 322. 2.55555556 37.88 -122.23 ]'
... ,'target':([4.526])}))
('[ 8.3252 41. 6.9841269 1.02380952 322. 2.55555556 37.88 -122.23 ]', [4.526])
"""
return (data["data"], data["target"])


def catboost(
features: np.ndarray, target: np.ndarray, test_features: np.ndarray
) -> np.ndarray:
"""
>>> catboost(np.array([[ 2.3571 , 52. , 6.00813008, 1.06775068,
... 907. , 2.45799458, 40.58 , -124.26]]),np.array([1.114]),
... np.array([[1.97840000e+00, 3.70000000e+01, 4.98858447e+00, 1.03881279e+00,
... 1.14300000e+03, 2.60958904e+00, 3.67800000e+01, -1.19780000e+02]]))
array([1.1139996])
"""
# Create and fit the CatBoost Regressor
catboost_model = CatBoostRegressor(verbose=0, random_seed=42, loss_function="RMSE")
catboost_model.fit(features, target)
# Predict target for test data
predictions = catboost_model.predict(test_features)
predictions = predictions.reshape(len(predictions), 1)
return predictions


def main() -> None:

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As there is no test file in this pull request nor any test function or class in the file machine_learning/catboost_regressor.py, please provide doctest for the function main

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As there is no test file in this pull request nor any test function or class in the file machine_learning/catboost_regressor.py, please provide doctest for the function main

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As there is no test file in this pull request nor any test function or class in the file machine_learning/catboost_regressor.py, please provide doctest for the function main

"""
The URL for this algorithm:
https://catboost.ai/
California house price dataset is used to demonstrate the algorithm.

Expected error values:
Mean Absolute Error: 0.30957163379906033
Mean Square Error: 0.22611560196662744
"""
# Load California house price dataset
california = fetch_california_housing()
data, target = data_handling(california)
x_train, x_test, y_train, y_test = train_test_split(
data, target, test_size=0.25, random_state=1
)
predictions = catboost(x_train, y_train, x_test)
# Error printing
print(f"Mean Absolute Error: {mean_absolute_error(y_test, predictions)}")
print(f"Mean Square Error: {mean_squared_error(y_test, predictions)}")


if __name__ == "__main__":
import doctest

doctest.testmod(verbose=True)
main()
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
beautifulsoup4
catboost
fake_useragent
imageio
keras
Expand Down
Loading