TheAlgorithms · mathangpeddi · Oct 2, 2024 · Oct 2, 2024 · Oct 2, 2024 · Oct 2, 2024
diff --git a/machine_learning/catboost_classifier.py b/machine_learning/catboost_classifier.py
@@ -0,0 +1,72 @@
+# Catboost Classifier Example
+import numpy as np
+from catboost import CatBoostClassifier
+from matplotlib import pyplot as plt
+from sklearn.datasets import load_iris
+from sklearn.metrics import ConfusionMatrixDisplay
+from sklearn.model_selection import train_test_split
+
+
+def data_handling(data: dict) -> tuple:
+    # Split dataset into features and target
+    # data is features
+    """
+    >>> data_handling(({'data':'[5.1, 3.5, 1.4, 0.2]','target':([0])}))
+    ('[5.1, 3.5, 1.4, 0.2]', [0])
+    >>> data_handling(
+    ...     {'data': '[4.9, 3.0, 1.4, 0.2], [4.7, 3.2, 1.3, 0.2]', 'target': ([0, 0])}
+    ... )
+    ('[4.9, 3.0, 1.4, 0.2], [4.7, 3.2, 1.3, 0.2]', [0, 0])
+    """
+    return (data["data"], data["target"])
+
+
+def catboost(features: np.ndarray, target: np.ndarray) -> CatBoostClassifier:
+    """
+    >>> catboost(np.array([[5.1, 3.6, 1.4, 0.2]]), np.array([0]))
+    <catboost.core.CatBoostClassifier object at 0x...>
+    """
+    classifier = CatBoostClassifier(verbose=0)
+    classifier.fit(features, target)
+    return classifier
+
+
+def main() -> None:
+    """
+    >>> main()
+
+    Url for the algorithm:
+    https://catboost.ai/
+    Iris type dataset is used to demonstrate algorithm.
+    """
+
+    # Load Iris dataset
+    iris = load_iris()
+    features, targets = data_handling(iris)
+    x_train, x_test, y_train, y_test = train_test_split(
+        features, targets, test_size=0.25
+    )
+
+    names = iris["target_names"]
+
+    # Create a CatBoost Classifier from the training data
+    catboost_classifier = catboost(x_train, y_train)
+
+    # Display the confusion matrix of the classifier with both training and test sets
+    ConfusionMatrixDisplay.from_estimator(
+        catboost_classifier,
+        x_test,
+        y_test,
+        display_labels=names,
+        cmap="Blues",
+        normalize="true",
+    )
+    plt.title("Normalized Confusion Matrix - IRIS Dataset (CatBoost)")
+    plt.show()
+
+
+if __name__ == "__main__":
+    import doctest
+
+    doctest.testmod(verbose=True)
+    main()
diff --git a/machine_learning/catboost_regressor.py b/machine_learning/catboost_regressor.py
@@ -0,0 +1,65 @@
+# Catboost Regressor Example
+import numpy as np
+from catboost import CatBoostRegressor
+from sklearn.datasets import fetch_california_housing
+from sklearn.metrics import mean_absolute_error, mean_squared_error
+from sklearn.model_selection import train_test_split
+
+
+def data_handling(data: dict) -> tuple:
+    # Split dataset into features and target. Data is features.
+    """
+    >>> data_handling((
+    ...  {'data':'[ 8.3252 41. 6.9841269 1.02380952  322. 2.55555556   37.88 -122.23 ]'
+    ...  ,'target':([4.526])}))
+    ('[ 8.3252 41. 6.9841269 1.02380952  322. 2.55555556   37.88 -122.23 ]', [4.526])
+    """
+    return (data["data"], data["target"])
+
+
+def catboost(
+    features: np.ndarray, target: np.ndarray, test_features: np.ndarray
+) -> np.ndarray:
+    """
+    >>> catboost(np.array([[ 2.3571 ,   52. , 6.00813008, 1.06775068,
+    ...    907. , 2.45799458,   40.58 , -124.26]]),np.array([1.114]),
+    ... np.array([[1.97840000e+00,  3.70000000e+01,  4.98858447e+00,  1.03881279e+00,
+    ...    1.14300000e+03,  2.60958904e+00,  3.67800000e+01, -1.19780000e+02]]))
+    array([1.1139996])
+    """
+    # Create and fit the CatBoost Regressor
+    catboost_model = CatBoostRegressor(verbose=0, random_seed=42, loss_function="RMSE")
+    catboost_model.fit(features, target)
+    # Predict target for test data
+    predictions = catboost_model.predict(test_features)
+    predictions = predictions.reshape(len(predictions), 1)
+    return predictions
+
+
+def main() -> None:
+    """
+    The URL for this algorithm:
+    https://catboost.ai/
+    California house price dataset is used to demonstrate the algorithm.
+
+    Expected error values:
+    Mean Absolute Error: 0.30957163379906033
+    Mean Square Error: 0.22611560196662744
+    """
+    # Load California house price dataset
+    california = fetch_california_housing()
+    data, target = data_handling(california)
+    x_train, x_test, y_train, y_test = train_test_split(
+        data, target, test_size=0.25, random_state=1
+    )
+    predictions = catboost(x_train, y_train, x_test)
+    # Error printing
+    print(f"Mean Absolute Error: {mean_absolute_error(y_test, predictions)}")
+    print(f"Mean Square Error: {mean_squared_error(y_test, predictions)}")
+
+
+if __name__ == "__main__":
+    import doctest
+
+    doctest.testmod(verbose=True)
+    main()
diff --git a/requirements.txt b/requirements.txt
@@ -1,4 +1,5 @@
 beautifulsoup4
+catboost
 fake_useragent
 imageio
 keras