TheAlgorithms · cclauss · Oct 20, 2022 · Oct 13, 2022 · Oct 13, 2022 · Oct 13, 2022
diff --git a/machine_learning/xgboostclassifier.py b/machine_learning/xgboostclassifier.py
@@ -0,0 +1,82 @@
+# XGBoost Classifier Example
+import numpy as np
+from matplotlib import pyplot as plt
+from sklearn.datasets import load_iris
+from sklearn.metrics import plot_confusion_matrix
+from sklearn.model_selection import train_test_split
+from xgboost import XGBClassifier
+
+
+def data_handling(data: dict) -> tuple:
+    # Split dataset into features and target
+    # data is features
+    """
+    >>> data_handling(({'data':'[5.1, 3.5, 1.4, 0.2]','target':([0])}))
+    ('[5.1, 3.5, 1.4, 0.2]', [0])
+    >>> data_handling(
+    ...     {'data': '[4.9, 3.0, 1.4, 0.2], [4.7, 3.2, 1.3, 0.2]', 'target': ([0, 0])}
+    ... )
+    ('[4.9, 3.0, 1.4, 0.2], [4.7, 3.2, 1.3, 0.2]', [0, 0])
+    """
+    return (data["data"], data["target"])
+
+
+def xgboost(features: np.ndarray, target: np.ndarray) -> XGBClassifier:
+    """
+    >>> xgboost(np.array([[5.1, 3.6, 1.4, 0.2]]), np.array([0]))
+    XGBClassifier(base_score=0.5, booster='gbtree', callbacks=None,
+                  colsample_bylevel=1, colsample_bynode=1, colsample_bytree=1,
+                  early_stopping_rounds=None, enable_categorical=False,
+                  eval_metric=None, gamma=0, gpu_id=-1, grow_policy='depthwise',
+                  importance_type=None, interaction_constraints='',
+                  learning_rate=0.300000012, max_bin=256, max_cat_to_onehot=4,
+                  max_delta_step=0, max_depth=6, max_leaves=0, min_child_weight=1,
+                  missing=nan, monotone_constraints='()', n_estimators=100,
+                  n_jobs=0, num_parallel_tree=1, predictor='auto', random_state=0,
+                  reg_alpha=0, reg_lambda=1, ...)
+    """
+    classifier = XGBClassifier()
+    classifier.fit(features, target)
+    return classifier
+
+
+def main() -> None:
+
+    """
+    >>> main()
+
+    Url for the algorithm:
+    https://xgboost.readthedocs.io/en/stable/
+    Iris type dataset is used to demonstrate algorithm.
+    """
+
+    # Load Iris dataset
+    iris = load_iris()
+    features, targets = data_handling(iris)
+    x_train, x_test, y_train, y_test = train_test_split(
+        features, targets, test_size=0.25
+    )
+
+    names = iris["target_names"]
+
+    # Create an XGBoost Classifier from the training data
+    xgboost_classifier = xgboost(x_train, y_train)
+
+    # Display the confusion matrix of the classifier with both training and test sets
+    plot_confusion_matrix(
+        xgboost_classifier,
+        x_test,
+        y_test,
+        display_labels=names,
+        cmap="Blues",
+        normalize="true",
+    )
+    plt.title("Normalized Confusion Matrix - IRIS Dataset")
+    plt.show()
+
+
+if __name__ == "__main__":
+    import doctest
+
+    doctest.testmod(verbose=True)
+    main()