diff --git a/machine_learning/xgboost_classifier.py b/machine_learning/xgboost_classifier.py
index 1da933cf690f..df31cb4631b9 100644
--- a/machine_learning/xgboost_classifier.py
+++ b/machine_learning/xgboost_classifier.py
@@ -1,5 +1,6 @@
 # XGBoost Classifier Example
 import numpy as np
+from decision_tree import DecisionTree
 from matplotlib import pyplot as plt
 from sklearn.datasets import load_iris
 from sklearn.metrics import ConfusionMatrixDisplay
@@ -21,6 +22,140 @@ def data_handling(data: dict) -> tuple:
     return (data["data"], data["target"])
 
 
+class XGBClassifier:
+    """
+    An implementation of a gradient boosting classifier inspired by XGBoost.
+
+    This implementation uses multi-class boosting with a logistic (softmax) loss.
+    It trains one regression tree per class on the negative gradient (residual)
+    at each boosting iteration.
+
+    Parameters
+    ----------
+    n_estimators : int, default=100
+        The number of boosting rounds.
+    learning_rate : float, default=0.3
+        Step size shrinkage used in updates to prevent overfitting.
+    max_depth : int, default=3
+        Maximum depth of the regression trees.
+    random_state : int, default=0
+        Random seed.
+
+    **Important:**
+    Due to limitations of our custom DecisionTree (which only supports one-dimensional input),
+    only the first feature (column 0) of the dataset is used when training each tree.
+    """
+
+    def __init__(
+        self,
+        n_estimators: int = 100,
+        learning_rate: float = 0.3,
+        max_depth: int = 3,
+        random_state: int = 0,
+    ):
+        self.n_estimators = n_estimators
+        self.learning_rate = learning_rate
+        self.max_depth = max_depth
+        self.random_state = random_state
+
+        # List of lists of trees; for each boosting round, we have one tree per class.
+        self.trees = []
+        self.num_class = None
+        self.initial_pred = None  # Initial log-odds per class
+
+    def fit(self, X: np.ndarray, y: np.ndarray) -> None:
+        """
+        Fit the gradient boosting model.
+
+        Parameters
+        ----------
+        X : np.ndarray, shape = (n_samples, n_features)
+            Training data.
+        y : np.ndarray, shape = (n_samples,)
+            Class labels (assumed to be integers 0, 1, ..., K-1).
+        """
+        n_samples = X.shape[0]
+        self.num_class = np.unique(y).shape[0]
+
+        # One-hot encode the labels.
+        y_onehot = np.zeros((n_samples, self.num_class))
+        y_onehot[np.arange(n_samples), y] = 1
+
+        # Initialize predictions F with the log class probabilities (log-odds).
+        class_counts = np.bincount(y, minlength=self.num_class)
+        class_prob = class_counts / n_samples
+        initial_score = np.log(class_prob + 1e-10)  # add small constant to avoid log(0)
+        self.initial_pred = initial_score  # shape: (num_class,)
+        F = np.tile(initial_score, (n_samples, 1))  # shape: (n_samples, num_class)
+
+        # Boosting rounds.
+        for t in range(self.n_estimators):
+            # Compute probabilities using softmax.
+            exp_F = np.exp(F)
+            p = exp_F / np.sum(
+                exp_F, axis=1, keepdims=True
+            )  # shape: (n_samples, num_class)
+            trees_per_class = []
+
+            for k in range(self.num_class):
+                # The negative gradient for class k (logistic loss): (y_true - p)
+                gradient = y_onehot[:, k] - p[:, k]
+
+                # **Note:** Due to our custom DecisionTree limitations, we use only the first feature.
+                feature_for_tree = X[:, 0]
+
+                # Instantiate and train the decision tree on (feature, gradient) pair.
+                tree = DecisionTree(depth=self.max_depth, min_leaf_size=5)
+                tree.train(feature_for_tree, gradient)
+                # Predict the update values using the tree.
+                update = np.array([tree.predict(x_val) for x_val in feature_for_tree])
+                # Update the scores for class k.
+                F[:, k] += self.learning_rate * update
+                trees_per_class.append(tree)
+            self.trees.append(trees_per_class)
+
+    def predict_proba(self, X: np.ndarray) -> np.ndarray:
+        """
+        Predict class probabilities for X.
+
+        Parameters
+        ----------
+        X : np.ndarray, shape = (n_samples, n_features)
+
+        Returns
+        -------
+        proba : np.ndarray, shape = (n_samples, num_class)
+            The class probabilities.
+        """
+        n_samples = X.shape[0]
+        F = np.tile(self.initial_pred, (n_samples, 1))
+        # Use the first feature for prediction as done in training.
+        feature_for_tree = X[:, 0]
+        for trees_per_class in self.trees:
+            for k, tree in enumerate(trees_per_class):
+                update = np.array([tree.predict(x_val) for x_val in feature_for_tree])
+                F[:, k] += self.learning_rate * update
+        exp_F = np.exp(F)
+        proba = exp_F / np.sum(exp_F, axis=1, keepdims=True)
+        return proba
+
+    def predict(self, X: np.ndarray) -> np.ndarray:
+        """
+        Predict class labels for X.
+
+        Parameters
+        ----------
+        X : np.ndarray, shape = (n_samples, n_features)
+
+        Returns
+        -------
+        labels : np.ndarray, shape = (n_samples,)
+            The predicted class labels.
+        """
+        proba = self.predict_proba(X)
+        return np.argmax(proba, axis=1)
+
+
 def xgboost(features: np.ndarray, target: np.ndarray) -> XGBClassifier:
     """
     # THIS TEST IS BROKEN!! >>> xgboost(np.array([[5.1, 3.6, 1.4, 0.2]]), np.array([0]))