7
7
from sklearn .model_selection import train_test_split
8
8
from xgboost import XGBClassifier
9
9
10
+
10
11
def data_handling (data : dict ) -> tuple :
11
12
# Split dataset into features and target
12
13
# data is features
@@ -20,6 +21,7 @@ def data_handling(data: dict) -> tuple:
20
21
"""
21
22
return (data ["data" ], data ["target" ])
22
23
24
+
23
25
class XGBClassifier :
24
26
"""
25
27
An implementation of a gradient boosting classifier inspired by XGBoost.
@@ -38,14 +40,19 @@ class XGBClassifier:
38
40
Maximum depth of the regression trees.
39
41
random_state : int, default=0
40
42
Random seed.
41
-
42
- **Important:**
43
+
44
+ **Important:**
43
45
Due to limitations of our custom DecisionTree (which only supports one-dimensional input),
44
46
only the first feature (column 0) of the dataset is used when training each tree.
45
47
"""
46
48
47
- def __init__ (self , n_estimators : int = 100 , learning_rate : float = 0.3 ,
48
- max_depth : int = 3 , random_state : int = 0 ):
49
+ def __init__ (
50
+ self ,
51
+ n_estimators : int = 100 ,
52
+ learning_rate : float = 0.3 ,
53
+ max_depth : int = 3 ,
54
+ random_state : int = 0 ,
55
+ ):
49
56
self .n_estimators = n_estimators
50
57
self .learning_rate = learning_rate
51
58
self .max_depth = max_depth
@@ -85,7 +92,9 @@ def fit(self, X: np.ndarray, y: np.ndarray) -> None:
85
92
for t in range (self .n_estimators ):
86
93
# Compute probabilities using softmax.
87
94
exp_F = np .exp (F )
88
- p = exp_F / np .sum (exp_F , axis = 1 , keepdims = True ) # shape: (n_samples, num_class)
95
+ p = exp_F / np .sum (
96
+ exp_F , axis = 1 , keepdims = True
97
+ ) # shape: (n_samples, num_class)
89
98
trees_per_class = []
90
99
91
100
for k in range (self .num_class ):
@@ -146,6 +155,7 @@ def predict(self, X: np.ndarray) -> np.ndarray:
146
155
proba = self .predict_proba (X )
147
156
return np .argmax (proba , axis = 1 )
148
157
158
+
149
159
def xgboost (features : np .ndarray , target : np .ndarray ) -> XGBClassifier :
150
160
"""
151
161
# THIS TEST IS BROKEN!! >>> xgboost(np.array([[5.1, 3.6, 1.4, 0.2]]), np.array([0]))
0 commit comments