@@ -148,6 +148,108 @@ def categorical_cross_entropy(
148
148
return - np .sum (y_true * np .log (y_pred ))
149
149
150
150
151
+ def categorical_focal_cross_entropy (
152
+ y_true : np .ndarray ,
153
+ y_pred : np .ndarray ,
154
+ alpha : np .ndarray = None ,
155
+ gamma : float = 2.0 ,
156
+ epsilon : float = 1e-15 ,
157
+ ) -> float :
158
+ """
159
+ Calculate the mean categorical focal cross-entropy (CFCE) loss between true
160
+ labels and predicted probabilities for multi-class classification.
161
+
162
+ CFCE loss is a generalization of binary focal cross-entropy for multi-class
163
+ classification. It addresses class imbalance by focusing on hard examples.
164
+
165
+ CFCE = -Σ alpha * (1 - y_pred)**gamma * y_true * log(y_pred)
166
+
167
+ Reference: [Lin et al., 2018](https://arxiv.org/pdf/1708.02002.pdf)
168
+
169
+ Parameters:
170
+ - y_true: True labels in one-hot encoded form.
171
+ - y_pred: Predicted probabilities for each class.
172
+ - alpha: Array of weighting factors for each class.
173
+ - gamma: Focusing parameter for modulating the loss (default: 2.0).
174
+ - epsilon: Small constant to avoid numerical instability.
175
+
176
+ Returns:
177
+ - The mean categorical focal cross-entropy loss.
178
+
179
+ >>> true_labels = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]])
180
+ >>> pred_probs = np.array([[0.9, 0.1, 0.0], [0.2, 0.7, 0.1], [0.0, 0.1, 0.9]])
181
+ >>> alpha = np.array([0.6, 0.2, 0.7])
182
+ >>> categorical_focal_cross_entropy(true_labels, pred_probs, alpha)
183
+ 0.0025966118981496423
184
+
185
+ >>> true_labels = np.array([[0, 1, 0], [0, 0, 1]])
186
+ >>> pred_probs = np.array([[0.05, 0.95, 0], [0.1, 0.8, 0.1]])
187
+ >>> alpha = np.array([0.25, 0.25, 0.25])
188
+ >>> categorical_focal_cross_entropy(true_labels, pred_probs, alpha)
189
+ 0.23315276982014324
190
+
191
+ >>> true_labels = np.array([[1, 0], [0, 1]])
192
+ >>> pred_probs = np.array([[0.9, 0.1, 0.0], [0.2, 0.7, 0.1]])
193
+ >>> categorical_cross_entropy(true_labels, pred_probs)
194
+ Traceback (most recent call last):
195
+ ...
196
+ ValueError: Input arrays must have the same shape.
197
+
198
+ >>> true_labels = np.array([[2, 0, 1], [1, 0, 0]])
199
+ >>> pred_probs = np.array([[0.9, 0.1, 0.0], [0.2, 0.7, 0.1]])
200
+ >>> categorical_focal_cross_entropy(true_labels, pred_probs)
201
+ Traceback (most recent call last):
202
+ ...
203
+ ValueError: y_true must be one-hot encoded.
204
+
205
+ >>> true_labels = np.array([[1, 0, 1], [1, 0, 0]])
206
+ >>> pred_probs = np.array([[0.9, 0.1, 0.0], [0.2, 0.7, 0.1]])
207
+ >>> categorical_focal_cross_entropy(true_labels, pred_probs)
208
+ Traceback (most recent call last):
209
+ ...
210
+ ValueError: y_true must be one-hot encoded.
211
+
212
+ >>> true_labels = np.array([[1, 0, 0], [0, 1, 0]])
213
+ >>> pred_probs = np.array([[0.9, 0.1, 0.1], [0.2, 0.7, 0.1]])
214
+ >>> categorical_focal_cross_entropy(true_labels, pred_probs)
215
+ Traceback (most recent call last):
216
+ ...
217
+ ValueError: Predicted probabilities must sum to approximately 1.
218
+
219
+ >>> true_labels = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]])
220
+ >>> pred_probs = np.array([[0.9, 0.1, 0.0], [0.2, 0.7, 0.1], [0.0, 0.1, 0.9]])
221
+ >>> alpha = np.array([0.6, 0.2])
222
+ >>> categorical_focal_cross_entropy(true_labels, pred_probs, alpha)
223
+ Traceback (most recent call last):
224
+ ...
225
+ ValueError: Length of alpha must match the number of classes.
226
+ """
227
+ if y_true .shape != y_pred .shape :
228
+ raise ValueError ("Shape of y_true and y_pred must be the same." )
229
+
230
+ if alpha is None :
231
+ alpha = np .ones (y_true .shape [1 ])
232
+
233
+ if np .any ((y_true != 0 ) & (y_true != 1 )) or np .any (y_true .sum (axis = 1 ) != 1 ):
234
+ raise ValueError ("y_true must be one-hot encoded." )
235
+
236
+ if len (alpha ) != y_true .shape [1 ]:
237
+ raise ValueError ("Length of alpha must match the number of classes." )
238
+
239
+ if not np .all (np .isclose (np .sum (y_pred , axis = 1 ), 1 , rtol = epsilon , atol = epsilon )):
240
+ raise ValueError ("Predicted probabilities must sum to approximately 1." )
241
+
242
+ # Clip predicted probabilities to avoid log(0)
243
+ y_pred = np .clip (y_pred , epsilon , 1 - epsilon )
244
+
245
+ # Calculate loss for each class and sum across classes
246
+ cfce_loss = - np .sum (
247
+ alpha * np .power (1 - y_pred , gamma ) * y_true * np .log (y_pred ), axis = 1
248
+ )
249
+
250
+ return np .mean (cfce_loss )
251
+
252
+
151
253
def hinge_loss (y_true : np .ndarray , y_pred : np .ndarray ) -> float :
152
254
"""
153
255
Calculate the mean hinge loss for between true labels and predicted probabilities
0 commit comments