Skip to content

Commit 3767245

Browse files
ZJUGuoShuaisedatguzelsemme
authored andcommitted
Add categorical focal cross-entropy loss algorithm (TheAlgorithms#11248)
1 parent bfebbc8 commit 3767245

File tree

1 file changed

+102
-0
lines changed

1 file changed

+102
-0
lines changed

Diff for: machine_learning/loss_functions.py

+102
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,108 @@ def categorical_cross_entropy(
148148
return -np.sum(y_true * np.log(y_pred))
149149

150150

151+
def categorical_focal_cross_entropy(
152+
y_true: np.ndarray,
153+
y_pred: np.ndarray,
154+
alpha: np.ndarray = None,
155+
gamma: float = 2.0,
156+
epsilon: float = 1e-15,
157+
) -> float:
158+
"""
159+
Calculate the mean categorical focal cross-entropy (CFCE) loss between true
160+
labels and predicted probabilities for multi-class classification.
161+
162+
CFCE loss is a generalization of binary focal cross-entropy for multi-class
163+
classification. It addresses class imbalance by focusing on hard examples.
164+
165+
CFCE = -Σ alpha * (1 - y_pred)**gamma * y_true * log(y_pred)
166+
167+
Reference: [Lin et al., 2018](https://arxiv.org/pdf/1708.02002.pdf)
168+
169+
Parameters:
170+
- y_true: True labels in one-hot encoded form.
171+
- y_pred: Predicted probabilities for each class.
172+
- alpha: Array of weighting factors for each class.
173+
- gamma: Focusing parameter for modulating the loss (default: 2.0).
174+
- epsilon: Small constant to avoid numerical instability.
175+
176+
Returns:
177+
- The mean categorical focal cross-entropy loss.
178+
179+
>>> true_labels = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]])
180+
>>> pred_probs = np.array([[0.9, 0.1, 0.0], [0.2, 0.7, 0.1], [0.0, 0.1, 0.9]])
181+
>>> alpha = np.array([0.6, 0.2, 0.7])
182+
>>> categorical_focal_cross_entropy(true_labels, pred_probs, alpha)
183+
0.0025966118981496423
184+
185+
>>> true_labels = np.array([[0, 1, 0], [0, 0, 1]])
186+
>>> pred_probs = np.array([[0.05, 0.95, 0], [0.1, 0.8, 0.1]])
187+
>>> alpha = np.array([0.25, 0.25, 0.25])
188+
>>> categorical_focal_cross_entropy(true_labels, pred_probs, alpha)
189+
0.23315276982014324
190+
191+
>>> true_labels = np.array([[1, 0], [0, 1]])
192+
>>> pred_probs = np.array([[0.9, 0.1, 0.0], [0.2, 0.7, 0.1]])
193+
>>> categorical_cross_entropy(true_labels, pred_probs)
194+
Traceback (most recent call last):
195+
...
196+
ValueError: Input arrays must have the same shape.
197+
198+
>>> true_labels = np.array([[2, 0, 1], [1, 0, 0]])
199+
>>> pred_probs = np.array([[0.9, 0.1, 0.0], [0.2, 0.7, 0.1]])
200+
>>> categorical_focal_cross_entropy(true_labels, pred_probs)
201+
Traceback (most recent call last):
202+
...
203+
ValueError: y_true must be one-hot encoded.
204+
205+
>>> true_labels = np.array([[1, 0, 1], [1, 0, 0]])
206+
>>> pred_probs = np.array([[0.9, 0.1, 0.0], [0.2, 0.7, 0.1]])
207+
>>> categorical_focal_cross_entropy(true_labels, pred_probs)
208+
Traceback (most recent call last):
209+
...
210+
ValueError: y_true must be one-hot encoded.
211+
212+
>>> true_labels = np.array([[1, 0, 0], [0, 1, 0]])
213+
>>> pred_probs = np.array([[0.9, 0.1, 0.1], [0.2, 0.7, 0.1]])
214+
>>> categorical_focal_cross_entropy(true_labels, pred_probs)
215+
Traceback (most recent call last):
216+
...
217+
ValueError: Predicted probabilities must sum to approximately 1.
218+
219+
>>> true_labels = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]])
220+
>>> pred_probs = np.array([[0.9, 0.1, 0.0], [0.2, 0.7, 0.1], [0.0, 0.1, 0.9]])
221+
>>> alpha = np.array([0.6, 0.2])
222+
>>> categorical_focal_cross_entropy(true_labels, pred_probs, alpha)
223+
Traceback (most recent call last):
224+
...
225+
ValueError: Length of alpha must match the number of classes.
226+
"""
227+
if y_true.shape != y_pred.shape:
228+
raise ValueError("Shape of y_true and y_pred must be the same.")
229+
230+
if alpha is None:
231+
alpha = np.ones(y_true.shape[1])
232+
233+
if np.any((y_true != 0) & (y_true != 1)) or np.any(y_true.sum(axis=1) != 1):
234+
raise ValueError("y_true must be one-hot encoded.")
235+
236+
if len(alpha) != y_true.shape[1]:
237+
raise ValueError("Length of alpha must match the number of classes.")
238+
239+
if not np.all(np.isclose(np.sum(y_pred, axis=1), 1, rtol=epsilon, atol=epsilon)):
240+
raise ValueError("Predicted probabilities must sum to approximately 1.")
241+
242+
# Clip predicted probabilities to avoid log(0)
243+
y_pred = np.clip(y_pred, epsilon, 1 - epsilon)
244+
245+
# Calculate loss for each class and sum across classes
246+
cfce_loss = -np.sum(
247+
alpha * np.power(1 - y_pred, gamma) * y_true * np.log(y_pred), axis=1
248+
)
249+
250+
return np.mean(cfce_loss)
251+
252+
151253
def hinge_loss(y_true: np.ndarray, y_pred: np.ndarray) -> float:
152254
"""
153255
Calculate the mean hinge loss for between true labels and predicted probabilities

0 commit comments

Comments
 (0)