Feature: CurricularFace (#1013)

ditwoo · web-flow · commit efbd29280451 · 2020-12-10T09:04:32.000+03:00
* curricularface module

* docs &amp; link to official implementation

* end string

* using `weights` instead `kernel`

* tests for curricularface

* using `torch.mm` instead F.linear
diff --git a/catalyst/contrib/nn/modules/__init__.py b/catalyst/contrib/nn/modules/__init__.py
@@ -10,6 +10,7 @@
     Normalize,
 )
 from catalyst.contrib.nn.modules.cosface import CosFace, AdaCos
+from catalyst.contrib.nn.modules.curricularface import CurricularFace
 from catalyst.contrib.nn.modules.lama import (
     LamaPooling,
     TemporalLastPooling,
diff --git a/catalyst/contrib/nn/modules/curricularface.py b/catalyst/contrib/nn/modules/curricularface.py
@@ -0,0 +1,127 @@
+import math
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+class CurricularFace(nn.Module):
+    """Implementation of
+    `CurricularFace: Adaptive Curriculum Learning\
+        Loss for Deep Face Recognition`_.
+
+    .. _CurricularFace\: Adaptive Curriculum Learning\
+        Loss for Deep Face Recognition:
+        https://arxiv.org/abs/2004.00288
+
+    Official `pytorch implementation`_.
+
+    .. _pytorch implementation:
+        https://github.com/HuangYG123/CurricularFace
+
+    Args:
+        in_features: size of each input sample.
+        out_features: size of each output sample.
+        s: norm of input feature.
+            Default: ``64.0``.
+        m: margin.
+            Default: ``0.5``.
+
+    Shape:
+        - Input: :math:`(batch, H_{in})` where
+          :math:`H_{in} = in\_features`.
+        - Output: :math:`(batch, H_{out})` where
+          :math:`H_{out} = out\_features`.
+
+    Example:
+        >>> layer = CurricularFace(5, 10, s=1.31, m=0.5)
+        >>> loss_fn = nn.CrosEntropyLoss()
+        >>> embedding = torch.randn(3, 5, requires_grad=True)
+        >>> target = torch.empty(3, dtype=torch.long).random_(10)
+        >>> output = layer(embedding, target)
+        >>> loss = loss_fn(output, target)
+        >>> loss.backward()
+
+    """  # noqa: RST215
+
+    def __init__(  # noqa: D107
+        self,
+        in_features: int,
+        out_features: int,
+        s: float = 64.0,
+        m: float = 0.5,
+    ):
+        super(CurricularFace, self).__init__()
+
+        self.in_features = in_features
+        self.out_features = out_features
+        self.m = m
+        self.s = s
+
+        self.cos_m = math.cos(m)
+        self.sin_m = math.sin(m)
+        self.threshold = math.cos(math.pi - m)
+        self.mm = math.sin(math.pi - m) * m
+
+        self.weight = nn.Parameter(torch.Tensor(in_features, out_features))
+        self.register_buffer("t", torch.zeros(1))
+
+        nn.init.normal_(self.weight, std=0.01)
+
+    def __repr__(self) -> str:  # noqa: D105
+        rep = (
+            "CurricularFace("
+            f"in_features={self.in_features},"
+            f"out_features={self.out_features},"
+            f"m={self.m},s={self.s}"
+            ")"
+        )
+        return rep
+
+    def forward(
+        self, input: torch.Tensor, label: torch.LongTensor
+    ) -> torch.Tensor:
+        """
+        Args:
+            input: input features,
+                expected shapes ``BxF`` where ``B``
+                is batch dimension and ``F`` is an
+                input feature dimension.
+            label: target classes,
+                expected shapes ``B`` where
+                ``B`` is batch dimension.
+
+        Returns:
+            tensor (logits) with shapes ``BxC``
+            where ``C`` is a number of classes.
+        """
+        cos_theta = torch.mm(
+            F.normalize(input), F.normalize(self.weight, dim=0)
+        )
+        cos_theta = cos_theta.clamp(-1, 1)  # for numerical stability
+
+        target_logit = cos_theta[torch.arange(0, input.size(0)), label].view(
+            -1, 1
+        )
+
+        sin_theta = torch.sqrt(1.0 - torch.pow(target_logit, 2))
+        cos_theta_m = (
+            target_logit * self.cos_m - sin_theta * self.sin_m
+        )  # cos(target+margin)
+        mask = cos_theta > cos_theta_m
+        final_target_logit = torch.where(
+            target_logit > self.threshold, cos_theta_m, target_logit - self.mm
+        )
+
+        hard_example = cos_theta[mask]
+        with torch.no_grad():
+            self.t = target_logit.mean() * 0.01 + (1 - 0.01) * self.t
+
+        cos_theta[mask] = hard_example * (self.t + hard_example)
+        cos_theta.scatter_(1, label.view(-1, 1).long(), final_target_logit)
+        output = cos_theta * self.s
+
+        return output
+
+
+__all__ = ["CurricularFace"]
diff --git a/catalyst/contrib/nn/tests/test_modules.py b/catalyst/contrib/nn/tests/test_modules.py
@@ -4,7 +4,12 @@
 import torch
 import torch.nn as nn
 
-from catalyst.contrib.nn.modules import ArcFace, CosFace, SoftMax
+from catalyst.contrib.nn.modules import (
+    ArcFace,
+    CosFace,
+    CurricularFace,
+    SoftMax,
+)
 
 
 def normalize(m: np.ndarray) -> np.ndarray:
@@ -209,3 +214,103 @@ def test_cosface_with_cross_entropy_loss():
         .numpy()
     )
     assert np.isclose(expected_loss.sum(), actual)
+
+
+def test_curricularface_with_cross_entropy_loss():
+    emb_size = 4
+    n_classes = 3
+    s = 3.0
+    m = 0.1
+
+    # fmt: off
+    features = np.array(
+        [
+            [1, 2, 3, 4],
+            [5, 6, 7, 8],
+        ],
+        dtype="f",
+    )
+    target = np.array([0, 2], dtype="l")
+    mask = np.array([[1, 0, 0], [0, 0, 1]], dtype="l")  # one_hot(target)
+
+    weight = np.array(
+        [
+            [0.1, 0.2, 0.3, 0.4],
+            [1.1, 3.2, 5.3, 0.4],
+            [0.1, 0.2, 6.3, 0.4],
+        ],
+        dtype="f",
+    )
+    # fmt: on
+
+    layer = CurricularFace(emb_size, n_classes, s, m)
+    layer.weight.data = torch.from_numpy(weight.T)
+    loss_fn = nn.CrossEntropyLoss(reduction="none")
+
+    normalized_features = normalize(features)  # 2x4
+    normalized_projection = normalize(weight)  # 3x4
+
+    cosine = normalized_features @ normalized_projection.T  # 2x4 * 4x3 = 2x3
+    logit = cosine[mask.astype(np.bool)].reshape(-1, 1)
+
+    sine = np.sqrt(1.0 - np.power(logit, 2))
+    cos_theta_m = logit * np.cos(m) - sine * np.sin(m)
+
+    final_logit = np.where(
+        logit > np.cos(np.pi - m), cos_theta_m, logit - np.sin(np.pi - m) * m,
+    )
+
+    cos_mask = cosine > cos_theta_m
+    hard = cosine[cos_mask]
+
+    t = np.mean(logit) * 0.01 - (1 - 0.01) * 0
+
+    cosine[cos_mask] = hard * (t + hard)  # 2x3
+    for r, c in enumerate(target):
+        cosine[r, c] = final_logit[r, 0]
+    cosine = cosine * s  # 2x3
+
+    expected_loss = cross_entropy(cosine, mask, 1)
+    actual = (
+        loss_fn(
+            layer(torch.from_numpy(features), torch.LongTensor(target)),
+            torch.LongTensor(target),
+        )
+        .detach()
+        .numpy()
+    )
+
+    assert np.allclose(expected_loss, actual)
+
+    # reinitialize layer (t is changed)
+    layer = CurricularFace(emb_size, n_classes, s, m)
+    layer.weight.data = torch.from_numpy(weight.T)
+    loss_fn = nn.CrossEntropyLoss(reduction="mean")
+
+    expected_loss = cross_entropy(cosine, mask, 1)
+    actual = (
+        loss_fn(
+            layer(torch.from_numpy(features), torch.LongTensor(target)),
+            torch.LongTensor(target),
+        )
+        .detach()
+        .numpy()
+    )
+
+    assert np.isclose(expected_loss.mean(), actual)
+
+    # reinitialize layer (t is changed)
+    layer = CurricularFace(emb_size, n_classes, s, m)
+    layer.weight.data = torch.from_numpy(weight.T)
+    loss_fn = nn.CrossEntropyLoss(reduction="sum")
+
+    expected_loss = cross_entropy(cosine, mask, 1)
+    actual = (
+        loss_fn(
+            layer(torch.from_numpy(features), torch.LongTensor(target)),
+            torch.LongTensor(target),
+        )
+        .detach()
+        .numpy()
+    )
+    assert np.isclose(expected_loss.sum(), actual)
diff --git a/docs/api/contrib.rst b/docs/api/contrib.rst
@@ -183,6 +183,13 @@ CosFace and AdaCos
     :undoc-members:
     :show-inheritance:
 
+CurricularFace
+"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+.. automodule:: catalyst.contrib.nn.modules.curricularface
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
 Last-Mean-Average-Attention (LAMA)-Pooling
 """"""""""""""""""""""""""""""""""""""""""
 .. automodule:: catalyst.contrib.nn.modules.lama

Original file line number	Diff line number	Diff line change
`@@ -10,6 +10,7 @@`
`10`	`10`	`Normalize,`
`11`	`11`	`)`
`12`	`12`	`from catalyst.contrib.nn.modules.cosface import CosFace, AdaCos`
	`13`	`+from catalyst.contrib.nn.modules.curricularface import CurricularFace`
`13`	`14`	`from catalyst.contrib.nn.modules.lama import (`
`14`	`15`	`LamaPooling,`
`15`	`16`	`TemporalLastPooling,`