fixed issues with mypy, ruff

sephml · sephml · commit c243cd8f0d53 · 2025-04-16T08:20:06.000+01:00
diff --git a/machine_learning/mab.py b/machine_learning/mab.py
@@ -24,6 +24,8 @@
 
 """
 
+from abc import ABC, abstractmethod
+
 import matplotlib.pyplot as plt
 import numpy as np
 
@@ -65,7 +67,32 @@ def pull(self, arm_index: int) -> int:
 # Epsilon-Greedy strategy
 
 
-class EpsilonGreedy:
+class Strategy(ABC):
+    """
+    Base class for all strategies.
+    """
+
+    @abstractmethod
+    def select_arm(self) -> int:
+        """
+        Select an arm to pull.
+
+        Returns:
+            The index of the arm to pull.
+        """
+
+    @abstractmethod
+    def update(self, arm_index: int, reward: int) -> None:
+        """
+        Update the strategy.
+
+        Args:
+            arm_index: The index of the arm to pull.
+            reward: The reward for the arm.
+        """
+
+
+class EpsilonGreedy(Strategy):
     """
     A class for a simple implementation of the Epsilon-Greedy strategy.
     Follow this link to learn more:
@@ -126,7 +153,7 @@ def update(self, arm_index: int, reward: int) -> None:
 # Upper Confidence Bound (UCB)
 
 
-class UCB:
+class UCB(Strategy):
     """
     A class for the Upper Confidence Bound (UCB) strategy.
     Follow this link to learn more:
@@ -185,7 +212,7 @@ def update(self, arm_index: int, reward: int) -> None:
 # Thompson Sampling
 
 
-class ThompsonSampling:
+class ThompsonSampling(Strategy):
     """
     A class for the Thompson Sampling strategy.
     Follow this link to learn more:
@@ -245,7 +272,7 @@ def update(self, arm_index: int, reward: int) -> None:
 
 
 # Random strategy (full exploration)
-class RandomStrategy:
+class RandomStrategy(Strategy):
     """
     A class for choosing totally random at each round to give
     a better comparison with the other optimised strategies.
@@ -292,7 +319,7 @@ def update(self, arm_index: int, reward: int) -> None:
 # Greedy strategy (full exploitation)
 
 
-class GreedyStrategy:
+class GreedyStrategy(Strategy):
     """
     A class for the Greedy strategy to show how full exploitation can be
     detrimental to the performance of the strategy.
@@ -351,7 +378,7 @@ def test_mab_strategies() -> None:
     arms_probabilities = [0.1, 0.3, 0.5, 0.8]  # True probabilities
 
     bandit = Bandit(arms_probabilities)
-    strategies = {
+    strategies: dict[str, Strategy] = {
         "Epsilon-Greedy": EpsilonGreedy(epsilon=0.1, num_arms=num_arms),
         "UCB": UCB(num_arms=num_arms),
         "Thompson Sampling": ThompsonSampling(num_arms=num_arms),