@@ -33,7 +33,7 @@ class Bandit:
33
33
A class to represent a multi-armed bandit.
34
34
"""
35
35
36
- def __init__ (self , probabilities : list [float ]):
36
+ def __init__ (self , probabilities : list [float ]) -> None :
37
37
"""
38
38
Initialize the bandit with a list of probabilities for each arm.
39
39
@@ -72,7 +72,7 @@ class EpsilonGreedy:
72
72
https://medium.com/analytics-vidhya/the-epsilon-greedy-algorithm-for-reinforcement-learning-5fe6f96dc870
73
73
"""
74
74
75
- def __init__ (self , epsilon : float , k : int ):
75
+ def __init__ (self , epsilon : float , k : int ) -> None :
76
76
"""
77
77
Initialize the Epsilon-Greedy strategy.
78
78
@@ -85,7 +85,7 @@ def __init__(self, epsilon: float, k: int):
85
85
self .counts = np .zeros (k )
86
86
self .values = np .zeros (k )
87
87
88
- def select_arm (self ):
88
+ def select_arm (self ) -> int :
89
89
"""
90
90
Select an arm to pull.
91
91
@@ -104,7 +104,7 @@ def select_arm(self):
104
104
else :
105
105
return np .argmax (self .values )
106
106
107
- def update (self , arm_index : int , reward : int ):
107
+ def update (self , arm_index : int , reward : int ) -> None :
108
108
"""
109
109
Update the strategy.
110
110
@@ -133,7 +133,7 @@ class UCB:
133
133
https://people.maths.bris.ac.uk/~maajg/teaching/stochopt/ucb.pdf
134
134
"""
135
135
136
- def __init__ (self , k : int ):
136
+ def __init__ (self , k : int ) -> None :
137
137
"""
138
138
Initialize the UCB strategy.
139
139
@@ -145,7 +145,7 @@ def __init__(self, k: int):
145
145
self .values = np .zeros (k )
146
146
self .total_counts = 0
147
147
148
- def select_arm (self ):
148
+ def select_arm (self ) -> int :
149
149
"""
150
150
Select an arm to pull.
151
151
@@ -159,10 +159,11 @@ def select_arm(self):
159
159
"""
160
160
if self .total_counts < self .k :
161
161
return self .total_counts
162
- ucb_values = self .values + np .sqrt (2 * np .log (self .total_counts ) / self .counts )
162
+ ucb_values = self .values + \
163
+ np .sqrt (2 * np .log (self .total_counts ) / self .counts )
163
164
return np .argmax (ucb_values )
164
165
165
- def update (self , arm_index : int , reward : int ):
166
+ def update (self , arm_index : int , reward : int ) -> None :
166
167
"""
167
168
Update the strategy.
168
169
@@ -192,7 +193,7 @@ class ThompsonSampling:
192
193
https://en.wikipedia.org/wiki/Thompson_sampling
193
194
"""
194
195
195
- def __init__ (self , k : int ):
196
+ def __init__ (self , k : int ) -> None :
196
197
"""
197
198
Initialize the Thompson Sampling strategy.
198
199
@@ -203,7 +204,7 @@ def __init__(self, k: int):
203
204
self .successes = np .zeros (k )
204
205
self .failures = np .zeros (k )
205
206
206
- def select_arm (self ):
207
+ def select_arm (self ) -> int :
207
208
"""
208
209
Select an arm to pull.
209
210
@@ -223,7 +224,7 @@ def select_arm(self):
223
224
]
224
225
return np .argmax (samples )
225
226
226
- def update (self , arm_index : int , reward : int ):
227
+ def update (self , arm_index : int , reward : int ) -> None :
227
228
"""
228
229
Update the strategy.
229
230
@@ -259,7 +260,7 @@ def __init__(self, k: int):
259
260
"""
260
261
self .k = k
261
262
262
- def select_arm (self ):
263
+ def select_arm (self ) -> int :
263
264
"""
264
265
Select an arm to pull.
265
266
@@ -274,7 +275,7 @@ def select_arm(self):
274
275
rng = np .random .default_rng ()
275
276
return rng .integers (self .k )
276
277
277
- def update (self , arm_index : int , reward : int ):
278
+ def update (self , arm_index : int , reward : int ) -> None :
278
279
"""
279
280
Update the strategy.
280
281
@@ -308,7 +309,7 @@ def __init__(self, k: int):
308
309
self .counts = np .zeros (k )
309
310
self .values = np .zeros (k )
310
311
311
- def select_arm (self ):
312
+ def select_arm (self ) -> int :
312
313
"""
313
314
Select an arm to pull.
314
315
@@ -322,7 +323,7 @@ def select_arm(self):
322
323
"""
323
324
return np .argmax (self .values )
324
325
325
- def update (self , arm_index : int , reward : int ):
326
+ def update (self , arm_index : int , reward : int ) -> None :
326
327
"""
327
328
Update the strategy.
328
329
0 commit comments