Skip to content

Commit 2759e3f

Browse files
authored
removed time.sleep and using ticker instead (#648)
* removed time.sleep and using ticker instead Signed-off-by: Nir Rozenbaum <[email protected]> * move ticker creation outside of go routine. make sure refresh internal is valid at the ticker creation time Signed-off-by: Nir Rozenbaum <[email protected]> * add DefaultRefreshPrometheusMetricsInterval for test purposes. once ticker was introduced instead of sleep, having 0 as the refresh internal is not valid. Signed-off-by: Nir Rozenbaum <[email protected]> * wait in test until metrics are available before running tests that rely on the values. up until now, the metrics go routine ran in tests with time.Sleep(0), which means metrics were avaiable immediately. while in tests in might be acceptable to wait few seconds using sleep, in the actual code (not tests) it's a bad practice to use sleep which was replaced with a ticker (to perform periodic task in an endless loop). Signed-off-by: Nir Rozenbaum <[email protected]> --------- Signed-off-by: Nir Rozenbaum <[email protected]>
1 parent 81100ff commit 2759e3f

File tree

4 files changed

+16
-14
lines changed

4 files changed

+16
-14
lines changed

pkg/epp/backend/metrics/logger.go

+7-6
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ const (
3232
// Note currently the EPP treats stale metrics same as fresh.
3333
// TODO: https://github.com/kubernetes-sigs/gateway-api-inference-extension/issues/336
3434
metricsValidityPeriod = 5 * time.Second
35+
debugPrintInterval = 5 * time.Second
3536
)
3637

3738
type Datastore interface {
@@ -46,16 +47,15 @@ type Datastore interface {
4647
// enabled; 2) flushes Prometheus metrics about the backend servers.
4748
func StartMetricsLogger(ctx context.Context, datastore Datastore, refreshPrometheusMetricsInterval time.Duration) {
4849
logger := log.FromContext(ctx)
49-
50-
// Periodically flush prometheus metrics for inference pool
50+
ticker := time.NewTicker(refreshPrometheusMetricsInterval)
5151
go func() {
52+
defer ticker.Stop()
5253
for {
5354
select {
5455
case <-ctx.Done():
5556
logger.V(logutil.DEFAULT).Info("Shutting down prometheus metrics thread")
5657
return
57-
default:
58-
time.Sleep(refreshPrometheusMetricsInterval)
58+
case <-ticker.C: // Periodically flush prometheus metrics for inference pool
5959
flushPrometheusMetricsOnce(logger, datastore)
6060
}
6161
}
@@ -64,13 +64,14 @@ func StartMetricsLogger(ctx context.Context, datastore Datastore, refreshPrometh
6464
// Periodically print out the pods and metrics for DEBUGGING.
6565
if logger := logger.V(logutil.DEBUG); logger.Enabled() {
6666
go func() {
67+
ticker := time.NewTicker(debugPrintInterval)
68+
defer ticker.Stop()
6769
for {
6870
select {
6971
case <-ctx.Done():
7072
logger.V(logutil.DEFAULT).Info("Shutting down metrics logger thread")
7173
return
72-
default:
73-
time.Sleep(5 * time.Second)
74+
case <-ticker.C:
7475
podsWithFreshMetrics := datastore.PodList(func(pm PodMetrics) bool {
7576
return time.Since(pm.GetMetrics().UpdateTime) <= metricsValidityPeriod
7677
})

pkg/epp/backend/metrics/pod_metrics.go

+6-8
Original file line numberDiff line numberDiff line change
@@ -84,21 +84,19 @@ func (pm *podMetrics) startRefreshLoop() {
8484
pm.once.Do(func() {
8585
go func() {
8686
pm.logger.V(logutil.DEFAULT).Info("Starting refresher", "pod", pm.GetPod())
87+
ticker := time.NewTicker(pm.interval)
88+
defer ticker.Stop()
8789
for {
8890
select {
8991
case <-pm.done:
9092
return
9193
case <-pm.parentCtx.Done():
9294
return
93-
default:
95+
case <-ticker.C: // refresh metrics periodically
96+
if err := pm.refreshMetrics(); err != nil {
97+
pm.logger.V(logutil.TRACE).Error(err, "Failed to refresh metrics", "pod", pm.GetPod())
98+
}
9499
}
95-
96-
err := pm.refreshMetrics()
97-
if err != nil {
98-
pm.logger.V(logutil.TRACE).Error(err, "Failed to refresh metrics", "pod", pm.GetPod())
99-
}
100-
101-
time.Sleep(pm.interval)
102100
}
103101
}()
104102
})

pkg/epp/server/runserver.go

+1
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,7 @@ func NewDefaultExtProcServerRunner() *ExtProcServerRunner {
7676
PoolName: DefaultPoolName,
7777
PoolNamespace: DefaultPoolNamespace,
7878
SecureServing: DefaultSecureServing,
79+
RefreshPrometheusMetricsInterval: DefaultRefreshPrometheusMetricsInterval,
7980
// Datastore can be assigned later.
8081
}
8182
}

test/integration/epp/hermetic_test.go

+2
Original file line numberDiff line numberDiff line change
@@ -1548,6 +1548,8 @@ func setUpHermeticServer(t *testing.T, podAndMetrics map[backendmetrics.Pod]*bac
15481548
}
15491549
}()
15501550

1551+
time.Sleep(serverRunner.RefreshPrometheusMetricsInterval) // wait for metrics to get available before running tests that rely on these metrics
1552+
15511553
// check if all pods are synced to datastore
15521554
assert.EventuallyWithT(t, func(t *assert.CollectT) {
15531555
assert.Len(t, serverRunner.Datastore.PodGetAll(), len(podAndMetrics), "Datastore not synced")

0 commit comments

Comments
 (0)