Skip to content

Commit de87cd3

Browse files
committed
Add scheduler plugin latency metric
1 parent 71c0f0c commit de87cd3

File tree

5 files changed

+173
-0
lines changed

5 files changed

+173
-0
lines changed

pkg/epp/metrics/metrics.go

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ import (
3030
const (
3131
InferenceModelComponent = "inference_model"
3232
InferencePoolComponent = "inference_pool"
33+
EPPComponent = "endpoint_picker"
3334
)
3435

3536
var (
@@ -176,6 +177,20 @@ var (
176177
},
177178
[]string{"name"},
178179
)
180+
181+
// Scheduler Plugin Metrics
182+
SchedulerPluginProcessingLatencies = compbasemetrics.NewHistogramVec(
183+
&compbasemetrics.HistogramOpts{
184+
Subsystem: EPPComponent,
185+
Name: "scheduler_plugin_duration_seconds",
186+
Help: "Scheduler plugin processing latency distribution in seconds for each plugin type and plugin name.",
187+
Buckets: []float64{
188+
0.0001, 0.0002, 0.0005, 0.001, 0.002, 0.005, 0.01, 0.02, 0.05, 0.1,
189+
},
190+
StabilityLevel: compbasemetrics.ALPHA,
191+
},
192+
[]string{"plugin_type", "plugin_name"},
193+
)
179194
)
180195

181196
var registerMetrics sync.Once
@@ -196,6 +211,8 @@ func Register() {
196211
legacyregistry.MustRegister(inferencePoolAvgKVCache)
197212
legacyregistry.MustRegister(inferencePoolAvgQueueSize)
198213
legacyregistry.MustRegister(inferencePoolReadyPods)
214+
215+
legacyregistry.MustRegister(SchedulerPluginProcessingLatencies)
199216
})
200217
}
201218

@@ -293,3 +310,8 @@ func RecordInferencePoolAvgQueueSize(name string, queueSize float64) {
293310
func RecordinferencePoolReadyPods(name string, runningPods float64) {
294311
inferencePoolReadyPods.WithLabelValues(name).Set(runningPods)
295312
}
313+
314+
// RecordSchedulerPluginProcessingLatency records the processing latency for a scheduler plugin.
315+
func RecordSchedulerPluginProcessingLatency(pluginType, pluginName string, duration time.Duration) {
316+
SchedulerPluginProcessingLatencies.WithLabelValues(pluginType, pluginName).Observe(duration.Seconds())
317+
}

pkg/epp/metrics/metrics_test.go

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -556,3 +556,67 @@ func TestInferencePoolMetrics(t *testing.T) {
556556
})
557557
}
558558
}
559+
560+
func TestSchedulerPluginProcessingLatencies(t *testing.T) {
561+
type pluginLatency struct {
562+
pluginType string
563+
pluginName string
564+
duration time.Duration
565+
}
566+
scenarios := []struct {
567+
name string
568+
latencies []pluginLatency
569+
}{
570+
{
571+
name: "multiple plugins",
572+
latencies: []pluginLatency{
573+
{
574+
pluginType: "PreSchedule",
575+
pluginName: "PluginA",
576+
duration: 100 * time.Millisecond,
577+
},
578+
{
579+
pluginType: "PostSchedule",
580+
pluginName: "PluginB",
581+
duration: 200 * time.Millisecond,
582+
},
583+
{
584+
pluginType: "Filter",
585+
pluginName: "PluginC",
586+
duration: 50 * time.Millisecond,
587+
},
588+
{
589+
pluginType: "Scorer",
590+
pluginName: "PluginD",
591+
duration: 10 * time.Millisecond,
592+
},
593+
{
594+
pluginType: "Picker",
595+
pluginName: "PluginE",
596+
duration: 10 * time.Microsecond,
597+
},
598+
},
599+
},
600+
}
601+
Register()
602+
for _, scenario := range scenarios {
603+
t.Run(scenario.name, func(t *testing.T) {
604+
for _, latency := range scenario.latencies {
605+
RecordSchedulerPluginProcessingLatency(latency.pluginType, latency.pluginName, latency.duration)
606+
}
607+
608+
wantPluginLatencies, err := os.Open("testdata/scheduler_plugin_processing_latencies_metric")
609+
defer func() {
610+
if err := wantPluginLatencies.Close(); err != nil {
611+
t.Error(err)
612+
}
613+
}()
614+
if err != nil {
615+
t.Fatal(err)
616+
}
617+
if err := testutil.GatherAndCompare(legacyregistry.DefaultGatherer, wantPluginLatencies, "endpoint_picker_scheduler_plugin_processing_latencies"); err != nil {
618+
t.Error(err)
619+
}
620+
})
621+
}
622+
}
Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
# HELP endpoint_picker_scheduler_plugin_duration_seconds [ALPHA] Scheduler plugin processing latency distribution in seconds for each plugin type and plugin name.
2+
# TYPE endpoint_picker_scheduler_plugin_duration_seconds histogram
3+
endpoint_picker_scheduler_plugin_duration_seconds_bucket{plugin_name="PluginA",plugin_type="PreSchedule",le="0.0001"} 0
4+
endpoint_picker_scheduler_plugin_duration_seconds_bucket{plugin_name="PluginA",plugin_type="PreSchedule",le="0.0002"} 0
5+
endpoint_picker_scheduler_plugin_duration_seconds_bucket{plugin_name="PluginA",plugin_type="PreSchedule",le="0.0005"} 0
6+
endpoint_picker_scheduler_plugin_duration_seconds_bucket{plugin_name="PluginA",plugin_type="PreSchedule",le="0.001"} 0
7+
endpoint_picker_scheduler_plugin_duration_seconds_bucket{plugin_name="PluginA",plugin_type="PreSchedule",le="0.002"} 0
8+
endpoint_picker_scheduler_plugin_duration_seconds_bucket{plugin_name="PluginA",plugin_type="PreSchedule",le="0.005"} 0
9+
endpoint_picker_scheduler_plugin_duration_seconds_bucket{plugin_name="PluginA",plugin_type="PreSchedule",le="0.01"} 0
10+
endpoint_picker_scheduler_plugin_duration_seconds_bucket{plugin_name="PluginA",plugin_type="PreSchedule",le="0.02"} 0
11+
endpoint_picker_scheduler_plugin_duration_seconds_bucket{plugin_name="PluginA",plugin_type="PreSchedule",le="0.05"} 0
12+
endpoint_picker_scheduler_plugin_duration_seconds_bucket{plugin_name="PluginA",plugin_type="PreSchedule",le="0.1"} 1
13+
endpoint_picker_scheduler_plugin_duration_seconds_bucket{plugin_name="PluginA",plugin_type="PreSchedule",le="+Inf"} 1
14+
endpoint_picker_scheduler_plugin_duration_seconds_sum{plugin_name="PluginA",plugin_type="PreSchedule"} 0.1
15+
endpoint_picker_scheduler_plugin_duration_seconds_count{plugin_name="PluginA",plugin_type="PreSchedule"} 1
16+
endpoint_picker_scheduler_plugin_duration_seconds_bucket{plugin_name="PluginB",plugin_type="PostSchedule",le="0.0001"} 0
17+
endpoint_picker_scheduler_plugin_duration_seconds_bucket{plugin_name="PluginB",plugin_type="PostSchedule",le="0.0002"} 0
18+
endpoint_picker_scheduler_plugin_duration_seconds_bucket{plugin_name="PluginB",plugin_type="PostSchedule",le="0.0005"} 0
19+
endpoint_picker_scheduler_plugin_duration_seconds_bucket{plugin_name="PluginB",plugin_type="PostSchedule",le="0.001"} 0
20+
endpoint_picker_scheduler_plugin_duration_seconds_bucket{plugin_name="PluginB",plugin_type="PostSchedule",le="0.002"} 0
21+
endpoint_picker_scheduler_plugin_duration_seconds_bucket{plugin_name="PluginB",plugin_type="PostSchedule",le="0.005"} 0
22+
endpoint_picker_scheduler_plugin_duration_seconds_bucket{plugin_name="PluginB",plugin_type="PostSchedule",le="0.01"} 0
23+
endpoint_picker_scheduler_plugin_duration_seconds_bucket{plugin_name="PluginB",plugin_type="PostSchedule",le="0.02"} 0
24+
endpoint_picker_scheduler_plugin_duration_seconds_bucket{plugin_name="PluginB",plugin_type="PostSchedule",le="0.05"} 0
25+
endpoint_picker_scheduler_plugin_duration_seconds_bucket{plugin_name="PluginB",plugin_type="PostSchedule",le="0.1"} 0
26+
endpoint_picker_scheduler_plugin_duration_seconds_bucket{plugin_name="PluginB",plugin_type="PostSchedule",le="+Inf"} 1
27+
endpoint_picker_scheduler_plugin_duration_seconds_sum{plugin_name="PluginB",plugin_type="PostSchedule"} 0.2
28+
endpoint_picker_scheduler_plugin_duration_seconds_count{plugin_name="PluginB",plugin_type="PostSchedule"} 1
29+
endpoint_picker_scheduler_plugin_duration_seconds_bucket{plugin_name="PluginC",plugin_type="Filter",le="0.0001"} 0
30+
endpoint_picker_scheduler_plugin_duration_seconds_bucket{plugin_name="PluginC",plugin_type="Filter",le="0.0002"} 0
31+
endpoint_picker_scheduler_plugin_duration_seconds_bucket{plugin_name="PluginC",plugin_type="Filter",le="0.0005"} 0
32+
endpoint_picker_scheduler_plugin_duration_seconds_bucket{plugin_name="PluginC",plugin_type="Filter",le="0.001"} 0
33+
endpoint_picker_scheduler_plugin_duration_seconds_bucket{plugin_name="PluginC",plugin_type="Filter",le="0.002"} 0
34+
endpoint_picker_scheduler_plugin_duration_seconds_bucket{plugin_name="PluginC",plugin_type="Filter",le="0.005"} 0
35+
endpoint_picker_scheduler_plugin_duration_seconds_bucket{plugin_name="PluginC",plugin_type="Filter",le="0.01"} 0
36+
endpoint_picker_scheduler_plugin_duration_seconds_bucket{plugin_name="PluginC",plugin_type="Filter",le="0.02"} 0
37+
endpoint_picker_scheduler_plugin_duration_seconds_bucket{plugin_name="PluginC",plugin_type="Filter",le="0.05"} 1
38+
endpoint_picker_scheduler_plugin_duration_seconds_bucket{plugin_name="PluginC",plugin_type="Filter",le="0.1"} 1
39+
endpoint_picker_scheduler_plugin_duration_seconds_bucket{plugin_name="PluginC",plugin_type="Filter",le="+Inf"} 1
40+
endpoint_picker_scheduler_plugin_duration_seconds_sum{plugin_name="PluginC",plugin_type="Filter"} 0.05
41+
endpoint_picker_scheduler_plugin_duration_seconds_count{plugin_name="PluginC",plugin_type="Filter"} 1
42+
endpoint_picker_scheduler_plugin_duration_seconds_bucket{plugin_name="PluginD",plugin_type="Scorer",le="0.0001"} 0
43+
endpoint_picker_scheduler_plugin_duration_seconds_bucket{plugin_name="PluginD",plugin_type="Scorer",le="0.0002"} 0
44+
endpoint_picker_scheduler_plugin_duration_seconds_bucket{plugin_name="PluginD",plugin_type="Scorer",le="0.0005"} 0
45+
endpoint_picker_scheduler_plugin_duration_seconds_bucket{plugin_name="PluginD",plugin_type="Scorer",le="0.001"} 0
46+
endpoint_picker_scheduler_plugin_duration_seconds_bucket{plugin_name="PluginD",plugin_type="Scorer",le="0.002"} 0
47+
endpoint_picker_scheduler_plugin_duration_seconds_bucket{plugin_name="PluginD",plugin_type="Scorer",le="0.005"} 0
48+
endpoint_picker_scheduler_plugin_duration_seconds_bucket{plugin_name="PluginD",plugin_type="Scorer",le="0.01"} 1
49+
endpoint_picker_scheduler_plugin_duration_seconds_bucket{plugin_name="PluginD",plugin_type="Scorer",le="0.02"} 1
50+
endpoint_picker_scheduler_plugin_duration_seconds_bucket{plugin_name="PluginD",plugin_type="Scorer",le="0.05"} 1
51+
endpoint_picker_scheduler_plugin_duration_seconds_bucket{plugin_name="PluginD",plugin_type="Scorer",le="0.1"} 1
52+
endpoint_picker_scheduler_plugin_duration_seconds_bucket{plugin_name="PluginD",plugin_type="Scorer",le="+Inf"} 1
53+
endpoint_picker_scheduler_plugin_duration_seconds_sum{plugin_name="PluginD",plugin_type="Scorer"} 0.01
54+
endpoint_picker_scheduler_plugin_duration_seconds_count{plugin_name="PluginD",plugin_type="Scorer"} 1
55+
endpoint_picker_scheduler_plugin_duration_seconds_bucket{plugin_name="PluginE",plugin_type="Picker",le="0.0001"} 1
56+
endpoint_picker_scheduler_plugin_duration_seconds_bucket{plugin_name="PluginE",plugin_type="Picker",le="0.0002"} 1
57+
endpoint_picker_scheduler_plugin_duration_seconds_bucket{plugin_name="PluginE",plugin_type="Picker",le="0.0005"} 1
58+
endpoint_picker_scheduler_plugin_duration_seconds_bucket{plugin_name="PluginE",plugin_type="Picker",le="0.001"} 1
59+
endpoint_picker_scheduler_plugin_duration_seconds_bucket{plugin_name="PluginE",plugin_type="Picker",le="0.002"} 1
60+
endpoint_picker_scheduler_plugin_duration_seconds_bucket{plugin_name="PluginE",plugin_type="Picker",le="0.005"} 1
61+
endpoint_picker_scheduler_plugin_duration_seconds_bucket{plugin_name="PluginE",plugin_type="Picker",le="0.01"} 1
62+
endpoint_picker_scheduler_plugin_duration_seconds_bucket{plugin_name="PluginE",plugin_type="Picker",le="0.02"} 1
63+
endpoint_picker_scheduler_plugin_duration_seconds_bucket{plugin_name="PluginE",plugin_type="Picker",le="0.05"} 1
64+
endpoint_picker_scheduler_plugin_duration_seconds_bucket{plugin_name="PluginE",plugin_type="Picker",le="0.1"} 1
65+
endpoint_picker_scheduler_plugin_duration_seconds_bucket{plugin_name="PluginE",plugin_type="Picker",le="+Inf"} 1
66+
endpoint_picker_scheduler_plugin_duration_seconds_sum{plugin_name="PluginE",plugin_type="Picker"} 1e-05
67+
endpoint_picker_scheduler_plugin_duration_seconds_count{plugin_name="PluginE",plugin_type="Picker"} 1

pkg/epp/scheduling/scheduler.go

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,9 +20,11 @@ package scheduling
2020
import (
2121
"context"
2222
"fmt"
23+
"time"
2324

2425
"sigs.k8s.io/controller-runtime/pkg/log"
2526
backendmetrics "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/backend/metrics"
27+
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/metrics"
2628
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/plugins"
2729
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/types"
2830
logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging"
@@ -111,7 +113,9 @@ func (s *Scheduler) Schedule(ctx context.Context, req *types.LLMRequest) (*types
111113
return nil, err
112114
}
113115

116+
before := time.Now()
114117
res, err := s.picker.Pick(sCtx, pods)
118+
metrics.RecordSchedulerPluginProcessingLatency(types.PickerPluginType, s.picker.Name(), time.Since(before))
115119
if err != nil {
116120
return nil, err
117121
}
@@ -125,14 +129,18 @@ func (s *Scheduler) Schedule(ctx context.Context, req *types.LLMRequest) (*types
125129
func (s *Scheduler) runPreSchedulePlugins(ctx *types.Context) {
126130
for _, plugin := range s.preSchedulePlugins {
127131
ctx.Logger.V(logutil.DEBUG).Info("Running pre-schedule plugin", "plugin", plugin.Name())
132+
before := time.Now()
128133
plugin.PreSchedule(ctx)
134+
metrics.RecordSchedulerPluginProcessingLatency(types.PreSchedulerPluginType, plugin.Name(), time.Since(before))
129135
}
130136
}
131137

132138
func (s *Scheduler) runPostSchedulePlugins(ctx *types.Context, res *types.Result) {
133139
for _, plugin := range s.postSchedulePlugins {
134140
ctx.Logger.V(logutil.DEBUG).Info("Running post-schedule plugin", "plugin", plugin.Name())
141+
before := time.Now()
135142
plugin.PostSchedule(ctx, res)
143+
metrics.RecordSchedulerPluginProcessingLatency(types.PostSchedulePluginType, plugin.Name(), time.Since(before))
136144
}
137145
}
138146

@@ -142,7 +150,9 @@ func (s *Scheduler) runFilterPlugins(ctx *types.Context) ([]types.Pod, error) {
142150
loggerDebug.Info("Before running filter plugins", "pods", pods)
143151
for _, filter := range s.filters {
144152
loggerDebug.Info("Running filter plugin", "plugin", filter.Name())
153+
before := time.Now()
145154
filteredPods, err := filter.Filter(ctx, pods)
155+
metrics.RecordSchedulerPluginProcessingLatency(types.FilterPluginType, filter.Name(), time.Since(before))
146156
if err != nil || len(filteredPods) == 0 {
147157
return nil, fmt.Errorf("failed to apply filter, resulted %v pods, this should never happen: %w", len(filteredPods), err)
148158
}
@@ -173,7 +183,9 @@ func runScorersForPod(ctx *types.Context, scorers []types.Scorer, pod types.Pod)
173183
score := float64(0)
174184
for _, scorer := range scorers {
175185
logger.Info("Running scorer", "scorer", scorer.Name())
186+
before := time.Now()
176187
oneScore, err := scorer.Score(ctx, pod)
188+
metrics.RecordSchedulerPluginProcessingLatency(types.ScorerPluginType, scorer.Name(), time.Since(before))
177189
if err != nil {
178190
logger.Error(err, "Failed to calculate score for scorer", "scorer", scorer.Name())
179191
return 0, err

pkg/epp/scheduling/types/interfaces.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,14 @@ import (
2020
backendmetrics "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/backend/metrics"
2121
)
2222

23+
const (
24+
PreSchedulerPluginType = "PreSchedule"
25+
PostSchedulePluginType = "PostSchedule"
26+
FilterPluginType = "Filter"
27+
ScorerPluginType = "Scorer"
28+
PickerPluginType = "Picker"
29+
)
30+
2331
type Pod interface {
2432
GetPod() *backendmetrics.Pod
2533
GetMetrics() *backendmetrics.Metrics

0 commit comments

Comments
 (0)