Skip to content

Commit 53444a8

Browse files
authored
Add load based scorer (kubernetes-sigs#77)
* Added scorer based on pod's load (length of waiting requests queue) * Added scorer based on pod's load (length of waiting requests queue) * Update load based scorer according to scorer API changes * Fixes according to the PR comments Fix kubernetes-sigs#66
1 parent ba4a7f3 commit 53444a8

File tree

2 files changed

+73
-7
lines changed

2 files changed

+73
-7
lines changed

pkg/epp/scheduling/config.go

+18-7
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,10 @@ limitations under the License.
1616

1717
package scheduling
1818

19-
import "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/plugins"
19+
import (
20+
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/plugins"
21+
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/plugins/scorers"
22+
)
2023

2124
type SchedulerConfig struct {
2225
preSchedulePlugins []plugins.PreSchedule
@@ -32,10 +35,18 @@ var defPlugin = &defaultPlugin{}
3235
// it's possible to call NewSchedulerWithConfig to pass a different argument.
3336

3437
// For build time plugins changes, it's recommended to change the defaultConfig variable in this file.
35-
var defaultConfig = &SchedulerConfig{
36-
preSchedulePlugins: []plugins.PreSchedule{},
37-
filters: []plugins.Filter{defPlugin},
38-
scorers: map[plugins.Scorer]int{},
39-
picker: defPlugin,
40-
postSchedulePlugins: []plugins.PostSchedule{},
38+
var defaultConfig = createDefaultConfig()
39+
40+
func createDefaultConfig() *SchedulerConfig {
41+
defConfig := &SchedulerConfig{
42+
preSchedulePlugins: []plugins.PreSchedule{},
43+
filters: []plugins.Filter{defPlugin},
44+
scorers: map[plugins.Scorer]int{},
45+
picker: defPlugin,
46+
postSchedulePlugins: []plugins.PostSchedule{},
47+
}
48+
49+
defConfig.scorers[scorers.NewLoadBasedScorer()] = 1.0
50+
51+
return defConfig
4152
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
/*
2+
Copyright 2025 The Kubernetes Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
package scorers
17+
18+
import (
19+
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/config"
20+
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/plugins"
21+
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/types"
22+
)
23+
24+
type LoadBasedScorer struct{}
25+
26+
func NewLoadBasedScorer() plugins.Scorer {
27+
return LoadBasedScorer{}
28+
}
29+
30+
func (s LoadBasedScorer) Name() string {
31+
return "load based scorer"
32+
}
33+
34+
// Score scores the given pod in range of 0-1
35+
// Currently metrics contains number of requests waiting in the queue, there is no information about number of requests
36+
// that can be processed in the given pod immediately.
37+
// Pod with empty waiting requests queue is scored with 0.5
38+
// Pod with requests in the queue will get score between 0.5 and 0.
39+
// Score 0 will get pod with number of requests in the queue equal to the threshold used in load-based filter (QueueingThresholdLoRA)
40+
// In future pods with additional capacity will get score higher than 0.5
41+
func (s LoadBasedScorer) Score(ctx *types.SchedulingContext, pods []types.Pod) map[types.Pod]float64 {
42+
scoredPods := make(map[types.Pod]float64)
43+
44+
for _, pod := range pods {
45+
waitingRequests := float64(pod.GetMetrics().WaitingQueueSize)
46+
47+
if waitingRequests == 0 {
48+
scoredPods[pod] = 0.5
49+
} else {
50+
scoredPods[pod] = 0.5 * (1.0 - (waitingRequests / float64(config.Conf.QueueingThresholdLoRA)))
51+
}
52+
}
53+
54+
return scoredPods
55+
}

0 commit comments

Comments
 (0)