Add load based scorer (kubernetes-sigs#77)

mayabar · web-flow · commit 53444a845289 · 2025-04-29T09:39:03.000+03:00
* Added scorer based on pod's load (length of waiting requests queue) * Added scorer based on pod's load (length of waiting requests queue) * Update load based scorer according to scorer API changes * Fixes according to the PR comments Fix kubernetes-sigs#66
diff --git a/pkg/epp/scheduling/config.go b/pkg/epp/scheduling/config.go
@@ -16,7 +16,10 @@ limitations under the License.
 
 package scheduling
 
-import "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/plugins"
+import (
+	"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/plugins"
+	"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/plugins/scorers"
+)
 
 type SchedulerConfig struct {
 	preSchedulePlugins  []plugins.PreSchedule
@@ -32,10 +35,18 @@ var defPlugin = &defaultPlugin{}
 // it's possible to call NewSchedulerWithConfig to pass a different argument.
 
 // For build time plugins changes, it's recommended to change the defaultConfig variable in this file.
-var defaultConfig = &SchedulerConfig{
-	preSchedulePlugins:  []plugins.PreSchedule{},
-	filters:             []plugins.Filter{defPlugin},
-	scorers:             map[plugins.Scorer]int{},
-	picker:              defPlugin,
-	postSchedulePlugins: []plugins.PostSchedule{},
+var defaultConfig = createDefaultConfig()
+
+func createDefaultConfig() *SchedulerConfig {
+	defConfig := &SchedulerConfig{
+		preSchedulePlugins:  []plugins.PreSchedule{},
+		filters:             []plugins.Filter{defPlugin},
+		scorers:             map[plugins.Scorer]int{},
+		picker:              defPlugin,
+		postSchedulePlugins: []plugins.PostSchedule{},
+	}
+
+	defConfig.scorers[scorers.NewLoadBasedScorer()] = 1.0
+
+	return defConfig
 }
diff --git a/pkg/epp/scheduling/plugins/scorers/load_based_scorer.go b/pkg/epp/scheduling/plugins/scorers/load_based_scorer.go
@@ -0,0 +1,55 @@
+/*
+Copyright 2025 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+	http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+package scorers
+
+import (
+	"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/config"
+	"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/plugins"
+	"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/types"
+)
+
+type LoadBasedScorer struct{}
+
+func NewLoadBasedScorer() plugins.Scorer {
+	return LoadBasedScorer{}
+}
+
+func (s LoadBasedScorer) Name() string {
+	return "load based scorer"
+}
+
+// Score scores the given pod in range of 0-1
+// Currently metrics contains number of requests waiting in the queue, there is no information about number of requests
+// that can be processed in the given pod immediately.
+// Pod with empty waiting requests queue is scored with 0.5
+// Pod with requests in the queue will get score between 0.5 and 0.
+// Score 0 will get pod with number of requests in the queue equal to the threshold used in load-based filter (QueueingThresholdLoRA)
+// In future pods with additional capacity will get score higher than 0.5
+func (s LoadBasedScorer) Score(ctx *types.SchedulingContext, pods []types.Pod) map[types.Pod]float64 {
+	scoredPods := make(map[types.Pod]float64)
+
+	for _, pod := range pods {
+		waitingRequests := float64(pod.GetMetrics().WaitingQueueSize)
+
+		if waitingRequests == 0 {
+			scoredPods[pod] = 0.5
+		} else {
+			scoredPods[pod] = 0.5 * (1.0 - (waitingRequests / float64(config.Conf.QueueingThresholdLoRA)))
+		}
+	}
+
+	return scoredPods
+}