kubernetes-sigs · k8s-ci-robot · Apr 28, 2025 · Apr 24, 2025 · Apr 24, 2025 · Apr 24, 2025
diff --git a/pkg/epp/scheduling/config.go b/pkg/epp/scheduling/config.go
@@ -20,8 +20,22 @@ import "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/plugins"
 
 type SchedulerConfig struct {
 	preSchedulePlugins  []plugins.PreSchedule
-	scorers             []plugins.Scorer
 	filters             []plugins.Filter
+	scorers             map[plugins.Scorer]int // map from scorer to weight
 	postSchedulePlugins []plugins.PostSchedule
 	picker              plugins.Picker
 }
+
+var defPlugin = &defaultPlugin{}
+
+// When the scheduler is initialized with NewScheduler function, this config will be used as default.
+// it's possible to call NewSchedulerWithConfig to pass a different argument.
+
+// For build time plugins changes, it's recommended to change the defaultConfig variable in this file.
+var defaultConfig = &SchedulerConfig{
+	preSchedulePlugins:  []plugins.PreSchedule{},
+	filters:             []plugins.Filter{defPlugin},
+	scorers:             map[plugins.Scorer]int{},
+	postSchedulePlugins: []plugins.PostSchedule{},
+	picker:              defPlugin,
+}
diff --git a/pkg/epp/scheduling/default_config.go b/pkg/epp/scheduling/default_config.go
diff --git a/pkg/epp/scheduling/plugins/filter/filter_test.go b/pkg/epp/scheduling/plugins/filter/filter_test.go
@@ -54,8 +54,7 @@ func TestFilter(t *testing.T) {
 			ctx := types.NewSchedulingContext(context.Background(), test.req, test.input)
 			got := test.filter.Filter(ctx, test.input)
 
-			opt := cmp.AllowUnexported(types.PodMetrics{})
-			if diff := cmp.Diff(test.output, got, opt); diff != "" {
+			if diff := cmp.Diff(test.output, got); diff != "" {
 				t.Errorf("Unexpected output (-want +got): %v", diff)
 			}
 		})
@@ -190,8 +189,7 @@ func TestFilterFunc(t *testing.T) {
 			ctx := types.NewSchedulingContext(context.Background(), test.req, test.input)
 			got := test.f(ctx, test.input)
 
-			opt := cmp.AllowUnexported(types.PodMetrics{})
-			if diff := cmp.Diff(test.output, got, opt); diff != "" {
+			if diff := cmp.Diff(test.output, got); diff != "" {
 				t.Errorf("Unexpected output (-want +got): %v", diff)
 			}
 		})

diff --git a/pkg/epp/scheduling/plugins/noop.go b/pkg/epp/scheduling/plugins/noop.go
diff --git a/pkg/epp/scheduling/plugins/picker/random_picker.go b/pkg/epp/scheduling/plugins/picker/random_picker.go
@@ -20,18 +20,32 @@ import (
 	"fmt"
 	"math/rand"
 
+	"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/plugins"
 	"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/types"
 	logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging"
 )
 
+var _ plugins.Picker = &RandomPicker{}
+
+// RandomPicker picks a random pod from the list of candidates.
 type RandomPicker struct{}
 
 func (rp *RandomPicker) Name() string {
 	return "random"
 }
 
-func (rp *RandomPicker) Pick(ctx *types.SchedulingContext, pods []types.Pod) *types.Result {
-	ctx.Logger.V(logutil.DEBUG).Info(fmt.Sprintf("Selecting a random pod from %d candidates: %+v", len(pods), pods))
-	i := rand.Intn(len(pods))
-	return &types.Result{TargetPod: pods[i]}
+func (rp *RandomPicker) Pick(ctx *types.SchedulingContext, scoredPods map[types.Pod]float64) *types.Result {
+	ctx.Logger.V(logutil.DEBUG).Info(fmt.Sprintf("Selecting a random pod from %d candidates", len(scoredPods)))
+	selectedIndex := rand.Intn(len(scoredPods))
+	i := 0
+	var randomPod types.Pod
+	for pod := range scoredPods {
+		if selectedIndex == i {
+			randomPod = pod
+			break
+
+		}
+		i++
+	}
+	return &types.Result{TargetPod: randomPod}
 }
diff --git a/pkg/epp/scheduling/plugins/plugins.go b/pkg/epp/scheduling/plugins/plugins.go
@@ -49,10 +49,11 @@ type Filter interface {
 	Filter(ctx *types.SchedulingContext, pods []types.Pod) []types.Pod
 }
 
-// Scorer defines the interface for scoring pods based on context.
+// Scorer defines the interface for scoring a list of pods based on context.
+// Scorers must score pods with a value within the range of [0,1] where 1 is the highest score.
 type Scorer interface {
 	Plugin
-	Score(ctx *types.SchedulingContext, pod types.Pod) float64
+	Score(ctx *types.SchedulingContext, pods []types.Pod) map[types.Pod]float64
 }
 
 // PostSchedule is called by the scheduler after it selects a targetPod for the request.
@@ -64,7 +65,7 @@ type PostSchedule interface {
 // Picker picks the final pod(s) to send the request to.
 type Picker interface {
 	Plugin
-	Pick(ctx *types.SchedulingContext, pods []types.Pod) *types.Result
+	Pick(ctx *types.SchedulingContext, scoredPods map[types.Pod]float64) *types.Result
 }
 
 // PostResponse is called by the scheduler after a successful response was sent.

diff --git a/pkg/epp/scheduling/scheduler.go b/pkg/epp/scheduling/scheduler.go
@@ -72,13 +72,18 @@ func NewScheduler(datastore Datastore) *Scheduler {
 }
 
 func NewSchedulerWithConfig(datastore Datastore, config *SchedulerConfig) *Scheduler {
+	sumOfScorersWeights := 0
+	for _, weight := range config.scorers {
+		sumOfScorersWeights += weight
+	}
 	scheduler := &Scheduler{
 		datastore:           datastore,
 		preSchedulePlugins:  config.preSchedulePlugins,
-		scorers:             config.scorers,
 		filters:             config.filters,
+		scorers:             config.scorers,
 		postSchedulePlugins: config.postSchedulePlugins,
 		picker:              config.picker,
+		sumOfScorersWeights: sumOfScorersWeights,
 	}
 
 	return scheduler
@@ -88,9 +93,10 @@ type Scheduler struct {
 	datastore           Datastore
 	preSchedulePlugins  []plugins.PreSchedule
 	filters             []plugins.Filter
-	scorers             []plugins.Scorer
+	scorers             map[plugins.Scorer]int // map from scorer to its weight
 	postSchedulePlugins []plugins.PostSchedule
 	picker              plugins.Picker
+	sumOfScorersWeights int
 }
 
 type Datastore interface {
@@ -106,21 +112,21 @@ func (s *Scheduler) Schedule(ctx context.Context, req *types.LLMRequest) (*types
 	// 1. Reduce concurrent access to the datastore.
 	// 2. Ensure consistent data during the scheduling operation of a request.
 	sCtx := types.NewSchedulingContext(ctx, req, types.ToSchedulerPodMetrics(s.datastore.PodGetAll()))
-	loggerDebug.Info(fmt.Sprintf("Scheduling a request. Metrics: %+v", sCtx.PodsSnapshot))
+	loggerDebug.Info(fmt.Sprintf("Scheduling a request, Metrics: %+v", sCtx.PodsSnapshot))
 
 	s.runPreSchedulePlugins(sCtx)
 
 	pods := s.runFilterPlugins(sCtx)
 	if len(pods) == 0 {
 		return nil, errutil.Error{Code: errutil.InferencePoolResourceExhausted, Msg: "failed to find a target pod"}
 	}
-
-	s.runScorerPlugins(sCtx, pods)
+	// if we got here, there is at least one pod to score
+	weightedScorePerPod := s.runScorerPlugins(sCtx, pods)
 
 	before := time.Now()
-	res := s.picker.Pick(sCtx, pods)
+	res := s.picker.Pick(sCtx, weightedScorePerPod)
 	metrics.RecordSchedulerPluginProcessingLatency(plugins.PickerPluginType, s.picker.Name(), time.Since(before))
-	loggerDebug.Info("After running picker plugins", "result", res)
+	loggerDebug.Info("After running picker plugin", "result", res)
 
 	s.runPostSchedulePlugins(sCtx, res)
 
@@ -136,15 +142,6 @@ func (s *Scheduler) runPreSchedulePlugins(ctx *types.SchedulingContext) {
 	}
 }
 
-func (s *Scheduler) runPostSchedulePlugins(ctx *types.SchedulingContext, res *types.Result) {
-	for _, plugin := range s.postSchedulePlugins {
-		ctx.Logger.V(logutil.DEBUG).Info("Running post-schedule plugin", "plugin", plugin.Name())
-		before := time.Now()
-		plugin.PostSchedule(ctx, res)
-		metrics.RecordSchedulerPluginProcessingLatency(plugins.PostSchedulePluginType, plugin.Name(), time.Since(before))
-	}
-}
-
 func (s *Scheduler) runFilterPlugins(ctx *types.SchedulingContext) []types.Pod {
 	loggerDebug := ctx.Logger.V(logutil.DEBUG)
 	filteredPods := ctx.PodsSnapshot
@@ -163,29 +160,37 @@ func (s *Scheduler) runFilterPlugins(ctx *types.SchedulingContext) []types.Pod {
 	return filteredPods
 }
 
-func (s *Scheduler) runScorerPlugins(ctx *types.SchedulingContext, pods []types.Pod) {
+func (s *Scheduler) runScorerPlugins(ctx *types.SchedulingContext, pods []types.Pod) map[types.Pod]float64 {
 	loggerDebug := ctx.Logger.V(logutil.DEBUG)
-	loggerDebug.Info("Before running score plugins", "pods", pods)
+	loggerDebug.Info("Before running scorer plugins", "pods", pods)
+
+	weightedScorePerPod := make(map[types.Pod]float64, len(pods))
 	for _, pod := range pods {
-		score := s.runScorersForPod(ctx, pod)
-		pod.SetScore(score)
+		weightedScorePerPod[pod] = float64(0) // initialize weighted score per pod with 0 value
+	}
+	// Iterate through each scorer in the chain and accumulate the weighted scores.
+	for scorer, weight := range s.scorers {
+		loggerDebug.Info("Running scorer", "scorer", scorer.Name())
+		before := time.Now()
+		scores := scorer.Score(ctx, pods)
+		metrics.RecordSchedulerPluginProcessingLatency(plugins.ScorerPluginType, scorer.Name(), time.Since(before))
+		for pod, score := range scores { // weight is relative to the sum of weights
+			weightedScorePerPod[pod] += score * float64(weight) / float64(s.sumOfScorersWeights) // TODO normalize score before multiply with weight
+		}
+		loggerDebug.Info("After running scorer", "scorer", scorer.Name())
 	}
-	loggerDebug.Info("After running score plugins", "pods", pods)
+	loggerDebug.Info("After running scorer plugins", "pods", pods)
+
+	return weightedScorePerPod
 }
 
-// Iterate through each scorer in the chain and accumulate the scores.
-func (s *Scheduler) runScorersForPod(ctx *types.SchedulingContext, pod types.Pod) float64 {
-	logger := ctx.Logger.WithValues("pod", pod.GetPod().NamespacedName).V(logutil.DEBUG)
-	score := float64(0)
-	for _, scorer := range s.scorers {
-		logger.Info("Running scorer", "scorer", scorer.Name())
+func (s *Scheduler) runPostSchedulePlugins(ctx *types.SchedulingContext, res *types.Result) {
+	for _, plugin := range s.postSchedulePlugins {
+		ctx.Logger.V(logutil.DEBUG).Info("Running post-schedule plugin", "plugin", plugin.Name())
 		before := time.Now()
-		oneScore := scorer.Score(ctx, pod)
-		metrics.RecordSchedulerPluginProcessingLatency(plugins.ScorerPluginType, scorer.Name(), time.Since(before))
-		score += oneScore
-		logger.Info("After scorer", "scorer", scorer.Name(), "score", oneScore, "total score", score)
+		plugin.PostSchedule(ctx, res)
+		metrics.RecordSchedulerPluginProcessingLatency(plugins.PostSchedulePluginType, plugin.Name(), time.Since(before))
 	}
-	return score
 }
 
 type defaultPlugin struct {