Merge pull request kubernetes-sigs#81 from irar2/load-test

irar2 · web-flow · commit 128d901e49ef · 2025-04-30T08:28:27.000+03:00
test: Test load based scorer
diff --git a/pkg/epp/scheduling/scorers_test.go b/pkg/epp/scheduling/scorers_test.go
@@ -0,0 +1,122 @@
+/*
+Copyright 2025 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package scheduling
+
+import (
+	"context"
+	"testing"
+
+	"github.com/google/go-cmp/cmp"
+	k8stypes "k8s.io/apimachinery/pkg/types"
+	backendmetrics "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/backend/metrics" // Import config for thresholds
+	"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/plugins"
+	"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/plugins/picker"
+	"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/plugins/scorers"
+	"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/types"
+)
+
+func TestScorers(t *testing.T) {
+	tests := []struct {
+		name    string
+		scorer  plugins.Scorer
+		req     *types.LLMRequest
+		input   []*backendmetrics.FakePodMetrics
+		wantRes *types.Result
+		err     bool
+	}{
+		{
+			name:   "load based scorer",
+			scorer: &scorers.LoadBasedScorer{},
+			req: &types.LLMRequest{
+				Model:               "critical",
+				ResolvedTargetModel: "critical",
+				Critical:            true,
+			},
+			// pod2 will be picked because it has the shortest queue
+			input: []*backendmetrics.FakePodMetrics{
+				{
+					Pod: &backendmetrics.Pod{NamespacedName: k8stypes.NamespacedName{Name: "pod1"}},
+					Metrics: &backendmetrics.Metrics{
+						WaitingQueueSize:    2,
+						KVCacheUsagePercent: 0.2,
+						MaxActiveModels:     2,
+						ActiveModels: map[string]int{
+							"foo": 1,
+							"bar": 1,
+						},
+					},
+				},
+				{
+					Pod: &backendmetrics.Pod{NamespacedName: k8stypes.NamespacedName{Name: "pod2"}},
+					Metrics: &backendmetrics.Metrics{
+						WaitingQueueSize:    0,
+						KVCacheUsagePercent: 0.2,
+						MaxActiveModels:     2,
+						ActiveModels: map[string]int{
+							"foo": 1,
+							"bar": 1,
+						},
+					},
+				},
+				{
+					Pod: &backendmetrics.Pod{NamespacedName: k8stypes.NamespacedName{Name: "pod3"}},
+					Metrics: &backendmetrics.Metrics{
+						WaitingQueueSize:    5,
+						KVCacheUsagePercent: 0.2,
+						MaxActiveModels:     2,
+						ActiveModels: map[string]int{
+							"foo": 1,
+							"bar": 1,
+						},
+					},
+				},
+			},
+			wantRes: &types.Result{
+				TargetPod: &types.PodMetrics{
+					Pod: &backendmetrics.Pod{NamespacedName: k8stypes.NamespacedName{Name: "pod2"}},
+					Metrics: &backendmetrics.Metrics{
+						WaitingQueueSize:    0,
+						KVCacheUsagePercent: 0.2,
+						MaxActiveModels:     2,
+						ActiveModels: map[string]int{
+							"foo": 1,
+							"bar": 1,
+						},
+						WaitingModels: map[string]int{},
+					},
+				},
+			},
+		},
+	}
+
+	for _, test := range tests {
+		t.Run(test.name, func(t *testing.T) {
+			scheduler := NewScheduler(&fakeDataStore{pods: test.input})
+			scheduler.scorers = map[plugins.Scorer]int{test.scorer: 1}
+			scheduler.picker = &picker.MaxScorePicker{}
+			got, err := scheduler.Schedule(context.Background(), test.req)
+			if test.err != (err != nil) {
+				t.Errorf("Unexpected error, got %v, want %v", err, test.err)
+			}
+
+			opt := cmp.AllowUnexported(types.PodMetrics{})
+			if diff := cmp.Diff(test.wantRes, got, opt); diff != "" {
+				t.Errorf("Unexpected output (-want +got): %v", diff)
+			}
+		})
+	}
+}