Skip to content

Commit d9db194

Browse files
committed
Cleanup - organize scheduler plugin by their functionality instead of type
1 parent 2b2b4a6 commit d9db194

15 files changed

+289
-175
lines changed

cmd/epp/main.go

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -45,10 +45,11 @@ import (
4545
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/metrics/collectors"
4646
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling"
4747
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/plugins"
48-
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/plugins/filter"
48+
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/plugins/capacity"
49+
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/plugins/kvcache"
4950
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/plugins/picker"
5051
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/plugins/prefix"
51-
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/plugins/scorer"
52+
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/plugins/queue"
5253
runserver "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/server"
5354
envutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/env"
5455
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging"
@@ -194,11 +195,11 @@ func run() error {
194195

195196
scheduler := scheduling.NewScheduler(datastore)
196197
if schedulerV2 == "true" {
197-
queueScorerWeight := envutil.GetEnvInt("QUEUE_SCORE_WEIGHT", scorer.DefaultQueueScorerWeight, setupLog)
198-
kvCacheScorerWeight := envutil.GetEnvInt("KV_CACHE_SCORE_WEIGHT", scorer.DefaultKVCacheScorerWeight, setupLog)
198+
queueScorerWeight := envutil.GetEnvInt("QUEUE_SCORE_WEIGHT", queue.DefaultQueueScorerWeight, setupLog)
199+
kvCacheScorerWeight := envutil.GetEnvInt("KV_CACHE_SCORE_WEIGHT", kvcache.DefaultKVCacheScorerWeight, setupLog)
199200
scorers := map[plugins.Scorer]int{
200-
&scorer.QueueScorer{}: queueScorerWeight,
201-
&scorer.KVCacheScorer{}: kvCacheScorerWeight,
201+
&queue.QueueScorer{}: queueScorerWeight,
202+
&kvcache.KVCacheScorer{}: kvCacheScorerWeight,
202203
}
203204
schedConfigOpts := []scheduling.ConfigOption{}
204205
if prefixCacheScheduling == "true" {
@@ -207,7 +208,7 @@ func run() error {
207208
}
208209
schedulerConfig := scheduling.NewSchedulerConfig(
209210
[]plugins.PreSchedule{},
210-
[]plugins.Filter{filter.NewSheddableCapacityFilter()},
211+
[]plugins.Filter{capacity.NewSheddableCapacityFilter()},
211212
scorers,
212213
picker.NewMaxScorePicker(),
213214
[]plugins.PostSchedule{},
Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
/*
2+
Copyright 2025 The Kubernetes Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package capacity
18+
19+
import (
20+
"context"
21+
"testing"
22+
23+
"github.com/google/go-cmp/cmp"
24+
backendmetrics "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/backend/metrics"
25+
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/plugins"
26+
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/types"
27+
)
28+
29+
func TestFilter(t *testing.T) {
30+
tests := []struct {
31+
name string
32+
req *types.LLMRequest
33+
filter plugins.Filter
34+
input []types.Pod
35+
output []types.Pod
36+
}{
37+
{
38+
name: "SheddableCapacityFilter, sheddable request",
39+
req: &types.LLMRequest{Critical: false},
40+
filter: &SheddableCapacityFilter{queueThreshold: 0, kvCacheThreshold: 0.8},
41+
input: []types.Pod{
42+
&types.PodMetrics{
43+
// This pod should be returned.
44+
MetricsState: &backendmetrics.MetricsState{
45+
WaitingQueueSize: 0,
46+
KVCacheUsagePercent: 0,
47+
},
48+
},
49+
&types.PodMetrics{
50+
// Queue is non zero, despite low kv cache, should not return.
51+
MetricsState: &backendmetrics.MetricsState{
52+
WaitingQueueSize: 1,
53+
KVCacheUsagePercent: 0.3,
54+
},
55+
},
56+
&types.PodMetrics{
57+
// High kv cache despite zero queue, should not return
58+
MetricsState: &backendmetrics.MetricsState{
59+
WaitingQueueSize: 0,
60+
KVCacheUsagePercent: 1.0,
61+
},
62+
},
63+
},
64+
output: []types.Pod{
65+
&types.PodMetrics{
66+
MetricsState: &backendmetrics.MetricsState{
67+
WaitingQueueSize: 0,
68+
KVCacheUsagePercent: 0,
69+
},
70+
},
71+
},
72+
},
73+
}
74+
75+
for _, test := range tests {
76+
t.Run(test.name, func(t *testing.T) {
77+
ctx := types.NewSchedulingContext(context.Background(), test.req, nil, test.input)
78+
got := test.filter.Filter(ctx, test.input)
79+
80+
if diff := cmp.Diff(test.output, got); diff != "" {
81+
t.Errorf("Unexpected output (-want +got): %v", diff)
82+
}
83+
})
84+
}
85+
}

pkg/epp/scheduling/plugins/filter/sheddable_capacity_filter.go renamed to pkg/epp/scheduling/plugins/capacity/sheddable_capacity_filter.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ See the License for the specific language governing permissions and
1414
limitations under the License.
1515
*/
1616

17-
package filter
17+
package capacity
1818

1919
import (
2020
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/config"
Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
/*
2+
Copyright 2025 The Kubernetes Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package kvcache
18+
19+
import (
20+
"context"
21+
"testing"
22+
23+
"github.com/google/go-cmp/cmp"
24+
backendmetrics "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/backend/metrics"
25+
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/plugins"
26+
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/types"
27+
)
28+
29+
func TestFilter(t *testing.T) {
30+
tests := []struct {
31+
name string
32+
req *types.LLMRequest
33+
filter plugins.Filter
34+
input []types.Pod
35+
output []types.Pod
36+
}{
37+
38+
{
39+
name: "least kv cache empty input",
40+
filter: NewLeastKVCacheFilter(),
41+
input: []types.Pod{},
42+
output: []types.Pod{},
43+
},
44+
{
45+
name: "least kv cache",
46+
filter: NewLeastKVCacheFilter(),
47+
input: []types.Pod{
48+
&types.PodMetrics{
49+
MetricsState: &backendmetrics.MetricsState{
50+
KVCacheUsagePercent: 0,
51+
},
52+
},
53+
&types.PodMetrics{
54+
MetricsState: &backendmetrics.MetricsState{
55+
KVCacheUsagePercent: 0.3,
56+
},
57+
},
58+
&types.PodMetrics{
59+
MetricsState: &backendmetrics.MetricsState{
60+
KVCacheUsagePercent: 1.0,
61+
},
62+
},
63+
},
64+
output: []types.Pod{
65+
&types.PodMetrics{
66+
MetricsState: &backendmetrics.MetricsState{
67+
KVCacheUsagePercent: 0,
68+
},
69+
},
70+
&types.PodMetrics{
71+
MetricsState: &backendmetrics.MetricsState{
72+
KVCacheUsagePercent: 0.3,
73+
},
74+
},
75+
},
76+
},
77+
}
78+
79+
for _, test := range tests {
80+
t.Run(test.name, func(t *testing.T) {
81+
ctx := types.NewSchedulingContext(context.Background(), test.req, nil, test.input)
82+
got := test.filter.Filter(ctx, test.input)
83+
84+
if diff := cmp.Diff(test.output, got); diff != "" {
85+
t.Errorf("Unexpected output (-want +got): %v", diff)
86+
}
87+
})
88+
}
89+
}

pkg/epp/scheduling/plugins/scorer/kvcache.go renamed to pkg/epp/scheduling/plugins/kvcache/kvcache_scorer.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ See the License for the specific language governing permissions and
1414
limitations under the License.
1515
*/
1616

17-
package scorer
17+
package kvcache
1818

1919
import (
2020
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/types"

pkg/epp/scheduling/plugins/scorer/kvcache_test.go renamed to pkg/epp/scheduling/plugins/kvcache/kvcache_scorer_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ See the License for the specific language governing permissions and
1414
limitations under the License.
1515
*/
1616

17-
package scorer
17+
package kvcache
1818

1919
import (
2020
"context"

pkg/epp/scheduling/plugins/filter/least_kvcache_filter.go renamed to pkg/epp/scheduling/plugins/kvcache/least_kvcache_filter.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ See the License for the specific language governing permissions and
1414
limitations under the License.
1515
*/
1616

17-
package filter
17+
package kvcache
1818

1919
import (
2020
"math"

0 commit comments

Comments
 (0)