Skip to content

Commit 2dce3ea

Browse files
merge has capacity filter with sheddable filter. (#809)
* merge has capacity filter with sheddable filter. has capacity only use was for sheddable requests (passthrough for critical ones). Signed-off-by: Nir Rozenbaum <[email protected]> * Update pkg/epp/scheduling/plugins/filter/filter_test.go Co-authored-by: Cong Liu <[email protected]> --------- Signed-off-by: Nir Rozenbaum <[email protected]> Co-authored-by: Cong Liu <[email protected]>
1 parent 4029a37 commit 2dce3ea

File tree

4 files changed

+17
-65
lines changed

4 files changed

+17
-65
lines changed

pkg/epp/scheduling/plugins/filter/filter_test.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -131,8 +131,9 @@ func TestFilter(t *testing.T) {
131131
},
132132
},
133133
{
134-
name: "lowQueueAndLessThanKVCacheThresholdPredicate",
135-
filter: &HasCapacityFilter{queueThreshold: 0, kvCacheThreshold: 0.8},
134+
name: "SheddableCapacityFilter, sheddable request",
135+
req: &types.LLMRequest{Critical: false},
136+
filter: &SheddableCapacityFilter{queueThreshold: 0, kvCacheThreshold: 0.8},
136137
input: []types.Pod{
137138
&types.PodMetrics{
138139
// This pod should be returned.

pkg/epp/scheduling/plugins/filter/has_capacity_filter.go renamed to pkg/epp/scheduling/plugins/filter/sheddable_capacity_filter.go

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -23,29 +23,33 @@ import (
2323
)
2424

2525
// compile-time type validation
26-
var _ plugins.Filter = &HasCapacityFilter{}
26+
var _ plugins.Filter = &SheddableCapacityFilter{}
2727

28-
// NewHasCapacityFilter returns a new HasCapacityFilter.
29-
func NewHasCapacityFilter() *HasCapacityFilter {
30-
return &HasCapacityFilter{
28+
// NewSheddableCapacityFilter returns a new SheddableCapacityFilter.
29+
func NewSheddableCapacityFilter() *SheddableCapacityFilter {
30+
return &SheddableCapacityFilter{
3131
queueThreshold: config.Conf.QueueThresholdCritical,
3232
kvCacheThreshold: config.Conf.KVCacheThreshold,
3333
}
3434
}
3535

36-
// HasCapacityFilter filters only pods that has capacity for sheddable requests.
37-
type HasCapacityFilter struct {
36+
// SheddableCapacityFilter filters only pods that has capacity for sheddable requests.
37+
type SheddableCapacityFilter struct {
3838
queueThreshold int
3939
kvCacheThreshold float64
4040
}
4141

4242
// Name returns the name of the filter.
43-
func (f *HasCapacityFilter) Name() string {
44-
return "has-capacity"
43+
func (f *SheddableCapacityFilter) Name() string {
44+
return "sheddable-capacity"
4545
}
4646

4747
// Filter filters out pods that doesn't meet the filter criteria.
48-
func (f *HasCapacityFilter) Filter(ctx *types.SchedulingContext, pods []types.Pod) []types.Pod {
48+
func (f *SheddableCapacityFilter) Filter(ctx *types.SchedulingContext, pods []types.Pod) []types.Pod {
49+
if ctx.Req.Critical {
50+
return pods // // Allow all pods to passthrough if the request is critical, even if all pods reach their capacity.
51+
}
52+
4953
filteredPods := []types.Pod{}
5054

5155
for _, pod := range pods {

pkg/epp/scheduling/plugins/filter/sheddable_request_filter.go

Lines changed: 0 additions & 53 deletions
This file was deleted.

pkg/epp/scheduling/scheduler.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ func NewScheduler(datastore Datastore) *Scheduler {
6666

6767
defaultConfig := &SchedulerConfig{
6868
preSchedulePlugins: []plugins.PreSchedule{},
69-
filters: []plugins.Filter{filter.NewSheddableRequestFilter(), lowLatencyFilter},
69+
filters: []plugins.Filter{filter.NewSheddableCapacityFilter(), lowLatencyFilter},
7070
scorers: map[plugins.Scorer]int{},
7171
picker: &picker.RandomPicker{},
7272
postSchedulePlugins: []plugins.PostSchedule{},

0 commit comments

Comments
 (0)