Skip to content

Commit 1a7cf07

Browse files
committed
Address comments
1 parent de87cd3 commit 1a7cf07

File tree

4 files changed

+16
-17
lines changed

4 files changed

+16
-17
lines changed

pkg/epp/scheduling/plugins/filter.go

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -28,19 +28,19 @@ import (
2828
logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging"
2929
)
3030

31-
type BasicFilter struct {
31+
type Filter struct {
3232
name string
3333
filter filterFunc
3434
}
3535

36-
func (bf *BasicFilter) Name() string {
36+
func (bf *Filter) Name() string {
3737
if bf == nil {
3838
return "nil"
3939
}
4040
return bf.name
4141
}
4242

43-
func (bf *BasicFilter) Filter(ctx *types.Context, pods []types.Pod) ([]types.Pod, error) {
43+
func (bf *Filter) Filter(ctx *types.Context, pods []types.Pod) ([]types.Pod, error) {
4444
loggerTrace := ctx.Logger.V(logutil.TRACE)
4545
loggerTrace.Info("Running a filter", "name", bf.Name(), "podCount", len(pods))
4646

@@ -123,7 +123,7 @@ func toFilterFunc(pp podPredicate) filterFunc {
123123
}
124124
}
125125

126-
var LeastQueueFilter = &BasicFilter{
126+
var LeastQueueFilter = &Filter{
127127
name: "least queuing",
128128
filter: leastQueuingFilterFunc,
129129
}
@@ -157,12 +157,12 @@ func leastQueuingFilterFunc(ctx *types.Context, pods []types.Pod) ([]types.Pod,
157157
return filtered, nil
158158
}
159159

160-
var LowQueueFilter = &BasicFilter{
160+
var LowQueueFilter = &Filter{
161161
name: "low queueing filter",
162162
filter: toFilterFunc((queueThresholdPredicate(config.Conf.QueueingThresholdLoRA))),
163163
}
164164

165-
var LeastKVCacheFilter = &BasicFilter{
165+
var LeastKVCacheFilter = &Filter{
166166
name: "least KV cache percent",
167167
filter: leastKVCacheFilterFunc,
168168
}
@@ -195,7 +195,7 @@ func leastKVCacheFilterFunc(ctx *types.Context, pods []types.Pod) ([]types.Pod,
195195
return filtered, nil
196196
}
197197

198-
var LoRAAffinityFilter = &BasicFilter{
198+
var LoRAAffinityFilter = &Filter{
199199
name: "affinity LoRA",
200200
filter: loRASoftAffinityFilterFunc,
201201
}
@@ -254,12 +254,12 @@ func loRASoftAffinityFilterFunc(ctx *types.Context, pods []types.Pod) ([]types.P
254254
return filtered_available, nil
255255
}
256256

257-
var HasCapacityFilter = &BasicFilter{
257+
var HasCapacityFilter = &Filter{
258258
name: "has capacity for sheddable requests",
259259
filter: toFilterFunc(queueThresholdPredicate(config.Conf.QueueThresholdCritical).and(kvCacheThresholdPredicate(config.Conf.KVCacheThreshold))),
260260
}
261261

262-
var DropRequestFilter = &BasicFilter{
262+
var DropRequestFilter = &Filter{
263263
name: "drop request",
264264
filter: func(ctx *types.Context, pods []types.Pod) ([]types.Pod, error) {
265265
ctx.Logger.V(logutil.DEFAULT).Info("Request dropped", "request", ctx.Req)
@@ -269,7 +269,7 @@ var DropRequestFilter = &BasicFilter{
269269
},
270270
}
271271

272-
var NoopFilter = &BasicFilter{
272+
var NoopFilter = &Filter{
273273
name: "noop",
274274
filter: func(ctx *types.Context, pods []types.Pod) ([]types.Pod, error) {
275275
ctx.Logger.V(logutil.DEBUG).Info("All pods pass")

pkg/epp/scheduling/plugins/filter_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ func TestFilter(t *testing.T) {
4040
{
4141
name: "simple filter without successor, failure",
4242
filter: &DecisionTreeFilter{
43-
Current: &BasicFilter{
43+
Current: &Filter{
4444
name: "error",
4545
filter: func(ctx *types.Context, pods []types.Pod) ([]types.Pod, error) {
4646
return nil, errors.New("filter error")

pkg/epp/scheduling/scheduler.go

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -70,9 +70,9 @@ func NewScheduler(datastore Datastore) *Scheduler {
7070

7171
return &Scheduler{
7272
datastore: datastore,
73-
preSchedulePlugins: []types.PreSchedule{defaultPlugin},
74-
postSchedulePlugins: []types.PostSchedule{defaultPlugin},
75-
scorers: []types.Scorer{defaultPlugin},
73+
preSchedulePlugins: []types.PreSchedule{},
74+
postSchedulePlugins: []types.PostSchedule{},
75+
scorers: []types.Scorer{},
7676
filters: []types.Filter{defaultPlugin},
7777
picker: defaultPlugin,
7878
}
@@ -97,7 +97,7 @@ func (s *Scheduler) Schedule(ctx context.Context, req *types.LLMRequest) (*types
9797
loggerDebug := logger.V(logutil.DEBUG)
9898

9999
// Snapshot pod metrics from the datastore to:
100-
// 1. Reduce conCurrent access to the datastore.
100+
// 1. Reduce concurrent access to the datastore.
101101
// 2. Ensure consistent data during the scheduling operation of a request.
102102
sCtx := types.NewContext(ctx, req, types.ToSchedulerPodMetrics(s.datastore.PodGetAll()))
103103
loggerDebug.Info(fmt.Sprintf("Scheduling a request. Metrics: %+v", sCtx.PodsSnapshot))
@@ -197,7 +197,6 @@ func runScorersForPod(ctx *types.Context, scorers []types.Scorer, pod types.Pod)
197197
}
198198

199199
type defaultPlugin struct {
200-
plugins.NoopPlugin
201200
plugins.RandomPicker
202201
}
203202

pkg/epp/scheduling/types/types.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ type LLMRequest struct {
3737
}
3838

3939
func (r *LLMRequest) String() string {
40-
return fmt.Sprintf("Model: %s, TargetModels: %v, ResolvedTargetModel: %s, Critical: %t, Prompt Size: %v", r.Model, r.TargetModels, r.ResolvedTargetModel, r.Critical, len(r.Prompt))
40+
return fmt.Sprintf("Model: %s, TargetModels: %v, ResolvedTargetModel: %s, Critical: %t, PromptLength: %v", r.Model, r.TargetModels, r.ResolvedTargetModel, r.Critical, len(r.Prompt))
4141
}
4242

4343
// Context holds contextual information during a scheduling operation.

0 commit comments

Comments
 (0)