@@ -14,56 +14,55 @@ See the License for the specific language governing permissions and
14
14
limitations under the License.
15
15
*/
16
16
17
- package plugins
17
+ package filter
18
18
19
19
import (
20
- "errors"
21
20
"math"
22
21
"math/rand"
23
22
"time"
24
23
25
24
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/config"
25
+ "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/plugins"
26
26
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/types"
27
- errutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/error"
28
27
logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging"
29
28
)
30
29
31
- type Filter struct {
30
+ type baseFilter struct {
32
31
name string
33
32
filter filterFunc
34
33
}
35
34
36
- func (bf * Filter ) Name () string {
37
- if bf == nil {
35
+ func (f * baseFilter ) Name () string {
36
+ if f == nil {
38
37
return "nil"
39
38
}
40
- return bf .name
39
+ return f .name
41
40
}
42
41
43
- func (bf * Filter ) Filter (ctx * types.Context , pods []types.Pod ) ( []types.Pod , error ) {
42
+ func (f * baseFilter ) Filter (ctx * types.SchedulingContext , pods []types.Pod ) []types.Pod {
44
43
loggerTrace := ctx .Logger .V (logutil .TRACE )
45
- loggerTrace .Info ("Running a filter" , "name" , bf .Name (), "podCount" , len (pods ))
44
+ loggerTrace .Info ("Running a filter" , "name" , f .Name (), "podCount" , len (pods ))
46
45
47
- return bf .filter (ctx , pods )
46
+ return f .filter (ctx , pods )
48
47
}
49
48
50
49
// DecisionTreeFilter applies current filterFunc, and then recursively applies next filters
51
50
// depending success or failure of the current filter.
52
51
// It can be used to construct a flow chart algorithm.
53
52
type DecisionTreeFilter struct {
54
- Current types .Filter
53
+ Current plugins .Filter
55
54
// NextOnSuccess filter will be applied after successfully applying the current filter.
56
55
// The filtered results will be passed to the next filter.
57
- NextOnSuccess types .Filter
56
+ NextOnSuccess plugins .Filter
58
57
// NextOnFailure filter will be applied if current filter fails.
59
58
// The original input will be passed to the next filter.
60
- NextOnFailure types .Filter
59
+ NextOnFailure plugins .Filter
61
60
// NextOnSuccessOrFailure is a convenience field to configure the next filter regardless of the
62
61
// success or failure of the current filter.
63
62
// NOTE: When using NextOnSuccessOrFailure, both nextOnSuccess and nextOnFailure SHOULD be nil.
64
63
// However if that's not the case, nextOnSuccess and nextOnFailure will be used, instead of
65
64
// NextOnSuccessOrFailure, in the success and failure scenarios, respectively.
66
- NextOnSuccessOrFailure types .Filter
65
+ NextOnSuccessOrFailure plugins .Filter
67
66
}
68
67
69
68
func (f * DecisionTreeFilter ) Name () string {
@@ -73,15 +72,15 @@ func (f *DecisionTreeFilter) Name() string {
73
72
return f .Current .Name ()
74
73
}
75
74
76
- func (f * DecisionTreeFilter ) Filter (ctx * types.Context , pods []types.Pod ) ( []types.Pod , error ) {
75
+ func (f * DecisionTreeFilter ) Filter (ctx * types.SchedulingContext , pods []types.Pod ) []types.Pod {
77
76
loggerTrace := ctx .Logger .V (logutil .TRACE )
78
- filtered , err := f .Current .Filter (ctx , pods )
77
+ filtered := f .Current .Filter (ctx , pods )
79
78
80
79
next := f .NextOnSuccessOrFailure
81
- if err == nil && len (filtered ) > 0 {
80
+ if len (filtered ) > 0 {
82
81
if f .NextOnSuccess == nil && f .NextOnSuccessOrFailure == nil {
83
82
// No succeeding filters to run, return.
84
- return filtered , err
83
+ return filtered
85
84
}
86
85
if f .NextOnSuccess != nil {
87
86
next = f .NextOnSuccess
@@ -92,7 +91,7 @@ func (f *DecisionTreeFilter) Filter(ctx *types.Context, pods []types.Pod) ([]typ
92
91
} else {
93
92
if f .NextOnFailure == nil && f .NextOnSuccessOrFailure == nil {
94
93
// No succeeding filters to run, return.
95
- return filtered , err
94
+ return filtered
96
95
}
97
96
if f .NextOnFailure != nil {
98
97
next = f .NextOnFailure
@@ -104,26 +103,24 @@ func (f *DecisionTreeFilter) Filter(ctx *types.Context, pods []types.Pod) ([]typ
104
103
}
105
104
106
105
// filterFunc filters a set of input pods to a subset.
107
- type filterFunc func (ctx * types.Context , pods []types.Pod ) ( []types.Pod , error )
106
+ type filterFunc func (ctx * types.SchedulingContext , pods []types.Pod ) []types.Pod
108
107
109
108
// toFilterFunc is a helper function to convert a per pod filter func to the FilterFunc.
110
109
func toFilterFunc (pp podPredicate ) filterFunc {
111
- return func (ctx * types.Context , pods []types.Pod ) ( []types.Pod , error ) {
110
+ return func (ctx * types.SchedulingContext , pods []types.Pod ) []types.Pod {
112
111
filtered := []types.Pod {}
113
112
for _ , pod := range pods {
114
113
pass := pp (ctx .Req , pod )
115
114
if pass {
116
115
filtered = append (filtered , pod )
117
116
}
118
117
}
119
- if len (filtered ) == 0 {
120
- return nil , errors .New ("no pods left" )
121
- }
122
- return filtered , nil
118
+
119
+ return filtered
123
120
}
124
121
}
125
122
126
- var LeastQueueFilter = & Filter {
123
+ var LeastQueueFilter = & baseFilter {
127
124
name : "least queuing" ,
128
125
filter : leastQueuingFilterFunc ,
129
126
}
@@ -135,7 +132,7 @@ var LeastQueueFilter = &Filter{
135
132
// the least one as it gives more choices for the next filter, which on aggregate gave better
136
133
// results.
137
134
// TODO: Compare this strategy with other strategies such as top K.
138
- func leastQueuingFilterFunc (ctx * types.Context , pods []types.Pod ) ( []types.Pod , error ) {
135
+ func leastQueuingFilterFunc (ctx * types.SchedulingContext , pods []types.Pod ) []types.Pod {
139
136
min := math .MaxInt
140
137
max := 0
141
138
filtered := []types.Pod {}
@@ -154,15 +151,15 @@ func leastQueuingFilterFunc(ctx *types.Context, pods []types.Pod) ([]types.Pod,
154
151
filtered = append (filtered , pod )
155
152
}
156
153
}
157
- return filtered , nil
154
+ return filtered
158
155
}
159
156
160
- var LowQueueFilter = & Filter {
157
+ var LowQueueFilter = & baseFilter {
161
158
name : "low queueing filter" ,
162
159
filter : toFilterFunc ((queueThresholdPredicate (config .Conf .QueueingThresholdLoRA ))),
163
160
}
164
161
165
- var LeastKVCacheFilter = & Filter {
162
+ var LeastKVCacheFilter = & baseFilter {
166
163
name : "least KV cache percent" ,
167
164
filter : leastKVCacheFilterFunc ,
168
165
}
@@ -173,7 +170,7 @@ var LeastKVCacheFilter = &Filter{
173
170
// should consider them all instead of the absolute minimum one. This worked better than picking the
174
171
// least one as it gives more choices for the next filter, which on aggregate gave better results.
175
172
// TODO: Compare this strategy with other strategies such as top K.
176
- func leastKVCacheFilterFunc (ctx * types.Context , pods []types.Pod ) ( []types.Pod , error ) {
173
+ func leastKVCacheFilterFunc (ctx * types.SchedulingContext , pods []types.Pod ) []types.Pod {
177
174
min := math .MaxFloat64
178
175
var max float64 = 0
179
176
filtered := []types.Pod {}
@@ -192,10 +189,10 @@ func leastKVCacheFilterFunc(ctx *types.Context, pods []types.Pod) ([]types.Pod,
192
189
filtered = append (filtered , pod )
193
190
}
194
191
}
195
- return filtered , nil
192
+ return filtered
196
193
}
197
194
198
- var LoRAAffinityFilter = & Filter {
195
+ var LoRAAffinityFilter = & baseFilter {
199
196
name : "affinity LoRA" ,
200
197
filter : loRASoftAffinityFilterFunc ,
201
198
}
@@ -216,7 +213,7 @@ var LoRAAffinityFilter = &Filter{
216
213
// Returns:
217
214
// - Filtered slice of pod metrics based on affinity and availability
218
215
// - Error if any issues occur during filtering
219
- func loRASoftAffinityFilterFunc (ctx * types.Context , pods []types.Pod ) ( []types.Pod , error ) {
216
+ func loRASoftAffinityFilterFunc (ctx * types.SchedulingContext , pods []types.Pod ) []types.Pod {
220
217
221
218
// Pre-allocate slices with estimated capacity
222
219
filtered_affinity := make ([]types.Pod , 0 , len (pods ))
@@ -241,34 +238,24 @@ func loRASoftAffinityFilterFunc(ctx *types.Context, pods []types.Pod) ([]types.P
241
238
// If both groups have pods, use probability to select which group to return
242
239
if len (filtered_affinity ) > 0 && len (filtered_available ) > 0 {
243
240
if randGen .Float64 () < config .Conf .LoraAffinityThreshold {
244
- return filtered_affinity , nil
241
+ return filtered_affinity
245
242
}
246
- return filtered_available , nil
243
+ return filtered_available
247
244
}
248
245
249
246
// Return whichever group has pods
250
247
if len (filtered_affinity ) > 0 {
251
- return filtered_affinity , nil
248
+ return filtered_affinity
252
249
}
253
250
254
- return filtered_available , nil
251
+ return filtered_available
255
252
}
256
253
257
- var HasCapacityFilter = & Filter {
254
+ var HasCapacityFilter = & baseFilter {
258
255
name : "has capacity for sheddable requests" ,
259
256
filter : toFilterFunc (queueThresholdPredicate (config .Conf .QueueThresholdCritical ).and (kvCacheThresholdPredicate (config .Conf .KVCacheThreshold ))),
260
257
}
261
258
262
- var DropRequestFilter = & Filter {
263
- name : "drop request" ,
264
- filter : func (ctx * types.Context , pods []types.Pod ) ([]types.Pod , error ) {
265
- ctx .Logger .V (logutil .DEFAULT ).Info ("Request dropped" , "request" , ctx .Req )
266
- return []types.Pod {}, errutil.Error {
267
- Code : errutil .InferencePoolResourceExhausted , Msg : "dropping request due to limited backend resources" ,
268
- }
269
- },
270
- }
271
-
272
259
// podPredicate is a filter function to check whether a pod is desired.
273
260
type podPredicate func (req * types.LLMRequest , pod types.Pod ) bool
274
261
0 commit comments