@@ -17,20 +17,15 @@ limitations under the License.
17
17
package scheduling
18
18
19
19
import (
20
- "errors"
21
20
"math"
22
21
"math/rand"
23
22
"time"
24
23
24
+ "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/plugins"
25
25
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/types"
26
26
logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging"
27
27
)
28
28
29
- type Filter interface {
30
- Name () string
31
- Filter (ctx * types.Context , pods []* types.PodMetrics ) ([]* types.PodMetrics , error )
32
- }
33
-
34
29
type basicFilter struct {
35
30
name string
36
31
filter filterFunc
@@ -43,7 +38,7 @@ func (bf *basicFilter) Name() string {
43
38
return bf .name
44
39
}
45
40
46
- func (bf * basicFilter ) Filter (ctx * types.Context , pods []* types.PodMetrics ) ([] * types.PodMetrics , error ) {
41
+ func (bf * basicFilter ) Filter (ctx * types.SchedulingContext , pods []types.Pod ) [] types.Pod {
47
42
loggerTrace := ctx .Logger .V (logutil .TRACE )
48
43
loggerTrace .Info ("Running a filter" , "name" , bf .Name (), "podCount" , len (pods ))
49
44
@@ -54,19 +49,19 @@ func (bf *basicFilter) Filter(ctx *types.Context, pods []*types.PodMetrics) ([]*
54
49
// depending success or failure of the current filter.
55
50
// It can be used to construct a flow chart algorithm.
56
51
type decisionTreeFilter struct {
57
- current Filter
52
+ current plugins. Filter
58
53
// nextOnSuccess filter will be applied after successfully applying the current filter.
59
54
// The filtered results will be passed to the next filter.
60
- nextOnSuccess Filter
55
+ nextOnSuccess plugins. Filter
61
56
// nextOnFailure filter will be applied if current filter fails.
62
57
// The original input will be passed to the next filter.
63
- nextOnFailure Filter
58
+ nextOnFailure plugins. Filter
64
59
// nextOnSuccessOrFailure is a convenience field to configure the next filter regardless of the
65
60
// success or failure of the current filter.
66
61
// NOTE: When using nextOnSuccessOrFailure, both nextOnSuccess and nextOnFailure SHOULD be nil.
67
62
// However if that's not the case, nextOnSuccess and nextOnFailure will be used, instead of
68
63
// nextOnSuccessOrFailure, in the success and failure scenarios, respectively.
69
- nextOnSuccessOrFailure Filter
64
+ nextOnSuccessOrFailure plugins. Filter
70
65
}
71
66
72
67
func (f * decisionTreeFilter ) Name () string {
@@ -76,15 +71,15 @@ func (f *decisionTreeFilter) Name() string {
76
71
return f .current .Name ()
77
72
}
78
73
79
- func (f * decisionTreeFilter ) Filter (ctx * types.Context , pods []* types.PodMetrics ) ([] * types.PodMetrics , error ) {
74
+ func (f * decisionTreeFilter ) Filter (ctx * types.SchedulingContext , pods []types.Pod ) [] types.Pod {
80
75
loggerTrace := ctx .Logger .V (logutil .TRACE )
81
- filtered , err := f .current .Filter (ctx , pods )
76
+ filtered := f .current .Filter (ctx , pods )
82
77
83
78
next := f .nextOnSuccessOrFailure
84
- if err == nil && len (filtered ) > 0 {
79
+ if len (filtered ) > 0 {
85
80
if f .nextOnSuccess == nil && f .nextOnSuccessOrFailure == nil {
86
81
// No succeeding filters to run, return.
87
- return filtered , err
82
+ return filtered
88
83
}
89
84
if f .nextOnSuccess != nil {
90
85
next = f .nextOnSuccess
@@ -95,7 +90,7 @@ func (f *decisionTreeFilter) Filter(ctx *types.Context, pods []*types.PodMetrics
95
90
} else {
96
91
if f .nextOnFailure == nil && f .nextOnSuccessOrFailure == nil {
97
92
// No succeeding filters to run, return.
98
- return filtered , err
93
+ return filtered
99
94
}
100
95
if f .nextOnFailure != nil {
101
96
next = f .nextOnFailure
@@ -107,22 +102,20 @@ func (f *decisionTreeFilter) Filter(ctx *types.Context, pods []*types.PodMetrics
107
102
}
108
103
109
104
// filterFunc filters a set of input pods to a subset.
110
- type filterFunc func (ctx * types.Context , pods []* types.PodMetrics ) ([] * types.PodMetrics , error )
105
+ type filterFunc func (ctx * types.SchedulingContext , pods []types.Pod ) [] types.Pod
111
106
112
107
// toFilterFunc is a helper function to convert a per pod filter func to the FilterFunc.
113
108
func toFilterFunc (pp podPredicate ) filterFunc {
114
- return func (ctx * types.Context , pods []* types.PodMetrics ) ([] * types.PodMetrics , error ) {
115
- filtered := []* types.PodMetrics {}
109
+ return func (ctx * types.SchedulingContext , pods []types.Pod ) [] types.Pod {
110
+ filtered := []types.Pod {}
116
111
for _ , pod := range pods {
117
112
pass := pp (ctx .Req , pod )
118
113
if pass {
119
114
filtered = append (filtered , pod )
120
115
}
121
116
}
122
- if len (filtered ) == 0 {
123
- return nil , errors .New ("no pods left" )
124
- }
125
- return filtered , nil
117
+
118
+ return filtered
126
119
}
127
120
}
128
121
@@ -138,26 +131,26 @@ var leastQueueFilter = &basicFilter{
138
131
// the least one as it gives more choices for the next filter, which on aggregate gave better
139
132
// results.
140
133
// TODO: Compare this strategy with other strategies such as top K.
141
- func leastQueuingFilterFunc (ctx * types.Context , pods []* types.PodMetrics ) ([] * types.PodMetrics , error ) {
134
+ func leastQueuingFilterFunc (ctx * types.SchedulingContext , pods []types.Pod ) [] types.Pod {
142
135
min := math .MaxInt
143
136
max := 0
144
- filtered := []* types.PodMetrics {}
137
+ filtered := []types.Pod {}
145
138
146
139
for _ , pod := range pods {
147
- if pod .WaitingQueueSize <= min {
148
- min = pod .WaitingQueueSize
140
+ if pod .GetMetrics (). WaitingQueueSize <= min {
141
+ min = pod .GetMetrics (). WaitingQueueSize
149
142
}
150
- if pod .WaitingQueueSize >= max {
151
- max = pod .WaitingQueueSize
143
+ if pod .GetMetrics (). WaitingQueueSize >= max {
144
+ max = pod .GetMetrics (). WaitingQueueSize
152
145
}
153
146
}
154
147
155
148
for _ , pod := range pods {
156
- if pod .WaitingQueueSize >= min && pod .WaitingQueueSize <= min + (max - min )/ len (pods ) {
149
+ if pod .GetMetrics (). WaitingQueueSize >= min && pod . GetMetrics () .WaitingQueueSize <= min + (max - min )/ len (pods ) {
157
150
filtered = append (filtered , pod )
158
151
}
159
152
}
160
- return filtered , nil
153
+ return filtered
161
154
}
162
155
163
156
var lowQueueFilter = & basicFilter {
@@ -176,26 +169,26 @@ var leastKVCacheFilter = &basicFilter{
176
169
// should consider them all instead of the absolute minimum one. This worked better than picking the
177
170
// least one as it gives more choices for the next filter, which on aggregate gave better results.
178
171
// TODO: Compare this strategy with other strategies such as top K.
179
- func leastKVCacheFilterFunc (ctx * types.Context , pods []* types.PodMetrics ) ([] * types.PodMetrics , error ) {
172
+ func leastKVCacheFilterFunc (ctx * types.SchedulingContext , pods []types.Pod ) [] types.Pod {
180
173
min := math .MaxFloat64
181
174
var max float64 = 0
182
- filtered := []* types.PodMetrics {}
175
+ filtered := []types.Pod {}
183
176
184
177
for _ , pod := range pods {
185
- if pod .KVCacheUsagePercent <= min {
186
- min = pod .KVCacheUsagePercent
178
+ if pod .GetMetrics (). KVCacheUsagePercent <= min {
179
+ min = pod .GetMetrics (). KVCacheUsagePercent
187
180
}
188
- if pod .KVCacheUsagePercent >= max {
189
- max = pod .KVCacheUsagePercent
181
+ if pod .GetMetrics (). KVCacheUsagePercent >= max {
182
+ max = pod .GetMetrics (). KVCacheUsagePercent
190
183
}
191
184
}
192
185
193
186
for _ , pod := range pods {
194
- if pod .KVCacheUsagePercent >= min && pod .KVCacheUsagePercent <= min + (max - min )/ float64 (len (pods )) {
187
+ if pod .GetMetrics (). KVCacheUsagePercent >= min && pod . GetMetrics () .KVCacheUsagePercent <= min + (max - min )/ float64 (len (pods )) {
195
188
filtered = append (filtered , pod )
196
189
}
197
190
}
198
- return filtered , nil
191
+ return filtered
199
192
}
200
193
201
194
var loRAAffinityFilter = & basicFilter {
@@ -219,20 +212,20 @@ var loRAAffinityFilter = &basicFilter{
219
212
// Returns:
220
213
// - Filtered slice of pod metrics based on affinity and availability
221
214
// - Error if any issues occur during filtering
222
- func loRASoftAffinityFilterFunc (ctx * types.Context , pods []* types.PodMetrics ) ([] * types.PodMetrics , error ) {
215
+ func loRASoftAffinityFilterFunc (ctx * types.SchedulingContext , pods []types.Pod ) [] types.Pod {
223
216
224
217
// Pre-allocate slices with estimated capacity
225
- filtered_affinity := make ([]* types.PodMetrics , 0 , len (pods ))
226
- filtered_available := make ([]* types.PodMetrics , 0 , len (pods ))
218
+ filtered_affinity := make ([]types.Pod , 0 , len (pods ))
219
+ filtered_available := make ([]types.Pod , 0 , len (pods ))
227
220
228
221
// Categorize pods based on affinity and availability
229
222
for _ , pod := range pods {
230
- _ , active := pod .ActiveModels [ctx .Req .ResolvedTargetModel ]
231
- _ , waiting := pod .WaitingModels [ctx .Req .ResolvedTargetModel ]
223
+ _ , active := pod .GetMetrics (). ActiveModels [ctx .Req .ResolvedTargetModel ]
224
+ _ , waiting := pod .GetMetrics (). WaitingModels [ctx .Req .ResolvedTargetModel ]
232
225
233
226
if active || waiting {
234
227
filtered_affinity = append (filtered_affinity , pod )
235
- } else if len (pod .ActiveModels )+ len (pod .WaitingModels ) < pod .MaxActiveModels {
228
+ } else if len (pod .GetMetrics (). ActiveModels )+ len (pod .GetMetrics (). WaitingModels ) < pod . GetMetrics () .MaxActiveModels {
236
229
filtered_available = append (filtered_available , pod )
237
230
}
238
231
}
@@ -244,36 +237,36 @@ func loRASoftAffinityFilterFunc(ctx *types.Context, pods []*types.PodMetrics) ([
244
237
// If both groups have pods, use probability to select which group to return
245
238
if len (filtered_affinity ) > 0 && len (filtered_available ) > 0 {
246
239
if randGen .Float64 () < config .LoraAffinityThreshold {
247
- return filtered_affinity , nil
240
+ return filtered_affinity
248
241
}
249
- return filtered_available , nil
242
+ return filtered_available
250
243
}
251
244
252
245
// Return whichever group has pods
253
246
if len (filtered_affinity ) > 0 {
254
- return filtered_affinity , nil
247
+ return filtered_affinity
255
248
}
256
249
257
- return filtered_available , nil
250
+ return filtered_available
258
251
}
259
252
260
253
// podPredicate is a filter function to check whether a pod is desired.
261
- type podPredicate func (req * types.LLMRequest , pod * types.PodMetrics ) bool
254
+ type podPredicate func (req * types.LLMRequest , pod types.Pod ) bool
262
255
263
256
func queueThresholdPredicate (queueThreshold int ) podPredicate {
264
- return func (req * types.LLMRequest , pod * types.PodMetrics ) bool {
265
- return pod .WaitingQueueSize <= queueThreshold
257
+ return func (req * types.LLMRequest , pod types.Pod ) bool {
258
+ return pod .GetMetrics (). WaitingQueueSize <= queueThreshold
266
259
}
267
260
}
268
261
269
262
func kvCacheThresholdPredicate (kvCacheThreshold float64 ) podPredicate {
270
- return func (req * types.LLMRequest , pod * types.PodMetrics ) bool {
271
- return pod .KVCacheUsagePercent <= kvCacheThreshold
263
+ return func (req * types.LLMRequest , pod types.Pod ) bool {
264
+ return pod .GetMetrics (). KVCacheUsagePercent <= kvCacheThreshold
272
265
}
273
266
}
274
267
275
268
func (pp podPredicate ) and (another podPredicate ) podPredicate {
276
- return func (req * types.LLMRequest , pod * types.PodMetrics ) bool {
269
+ return func (req * types.LLMRequest , pod types.Pod ) bool {
277
270
return pp (req , pod ) && another (req , pod )
278
271
}
279
272
}
0 commit comments