@@ -4,43 +4,46 @@ import (
4
4
"errors"
5
5
"math"
6
6
7
+ "google.golang.org/grpc/codes"
8
+ "google.golang.org/grpc/status"
7
9
"inference.networking.x-k8s.io/gateway-api-inference-extension/pkg/ext-proc/backend"
10
+
8
11
klog "k8s.io/klog/v2"
9
12
)
10
13
11
- type Filter interface {
14
+ type FilterChain interface {
12
15
Name () string
13
16
Filter (req * LLMRequest , pods []* backend.PodMetrics ) ([]* backend.PodMetrics , error )
14
17
}
15
18
16
- // filter applies current filterFunc, and then recursively applies next filters depending success or
19
+ // filterChainImpl applies current filterFunc, and then recursively applies next filters depending success or
17
20
// failure of the current filterFunc.
18
21
// It can be used to construct a flow chart algorithm.
19
- type filter struct {
22
+ type filterChainImpl struct {
20
23
name string
21
- filter filterFunc
24
+ filter filter
22
25
// nextOnSuccess filter will be applied after successfully applying the current filter.
23
26
// The filtered results will be passed to the next filter.
24
- nextOnSuccess * filter
27
+ nextOnSuccess * filterChainImpl
25
28
// nextOnFailure filter will be applied if current filter fails.
26
29
// The original input will be passed to the next filter.
27
- nextOnFailure * filter
30
+ nextOnFailure * filterChainImpl
28
31
// nextOnSuccessOrFailure is a convenience field to configure the next filter regardless of the
29
32
// success or failure of the current filter.
30
33
// NOTE: When using nextOnSuccessOrFailure, both nextOnSuccess and nextOnFailure SHOULD be nil.
31
34
// However if that's not the case, nextOnSuccess and nextOnFailure will be used, instead of
32
35
// nextOnSuccessOrFailure, in the success and failure scenarios, respectively.
33
- nextOnSuccessOrFailure * filter
36
+ nextOnSuccessOrFailure * filterChainImpl
34
37
}
35
38
36
- func (f * filter ) Name () string {
39
+ func (f * filterChainImpl ) Name () string {
37
40
if f == nil {
38
41
return "nil"
39
42
}
40
43
return f .name
41
44
}
42
45
43
- func (f * filter ) Filter (req * LLMRequest , pods []* backend.PodMetrics ) ([]* backend.PodMetrics , error ) {
46
+ func (f * filterChainImpl ) Filter (req * LLMRequest , pods []* backend.PodMetrics ) ([]* backend.PodMetrics , error ) {
44
47
klog .V (3 ).Infof ("Running filter %q on request %v with %v pods" , f .name , req , len (pods ))
45
48
46
49
filtered , err := f .filter (req , pods )
@@ -71,11 +74,11 @@ func (f *filter) Filter(req *LLMRequest, pods []*backend.PodMetrics) ([]*backend
71
74
}
72
75
}
73
76
74
- // filterFunc filters a set of input pods to a subset.
75
- type filterFunc func (req * LLMRequest , pods []* backend.PodMetrics ) ([]* backend.PodMetrics , error )
77
+ // filter filters a set of input pods to a subset.
78
+ type filter func (req * LLMRequest , pods []* backend.PodMetrics ) ([]* backend.PodMetrics , error )
76
79
77
- // toFilterFunc is a helper function to convert a per pod filter func to the FilterFunc.
78
- func toFilterFunc (pp podPredicate ) filterFunc {
80
+ // toFilter is a helper function to convert a per pod filter func to the FilterFunc.
81
+ func toFilter (pp podPredicate ) filter {
79
82
return func (req * LLMRequest , pods []* backend.PodMetrics ) ([]* backend.PodMetrics , error ) {
80
83
filtered := []* backend.PodMetrics {}
81
84
for _ , pod := range pods {
@@ -152,6 +155,12 @@ func leastKVCacheFilterFunc(req *LLMRequest, pods []*backend.PodMetrics) ([]*bac
152
155
return filtered , nil
153
156
}
154
157
158
+ func dropRequestFilterFunc (req * LLMRequest , pods []* backend.PodMetrics ) ([]* backend.PodMetrics , error ) {
159
+ klog .Infof ("Dropping request %v" , req )
160
+ return []* backend.PodMetrics {}, status .Errorf (
161
+ codes .ResourceExhausted , "dropping request due to limited backend resources" )
162
+ }
163
+
155
164
// podPredicate is a filter function to check whether a pod is desired.
156
165
type podPredicate func (req * LLMRequest , pod * backend.PodMetrics ) bool
157
166
0 commit comments