Skip to content

Commit 58f3213

Browse files
authored
Merge pull request kubernetes-sigs#104 from neuralmagic/enable-pd
Provide a way to enable the PDFilter
2 parents f9e6530 + 01c043e commit 58f3213

File tree

2 files changed

+23
-3
lines changed

2 files changed

+23
-3
lines changed

README.md

+7-3
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,11 @@ To enable LoadAwareScorer, the following env vars must be configured:
2222
export ENABLE_LOAD_AWARE_SCORER=true
2323
export LOAD_AWARE_SCORER_WEIGHT=1.0
2424
```
25+
26+
To enable PDFilter, the following env var must be configured:
27+
```
28+
export ENABLE_PD_FILTER=true
29+
```
2530
---
2631
[Inference Gateways]:#concepts-and-definitions
2732

@@ -96,8 +101,8 @@ See our website at https://gateway-api-inference-extension.sigs.k8s.io/ for deta
96101
## Roadmap
97102

98103
As Inference Gateway builds towards a GA release. We will continue to expand our capabilities, namely:
99-
1. Prefix-cache aware load balancing with interfaces for remote caches
100-
1. Recommended LoRA adapter pipeline for automated rollout
104+
1. Prefix-cache aware load balancing with interfaces for remote caches
105+
1. Recommended LoRA adapter pipeline for automated rollout
101106
1. Fairness and priority between workloads within the same criticality band
102107
1. HPA support for autoscaling on aggregate metrics derived from the load balancer
103108
1. Support for large multi-modal inputs and outputs
@@ -121,4 +126,3 @@ Contributions are readily welcomed, follow the [dev guide](./docs/dev.md) to sta
121126
### Code of conduct
122127

123128
Participation in the Kubernetes community is governed by the [Kubernetes Code of Conduct](code-of-conduct.md).
124-

pkg/epp/scheduling/local_config.go

+16
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,9 @@ package scheduling
1818

1919
import (
2020
"context"
21+
2122
"sigs.k8s.io/controller-runtime/pkg/log"
23+
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/plugins/filter"
2224
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/plugins/picker"
2325
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/plugins/scorer"
2426
envutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/env"
@@ -28,6 +30,7 @@ import (
2830
const (
2931
kvCacheScorerEnablementEnvVar = "ENABLE_KVCACHE_AWARE_SCORER"
3032
loadAwareScorerEnablementEnvVar = "ENABLE_LOAD_AWARE_SCORER"
33+
pdFilterEnablementEnvVar = "ENABLE_PD_FILTER"
3134

3235
kvCacheScorerWeightEnvVar = "KVCACHE_AWARE_SCORER_WEIGHT"
3336
loadAwareScorerWeightEnvVar = "LOAD_AWARE_SCORER_WEIGHT"
@@ -38,6 +41,7 @@ func setDefaultConfig() {
3841
// this configuration is a temporary state, it should be better streamlined.
3942
setLoadAwareScorer()
4043
setKVCacheAwareScorer()
44+
setPDFilter()
4145

4246
defaultConfig.picker = picker.NewMaxScorePicker()
4347
}
@@ -75,3 +79,15 @@ func setKVCacheAwareScorer() {
7579
defaultConfig.scorers[kvCacheScorer] = kvCacheScorerWeight
7680
loggerDebug.Info("Initialized KVCacheAwareScorer", "weight", kvCacheScorerWeight)
7781
}
82+
83+
func setPDFilter() {
84+
ctx := context.Background()
85+
loggerDebug := log.FromContext(ctx).WithName("scheduler_config").V(logutil.DEBUG)
86+
87+
if envutil.GetEnvString(pdFilterEnablementEnvVar, "false", loggerDebug) != "true" {
88+
loggerDebug.Info("Skipping PDFilter creation as it is not enabled")
89+
return
90+
}
91+
92+
defaultConfig.filters = append(defaultConfig.filters, filter.PDFilter)
93+
}

0 commit comments

Comments
 (0)