Skip to content

Commit d8ba48d

Browse files
committed
Replace scheduler v2 with config v2
1 parent 415a624 commit d8ba48d

File tree

3 files changed

+30
-17
lines changed

3 files changed

+30
-17
lines changed

cmd/epp/main.go

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -111,19 +111,26 @@ var (
111111
setupLog = ctrl.Log.WithName("setup")
112112

113113
// Environment variables
114-
schedulerV2 = envutil.GetEnvString("EXPERIMENTAL_USE_SCHEDULE_V2", "false", setupLog)
115-
prefixCacheConfig = loadPrefixCacheConfig()
114+
schedulerV2 = envutil.GetEnvString("EXPERIMENTAL_USE_SCHEDULER_V2", "false", setupLog)
116115
)
117116

118117
func loadPrefixCacheConfig() prefix.Config {
119-
// logger := zap.New(zap.RawZapOpts(uberzap.AddCaller()))
120-
// log.SetLogger(logger)
121118
baseLogger := log.Log.WithName("env-config")
122119

123120
return prefix.Config{
124-
HashBlockSize: envutil.GetEnvInt("PREFIX_CACHE_HASH_BLOCK_SIZE", prefix.DefaultCacheBlockSize, baseLogger),
121+
HashBlockSize: envutil.GetEnvInt("PREFIX_CACHE_HASH_BLOCK_SIZE", prefix.DefaultHashBlockSize, baseLogger),
125122
MaxPrefixBlocksToMatch: envutil.GetEnvInt("PREFIX_CACHE_MAX_PREFIX_BLOCKS", prefix.DefaultMaxPrefixBlocks, baseLogger),
126-
LRUIndexerCapacity: envutil.GetEnvInt("PREFIX_CACHE_MAX_CACHE_SIZE_MB", prefix.DefaultLRUIndexerCapacity, baseLogger),
123+
LRUIndexerCapacity: envutil.GetEnvInt("PREFIX_CACHE_LRU_CAPACITY", prefix.DefaultLRUIndexerCapacity, baseLogger),
124+
}
125+
}
126+
127+
func loadSchedulingScorerWeights() scheduling.ScorerWeights {
128+
baseLogger := log.Log.WithName("env-config")
129+
130+
return scheduling.ScorerWeights{
131+
Prefix: envutil.GetEnvInt("PREFIX_CACHE_SCORE_WEIGHT", 3, baseLogger),
132+
Queue: envutil.GetEnvInt("QUEUE_SCORE_WEIGHT", 2, baseLogger),
133+
KVCache: envutil.GetEnvInt("KV_CACHE_SCORE_WEIGHT", 1, baseLogger),
127134
}
128135
}
129136

@@ -191,8 +198,9 @@ func run() error {
191198

192199
scheduler := scheduling.NewScheduler(datastore)
193200
if schedulerV2 == "true" {
194-
setupLog.Info("Creating scheduler with prefixCache plugin", "prefix cache config", prefixCacheConfig)
195-
scheduler = scheduling.NewSchedulerV2(datastore, prefixCacheConfig)
201+
schedConfig := scheduling.CreateConfig(loadSchedulingScorerWeights(), loadPrefixCacheConfig())
202+
setupLog.Info("Creating scheduler", "config", *schedConfig)
203+
scheduler = scheduling.NewSchedulerWithConfig(datastore, schedConfig)
196204
}
197205
serverRunner := &runserver.ExtProcServerRunner{
198206
GrpcPort: *grpcPort,

pkg/epp/scheduling/scheduler_v2.go renamed to pkg/epp/scheduling/config_v2.go

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -26,27 +26,32 @@ import (
2626
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/types"
2727
)
2828

29-
func NewSchedulerV2(datastore Datastore, prefixConfig prefix.Config) *Scheduler {
29+
func CreateConfig(weights ScorerWeights, prefixConfig prefix.Config) *SchedulerConfig {
3030
prefixPlugin := prefix.New(prefixConfig)
3131
queuePlugin := &scorer.QueueScorer{}
3232
kvCachePlugin := &scorer.KVCacheScorer{}
33-
configV2 := &SchedulerConfig{
33+
config := &SchedulerConfig{
3434
PreSchedulePlugins: []plugins.PreSchedule{prefixPlugin},
3535
PostSchedulePlugins: []plugins.PostSchedule{prefixPlugin},
3636
Scorers: map[plugins.Scorer]int{
37-
prefixPlugin: 3,
38-
queuePlugin: 1,
39-
kvCachePlugin: 1,
37+
prefixPlugin: weights.Prefix,
38+
queuePlugin: weights.Queue,
39+
kvCachePlugin: weights.KVCache,
4040
},
4141
Filters: []plugins.Filter{&sheddableRequestFilterV2{}},
4242
Picker: &picker.MaxScorePicker{},
4343
}
44-
return NewSchedulerWithConfig(datastore, configV2)
44+
return config
4545
}
4646

47-
type sheddableRequestFilterV2 struct {
47+
type ScorerWeights struct {
48+
Prefix int
49+
Queue int
50+
KVCache int
4851
}
4952

53+
type sheddableRequestFilterV2 struct{}
54+
5055
func (p *sheddableRequestFilterV2) Name() string {
5156
return "sheddableRequestFilterV2"
5257
}

pkg/epp/scheduling/plugins/prefix/plugin.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ const (
3333
// We optimistically search more than one to give more candidates for the scheduler to choose.
3434
DefaultNumServersToMatch = 2
3535
// vLLM default token block size is 16, and a good guess of average characters per token is 4.
36-
DefaultCacheBlockSize = 64
36+
DefaultHashBlockSize = 64
3737
DefaultMaxPrefixBlocks = 128
3838
// Assume each request reaches DefaultMaxPrefixBlocks = 128, and each BlockHash is cached onto 2
3939
// servers due to load balancing, then it requires 256 entries per request.
@@ -60,7 +60,7 @@ type Config struct {
6060
}
6161

6262
var DefaultConfig = Config{
63-
HashBlockSize: DefaultCacheBlockSize,
63+
HashBlockSize: DefaultHashBlockSize,
6464
MaxPrefixBlocksToMatch: DefaultMaxPrefixBlocks,
6565
LRUIndexerCapacity: DefaultLRUIndexerCapacity,
6666
}

0 commit comments

Comments
 (0)