diff --git a/README.md b/README.md index 76a333eea..229e58b17 100644 --- a/README.md +++ b/README.md @@ -59,6 +59,12 @@ export PREFILL_ENABLE_LOAD_AWARE_SCORER=true export PREFILL_LOAD_AWARE_SCORER_WEIGHT=1.0 ``` +To enable and configure the prefix aware scorer for prefill, the following environment variables must be configured: +``` +export PREFILL_ENABLE_PREFIX_AWARE_SCORER=true +export PREFILL_PREFIX_AWARE_SCORER_WEIGHT=1.0 +``` + Decode configuration: To enable and configure the kv cache scorer for decode, the following environment variables must be configured: @@ -72,6 +78,12 @@ To enable and configure the load aware scorer for decode, the following environm export DECODE_ENABLE_LOAD_AWARE_SCORER=true export DECODE_LOAD_AWARE_SCORER_WEIGHT=1.0 ``` + +To enable and configure the prefix aware scorer for decode, the following environment variables must be configured: +``` +export DECODE_ENABLE_PREFIX_AWARE_SCORER=true +export DECODE_PREFIX_AWARE_SCORER_WEIGHT=1.0 +``` --- [Inference Gateways]:#concepts-and-definitions diff --git a/pkg/epp/scheduling/config_utils.go b/pkg/epp/scheduling/config_utils.go index 4145dbe1b..60e8e885d 100644 --- a/pkg/epp/scheduling/config_utils.go +++ b/pkg/epp/scheduling/config_utils.go @@ -29,13 +29,17 @@ import ( const ( prefillKvCacheScorerEnablementEnvVar = "PREFILL_ENABLE_KVCACHE_AWARE_SCORER" prefillLoadAwareScorerEnablementEnvVar = "PREFILL_ENABLE_LOAD_AWARE_SCORER" + prefillPrefixScorerEnablementEnvVar = "PREFILL_ENABLE_PREFIX_AWARE_SCORER" decodeKvCacheScorerEnablementEnvVar = "DECODE_ENABLE_KVCACHE_AWARE_SCORER" decodeLoadAwareScorerEnablementEnvVar = "DECODE_ENABLE_LOAD_AWARE_SCORER" + decodePrefixScorerEnablementEnvVar = "DECODE_ENABLE_PREFIX_AWARE_SCORER" prefillKvCacheScorerWeightEnvVar = "PREFILL_KVCACHE_AWARE_SCORER_WEIGHT" prefillLoadAwareScorerWeightEnvVar = "PREFILL_LOAD_AWARE_SCORER_WEIGHT" + prefillPrefixScorerWeightEnvVar = "PREFILL_PREFIX_AWARE_SCORER_WEIGHT" decodeKvCacheScorerWeightEnvVar = "DECODE_KVCACHE_AWARE_SCORER_WEIGHT" decodeLoadAwareScorerWeightEnvVar = "DECODE_LOAD_AWARE_SCORER_WEIGHT" + decodePrefixScorerWeightEnvVar = "DECODE_PREFIX_AWARE_SCORER_WEIGHT" pdEnabledEnvKey = "PD_ENABLED" @@ -46,6 +50,7 @@ const ( const ( loadAwareScorerName = "LoadAwareScorer" kvCacheAwareScorerName = "KVCacheAwareScorer" + prefixAwareScorerName = "PrefixAwareScorer" ) func addScorerByEnvironment(ctx context.Context, config *SchedulerConfig, scorerName string, scorerEnabledEnvKey string, weightEnvKey string, logger logr.Logger) { diff --git a/pkg/epp/scheduling/pd_config.go b/pkg/epp/scheduling/pd_config.go index 3371093a1..19916f7ec 100644 --- a/pkg/epp/scheduling/pd_config.go +++ b/pkg/epp/scheduling/pd_config.go @@ -66,12 +66,20 @@ func init() { func loadPrefillConfiguration(ctx context.Context, logger logr.Logger) { // add scorers - addScorerByEnvironment(ctx, prefillConfig, kvCacheAwareScorerName, kvCacheScorerEnablementEnvVar, kvCacheScorerWeightEnvVar, logger) - addScorerByEnvironment(ctx, prefillConfig, loadAwareScorerName, loadAwareScorerEnablementEnvVar, loadAwareScorerWeightEnvVar, logger) + addScorerByEnvironment(ctx, prefillConfig, kvCacheAwareScorerName, prefillKvCacheScorerEnablementEnvVar, + prefillKvCacheScorerWeightEnvVar, logger) + addScorerByEnvironment(ctx, prefillConfig, loadAwareScorerName, prefillLoadAwareScorerEnablementEnvVar, + prefillLoadAwareScorerWeightEnvVar, logger) + addScorerByEnvironment(ctx, prefillConfig, prefixAwareScorerName, prefillPrefixScorerEnablementEnvVar, + prefillPrefixScorerWeightEnvVar, logger) } func loadDecodeConfiguration(ctx context.Context, logger logr.Logger) { // add scorers - addScorerByEnvironment(ctx, decodeConfig, kvCacheAwareScorerName, kvCacheScorerEnablementEnvVar, kvCacheScorerWeightEnvVar, logger) - addScorerByEnvironment(ctx, decodeConfig, loadAwareScorerName, loadAwareScorerEnablementEnvVar, loadAwareScorerWeightEnvVar, logger) + addScorerByEnvironment(ctx, decodeConfig, kvCacheAwareScorerName, decodeKvCacheScorerEnablementEnvVar, + decodeKvCacheScorerWeightEnvVar, logger) + addScorerByEnvironment(ctx, decodeConfig, loadAwareScorerName, decodeLoadAwareScorerEnablementEnvVar, + decodeLoadAwareScorerWeightEnvVar, logger) + addScorerByEnvironment(ctx, decodeConfig, prefixAwareScorerName, decodePrefixScorerEnablementEnvVar, + decodePrefixScorerWeightEnvVar, logger) }