Skip to content

Commit 1a81066

Browse files
committed
initial sketching of interfacing
1 parent a5bf0ac commit 1a81066

File tree

4 files changed

+157
-2
lines changed

4 files changed

+157
-2
lines changed

docs/proposals/0683-epp-architecture-proposal/README.md

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -86,12 +86,17 @@ Due to the possibility of this becoming a bit of a dumping ground. The API will
8686

8787
The flow controller will consume resource regime data, and enforce proper resource sharing between workloads. This will primarily be done through a queuing mechanism [as described here](https://docs.google.com/document/d/1VZL7opFWuwgWquvgiOzLlXAJ633qZ9U-A0ZixGjBgaI/edit?usp=sharing).
8888

89-
#### Scheduling Layer
89+
#### Scheduling Subsystem
9090

91-
As the Scheduling Layer is the final interface to the entirety of the pool, all configuration will be at the _pool_ level. The default scheduling layer will be an experimentally-backed LB algorithm, with exposed config values.
91+
The Scheduling Subsystem is intended to be
92+
93+
As the Scheduling is the final interface to the entirety of the pool, all configuration will be at the _pool_ level. The default scheduling layer will be an experimentally-backed LB algorithm, with exposed config values.
9294

9395
The Scheduler will define a strong interface API, so that new scheduling algos may be plugged & dark-launched to test in production traffic without impacting said traffic. Extension is expected to adhere to the [Scheduler Subsystem definition](https://github.com/kubernetes-sigs/gateway-api-inference-extension/pull/603)
9496

97+
98+
<img src="./images/scheduler_subsystem.svg" alt="Scheduling Algorithm" width="1000" />
99+
95100
### `Non-extensible`
96101

97102
#### Ext-Proc Server
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
#names are egregiously long, but attempting to descibe custom logic within a name
2+
profileSelection: disagg-token-length
3+
schedulingResult: log-shadowbox-label-pd-result
4+
profiles:
5+
prefill:
6+
preschedule:
7+
- decode-prefix-cache-check
8+
filter:
9+
- is-prefill
10+
- has-required-accelerator
11+
score:
12+
- prefix-cache: 3
13+
- latency-scorer: 2
14+
selection:
15+
- best-score
16+
postschedule:
17+
- log-full-scores
18+
decode:
19+
filter:
20+
- is-decode
21+
score:
22+
- prefix-cache: 3
23+
- kv-cache-util: 5
24+
selection:
25+
- random-top-3
26+
shadowbox-decode:
27+
filter:
28+
- is-decode
29+
- is-tpu
30+
score:
31+
- prefix-cache-v2: 4
32+
- kv-cache-util: 1
33+
selection:
34+
- random-top-3

docs/proposals/0683-epp-architecture-proposal/images/scheduler_subsystem.svg

Lines changed: 1 addition & 0 deletions
Loading
Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
/*
2+
Copyright 2025 The Kubernetes Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package framework
18+
19+
import (
20+
"context"
21+
22+
scheduling "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/types"
23+
)
24+
25+
// Plugin is the parent type for all the scheduling framework plugins.
26+
type Plugin interface {
27+
Name() string
28+
}
29+
30+
type Endpoint struct {
31+
State EndpointState
32+
Score float64
33+
}
34+
35+
type EndpointState struct {
36+
// storage is per Scheduling Cycle, and so has no thread-safe concerns.
37+
storage map[string]any
38+
}
39+
40+
type SchedulingResult struct {
41+
results map[string][]Endpoint
42+
}
43+
44+
type Scheduler interface {
45+
// ProfileSelection selects scheduling profiles through the implemented
46+
// logic, and returns:
47+
// - A subset of the registered scheduling profiles to be ran
48+
// - A bool flagging if this profileSet should return its result to ProfileSelection or complete with a result
49+
ProfileSelection(data scheduling.CycleState, results map[string][]Endpoint) (profiles map[string]SchedulingProfile, recurse bool)
50+
51+
// SchedulingProfiles lists all of the scheduling profiles registered
52+
// with the scheduler.
53+
SchedulingProfiles() map[string]SchedulingProfile
54+
55+
// SchedulingResult takes the output of the result(s) of the scheduling cycle(s)
56+
// and makes sense of the data to be consumed by request control.
57+
// For example: suppose you have 2 profiles ShadowBoxing Profile & Production Profile.
58+
// SchedulingResult would know to simply log the result of ShadowBoxing
59+
// profile, and do nothing else with it.
60+
SchedulingResult(profileResults map[string][]Endpoint) SchedulingResult
61+
}
62+
63+
// SchedulingProfile is used to describe a profile that will
64+
// run for a given scheduling cycle.
65+
type SchedulingProfile struct {
66+
// Name of the profile
67+
Name string
68+
// Filters lists all Filter plugins associated with this Profile. Filters
69+
// are optional.
70+
Filters []Filter
71+
// Scorers lists all Score plugins associated with this Profile. At
72+
// least 1 scorer must be registered for a profile to be valid.
73+
Scorers map[Scorer]int
74+
// Selection returns the function that picks the endpoint(s).
75+
Selection Picker
76+
}
77+
78+
// Preschedule will be ran at the start of a scheduling cycle. This should be
79+
// scoped to any foundational work needed that is custom to this scheduling
80+
// profile.
81+
type PreSchedule interface {
82+
Plugin
83+
PreSchedule(ctx context.Context, state scheduling.CycleState, endpoints []Endpoint)
84+
}
85+
86+
// Filter runs before any scoring, and remove endpoints that are not fit for
87+
// selection. The framework will return an error to the client if the endpoints
88+
// are filtered to zero.
89+
type Filter interface {
90+
Plugin
91+
Filter(ctx context.Context, state scheduling.CycleState, endpoints []Endpoint) []Endpoint
92+
}
93+
94+
// Scorer applies a score to each remaining endpoint provided. Scorers SHOULD
95+
// keep their score values in a normalized range: [0-1]. Any weighting should
96+
// be added at the SchedulingProfile configuration level.
97+
type Scorer interface {
98+
Plugin
99+
Score(ctx context.Context, state scheduling.CycleState, endpoints []Endpoint) []Endpoint
100+
}
101+
102+
// Picker selects the endpoint(s) from the provided list of scored endpoints.
103+
// Picker MUST return, one endpoint at minimum.
104+
type Picker interface {
105+
Plugin
106+
Pick(ctx context.Context, state scheduling.CycleState, endpoints []Endpoint) []Endpoint
107+
}
108+
109+
// PostSchedule runs per-scheduling cycle, and is part of a scheduling profile.
110+
// PostSchedule performs any remaining work needed for the scheduling cycle.
111+
// PostSchedule is not expected to change any values of the parameters.
112+
type PostSchedule interface {
113+
Plugin
114+
PostSchedule(ctx context.Context, state scheduling.CycleState, selectedEndpoints []Endpoint)
115+
}

0 commit comments

Comments
 (0)