Skip to content

Commit 3c6bd6e

Browse files
committed
v0.1
1 parent c94bf1f commit 3c6bd6e

File tree

4 files changed

+608
-0
lines changed

4 files changed

+608
-0
lines changed

api/groupversion_info.go

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
/*
2+
Copyright 2024 The Kubernetes Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
// Package v1alpha1 contains API Schema definitions for the gateway v1alpha1 API group
18+
// +kubebuilder:object:generate=true
19+
// +groupName=inference.networking.x-k8s.io
20+
package v1alpha1
21+
22+
import (
23+
"k8s.io/apimachinery/pkg/runtime/schema"
24+
"sigs.k8s.io/controller-runtime/pkg/scheme"
25+
)
26+
27+
var (
28+
// GroupVersion is group version used to register these objects
29+
GroupVersion = schema.GroupVersion{Group: "inference.networking.x-k8s.io", Version: "v1alpha1"}
30+
31+
// SchemeGroupVersion is alias to GroupVersion for client-go libraries.
32+
// It is required by pkg/client/informers/externalversions/...
33+
SchemeGroupVersion = GroupVersion
34+
35+
// SchemeBuilder is used to add go types to the GroupVersionKind scheme
36+
SchemeBuilder = &scheme.Builder{GroupVersion: GroupVersion}
37+
38+
// AddToScheme adds the types in this group-version to the given scheme.
39+
AddToScheme = SchemeBuilder.AddToScheme
40+
)
41+
42+
// Resource is required by pkg/client/listers/...
43+
func Resource(resource string) schema.GroupResource {
44+
return GroupVersion.WithResource(resource).GroupResource()
45+
}

api/inferencemodel_types.go

Lines changed: 183 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,183 @@
1+
/*
2+
Copyright 2024 The Kubernetes Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package v1alpha1
18+
19+
import (
20+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
21+
)
22+
23+
// InferenceModel is the Schema for the InferenceModels API.
24+
//
25+
// +kubebuilder:object:root=true
26+
// +kubebuilder:subresource:status
27+
// +genclient
28+
type InferenceModel struct {
29+
metav1.TypeMeta `json:",inline"`
30+
metav1.ObjectMeta `json:"metadata,omitempty"`
31+
32+
Spec InferenceModelSpec `json:"spec,omitempty"`
33+
Status InferenceModelStatus `json:"status,omitempty"`
34+
}
35+
36+
// InferenceModelList contains a list of InferenceModel.
37+
//
38+
// +kubebuilder:object:root=true
39+
type InferenceModelList struct {
40+
metav1.TypeMeta `json:",inline"`
41+
metav1.ListMeta `json:"metadata,omitempty"`
42+
Items []InferenceModel `json:"items"`
43+
}
44+
45+
// InferenceModelSpec represents the desired state of a specific model use case. This resource is
46+
// managed by the "Inference Workload Owner" persona.
47+
//
48+
// The Inference Workload Owner persona is someone that trains, verifies, and
49+
// leverages a large language model from a model frontend, drives the lifecycle
50+
// and rollout of new versions of those models, and defines the specific
51+
// performance and latency goals for the model. These workloads are
52+
// expected to operate within an InferencePool sharing compute capacity with other
53+
// InferenceModels, defined by the Inference Platform Admin.
54+
//
55+
// InferenceModel's modelName (not the ObjectMeta name) is unique for a given InferencePool,
56+
// if the name is reused, an error will be shown on the status of a
57+
// InferenceModel that attempted to reuse. The oldest InferenceModel, based on
58+
// creation timestamp, will be selected to remain valid. In the event of a race
59+
// condition, one will be selected at random.
60+
type InferenceModelSpec struct {
61+
// ModelName is the name of the model as the users set in the "model" parameter in the requests.
62+
// The name should be unique among the workloads that reference the same backend pool.
63+
// This is the parameter that will be used to match the request with. In the future, we may
64+
// allow to match on other request parameters. The other approach to support matching
65+
// on other request parameters is to use a different ModelName per HTTPFilter.
66+
// Names can be reserved without implementing an actual model in the pool.
67+
// This can be done by specifying a target model and setting the weight to zero,
68+
// an error will be returned specifying that no valid target model is found.
69+
//
70+
// +kubebuilder:validation:MaxLength=253
71+
// +kubebuilder:validation:Required
72+
ModelName string `json:"modelName"`
73+
74+
// Criticality defines how important it is to serve the model compared to other models referencing the same pool.
75+
//
76+
// +optional
77+
// +kubebuilder:default="Default"
78+
Criticality *Criticality `json:"criticality,omitempty"`
79+
80+
// TargetModels allow multiple versions of a model for traffic splitting.
81+
// If not specified, the target model name is defaulted to the modelName parameter.
82+
// modelName is often in reference to a LoRA adapter.
83+
//
84+
// +optional
85+
// +kubebuilder:validation:MaxItems=10
86+
TargetModels []TargetModel `json:"targetModels,omitempty"`
87+
88+
// PoolRef is a reference to the inference pool, the pool must exist in the same namespace.
89+
//
90+
// +kubebuilder:validation:Required
91+
PoolRef PoolObjectReference `json:"poolRef"`
92+
}
93+
94+
// PoolObjectReference identifies an API object within the namespace of the
95+
// referrer.
96+
type PoolObjectReference struct {
97+
// Group is the group of the referent.
98+
//
99+
// +optional
100+
// +kubebuilder:default="inference.networking.x-k8s.io"
101+
// +kubebuilder:validation:MaxLength=253
102+
// +kubebuilder:validation:Pattern=`^$|^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$`
103+
Group string `json:"group,omitempty"`
104+
105+
// Kind is kind of the referent. For example "InferencePool".
106+
//
107+
// +optional
108+
// +kubebuilder:default="InferencePool"
109+
// +kubebuilder:validation:MinLength=1
110+
// +kubebuilder:validation:MaxLength=63
111+
// +kubebuilder:validation:Pattern=`^[a-zA-Z]([-a-zA-Z0-9]*[a-zA-Z0-9])?$`
112+
Kind string `json:"kind,omitempty"`
113+
114+
// Name is the name of the referent.
115+
//
116+
// +kubebuilder:validation:MinLength=1
117+
// +kubebuilder:validation:MaxLength=253
118+
// +kubebuilder:validation:Required
119+
Name string `json:"name"`
120+
}
121+
122+
// Criticality defines how important it is to serve the model compared to other models.
123+
// +kubebuilder:validation:Enum=Critical;Default;Sheddable
124+
type Criticality string
125+
126+
const (
127+
// Critical defines the highest level of criticality. Requests to this band will be shed last.
128+
Critical Criticality = "Critical"
129+
130+
// Default defines the default criticality level and is more important than Sheddable but less
131+
// important than Critical. Requests in this band will be shed before critical traffic.
132+
Default Criticality = "Default"
133+
134+
// Sheddable defines the lowest level of criticality. Requests to this band will be shed before
135+
// all other bands.
136+
Sheddable Criticality = "Sheddable"
137+
)
138+
139+
// TargetModel represents a deployed model or a LoRA adapter. The
140+
// Name field is expected to match the name of the LoRA adapter
141+
// (or base model) as it is registered within the model server. Inference
142+
// Gateway assumes that the model exists on the model server and it's the
143+
// responsibility of the user to validate a correct match. Should a model fail
144+
// to exist at request time, the error is processed by the Inference Gateway
145+
// and emitted on the appropriate InferenceModel object.
146+
type TargetModel struct {
147+
// Name is the name of the adapter as expected by the ModelServer.
148+
//
149+
// +kubebuilder:validation:MaxLength=253
150+
// +kubebuilder:validation:Required
151+
Name string `json:"name"`
152+
153+
// Weight is used to determine the proportion of traffic that should be
154+
// sent to this model when multiple target models are specified.
155+
//
156+
// Weight defines the proportion of requests forwarded to the specified
157+
// model. This is computed as weight/(sum of all weights in this
158+
// TargetModels list). For non-zero values, there may be some epsilon from
159+
// the exact proportion defined here depending on the precision an
160+
// implementation supports. Weight is not a percentage and the sum of
161+
// weights does not need to equal 100.
162+
//
163+
// If only one model is specified and it has a weight greater than 0, 100%
164+
// of the traffic is forwarded to that model. If weight is set to 0, no
165+
// traffic should be forwarded for this model. If unspecified, weight
166+
// defaults to 1.
167+
//
168+
// +optional
169+
// +kubebuilder:default=1
170+
// +kubebuilder:validation:Minimum=0
171+
// +kubebuilder:validation:Maximum=1000000
172+
Weight int32 `json:"weight,omitempty"`
173+
}
174+
175+
// InferenceModelStatus defines the observed state of InferenceModel
176+
type InferenceModelStatus struct {
177+
// Conditions track the state of the InferencePool.
178+
Conditions []metav1.Condition `json:"conditions,omitempty"`
179+
}
180+
181+
func init() {
182+
SchemeBuilder.Register(&InferenceModel{}, &InferenceModelList{})
183+
}

api/inferencepool_types.go

Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
/*
2+
Copyright 2024 The Kubernetes Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package v1alpha1
18+
19+
import (
20+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
21+
)
22+
23+
// InferencePool is the Schema for the InferencePools API.
24+
//
25+
// +kubebuilder:object:root=true
26+
// +kubebuilder:subresource:status
27+
// +genclient
28+
type InferencePool struct {
29+
metav1.TypeMeta `json:",inline"`
30+
metav1.ObjectMeta `json:"metadata,omitempty"`
31+
32+
Spec InferencePoolSpec `json:"spec,omitempty"`
33+
Status InferencePoolStatus `json:"status,omitempty"`
34+
}
35+
36+
// InferencePoolList contains a list of InferencePool.
37+
//
38+
// +kubebuilder:object:root=true
39+
type InferencePoolList struct {
40+
metav1.TypeMeta `json:",inline"`
41+
metav1.ListMeta `json:"metadata,omitempty"`
42+
Items []InferencePool `json:"items"`
43+
}
44+
45+
// InferencePoolSpec defines the desired state of InferencePool
46+
type InferencePoolSpec struct {
47+
// Selector defines a map of label to watch model server pods
48+
// that should be included in the InferencePool. ModelServers should not
49+
// be with any other Service or InferencePool, that behavior is not supported
50+
// and will result in sub-optimal utilization.
51+
// In some cases, implementations may translate this to a Service selector, so this matches the simple
52+
// map used for Service selectors instead of the full Kubernetes LabelSelector type.
53+
//
54+
// +kubebuilder:validation:Required
55+
Selector map[LabelKey]LabelValue `json:"selector"`
56+
57+
// TargetPortNumber defines the port number to access the selected model servers.
58+
// The number must be in the range 1 to 65535.
59+
//
60+
// +kubebuilder:validation:Minimum=1
61+
// +kubebuilder:validation:Maximum=65535
62+
// +kubebuilder:validation:Required
63+
TargetPortNumber int32 `json:"targetPortNumber"`
64+
}
65+
66+
// LabelKey was originally copied from: https://github.com/kubernetes-sigs/gateway-api/blob/99a3934c6bc1ce0874f3a4c5f20cafd8977ffcb4/apis/v1/shared_types.go#L694-L731
67+
// Duplicated as to not take an unexpected dependency on gw's API.
68+
//
69+
// LabelKey is the key of a label. This is used for validation
70+
// of maps. This matches the Kubernetes "qualified name" validation that is used for labels.
71+
//
72+
// Valid values include:
73+
//
74+
// * example
75+
// * example.com
76+
// * example.com/path
77+
// * example.com/path.html
78+
//
79+
// Invalid values include:
80+
//
81+
// * example~ - "~" is an invalid character
82+
// * example.com. - can not start or end with "."
83+
//
84+
// +kubebuilder:validation:MinLength=1
85+
// +kubebuilder:validation:MaxLength=253
86+
// +kubebuilder:validation:Pattern=`^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?([A-Za-z0-9][-A-Za-z0-9_.]{0,61})?[A-Za-z0-9]$`
87+
type LabelKey string
88+
89+
// LabelValue is the value of a label. This is used for validation
90+
// of maps. This matches the Kubernetes label validation rules:
91+
// * must be 63 characters or less (can be empty),
92+
// * unless empty, must begin and end with an alphanumeric character ([a-z0-9A-Z]),
93+
// * could contain dashes (-), underscores (_), dots (.), and alphanumerics between.
94+
//
95+
// Valid values include:
96+
//
97+
// * MyValue
98+
// * my.name
99+
// * 123-my-value
100+
//
101+
// +kubebuilder:validation:MinLength=0
102+
// +kubebuilder:validation:MaxLength=63
103+
// +kubebuilder:validation:Pattern=`^(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])?$`
104+
type LabelValue string
105+
106+
// InferencePoolStatus defines the observed state of InferencePool
107+
type InferencePoolStatus struct {
108+
// Conditions track the state of the InferencePool.
109+
Conditions []metav1.Condition `json:"conditions,omitempty"`
110+
}
111+
112+
func init() {
113+
SchemeBuilder.Register(&InferencePool{}, &InferencePoolList{})
114+
}

0 commit comments

Comments
 (0)