Skip to content

Commit b0df805

Browse files
committed
Add model server configurations to InferencePool
1 parent adad31c commit b0df805

File tree

6 files changed

+166
-2
lines changed

6 files changed

+166
-2
lines changed

api/v1alpha1/inferencepool_types.go

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,34 @@ type InferencePoolSpec struct {
6161
// +kubebuilder:validation:Maximum=65535
6262
// +kubebuilder:validation:Required
6363
TargetPortNumber int32 `json:"targetPortNumber"`
64+
65+
// ModelServerAttributes defines model server specific attributes.
66+
//
67+
// +kubebuilder:validation:Required
68+
ModelServerAttributes ModelServerAttributes `json:"modelServerAttributes"`
69+
}
70+
71+
type ModelServerAttributes struct {
72+
// If LoRA serving is enabled, the inference extension will scrape LoRA metrics and apply LoRA
73+
// affinity strategy.
74+
//
75+
// +optional
76+
// +kubebuilder:default=false
77+
LoRAEnabled bool `json:"loraEnabled"`
78+
79+
// One and only one of the following model server types MUST be specified.
80+
//
81+
// +optional
82+
VLLM VLLM `json:"vllm"`
83+
//
84+
// +optional
85+
Triton Triton `json:"triton"`
86+
}
87+
88+
type VLLM struct {
89+
}
90+
91+
type Triton struct {
6492
}
6593

6694
// LabelKey was originally copied from: https://github.com/kubernetes-sigs/gateway-api/blob/99a3934c6bc1ce0874f3a4c5f20cafd8977ffcb4/apis/v1/shared_types.go#L694-L731

api/v1alpha1/zz_generated.deepcopy.go

Lines changed: 48 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

client-go/applyconfiguration/api/v1alpha1/inferencepoolspec.go

Lines changed: 11 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

client-go/applyconfiguration/api/v1alpha1/modelserverattributes.go

Lines changed: 60 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

client-go/applyconfiguration/utils.go

Lines changed: 2 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

config/crd/bases/inference.networking.x-k8s.io_inferencepools.yaml

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,22 @@ spec:
3939
spec:
4040
description: InferencePoolSpec defines the desired state of InferencePool
4141
properties:
42+
modelServerAttributes:
43+
description: ModelServerAttributes defines model server specific attributes.
44+
properties:
45+
loraEnabled:
46+
default: false
47+
description: |-
48+
If LoRA serving is enabled, the inference extension will scrape LoRA metrics and apply LoRA
49+
affinity strategy.
50+
type: boolean
51+
triton:
52+
type: object
53+
vllm:
54+
description: One and only one of the following model server types
55+
MUST be specified.
56+
type: object
57+
type: object
4258
selector:
4359
additionalProperties:
4460
description: |-
@@ -74,6 +90,7 @@ spec:
7490
minimum: 1
7591
type: integer
7692
required:
93+
- modelServerAttributes
7794
- selector
7895
- targetPortNumber
7996
type: object

0 commit comments

Comments
 (0)