kubernetes-sigs
diff --git a/‎api/v1alpha1/inferencepool_types.go
Lines changed: 28 additions & 0 deletions b/‎api/v1alpha1/inferencepool_types.go
Lines changed: 28 additions & 0 deletions
diff --git a/‎api/v1alpha1/zz_generated.deepcopy.go
Lines changed: 48 additions & 0 deletions b/‎api/v1alpha1/zz_generated.deepcopy.go
Lines changed: 48 additions & 0 deletions
diff --git a/‎client-go/applyconfiguration/api/v1alpha1/inferencepoolspec.go
Lines changed: 11 additions & 2 deletions b/‎client-go/applyconfiguration/api/v1alpha1/inferencepoolspec.go
Lines changed: 11 additions & 2 deletions
diff --git a/‎client-go/applyconfiguration/api/v1alpha1/modelserverattributes.go
Lines changed: 60 additions & 0 deletions b/‎client-go/applyconfiguration/api/v1alpha1/modelserverattributes.go
Lines changed: 60 additions & 0 deletions
diff --git a/‎client-go/applyconfiguration/utils.go
Lines changed: 2 additions & 0 deletions b/‎client-go/applyconfiguration/utils.go
Lines changed: 2 additions & 0 deletions
diff --git a/‎config/crd/bases/inference.networking.x-k8s.io_inferencepools.yaml
Lines changed: 17 additions & 0 deletions b/‎config/crd/bases/inference.networking.x-k8s.io_inferencepools.yaml
Lines changed: 17 additions & 0 deletions
@@ -61,6 +61,34 @@ type InferencePoolSpec struct {
 	// +kubebuilder:validation:Maximum=65535
 	// +kubebuilder:validation:Required
 	TargetPortNumber int32 `json:"targetPortNumber"`
+
+	// ModelServerAttributes defines model server specific attributes.
+	//
+	// +kubebuilder:validation:Required
+	ModelServerAttributes ModelServerAttributes `json:"modelServerAttributes"`
+}
+
+type ModelServerAttributes struct {
+	// If LoRA serving is enabled, the inference extension will scrape LoRA metrics and apply LoRA
+	// affinity strategy.
+	//
+	// +optional
+	// +kubebuilder:default=false
+	LoRAEnabled bool `json:"loraEnabled"`
+
+	// One and only one of the following model server types MUST be specified.
+	//
+	// +optional
+	VLLM VLLM `json:"vllm"`
+	//
+	// +optional
+	Triton Triton `json:"triton"`
+}
+
+type VLLM struct {
+}
+
+type Triton struct {
 }
 
 // LabelKey was originally copied from: https://github.com/kubernetes-sigs/gateway-api/blob/99a3934c6bc1ce0874f3a4c5f20cafd8977ffcb4/apis/v1/shared_types.go#L694-L731
 
@@ -39,6 +39,22 @@ spec:
           spec:
             description: InferencePoolSpec defines the desired state of InferencePool
             properties:
+              modelServerAttributes:
+                description: ModelServerAttributes defines model server specific attributes.
+                properties:
+                  loraEnabled:
+                    default: false
+                    description: |-
+                      If LoRA serving is enabled, the inference extension will scrape LoRA metrics and apply LoRA
+                      affinity strategy.
+                    type: boolean
+                  triton:
+                    type: object
+                  vllm:
+                    description: One and only one of the following model server types
+                      MUST be specified.
+                    type: object
+                type: object
               selector:
                 additionalProperties:
                   description: |-
@@ -74,6 +90,7 @@ spec:
                 minimum: 1
                 type: integer
             required:
+            - modelServerAttributes
             - selector
             - targetPortNumber
             type: object