From 050d70790e1e806111c90b5bc0caf502ec82f2b3 Mon Sep 17 00:00:00 2001 From: Kellen Swain Date: Fri, 10 Jan 2025 18:12:49 +0000 Subject: [PATCH 1/6] Feedback updates + code gen --- api/v1alpha1/inferencemodel_types.go | 35 +++++++++++++++++-- api/v1alpha1/inferencepool_types.go | 31 ++++++++++++++++ ...e.networking.x-k8s.io_inferencemodels.yaml | 21 +++++++++-- ...ce.networking.x-k8s.io_inferencepools.yaml | 17 ++++++++- 4 files changed, 98 insertions(+), 6 deletions(-) diff --git a/api/v1alpha1/inferencemodel_types.go b/api/v1alpha1/inferencemodel_types.go index 631031815..242024289 100644 --- a/api/v1alpha1/inferencemodel_types.go +++ b/api/v1alpha1/inferencemodel_types.go @@ -144,7 +144,7 @@ const ( // to exist at request time, the error is processed by the Inference Gateway // and emitted on the appropriate InferenceModel object. type TargetModel struct { - // Name is the name of the adapter as expected by the ModelServer. + // Name is the name of the adapter or base model, as expected by the ModelServer. // // +kubebuilder:validation:MaxLength=253 // +kubebuilder:validation:Required @@ -174,10 +174,41 @@ type TargetModel struct { // InferenceModelStatus defines the observed state of InferenceModel type InferenceModelStatus struct { - // Conditions track the state of the InferencePool. + // Conditions track the state of the InferenceModel. + // + // Known condition types are: + // + // * "Ready" + // + // +optional + // +listType=map + // +listMapKey=type + // +kubebuilder:validation:MaxItems=8 + // +kubebuilder:default={{type: "Ready", status: "Unknown", reason:"Pending", message:"Waiting for controller", lastTransitionTime: "1970-01-01T00:00:00Z"}} Conditions []metav1.Condition `json:"conditions,omitempty"` } +// InferenceModelConditionType is a type of condition for the InferenceModel. +type InferenceModelConditionType string + +// InferenceModelConditionReason is the reason for a given InferenceModelConditionType. +type InferenceModelConditionReason string + +const ( + // This condition indicates whether the model is ready for traffic or not, and why. + ModelConditionReady InferenceModelConditionType = "Ready" + + // Desired state. Model is ready for serving with no conflicts or issues. + ModelReasonReady InferenceModelConditionReason = "Ready" + + // This reason is used when a given ModelName already exists within the pool. + // Details about naming conflict resolution are on the ModelName field itself. + ModelReasonNameInUse InferenceModelConditionReason = "ModelNameInUse" + + // This reason is the initial state, and indicates that the controller has not yet reconciled the InferenceModel. + ModelReasonPending InferenceModelConditionReason = "Pending" +) + func init() { SchemeBuilder.Register(&InferenceModel{}, &InferenceModelList{}) } diff --git a/api/v1alpha1/inferencepool_types.go b/api/v1alpha1/inferencepool_types.go index 852c72674..2938ab473 100644 --- a/api/v1alpha1/inferencepool_types.go +++ b/api/v1alpha1/inferencepool_types.go @@ -68,6 +68,7 @@ type InferencePoolSpec struct { // // LabelKey is the key of a label. This is used for validation // of maps. This matches the Kubernetes "qualified name" validation that is used for labels. +// // Labels are case sensitive, so: my-label and My-Label are considered distinct. // // Valid values include: // @@ -106,9 +107,39 @@ type LabelValue string // InferencePoolStatus defines the observed state of InferencePool type InferencePoolStatus struct { // Conditions track the state of the InferencePool. + // + // Known condition types are: + // + // * "Ready" + // + // +optional + // +listType=map + // +listMapKey=type + // +kubebuilder:validation:MaxItems=8 + // +kubebuilder:default={{type: "Ready", status: "Unknown", reason:"Pending", message:"Waiting for controller", lastTransitionTime: "1970-01-01T00:00:00Z"}} Conditions []metav1.Condition `json:"conditions,omitempty"` } +// InferencePoolConditionType is a type of condition for the InferencePool +type InferencePoolConditionType string + +// InferencePoolConditionReason is the reason for a given InferencePoolConditionType +type InferencePoolConditionReason string + +const ( + // This condition indicates whether the pool is ready for traffic or not, and why. + PoolConditionReady InferencePoolConditionType = "Ready" + + // Desired state. The pool and its components are initialized and ready for traffic. + PoolReasonReady InferencePoolConditionReason = "Ready" + + // This reason is used when the EPP has not yet passed health checks, or has started failing them. + PoolReasonEPPNotHealthy InferencePoolConditionReason = "EndpointPickerNotHealthy" + + // This reason is the initial state, and indicates that the controller has not yet reconciled this pool. + PoolReasonPending InferencePoolConditionReason = "Pending" +) + func init() { SchemeBuilder.Register(&InferencePool{}, &InferencePoolList{}) } diff --git a/config/crd/bases/inference.networking.x-k8s.io_inferencemodels.yaml b/config/crd/bases/inference.networking.x-k8s.io_inferencemodels.yaml index 7fe1d561a..226bda1f8 100644 --- a/config/crd/bases/inference.networking.x-k8s.io_inferencemodels.yaml +++ b/config/crd/bases/inference.networking.x-k8s.io_inferencemodels.yaml @@ -116,8 +116,8 @@ spec: and emitted on the appropriate InferenceModel object. properties: name: - description: Name is the name of the adapter as expected by - the ModelServer. + description: Name is the name of the adapter or base model, + as expected by the ModelServer. maxLength: 253 type: string weight: @@ -154,7 +154,18 @@ spec: description: InferenceModelStatus defines the observed state of InferenceModel properties: conditions: - description: Conditions track the state of the InferencePool. + default: + - lastTransitionTime: "1970-01-01T00:00:00Z" + message: Waiting for controller + reason: Pending + status: Unknown + type: Ready + description: |- + Conditions track the state of the InferenceModel. + + Known condition types are: + + * "Ready" items: description: Condition contains details for one aspect of the current state of this API Resource. @@ -209,7 +220,11 @@ spec: - status - type type: object + maxItems: 8 type: array + x-kubernetes-list-map-keys: + - type + x-kubernetes-list-type: map type: object type: object served: true diff --git a/config/crd/bases/inference.networking.x-k8s.io_inferencepools.yaml b/config/crd/bases/inference.networking.x-k8s.io_inferencepools.yaml index d4500a135..de5f40bc4 100644 --- a/config/crd/bases/inference.networking.x-k8s.io_inferencepools.yaml +++ b/config/crd/bases/inference.networking.x-k8s.io_inferencepools.yaml @@ -81,7 +81,18 @@ spec: description: InferencePoolStatus defines the observed state of InferencePool properties: conditions: - description: Conditions track the state of the InferencePool. + default: + - lastTransitionTime: "1970-01-01T00:00:00Z" + message: Waiting for controller + reason: Pending + status: Unknown + type: Ready + description: |- + Conditions track the state of the InferencePool. + + Known condition types are: + + * "Ready" items: description: Condition contains details for one aspect of the current state of this API Resource. @@ -136,7 +147,11 @@ spec: - status - type type: object + maxItems: 8 type: array + x-kubernetes-list-map-keys: + - type + x-kubernetes-list-type: map type: object type: object served: true From a9f18ceb623a353831f78d51da4f805c7d15ae89 Mon Sep 17 00:00:00 2001 From: Kellen Swain Date: Fri, 10 Jan 2025 20:21:01 +0000 Subject: [PATCH 2/6] Adding explanations for condition reasons --- api/v1alpha1/inferencemodel_types.go | 15 ++++++++++++++- api/v1alpha1/inferencepool_types.go | 15 ++++++++++++++- 2 files changed, 28 insertions(+), 2 deletions(-) diff --git a/api/v1alpha1/inferencemodel_types.go b/api/v1alpha1/inferencemodel_types.go index 242024289..f5e92352a 100644 --- a/api/v1alpha1/inferencemodel_types.go +++ b/api/v1alpha1/inferencemodel_types.go @@ -195,7 +195,20 @@ type InferenceModelConditionType string type InferenceModelConditionReason string const ( - // This condition indicates whether the model is ready for traffic or not, and why. + // This condition indicates if the model is ready to accept traffic, and if not, why. + // + // Possible reasons for this condition to be True are: + // + // * "Ready" + // + // Possible reasons for this condition to be False are: + // + // * "ModelNameInUse" + // + // Possible reasons for this condition to be Unknown are: + // + // * "Pending" + // ModelConditionReady InferenceModelConditionType = "Ready" // Desired state. Model is ready for serving with no conflicts or issues. diff --git a/api/v1alpha1/inferencepool_types.go b/api/v1alpha1/inferencepool_types.go index 2938ab473..2e909f1d2 100644 --- a/api/v1alpha1/inferencepool_types.go +++ b/api/v1alpha1/inferencepool_types.go @@ -127,7 +127,20 @@ type InferencePoolConditionType string type InferencePoolConditionReason string const ( - // This condition indicates whether the pool is ready for traffic or not, and why. + // This condition indicates if the pool is ready to accept traffic, and if not, why. + // + // Possible reasons for this condition to be True are: + // + // * "Ready" + // + // Possible reasons for this condition to be False are: + // + // * "EndpointPickerNotHealthy" + // + // Possible reasons for this condition to be Unknown are: + // + // * "Pending" + // PoolConditionReady InferencePoolConditionType = "Ready" // Desired state. The pool and its components are initialized and ready for traffic. From 5fab01947e2a5d7687ca20cb279e7f7562abe471 Mon Sep 17 00:00:00 2001 From: Kellen Swain Date: Fri, 10 Jan 2025 20:22:26 +0000 Subject: [PATCH 3/6] typo fix --- api/v1alpha1/inferencepool_types.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/api/v1alpha1/inferencepool_types.go b/api/v1alpha1/inferencepool_types.go index 2e909f1d2..a9e15d311 100644 --- a/api/v1alpha1/inferencepool_types.go +++ b/api/v1alpha1/inferencepool_types.go @@ -68,7 +68,7 @@ type InferencePoolSpec struct { // // LabelKey is the key of a label. This is used for validation // of maps. This matches the Kubernetes "qualified name" validation that is used for labels. -// // Labels are case sensitive, so: my-label and My-Label are considered distinct. +// Labels are case sensitive, so: my-label and My-Label are considered distinct. // // Valid values include: // From 9942311cf2b2715e34e0a97b7e864acc1f8a8e4c Mon Sep 17 00:00:00 2001 From: Kellen Swain Date: Wed, 15 Jan 2025 21:05:29 +0000 Subject: [PATCH 4/6] Updating Condition Type --- api/v1alpha1/inferencemodel_types.go | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/api/v1alpha1/inferencemodel_types.go b/api/v1alpha1/inferencemodel_types.go index f5e92352a..7667789c1 100644 --- a/api/v1alpha1/inferencemodel_types.go +++ b/api/v1alpha1/inferencemodel_types.go @@ -178,7 +178,7 @@ type InferenceModelStatus struct { // // Known condition types are: // - // * "Ready" + // * "Accepted" // // +optional // +listType=map @@ -195,11 +195,11 @@ type InferenceModelConditionType string type InferenceModelConditionReason string const ( - // This condition indicates if the model is ready to accept traffic, and if not, why. + // This condition indicates if the model config is accepted, and if not, why. // // Possible reasons for this condition to be True are: // - // * "Ready" + // * "Accepted" // // Possible reasons for this condition to be False are: // @@ -209,10 +209,10 @@ const ( // // * "Pending" // - ModelConditionReady InferenceModelConditionType = "Ready" + ModelConditionReady InferenceModelConditionType = "Accepted" - // Desired state. Model is ready for serving with no conflicts or issues. - ModelReasonReady InferenceModelConditionReason = "Ready" + // Desired state. Model conforms to the state of the pool. + ModelReasonReady InferenceModelConditionReason = "Accepted" // This reason is used when a given ModelName already exists within the pool. // Details about naming conflict resolution are on the ModelName field itself. From 34bd2b26431d29eb8ad23a66c19083f1b4b857c2 Mon Sep 17 00:00:00 2001 From: Kellen Swain Date: Wed, 15 Jan 2025 21:08:24 +0000 Subject: [PATCH 5/6] generated manifests --- .../bases/inference.networking.x-k8s.io_inferencemodels.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config/crd/bases/inference.networking.x-k8s.io_inferencemodels.yaml b/config/crd/bases/inference.networking.x-k8s.io_inferencemodels.yaml index 226bda1f8..ffdceddb1 100644 --- a/config/crd/bases/inference.networking.x-k8s.io_inferencemodels.yaml +++ b/config/crd/bases/inference.networking.x-k8s.io_inferencemodels.yaml @@ -165,7 +165,7 @@ spec: Known condition types are: - * "Ready" + * "Accepted" items: description: Condition contains details for one aspect of the current state of this API Resource. From 4fb9e22617fbc327d4a350e0ad00042b754f59df Mon Sep 17 00:00:00 2001 From: Kellen Swain Date: Wed, 15 Jan 2025 22:18:29 +0000 Subject: [PATCH 6/6] var name update --- api/v1alpha1/inferencemodel_types.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/api/v1alpha1/inferencemodel_types.go b/api/v1alpha1/inferencemodel_types.go index 7667789c1..dea51ba45 100644 --- a/api/v1alpha1/inferencemodel_types.go +++ b/api/v1alpha1/inferencemodel_types.go @@ -209,10 +209,10 @@ const ( // // * "Pending" // - ModelConditionReady InferenceModelConditionType = "Accepted" + ModelConditionAccepted InferenceModelConditionType = "Accepted" // Desired state. Model conforms to the state of the pool. - ModelReasonReady InferenceModelConditionReason = "Accepted" + ModelReasonAccepted InferenceModelConditionReason = "Accepted" // This reason is used when a given ModelName already exists within the pool. // Details about naming conflict resolution are on the ModelName field itself.