From 78be0d10de3ebe68452c1e3d9f326c5c389bfd35 Mon Sep 17 00:00:00 2001
From: Zhonghu Xu <xuzhonghu@huawei.com>
Date: Tue, 25 Feb 2025 14:30:55 +0800
Subject: [PATCH 1/3] remove v1alpha1

---
 api/v1alpha1/doc.go                   |  23 --
 api/v1alpha1/groupversion_info.go     |  45 ----
 api/v1alpha1/inferencemodel_types.go  | 234 -----------------
 api/v1alpha1/inferencepool_types.go   | 238 -----------------
 api/v1alpha1/zz_generated.deepcopy.go | 361 --------------------------
 5 files changed, 901 deletions(-)
 delete mode 100644 api/v1alpha1/doc.go
 delete mode 100644 api/v1alpha1/groupversion_info.go
 delete mode 100644 api/v1alpha1/inferencemodel_types.go
 delete mode 100644 api/v1alpha1/inferencepool_types.go
 delete mode 100644 api/v1alpha1/zz_generated.deepcopy.go

diff --git a/api/v1alpha1/doc.go b/api/v1alpha1/doc.go
deleted file mode 100644
index 8e970cedf..000000000
--- a/api/v1alpha1/doc.go
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
-Copyright 2024 The Kubernetes Authors.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/
-
-// Package v1alpha1 contains API Schema definitions for the
-// inference.networking.x-k8s.io API group.
-//
-// +k8s:openapi-gen=true
-// +kubebuilder:object:generate=true
-// +groupName=inference.networking.x-k8s.io
-package v1alpha1
diff --git a/api/v1alpha1/groupversion_info.go b/api/v1alpha1/groupversion_info.go
deleted file mode 100644
index 8c0a449f6..000000000
--- a/api/v1alpha1/groupversion_info.go
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
-Copyright 2024 The Kubernetes Authors.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/
-
-// Package v1alpha1 contains API Schema definitions for the gateway v1alpha1 API group
-// +kubebuilder:object:generate=true
-// +groupName=inference.networking.x-k8s.io
-package v1alpha1
-
-import (
-	"k8s.io/apimachinery/pkg/runtime/schema"
-	"sigs.k8s.io/controller-runtime/pkg/scheme"
-)
-
-var (
-	// GroupVersion is group version used to register these objects
-	GroupVersion = schema.GroupVersion{Group: "inference.networking.x-k8s.io", Version: "v1alpha1"}
-
-	// SchemeGroupVersion is alias to GroupVersion for client-go libraries.
-	// It is required by pkg/client/informers/externalversions/...
-	SchemeGroupVersion = GroupVersion
-
-	// SchemeBuilder is used to add go types to the GroupVersionKind scheme
-	SchemeBuilder = &scheme.Builder{GroupVersion: GroupVersion}
-
-	// AddToScheme adds the types in this group-version to the given scheme.
-	AddToScheme = SchemeBuilder.AddToScheme
-)
-
-// Resource is required by pkg/client/listers/...
-func Resource(resource string) schema.GroupResource {
-	return GroupVersion.WithResource(resource).GroupResource()
-}
diff --git a/api/v1alpha1/inferencemodel_types.go b/api/v1alpha1/inferencemodel_types.go
deleted file mode 100644
index f171c10ea..000000000
--- a/api/v1alpha1/inferencemodel_types.go
+++ /dev/null
@@ -1,234 +0,0 @@
-/*
-Copyright 2024 The Kubernetes Authors.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/
-
-package v1alpha1
-
-import (
-	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
-)
-
-// InferenceModel is the Schema for the InferenceModels API.
-//
-// +kubebuilder:object:root=true
-// +kubebuilder:subresource:status
-// +genclient
-type InferenceModel struct {
-	metav1.TypeMeta   `json:",inline"`
-	metav1.ObjectMeta `json:"metadata,omitempty"`
-
-	Spec   InferenceModelSpec   `json:"spec,omitempty"`
-	Status InferenceModelStatus `json:"status,omitempty"`
-}
-
-// InferenceModelList contains a list of InferenceModel.
-//
-// +kubebuilder:object:root=true
-type InferenceModelList struct {
-	metav1.TypeMeta `json:",inline"`
-	metav1.ListMeta `json:"metadata,omitempty"`
-	Items           []InferenceModel `json:"items"`
-}
-
-// InferenceModelSpec represents the desired state of a specific model use case. This resource is
-// managed by the "Inference Workload Owner" persona.
-//
-// The Inference Workload Owner persona is someone that trains, verifies, and
-// leverages a large language model from a model frontend, drives the lifecycle
-// and rollout of new versions of those models, and defines the specific
-// performance and latency goals for the model. These workloads are
-// expected to operate within an InferencePool sharing compute capacity with other
-// InferenceModels, defined by the Inference Platform Admin.
-//
-// InferenceModel's modelName (not the ObjectMeta name) is unique for a given InferencePool,
-// if the name is reused, an error will be shown on the status of a
-// InferenceModel that attempted to reuse. The oldest InferenceModel, based on
-// creation timestamp, will be selected to remain valid. In the event of a race
-// condition, one will be selected at random.
-type InferenceModelSpec struct {
-	// ModelName is the name of the model as it will be set in the "model" parameter for an incoming request.
-	// ModelNames must be unique for a referencing InferencePool
-	// (names can be reused for a different pool in the same cluster).
-	// The modelName with the oldest creation timestamp is retained, and the incoming
-	// InferenceModel is sets the Ready status to false with a corresponding reason.
-	// In the rare case of a race condition, one Model will be selected randomly to be considered valid, and the other rejected.
-	// Names can be reserved without an underlying model configured in the pool.
-	// This can be done by specifying a target model and setting the weight to zero,
-	// an error will be returned specifying that no valid target model is found.
-	//
-	// +kubebuilder:validation:MaxLength=256
-	// +kubebuilder:validation:Required
-	ModelName string `json:"modelName"`
-
-	// Criticality defines how important it is to serve the model compared to other models referencing the same pool.
-	// Criticality impacts how traffic is handled in resource constrained situations. It handles this by
-	// queuing or rejecting requests of lower criticality. InferenceModels of an equivalent Criticality will
-	// fairly share resources over throughput of tokens. In the future, the metric used to calculate fairness,
-	// and the proportionality of fairness will be configurable.
-	//
-	// Default values for this field will not be set, to allow for future additions of new field that may 'one of' with this field.
-	// Any implementations that may consume this field may treat an unset value as the 'Standard' range.
-	// +optional
-	Criticality *Criticality `json:"criticality,omitempty"`
-
-	// TargetModels allow multiple versions of a model for traffic splitting.
-	// If not specified, the target model name is defaulted to the modelName parameter.
-	// modelName is often in reference to a LoRA adapter.
-	//
-	// +optional
-	// +kubebuilder:validation:MaxItems=10
-	// +kubebuilder:validation:XValidation:message="Weights should be set for all models, or none of the models.",rule="self.all(model, has(model.weight)) || self.all(model, !has(model.weight))"
-	TargetModels []TargetModel `json:"targetModels,omitempty"`
-
-	// PoolRef is a reference to the inference pool, the pool must exist in the same namespace.
-	//
-	// +kubebuilder:validation:Required
-	PoolRef PoolObjectReference `json:"poolRef"`
-}
-
-// PoolObjectReference identifies an API object within the namespace of the
-// referrer.
-type PoolObjectReference struct {
-	// Group is the group of the referent.
-	//
-	// +optional
-	// +kubebuilder:default="inference.networking.x-k8s.io"
-	// +kubebuilder:validation:MaxLength=253
-	// +kubebuilder:validation:Pattern=`^$|^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$`
-	Group string `json:"group,omitempty"`
-
-	// Kind is kind of the referent. For example "InferencePool".
-	//
-	// +optional
-	// +kubebuilder:default="InferencePool"
-	// +kubebuilder:validation:MinLength=1
-	// +kubebuilder:validation:MaxLength=63
-	// +kubebuilder:validation:Pattern=`^[a-zA-Z]([-a-zA-Z0-9]*[a-zA-Z0-9])?$`
-	Kind string `json:"kind,omitempty"`
-
-	// Name is the name of the referent.
-	//
-	// +kubebuilder:validation:MinLength=1
-	// +kubebuilder:validation:MaxLength=253
-	// +kubebuilder:validation:Required
-	Name string `json:"name"`
-}
-
-// Criticality defines how important it is to serve the model compared to other models.
-// Criticality is intentionally a bounded enum to contain the possibilities that need to be supported by the load balancing algorithm. Any reference to the Criticality field must be optional(use a pointer), and set no default.
-// This allows us to union this with a oneOf field in the future should we wish to adjust/extend this behavior.
-// +kubebuilder:validation:Enum=Critical;Standard;Sheddable
-type Criticality string
-
-const (
-	// Critical defines the highest level of criticality. Requests to this band will be shed last.
-	Critical Criticality = "Critical"
-
-	// Standard defines the base criticality level and is more important than Sheddable but less
-	// important than Critical. Requests in this band will be shed before critical traffic.
-	// Most models are expected to fall within this band.
-	Standard Criticality = "Standard"
-
-	// Sheddable defines the lowest level of criticality. Requests to this band will be shed before
-	// all other bands.
-	Sheddable Criticality = "Sheddable"
-)
-
-// TargetModel represents a deployed model or a LoRA adapter. The
-// Name field is expected to match the name of the LoRA adapter
-// (or base model) as it is registered within the model server. Inference
-// Gateway assumes that the model exists on the model server and it's the
-// responsibility of the user to validate a correct match. Should a model fail
-// to exist at request time, the error is processed by the Inference Gateway
-// and emitted on the appropriate InferenceModel object.
-type TargetModel struct {
-	// Name is the name of the adapter or base model, as expected by the ModelServer.
-	//
-	// +kubebuilder:validation:MaxLength=253
-	// +kubebuilder:validation:Required
-	Name string `json:"name"`
-
-	// Weight is used to determine the proportion of traffic that should be
-	// sent to this model when multiple target models are specified.
-	//
-	// Weight defines the proportion of requests forwarded to the specified
-	// model. This is computed as weight/(sum of all weights in this
-	// TargetModels list). For non-zero values, there may be some epsilon from
-	// the exact proportion defined here depending on the precision an
-	// implementation supports. Weight is not a percentage and the sum of
-	// weights does not need to equal 100.
-	//
-	// If a weight is set for any targetModel, it must be set for all targetModels.
-	// Conversely weights are optional, so long as ALL targetModels do not specify a weight.
-	//
-	// +optional
-	// +kubebuilder:validation:Minimum=0
-	// +kubebuilder:validation:Maximum=1000000
-	Weight *int32 `json:"weight,omitempty"`
-}
-
-// InferenceModelStatus defines the observed state of InferenceModel
-type InferenceModelStatus struct {
-	// Conditions track the state of the InferenceModel.
-	//
-	// Known condition types are:
-	//
-	// * "Accepted"
-	//
-	// +optional
-	// +listType=map
-	// +listMapKey=type
-	// +kubebuilder:validation:MaxItems=8
-	// +kubebuilder:default={{type: "Ready", status: "Unknown", reason:"Pending", message:"Waiting for controller", lastTransitionTime: "1970-01-01T00:00:00Z"}}
-	Conditions []metav1.Condition `json:"conditions,omitempty"`
-}
-
-// InferenceModelConditionType is a type of condition for the InferenceModel.
-type InferenceModelConditionType string
-
-// InferenceModelConditionReason is the reason for a given InferenceModelConditionType.
-type InferenceModelConditionReason string
-
-const (
-	// ModelConditionAccepted indicates if the model config is accepted, and if not, why.
-	//
-	// Possible reasons for this condition to be True are:
-	//
-	// * "Accepted"
-	//
-	// Possible reasons for this condition to be False are:
-	//
-	// * "ModelNameInUse"
-	//
-	// Possible reasons for this condition to be Unknown are:
-	//
-	// * "Pending"
-	//
-	ModelConditionAccepted InferenceModelConditionType = "Accepted"
-
-	// ModelReasonAccepted is the desired state. Model conforms to the state of the pool.
-	ModelReasonAccepted InferenceModelConditionReason = "Accepted"
-
-	// ModelReasonNameInUse is used when a given ModelName already exists within the pool.
-	// Details about naming conflict resolution are on the ModelName field itself.
-	ModelReasonNameInUse InferenceModelConditionReason = "ModelNameInUse"
-
-	// ModelReasonPending is the initial state, and indicates that the controller has not yet reconciled the InferenceModel.
-	ModelReasonPending InferenceModelConditionReason = "Pending"
-)
-
-func init() {
-	SchemeBuilder.Register(&InferenceModel{}, &InferenceModelList{})
-}
diff --git a/api/v1alpha1/inferencepool_types.go b/api/v1alpha1/inferencepool_types.go
deleted file mode 100644
index b4c95d407..000000000
--- a/api/v1alpha1/inferencepool_types.go
+++ /dev/null
@@ -1,238 +0,0 @@
-/*
-Copyright 2024 The Kubernetes Authors.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/
-
-package v1alpha1
-
-import (
-	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
-)
-
-// InferencePool is the Schema for the InferencePools API.
-//
-// +kubebuilder:object:root=true
-// +kubebuilder:subresource:status
-// +genclient
-type InferencePool struct {
-	metav1.TypeMeta   `json:",inline"`
-	metav1.ObjectMeta `json:"metadata,omitempty"`
-
-	Spec   InferencePoolSpec   `json:"spec,omitempty"`
-	Status InferencePoolStatus `json:"status,omitempty"`
-}
-
-// InferencePoolList contains a list of InferencePool.
-//
-// +kubebuilder:object:root=true
-type InferencePoolList struct {
-	metav1.TypeMeta `json:",inline"`
-	metav1.ListMeta `json:"metadata,omitempty"`
-	Items           []InferencePool `json:"items"`
-}
-
-// InferencePoolSpec defines the desired state of InferencePool
-type InferencePoolSpec struct {
-	// Selector defines a map of labels to watch model server pods
-	// that should be included in the InferencePool.
-	// In some cases, implementations may translate this field to a Service selector, so this matches the simple
-	// map used for Service selectors instead of the full Kubernetes LabelSelector type.
-	//
-	// +kubebuilder:validation:Required
-	Selector map[LabelKey]LabelValue `json:"selector"`
-
-	// TargetPortNumber defines the port number to access the selected model servers.
-	// The number must be in the range 1 to 65535.
-	//
-	// +kubebuilder:validation:Minimum=1
-	// +kubebuilder:validation:Maximum=65535
-	// +kubebuilder:validation:Required
-	TargetPortNumber int32 `json:"targetPortNumber"`
-
-	// EndpointPickerConfig specifies the configuration needed by the proxy to discover and connect to the endpoint
-	// picker service that picks endpoints for the requests routed to this pool.
-	EndpointPickerConfig `json:",inline"`
-}
-
-// EndpointPickerConfig specifies the configuration needed by the proxy to discover and connect to the endpoint picker extension.
-// This type is intended to be a union of mutually exclusive configuration options that we may add in the future.
-type EndpointPickerConfig struct {
-	// Extension configures an endpoint picker as an extension service.
-	//
-	// +kubebuilder:validation:Required
-	ExtensionRef *Extension `json:"extensionRef,omitempty"`
-}
-
-// Extension specifies how to configure an extension that runs the endpoint picker.
-type Extension struct {
-	// Reference is a reference to a service extension.
-	ExtensionReference `json:",inline"`
-
-	// ExtensionConnection configures the connection between the gateway and the extension.
-	ExtensionConnection `json:",inline"`
-}
-
-// ExtensionReference is a reference to the extension deployment.
-type ExtensionReference struct {
-	// Group is the group of the referent.
-	// When unspecified or empty string, core API group is inferred.
-	//
-	// +optional
-	// +kubebuilder:default=""
-	Group *string `json:"group,omitempty"`
-
-	// Kind is the Kubernetes resource kind of the referent. For example
-	// "Service".
-	//
-	// Defaults to "Service" when not specified.
-	//
-	// ExternalName services can refer to CNAME DNS records that may live
-	// outside of the cluster and as such are difficult to reason about in
-	// terms of conformance. They also may not be safe to forward to (see
-	// CVE-2021-25740 for more information). Implementations MUST NOT
-	// support ExternalName Services.
-	//
-	// +optional
-	// +kubebuilder:default=Service
-	Kind *string `json:"kind,omitempty"`
-
-	// Name is the name of the referent.
-	//
-	// +kubebuilder:validation:Required
-	Name string `json:"name"`
-
-	// The port number on the pods running the extension. When unspecified, implementations SHOULD infer a
-	// default value of 9002 when the Kind is Service.
-	//
-	// +kubebuilder:validation:Minimum=1
-	// +kubebuilder:validation:Maximum=65535
-	// +optional
-	TargetPortNumber *int32 `json:"targetPortNumber,omitempty"`
-}
-
-// ExtensionConnection encapsulates options that configures the connection to the extension.
-type ExtensionConnection struct {
-	// Configures how the gateway handles the case when the extension is not responsive.
-	// Defaults to failClose.
-	//
-	// +optional
-	// +kubebuilder:default="FailClose"
-	FailureMode *ExtensionFailureMode `json:"failureMode"`
-}
-
-// ExtensionFailureMode defines the options for how the gateway handles the case when the extension is not
-// responsive.
-// +kubebuilder:validation:Enum=FailOpen;FailClose
-type ExtensionFailureMode string
-
-const (
-	// FailOpen specifies that the proxy should not drop the request and forward the request to and endpoint of its picking.
-	FailOpen ExtensionFailureMode = "FailOpen"
-	// FailClose specifies that the proxy should drop the request.
-	FailClose ExtensionFailureMode = "FailClose"
-)
-
-// LabelKey was originally copied from: https://github.com/kubernetes-sigs/gateway-api/blob/99a3934c6bc1ce0874f3a4c5f20cafd8977ffcb4/apis/v1/shared_types.go#L694-L731
-// Duplicated as to not take an unexpected dependency on gw's API.
-//
-// LabelKey is the key of a label. This is used for validation
-// of maps. This matches the Kubernetes "qualified name" validation that is used for labels.
-// Labels are case sensitive, so: my-label and My-Label are considered distinct.
-//
-// Valid values include:
-//
-// * example
-// * example.com
-// * example.com/path
-// * example.com/path.html
-//
-// Invalid values include:
-//
-// * example~ - "~" is an invalid character
-// * example.com. - can not start or end with "."
-//
-// +kubebuilder:validation:MinLength=1
-// +kubebuilder:validation:MaxLength=253
-// +kubebuilder:validation:Pattern=`^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?([A-Za-z0-9][-A-Za-z0-9_.]{0,61})?[A-Za-z0-9]$`
-type LabelKey string
-
-// LabelValue is the value of a label. This is used for validation
-// of maps. This matches the Kubernetes label validation rules:
-// * must be 63 characters or less (can be empty),
-// * unless empty, must begin and end with an alphanumeric character ([a-z0-9A-Z]),
-// * could contain dashes (-), underscores (_), dots (.), and alphanumerics between.
-//
-// Valid values include:
-//
-// * MyValue
-// * my.name
-// * 123-my-value
-//
-// +kubebuilder:validation:MinLength=0
-// +kubebuilder:validation:MaxLength=63
-// +kubebuilder:validation:Pattern=`^(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])?$`
-type LabelValue string
-
-// InferencePoolStatus defines the observed state of InferencePool
-type InferencePoolStatus struct {
-	// Conditions track the state of the InferencePool.
-	//
-	// Known condition types are:
-	//
-	// * "Ready"
-	//
-	// +optional
-	// +listType=map
-	// +listMapKey=type
-	// +kubebuilder:validation:MaxItems=8
-	// +kubebuilder:default={{type: "Ready", status: "Unknown", reason:"Pending", message:"Waiting for controller", lastTransitionTime: "1970-01-01T00:00:00Z"}}
-	Conditions []metav1.Condition `json:"conditions,omitempty"`
-}
-
-// InferencePoolConditionType is a type of condition for the InferencePool
-type InferencePoolConditionType string
-
-// InferencePoolConditionReason is the reason for a given InferencePoolConditionType
-type InferencePoolConditionReason string
-
-const (
-	// PoolConditionReady indicates if the pool is ready to accept traffic, and if not, why.
-	//
-	// Possible reasons for this condition to be True are:
-	//
-	// * "Ready"
-	//
-	// Possible reasons for this condition to be False are:
-	//
-	// * "EndpointPickerNotHealthy"
-	//
-	// Possible reasons for this condition to be Unknown are:
-	//
-	// * "Pending"
-	//
-	PoolConditionReady InferencePoolConditionType = "Ready"
-
-	// PoolReasonReady is the desired state. The pool and its components are initialized and ready for traffic.
-	PoolReasonReady InferencePoolConditionReason = "Ready"
-
-	// PoolReasonEPPNotHealthy is used when the EPP has not yet passed health checks, or has started failing them.
-	PoolReasonEPPNotHealthy InferencePoolConditionReason = "EndpointPickerNotHealthy"
-
-	// PoolReasonPending is the initial state, and indicates that the controller has not yet reconciled this pool.
-	PoolReasonPending InferencePoolConditionReason = "Pending"
-)
-
-func init() {
-	SchemeBuilder.Register(&InferencePool{}, &InferencePoolList{})
-}
diff --git a/api/v1alpha1/zz_generated.deepcopy.go b/api/v1alpha1/zz_generated.deepcopy.go
deleted file mode 100644
index fd55379ea..000000000
--- a/api/v1alpha1/zz_generated.deepcopy.go
+++ /dev/null
@@ -1,361 +0,0 @@
-//go:build !ignore_autogenerated
-
-/*
-Copyright 2024 The Kubernetes Authors.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/
-
-// Code generated by controller-gen. DO NOT EDIT.
-
-package v1alpha1
-
-import (
-	"k8s.io/apimachinery/pkg/apis/meta/v1"
-	runtime "k8s.io/apimachinery/pkg/runtime"
-)
-
-// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
-func (in *EndpointPickerConfig) DeepCopyInto(out *EndpointPickerConfig) {
-	*out = *in
-	if in.ExtensionRef != nil {
-		in, out := &in.ExtensionRef, &out.ExtensionRef
-		*out = new(Extension)
-		(*in).DeepCopyInto(*out)
-	}
-}
-
-// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new EndpointPickerConfig.
-func (in *EndpointPickerConfig) DeepCopy() *EndpointPickerConfig {
-	if in == nil {
-		return nil
-	}
-	out := new(EndpointPickerConfig)
-	in.DeepCopyInto(out)
-	return out
-}
-
-// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
-func (in *Extension) DeepCopyInto(out *Extension) {
-	*out = *in
-	in.ExtensionReference.DeepCopyInto(&out.ExtensionReference)
-	in.ExtensionConnection.DeepCopyInto(&out.ExtensionConnection)
-}
-
-// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Extension.
-func (in *Extension) DeepCopy() *Extension {
-	if in == nil {
-		return nil
-	}
-	out := new(Extension)
-	in.DeepCopyInto(out)
-	return out
-}
-
-// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
-func (in *ExtensionConnection) DeepCopyInto(out *ExtensionConnection) {
-	*out = *in
-	if in.FailureMode != nil {
-		in, out := &in.FailureMode, &out.FailureMode
-		*out = new(ExtensionFailureMode)
-		**out = **in
-	}
-}
-
-// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ExtensionConnection.
-func (in *ExtensionConnection) DeepCopy() *ExtensionConnection {
-	if in == nil {
-		return nil
-	}
-	out := new(ExtensionConnection)
-	in.DeepCopyInto(out)
-	return out
-}
-
-// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
-func (in *ExtensionReference) DeepCopyInto(out *ExtensionReference) {
-	*out = *in
-	if in.Group != nil {
-		in, out := &in.Group, &out.Group
-		*out = new(string)
-		**out = **in
-	}
-	if in.Kind != nil {
-		in, out := &in.Kind, &out.Kind
-		*out = new(string)
-		**out = **in
-	}
-	if in.TargetPortNumber != nil {
-		in, out := &in.TargetPortNumber, &out.TargetPortNumber
-		*out = new(int32)
-		**out = **in
-	}
-}
-
-// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ExtensionReference.
-func (in *ExtensionReference) DeepCopy() *ExtensionReference {
-	if in == nil {
-		return nil
-	}
-	out := new(ExtensionReference)
-	in.DeepCopyInto(out)
-	return out
-}
-
-// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
-func (in *InferenceModel) DeepCopyInto(out *InferenceModel) {
-	*out = *in
-	out.TypeMeta = in.TypeMeta
-	in.ObjectMeta.DeepCopyInto(&out.ObjectMeta)
-	in.Spec.DeepCopyInto(&out.Spec)
-	in.Status.DeepCopyInto(&out.Status)
-}
-
-// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new InferenceModel.
-func (in *InferenceModel) DeepCopy() *InferenceModel {
-	if in == nil {
-		return nil
-	}
-	out := new(InferenceModel)
-	in.DeepCopyInto(out)
-	return out
-}
-
-// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
-func (in *InferenceModel) DeepCopyObject() runtime.Object {
-	if c := in.DeepCopy(); c != nil {
-		return c
-	}
-	return nil
-}
-
-// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
-func (in *InferenceModelList) DeepCopyInto(out *InferenceModelList) {
-	*out = *in
-	out.TypeMeta = in.TypeMeta
-	in.ListMeta.DeepCopyInto(&out.ListMeta)
-	if in.Items != nil {
-		in, out := &in.Items, &out.Items
-		*out = make([]InferenceModel, len(*in))
-		for i := range *in {
-			(*in)[i].DeepCopyInto(&(*out)[i])
-		}
-	}
-}
-
-// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new InferenceModelList.
-func (in *InferenceModelList) DeepCopy() *InferenceModelList {
-	if in == nil {
-		return nil
-	}
-	out := new(InferenceModelList)
-	in.DeepCopyInto(out)
-	return out
-}
-
-// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
-func (in *InferenceModelList) DeepCopyObject() runtime.Object {
-	if c := in.DeepCopy(); c != nil {
-		return c
-	}
-	return nil
-}
-
-// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
-func (in *InferenceModelSpec) DeepCopyInto(out *InferenceModelSpec) {
-	*out = *in
-	if in.Criticality != nil {
-		in, out := &in.Criticality, &out.Criticality
-		*out = new(Criticality)
-		**out = **in
-	}
-	if in.TargetModels != nil {
-		in, out := &in.TargetModels, &out.TargetModels
-		*out = make([]TargetModel, len(*in))
-		for i := range *in {
-			(*in)[i].DeepCopyInto(&(*out)[i])
-		}
-	}
-	out.PoolRef = in.PoolRef
-}
-
-// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new InferenceModelSpec.
-func (in *InferenceModelSpec) DeepCopy() *InferenceModelSpec {
-	if in == nil {
-		return nil
-	}
-	out := new(InferenceModelSpec)
-	in.DeepCopyInto(out)
-	return out
-}
-
-// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
-func (in *InferenceModelStatus) DeepCopyInto(out *InferenceModelStatus) {
-	*out = *in
-	if in.Conditions != nil {
-		in, out := &in.Conditions, &out.Conditions
-		*out = make([]v1.Condition, len(*in))
-		for i := range *in {
-			(*in)[i].DeepCopyInto(&(*out)[i])
-		}
-	}
-}
-
-// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new InferenceModelStatus.
-func (in *InferenceModelStatus) DeepCopy() *InferenceModelStatus {
-	if in == nil {
-		return nil
-	}
-	out := new(InferenceModelStatus)
-	in.DeepCopyInto(out)
-	return out
-}
-
-// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
-func (in *InferencePool) DeepCopyInto(out *InferencePool) {
-	*out = *in
-	out.TypeMeta = in.TypeMeta
-	in.ObjectMeta.DeepCopyInto(&out.ObjectMeta)
-	in.Spec.DeepCopyInto(&out.Spec)
-	in.Status.DeepCopyInto(&out.Status)
-}
-
-// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new InferencePool.
-func (in *InferencePool) DeepCopy() *InferencePool {
-	if in == nil {
-		return nil
-	}
-	out := new(InferencePool)
-	in.DeepCopyInto(out)
-	return out
-}
-
-// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
-func (in *InferencePool) DeepCopyObject() runtime.Object {
-	if c := in.DeepCopy(); c != nil {
-		return c
-	}
-	return nil
-}
-
-// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
-func (in *InferencePoolList) DeepCopyInto(out *InferencePoolList) {
-	*out = *in
-	out.TypeMeta = in.TypeMeta
-	in.ListMeta.DeepCopyInto(&out.ListMeta)
-	if in.Items != nil {
-		in, out := &in.Items, &out.Items
-		*out = make([]InferencePool, len(*in))
-		for i := range *in {
-			(*in)[i].DeepCopyInto(&(*out)[i])
-		}
-	}
-}
-
-// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new InferencePoolList.
-func (in *InferencePoolList) DeepCopy() *InferencePoolList {
-	if in == nil {
-		return nil
-	}
-	out := new(InferencePoolList)
-	in.DeepCopyInto(out)
-	return out
-}
-
-// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
-func (in *InferencePoolList) DeepCopyObject() runtime.Object {
-	if c := in.DeepCopy(); c != nil {
-		return c
-	}
-	return nil
-}
-
-// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
-func (in *InferencePoolSpec) DeepCopyInto(out *InferencePoolSpec) {
-	*out = *in
-	if in.Selector != nil {
-		in, out := &in.Selector, &out.Selector
-		*out = make(map[LabelKey]LabelValue, len(*in))
-		for key, val := range *in {
-			(*out)[key] = val
-		}
-	}
-	in.EndpointPickerConfig.DeepCopyInto(&out.EndpointPickerConfig)
-}
-
-// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new InferencePoolSpec.
-func (in *InferencePoolSpec) DeepCopy() *InferencePoolSpec {
-	if in == nil {
-		return nil
-	}
-	out := new(InferencePoolSpec)
-	in.DeepCopyInto(out)
-	return out
-}
-
-// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
-func (in *InferencePoolStatus) DeepCopyInto(out *InferencePoolStatus) {
-	*out = *in
-	if in.Conditions != nil {
-		in, out := &in.Conditions, &out.Conditions
-		*out = make([]v1.Condition, len(*in))
-		for i := range *in {
-			(*in)[i].DeepCopyInto(&(*out)[i])
-		}
-	}
-}
-
-// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new InferencePoolStatus.
-func (in *InferencePoolStatus) DeepCopy() *InferencePoolStatus {
-	if in == nil {
-		return nil
-	}
-	out := new(InferencePoolStatus)
-	in.DeepCopyInto(out)
-	return out
-}
-
-// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
-func (in *PoolObjectReference) DeepCopyInto(out *PoolObjectReference) {
-	*out = *in
-}
-
-// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PoolObjectReference.
-func (in *PoolObjectReference) DeepCopy() *PoolObjectReference {
-	if in == nil {
-		return nil
-	}
-	out := new(PoolObjectReference)
-	in.DeepCopyInto(out)
-	return out
-}
-
-// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
-func (in *TargetModel) DeepCopyInto(out *TargetModel) {
-	*out = *in
-	if in.Weight != nil {
-		in, out := &in.Weight, &out.Weight
-		*out = new(int32)
-		**out = **in
-	}
-}
-
-// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new TargetModel.
-func (in *TargetModel) DeepCopy() *TargetModel {
-	if in == nil {
-		return nil
-	}
-	out := new(TargetModel)
-	in.DeepCopyInto(out)
-	return out
-}

From 6baca05c0c538aa5b0b9fc389329069237b20cc7 Mon Sep 17 00:00:00 2001
From: Zhonghu Xu <xuzhonghu@huawei.com>
Date: Tue, 25 Feb 2025 14:37:12 +0800
Subject: [PATCH 2/3] auto gen

---
 .../api/v1alpha1/endpointpickerconfig.go      |  38 ---
 .../api/v1alpha1/extension.go                 |  75 ------
 .../api/v1alpha1/extensionconnection.go       |  42 ----
 .../api/v1alpha1/extensionreference.go        |  65 -----
 .../api/v1alpha1/inferencemodel.go            | 224 ------------------
 .../api/v1alpha1/inferencemodelspec.go        |  74 ------
 .../api/v1alpha1/inferencemodelstatus.go      |  47 ----
 .../api/v1alpha1/inferencepool.go             | 224 ------------------
 .../api/v1alpha1/inferencepoolspec.go         |  66 ------
 .../api/v1alpha1/inferencepoolstatus.go       |  47 ----
 .../api/v1alpha1/poolobjectreference.go       |  56 -----
 .../api/v1alpha1/targetmodel.go               |  47 ----
 client-go/applyconfiguration/utils.go         |  30 +--
 client-go/clientset/versioned/clientset.go    |  13 -
 .../versioned/fake/clientset_generated.go     |   7 -
 .../clientset/versioned/fake/register.go      |   2 -
 .../clientset/versioned/scheme/register.go    |   2 -
 .../typed/api/v1alpha1/api_client.go          | 111 ---------
 .../versioned/typed/api/v1alpha1/doc.go       |  19 --
 .../versioned/typed/api/v1alpha1/fake/doc.go  |  19 --
 .../api/v1alpha1/fake/fake_api_client.go      |  43 ----
 .../api/v1alpha1/fake/fake_inferencemodel.go  |  52 ----
 .../api/v1alpha1/fake/fake_inferencepool.go   |  52 ----
 .../typed/api/v1alpha1/generated_expansion.go |  22 --
 .../typed/api/v1alpha1/inferencemodel.go      |  73 ------
 .../typed/api/v1alpha1/inferencepool.go       |  73 ------
 .../externalversions/api/interface.go         |   8 -
 .../api/v1alpha1/inferencemodel.go            |  89 -------
 .../api/v1alpha1/inferencepool.go             |  89 -------
 .../api/v1alpha1/interface.go                 |  51 ----
 .../informers/externalversions/generic.go     |   9 +-
 .../api/v1alpha1/expansion_generated.go       |  34 ---
 .../listers/api/v1alpha1/inferencemodel.go    |  69 ------
 .../listers/api/v1alpha1/inferencepool.go     |  69 ------
 cmd/epp/main.go                               |   3 -
 ...e.networking.x-k8s.io_inferencemodels.yaml | 224 ------------------
 ...ce.networking.x-k8s.io_inferencepools.yaml | 190 ---------------
 37 files changed, 2 insertions(+), 2356 deletions(-)
 delete mode 100644 client-go/applyconfiguration/api/v1alpha1/endpointpickerconfig.go
 delete mode 100644 client-go/applyconfiguration/api/v1alpha1/extension.go
 delete mode 100644 client-go/applyconfiguration/api/v1alpha1/extensionconnection.go
 delete mode 100644 client-go/applyconfiguration/api/v1alpha1/extensionreference.go
 delete mode 100644 client-go/applyconfiguration/api/v1alpha1/inferencemodel.go
 delete mode 100644 client-go/applyconfiguration/api/v1alpha1/inferencemodelspec.go
 delete mode 100644 client-go/applyconfiguration/api/v1alpha1/inferencemodelstatus.go
 delete mode 100644 client-go/applyconfiguration/api/v1alpha1/inferencepool.go
 delete mode 100644 client-go/applyconfiguration/api/v1alpha1/inferencepoolspec.go
 delete mode 100644 client-go/applyconfiguration/api/v1alpha1/inferencepoolstatus.go
 delete mode 100644 client-go/applyconfiguration/api/v1alpha1/poolobjectreference.go
 delete mode 100644 client-go/applyconfiguration/api/v1alpha1/targetmodel.go
 delete mode 100644 client-go/clientset/versioned/typed/api/v1alpha1/api_client.go
 delete mode 100644 client-go/clientset/versioned/typed/api/v1alpha1/doc.go
 delete mode 100644 client-go/clientset/versioned/typed/api/v1alpha1/fake/doc.go
 delete mode 100644 client-go/clientset/versioned/typed/api/v1alpha1/fake/fake_api_client.go
 delete mode 100644 client-go/clientset/versioned/typed/api/v1alpha1/fake/fake_inferencemodel.go
 delete mode 100644 client-go/clientset/versioned/typed/api/v1alpha1/fake/fake_inferencepool.go
 delete mode 100644 client-go/clientset/versioned/typed/api/v1alpha1/generated_expansion.go
 delete mode 100644 client-go/clientset/versioned/typed/api/v1alpha1/inferencemodel.go
 delete mode 100644 client-go/clientset/versioned/typed/api/v1alpha1/inferencepool.go
 delete mode 100644 client-go/informers/externalversions/api/v1alpha1/inferencemodel.go
 delete mode 100644 client-go/informers/externalversions/api/v1alpha1/inferencepool.go
 delete mode 100644 client-go/informers/externalversions/api/v1alpha1/interface.go
 delete mode 100644 client-go/listers/api/v1alpha1/expansion_generated.go
 delete mode 100644 client-go/listers/api/v1alpha1/inferencemodel.go
 delete mode 100644 client-go/listers/api/v1alpha1/inferencepool.go

diff --git a/client-go/applyconfiguration/api/v1alpha1/endpointpickerconfig.go b/client-go/applyconfiguration/api/v1alpha1/endpointpickerconfig.go
deleted file mode 100644
index 91895ddce..000000000
--- a/client-go/applyconfiguration/api/v1alpha1/endpointpickerconfig.go
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
-Copyright 2024 The Kubernetes Authors.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/
-// Code generated by applyconfiguration-gen. DO NOT EDIT.
-
-package v1alpha1
-
-// EndpointPickerConfigApplyConfiguration represents a declarative configuration of the EndpointPickerConfig type for use
-// with apply.
-type EndpointPickerConfigApplyConfiguration struct {
-	ExtensionRef *ExtensionApplyConfiguration `json:"extensionRef,omitempty"`
-}
-
-// EndpointPickerConfigApplyConfiguration constructs a declarative configuration of the EndpointPickerConfig type for use with
-// apply.
-func EndpointPickerConfig() *EndpointPickerConfigApplyConfiguration {
-	return &EndpointPickerConfigApplyConfiguration{}
-}
-
-// WithExtensionRef sets the ExtensionRef field in the declarative configuration to the given value
-// and returns the receiver, so that objects can be built by chaining "With" function invocations.
-// If called multiple times, the ExtensionRef field is set to the value of the last call.
-func (b *EndpointPickerConfigApplyConfiguration) WithExtensionRef(value *ExtensionApplyConfiguration) *EndpointPickerConfigApplyConfiguration {
-	b.ExtensionRef = value
-	return b
-}
diff --git a/client-go/applyconfiguration/api/v1alpha1/extension.go b/client-go/applyconfiguration/api/v1alpha1/extension.go
deleted file mode 100644
index 4213af883..000000000
--- a/client-go/applyconfiguration/api/v1alpha1/extension.go
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
-Copyright 2024 The Kubernetes Authors.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/
-// Code generated by applyconfiguration-gen. DO NOT EDIT.
-
-package v1alpha1
-
-import (
-	apiv1alpha1 "sigs.k8s.io/gateway-api-inference-extension/api/v1alpha1"
-)
-
-// ExtensionApplyConfiguration represents a declarative configuration of the Extension type for use
-// with apply.
-type ExtensionApplyConfiguration struct {
-	ExtensionReferenceApplyConfiguration  `json:",inline"`
-	ExtensionConnectionApplyConfiguration `json:",inline"`
-}
-
-// ExtensionApplyConfiguration constructs a declarative configuration of the Extension type for use with
-// apply.
-func Extension() *ExtensionApplyConfiguration {
-	return &ExtensionApplyConfiguration{}
-}
-
-// WithGroup sets the Group field in the declarative configuration to the given value
-// and returns the receiver, so that objects can be built by chaining "With" function invocations.
-// If called multiple times, the Group field is set to the value of the last call.
-func (b *ExtensionApplyConfiguration) WithGroup(value string) *ExtensionApplyConfiguration {
-	b.ExtensionReferenceApplyConfiguration.Group = &value
-	return b
-}
-
-// WithKind sets the Kind field in the declarative configuration to the given value
-// and returns the receiver, so that objects can be built by chaining "With" function invocations.
-// If called multiple times, the Kind field is set to the value of the last call.
-func (b *ExtensionApplyConfiguration) WithKind(value string) *ExtensionApplyConfiguration {
-	b.ExtensionReferenceApplyConfiguration.Kind = &value
-	return b
-}
-
-// WithName sets the Name field in the declarative configuration to the given value
-// and returns the receiver, so that objects can be built by chaining "With" function invocations.
-// If called multiple times, the Name field is set to the value of the last call.
-func (b *ExtensionApplyConfiguration) WithName(value string) *ExtensionApplyConfiguration {
-	b.ExtensionReferenceApplyConfiguration.Name = &value
-	return b
-}
-
-// WithTargetPortNumber sets the TargetPortNumber field in the declarative configuration to the given value
-// and returns the receiver, so that objects can be built by chaining "With" function invocations.
-// If called multiple times, the TargetPortNumber field is set to the value of the last call.
-func (b *ExtensionApplyConfiguration) WithTargetPortNumber(value int32) *ExtensionApplyConfiguration {
-	b.ExtensionReferenceApplyConfiguration.TargetPortNumber = &value
-	return b
-}
-
-// WithFailureMode sets the FailureMode field in the declarative configuration to the given value
-// and returns the receiver, so that objects can be built by chaining "With" function invocations.
-// If called multiple times, the FailureMode field is set to the value of the last call.
-func (b *ExtensionApplyConfiguration) WithFailureMode(value apiv1alpha1.ExtensionFailureMode) *ExtensionApplyConfiguration {
-	b.ExtensionConnectionApplyConfiguration.FailureMode = &value
-	return b
-}
diff --git a/client-go/applyconfiguration/api/v1alpha1/extensionconnection.go b/client-go/applyconfiguration/api/v1alpha1/extensionconnection.go
deleted file mode 100644
index ff8752a90..000000000
--- a/client-go/applyconfiguration/api/v1alpha1/extensionconnection.go
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
-Copyright 2024 The Kubernetes Authors.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/
-// Code generated by applyconfiguration-gen. DO NOT EDIT.
-
-package v1alpha1
-
-import (
-	apiv1alpha1 "sigs.k8s.io/gateway-api-inference-extension/api/v1alpha1"
-)
-
-// ExtensionConnectionApplyConfiguration represents a declarative configuration of the ExtensionConnection type for use
-// with apply.
-type ExtensionConnectionApplyConfiguration struct {
-	FailureMode *apiv1alpha1.ExtensionFailureMode `json:"failureMode,omitempty"`
-}
-
-// ExtensionConnectionApplyConfiguration constructs a declarative configuration of the ExtensionConnection type for use with
-// apply.
-func ExtensionConnection() *ExtensionConnectionApplyConfiguration {
-	return &ExtensionConnectionApplyConfiguration{}
-}
-
-// WithFailureMode sets the FailureMode field in the declarative configuration to the given value
-// and returns the receiver, so that objects can be built by chaining "With" function invocations.
-// If called multiple times, the FailureMode field is set to the value of the last call.
-func (b *ExtensionConnectionApplyConfiguration) WithFailureMode(value apiv1alpha1.ExtensionFailureMode) *ExtensionConnectionApplyConfiguration {
-	b.FailureMode = &value
-	return b
-}
diff --git a/client-go/applyconfiguration/api/v1alpha1/extensionreference.go b/client-go/applyconfiguration/api/v1alpha1/extensionreference.go
deleted file mode 100644
index c72c03066..000000000
--- a/client-go/applyconfiguration/api/v1alpha1/extensionreference.go
+++ /dev/null
@@ -1,65 +0,0 @@
-/*
-Copyright 2024 The Kubernetes Authors.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/
-// Code generated by applyconfiguration-gen. DO NOT EDIT.
-
-package v1alpha1
-
-// ExtensionReferenceApplyConfiguration represents a declarative configuration of the ExtensionReference type for use
-// with apply.
-type ExtensionReferenceApplyConfiguration struct {
-	Group            *string `json:"group,omitempty"`
-	Kind             *string `json:"kind,omitempty"`
-	Name             *string `json:"name,omitempty"`
-	TargetPortNumber *int32  `json:"targetPortNumber,omitempty"`
-}
-
-// ExtensionReferenceApplyConfiguration constructs a declarative configuration of the ExtensionReference type for use with
-// apply.
-func ExtensionReference() *ExtensionReferenceApplyConfiguration {
-	return &ExtensionReferenceApplyConfiguration{}
-}
-
-// WithGroup sets the Group field in the declarative configuration to the given value
-// and returns the receiver, so that objects can be built by chaining "With" function invocations.
-// If called multiple times, the Group field is set to the value of the last call.
-func (b *ExtensionReferenceApplyConfiguration) WithGroup(value string) *ExtensionReferenceApplyConfiguration {
-	b.Group = &value
-	return b
-}
-
-// WithKind sets the Kind field in the declarative configuration to the given value
-// and returns the receiver, so that objects can be built by chaining "With" function invocations.
-// If called multiple times, the Kind field is set to the value of the last call.
-func (b *ExtensionReferenceApplyConfiguration) WithKind(value string) *ExtensionReferenceApplyConfiguration {
-	b.Kind = &value
-	return b
-}
-
-// WithName sets the Name field in the declarative configuration to the given value
-// and returns the receiver, so that objects can be built by chaining "With" function invocations.
-// If called multiple times, the Name field is set to the value of the last call.
-func (b *ExtensionReferenceApplyConfiguration) WithName(value string) *ExtensionReferenceApplyConfiguration {
-	b.Name = &value
-	return b
-}
-
-// WithTargetPortNumber sets the TargetPortNumber field in the declarative configuration to the given value
-// and returns the receiver, so that objects can be built by chaining "With" function invocations.
-// If called multiple times, the TargetPortNumber field is set to the value of the last call.
-func (b *ExtensionReferenceApplyConfiguration) WithTargetPortNumber(value int32) *ExtensionReferenceApplyConfiguration {
-	b.TargetPortNumber = &value
-	return b
-}
diff --git a/client-go/applyconfiguration/api/v1alpha1/inferencemodel.go b/client-go/applyconfiguration/api/v1alpha1/inferencemodel.go
deleted file mode 100644
index d2a5b2b45..000000000
--- a/client-go/applyconfiguration/api/v1alpha1/inferencemodel.go
+++ /dev/null
@@ -1,224 +0,0 @@
-/*
-Copyright 2024 The Kubernetes Authors.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/
-// Code generated by applyconfiguration-gen. DO NOT EDIT.
-
-package v1alpha1
-
-import (
-	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
-	types "k8s.io/apimachinery/pkg/types"
-	v1 "k8s.io/client-go/applyconfigurations/meta/v1"
-)
-
-// InferenceModelApplyConfiguration represents a declarative configuration of the InferenceModel type for use
-// with apply.
-type InferenceModelApplyConfiguration struct {
-	v1.TypeMetaApplyConfiguration    `json:",inline"`
-	*v1.ObjectMetaApplyConfiguration `json:"metadata,omitempty"`
-	Spec                             *InferenceModelSpecApplyConfiguration   `json:"spec,omitempty"`
-	Status                           *InferenceModelStatusApplyConfiguration `json:"status,omitempty"`
-}
-
-// InferenceModel constructs a declarative configuration of the InferenceModel type for use with
-// apply.
-func InferenceModel(name, namespace string) *InferenceModelApplyConfiguration {
-	b := &InferenceModelApplyConfiguration{}
-	b.WithName(name)
-	b.WithNamespace(namespace)
-	b.WithKind("InferenceModel")
-	b.WithAPIVersion("inference.networking.x-k8s.io/v1alpha1")
-	return b
-}
-
-// WithKind sets the Kind field in the declarative configuration to the given value
-// and returns the receiver, so that objects can be built by chaining "With" function invocations.
-// If called multiple times, the Kind field is set to the value of the last call.
-func (b *InferenceModelApplyConfiguration) WithKind(value string) *InferenceModelApplyConfiguration {
-	b.TypeMetaApplyConfiguration.Kind = &value
-	return b
-}
-
-// WithAPIVersion sets the APIVersion field in the declarative configuration to the given value
-// and returns the receiver, so that objects can be built by chaining "With" function invocations.
-// If called multiple times, the APIVersion field is set to the value of the last call.
-func (b *InferenceModelApplyConfiguration) WithAPIVersion(value string) *InferenceModelApplyConfiguration {
-	b.TypeMetaApplyConfiguration.APIVersion = &value
-	return b
-}
-
-// WithName sets the Name field in the declarative configuration to the given value
-// and returns the receiver, so that objects can be built by chaining "With" function invocations.
-// If called multiple times, the Name field is set to the value of the last call.
-func (b *InferenceModelApplyConfiguration) WithName(value string) *InferenceModelApplyConfiguration {
-	b.ensureObjectMetaApplyConfigurationExists()
-	b.ObjectMetaApplyConfiguration.Name = &value
-	return b
-}
-
-// WithGenerateName sets the GenerateName field in the declarative configuration to the given value
-// and returns the receiver, so that objects can be built by chaining "With" function invocations.
-// If called multiple times, the GenerateName field is set to the value of the last call.
-func (b *InferenceModelApplyConfiguration) WithGenerateName(value string) *InferenceModelApplyConfiguration {
-	b.ensureObjectMetaApplyConfigurationExists()
-	b.ObjectMetaApplyConfiguration.GenerateName = &value
-	return b
-}
-
-// WithNamespace sets the Namespace field in the declarative configuration to the given value
-// and returns the receiver, so that objects can be built by chaining "With" function invocations.
-// If called multiple times, the Namespace field is set to the value of the last call.
-func (b *InferenceModelApplyConfiguration) WithNamespace(value string) *InferenceModelApplyConfiguration {
-	b.ensureObjectMetaApplyConfigurationExists()
-	b.ObjectMetaApplyConfiguration.Namespace = &value
-	return b
-}
-
-// WithUID sets the UID field in the declarative configuration to the given value
-// and returns the receiver, so that objects can be built by chaining "With" function invocations.
-// If called multiple times, the UID field is set to the value of the last call.
-func (b *InferenceModelApplyConfiguration) WithUID(value types.UID) *InferenceModelApplyConfiguration {
-	b.ensureObjectMetaApplyConfigurationExists()
-	b.ObjectMetaApplyConfiguration.UID = &value
-	return b
-}
-
-// WithResourceVersion sets the ResourceVersion field in the declarative configuration to the given value
-// and returns the receiver, so that objects can be built by chaining "With" function invocations.
-// If called multiple times, the ResourceVersion field is set to the value of the last call.
-func (b *InferenceModelApplyConfiguration) WithResourceVersion(value string) *InferenceModelApplyConfiguration {
-	b.ensureObjectMetaApplyConfigurationExists()
-	b.ObjectMetaApplyConfiguration.ResourceVersion = &value
-	return b
-}
-
-// WithGeneration sets the Generation field in the declarative configuration to the given value
-// and returns the receiver, so that objects can be built by chaining "With" function invocations.
-// If called multiple times, the Generation field is set to the value of the last call.
-func (b *InferenceModelApplyConfiguration) WithGeneration(value int64) *InferenceModelApplyConfiguration {
-	b.ensureObjectMetaApplyConfigurationExists()
-	b.ObjectMetaApplyConfiguration.Generation = &value
-	return b
-}
-
-// WithCreationTimestamp sets the CreationTimestamp field in the declarative configuration to the given value
-// and returns the receiver, so that objects can be built by chaining "With" function invocations.
-// If called multiple times, the CreationTimestamp field is set to the value of the last call.
-func (b *InferenceModelApplyConfiguration) WithCreationTimestamp(value metav1.Time) *InferenceModelApplyConfiguration {
-	b.ensureObjectMetaApplyConfigurationExists()
-	b.ObjectMetaApplyConfiguration.CreationTimestamp = &value
-	return b
-}
-
-// WithDeletionTimestamp sets the DeletionTimestamp field in the declarative configuration to the given value
-// and returns the receiver, so that objects can be built by chaining "With" function invocations.
-// If called multiple times, the DeletionTimestamp field is set to the value of the last call.
-func (b *InferenceModelApplyConfiguration) WithDeletionTimestamp(value metav1.Time) *InferenceModelApplyConfiguration {
-	b.ensureObjectMetaApplyConfigurationExists()
-	b.ObjectMetaApplyConfiguration.DeletionTimestamp = &value
-	return b
-}
-
-// WithDeletionGracePeriodSeconds sets the DeletionGracePeriodSeconds field in the declarative configuration to the given value
-// and returns the receiver, so that objects can be built by chaining "With" function invocations.
-// If called multiple times, the DeletionGracePeriodSeconds field is set to the value of the last call.
-func (b *InferenceModelApplyConfiguration) WithDeletionGracePeriodSeconds(value int64) *InferenceModelApplyConfiguration {
-	b.ensureObjectMetaApplyConfigurationExists()
-	b.ObjectMetaApplyConfiguration.DeletionGracePeriodSeconds = &value
-	return b
-}
-
-// WithLabels puts the entries into the Labels field in the declarative configuration
-// and returns the receiver, so that objects can be build by chaining "With" function invocations.
-// If called multiple times, the entries provided by each call will be put on the Labels field,
-// overwriting an existing map entries in Labels field with the same key.
-func (b *InferenceModelApplyConfiguration) WithLabels(entries map[string]string) *InferenceModelApplyConfiguration {
-	b.ensureObjectMetaApplyConfigurationExists()
-	if b.ObjectMetaApplyConfiguration.Labels == nil && len(entries) > 0 {
-		b.ObjectMetaApplyConfiguration.Labels = make(map[string]string, len(entries))
-	}
-	for k, v := range entries {
-		b.ObjectMetaApplyConfiguration.Labels[k] = v
-	}
-	return b
-}
-
-// WithAnnotations puts the entries into the Annotations field in the declarative configuration
-// and returns the receiver, so that objects can be build by chaining "With" function invocations.
-// If called multiple times, the entries provided by each call will be put on the Annotations field,
-// overwriting an existing map entries in Annotations field with the same key.
-func (b *InferenceModelApplyConfiguration) WithAnnotations(entries map[string]string) *InferenceModelApplyConfiguration {
-	b.ensureObjectMetaApplyConfigurationExists()
-	if b.ObjectMetaApplyConfiguration.Annotations == nil && len(entries) > 0 {
-		b.ObjectMetaApplyConfiguration.Annotations = make(map[string]string, len(entries))
-	}
-	for k, v := range entries {
-		b.ObjectMetaApplyConfiguration.Annotations[k] = v
-	}
-	return b
-}
-
-// WithOwnerReferences adds the given value to the OwnerReferences field in the declarative configuration
-// and returns the receiver, so that objects can be build by chaining "With" function invocations.
-// If called multiple times, values provided by each call will be appended to the OwnerReferences field.
-func (b *InferenceModelApplyConfiguration) WithOwnerReferences(values ...*v1.OwnerReferenceApplyConfiguration) *InferenceModelApplyConfiguration {
-	b.ensureObjectMetaApplyConfigurationExists()
-	for i := range values {
-		if values[i] == nil {
-			panic("nil value passed to WithOwnerReferences")
-		}
-		b.ObjectMetaApplyConfiguration.OwnerReferences = append(b.ObjectMetaApplyConfiguration.OwnerReferences, *values[i])
-	}
-	return b
-}
-
-// WithFinalizers adds the given value to the Finalizers field in the declarative configuration
-// and returns the receiver, so that objects can be build by chaining "With" function invocations.
-// If called multiple times, values provided by each call will be appended to the Finalizers field.
-func (b *InferenceModelApplyConfiguration) WithFinalizers(values ...string) *InferenceModelApplyConfiguration {
-	b.ensureObjectMetaApplyConfigurationExists()
-	for i := range values {
-		b.ObjectMetaApplyConfiguration.Finalizers = append(b.ObjectMetaApplyConfiguration.Finalizers, values[i])
-	}
-	return b
-}
-
-func (b *InferenceModelApplyConfiguration) ensureObjectMetaApplyConfigurationExists() {
-	if b.ObjectMetaApplyConfiguration == nil {
-		b.ObjectMetaApplyConfiguration = &v1.ObjectMetaApplyConfiguration{}
-	}
-}
-
-// WithSpec sets the Spec field in the declarative configuration to the given value
-// and returns the receiver, so that objects can be built by chaining "With" function invocations.
-// If called multiple times, the Spec field is set to the value of the last call.
-func (b *InferenceModelApplyConfiguration) WithSpec(value *InferenceModelSpecApplyConfiguration) *InferenceModelApplyConfiguration {
-	b.Spec = value
-	return b
-}
-
-// WithStatus sets the Status field in the declarative configuration to the given value
-// and returns the receiver, so that objects can be built by chaining "With" function invocations.
-// If called multiple times, the Status field is set to the value of the last call.
-func (b *InferenceModelApplyConfiguration) WithStatus(value *InferenceModelStatusApplyConfiguration) *InferenceModelApplyConfiguration {
-	b.Status = value
-	return b
-}
-
-// GetName retrieves the value of the Name field in the declarative configuration.
-func (b *InferenceModelApplyConfiguration) GetName() *string {
-	b.ensureObjectMetaApplyConfigurationExists()
-	return b.ObjectMetaApplyConfiguration.Name
-}
diff --git a/client-go/applyconfiguration/api/v1alpha1/inferencemodelspec.go b/client-go/applyconfiguration/api/v1alpha1/inferencemodelspec.go
deleted file mode 100644
index 2b1a4cbf1..000000000
--- a/client-go/applyconfiguration/api/v1alpha1/inferencemodelspec.go
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
-Copyright 2024 The Kubernetes Authors.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/
-// Code generated by applyconfiguration-gen. DO NOT EDIT.
-
-package v1alpha1
-
-import (
-	apiv1alpha1 "sigs.k8s.io/gateway-api-inference-extension/api/v1alpha1"
-)
-
-// InferenceModelSpecApplyConfiguration represents a declarative configuration of the InferenceModelSpec type for use
-// with apply.
-type InferenceModelSpecApplyConfiguration struct {
-	ModelName    *string                                `json:"modelName,omitempty"`
-	Criticality  *apiv1alpha1.Criticality               `json:"criticality,omitempty"`
-	TargetModels []TargetModelApplyConfiguration        `json:"targetModels,omitempty"`
-	PoolRef      *PoolObjectReferenceApplyConfiguration `json:"poolRef,omitempty"`
-}
-
-// InferenceModelSpecApplyConfiguration constructs a declarative configuration of the InferenceModelSpec type for use with
-// apply.
-func InferenceModelSpec() *InferenceModelSpecApplyConfiguration {
-	return &InferenceModelSpecApplyConfiguration{}
-}
-
-// WithModelName sets the ModelName field in the declarative configuration to the given value
-// and returns the receiver, so that objects can be built by chaining "With" function invocations.
-// If called multiple times, the ModelName field is set to the value of the last call.
-func (b *InferenceModelSpecApplyConfiguration) WithModelName(value string) *InferenceModelSpecApplyConfiguration {
-	b.ModelName = &value
-	return b
-}
-
-// WithCriticality sets the Criticality field in the declarative configuration to the given value
-// and returns the receiver, so that objects can be built by chaining "With" function invocations.
-// If called multiple times, the Criticality field is set to the value of the last call.
-func (b *InferenceModelSpecApplyConfiguration) WithCriticality(value apiv1alpha1.Criticality) *InferenceModelSpecApplyConfiguration {
-	b.Criticality = &value
-	return b
-}
-
-// WithTargetModels adds the given value to the TargetModels field in the declarative configuration
-// and returns the receiver, so that objects can be build by chaining "With" function invocations.
-// If called multiple times, values provided by each call will be appended to the TargetModels field.
-func (b *InferenceModelSpecApplyConfiguration) WithTargetModels(values ...*TargetModelApplyConfiguration) *InferenceModelSpecApplyConfiguration {
-	for i := range values {
-		if values[i] == nil {
-			panic("nil value passed to WithTargetModels")
-		}
-		b.TargetModels = append(b.TargetModels, *values[i])
-	}
-	return b
-}
-
-// WithPoolRef sets the PoolRef field in the declarative configuration to the given value
-// and returns the receiver, so that objects can be built by chaining "With" function invocations.
-// If called multiple times, the PoolRef field is set to the value of the last call.
-func (b *InferenceModelSpecApplyConfiguration) WithPoolRef(value *PoolObjectReferenceApplyConfiguration) *InferenceModelSpecApplyConfiguration {
-	b.PoolRef = value
-	return b
-}
diff --git a/client-go/applyconfiguration/api/v1alpha1/inferencemodelstatus.go b/client-go/applyconfiguration/api/v1alpha1/inferencemodelstatus.go
deleted file mode 100644
index b0b003bb4..000000000
--- a/client-go/applyconfiguration/api/v1alpha1/inferencemodelstatus.go
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
-Copyright 2024 The Kubernetes Authors.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/
-// Code generated by applyconfiguration-gen. DO NOT EDIT.
-
-package v1alpha1
-
-import (
-	v1 "k8s.io/client-go/applyconfigurations/meta/v1"
-)
-
-// InferenceModelStatusApplyConfiguration represents a declarative configuration of the InferenceModelStatus type for use
-// with apply.
-type InferenceModelStatusApplyConfiguration struct {
-	Conditions []v1.ConditionApplyConfiguration `json:"conditions,omitempty"`
-}
-
-// InferenceModelStatusApplyConfiguration constructs a declarative configuration of the InferenceModelStatus type for use with
-// apply.
-func InferenceModelStatus() *InferenceModelStatusApplyConfiguration {
-	return &InferenceModelStatusApplyConfiguration{}
-}
-
-// WithConditions adds the given value to the Conditions field in the declarative configuration
-// and returns the receiver, so that objects can be build by chaining "With" function invocations.
-// If called multiple times, values provided by each call will be appended to the Conditions field.
-func (b *InferenceModelStatusApplyConfiguration) WithConditions(values ...*v1.ConditionApplyConfiguration) *InferenceModelStatusApplyConfiguration {
-	for i := range values {
-		if values[i] == nil {
-			panic("nil value passed to WithConditions")
-		}
-		b.Conditions = append(b.Conditions, *values[i])
-	}
-	return b
-}
diff --git a/client-go/applyconfiguration/api/v1alpha1/inferencepool.go b/client-go/applyconfiguration/api/v1alpha1/inferencepool.go
deleted file mode 100644
index 2940143ef..000000000
--- a/client-go/applyconfiguration/api/v1alpha1/inferencepool.go
+++ /dev/null
@@ -1,224 +0,0 @@
-/*
-Copyright 2024 The Kubernetes Authors.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/
-// Code generated by applyconfiguration-gen. DO NOT EDIT.
-
-package v1alpha1
-
-import (
-	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
-	types "k8s.io/apimachinery/pkg/types"
-	v1 "k8s.io/client-go/applyconfigurations/meta/v1"
-)
-
-// InferencePoolApplyConfiguration represents a declarative configuration of the InferencePool type for use
-// with apply.
-type InferencePoolApplyConfiguration struct {
-	v1.TypeMetaApplyConfiguration    `json:",inline"`
-	*v1.ObjectMetaApplyConfiguration `json:"metadata,omitempty"`
-	Spec                             *InferencePoolSpecApplyConfiguration   `json:"spec,omitempty"`
-	Status                           *InferencePoolStatusApplyConfiguration `json:"status,omitempty"`
-}
-
-// InferencePool constructs a declarative configuration of the InferencePool type for use with
-// apply.
-func InferencePool(name, namespace string) *InferencePoolApplyConfiguration {
-	b := &InferencePoolApplyConfiguration{}
-	b.WithName(name)
-	b.WithNamespace(namespace)
-	b.WithKind("InferencePool")
-	b.WithAPIVersion("inference.networking.x-k8s.io/v1alpha1")
-	return b
-}
-
-// WithKind sets the Kind field in the declarative configuration to the given value
-// and returns the receiver, so that objects can be built by chaining "With" function invocations.
-// If called multiple times, the Kind field is set to the value of the last call.
-func (b *InferencePoolApplyConfiguration) WithKind(value string) *InferencePoolApplyConfiguration {
-	b.TypeMetaApplyConfiguration.Kind = &value
-	return b
-}
-
-// WithAPIVersion sets the APIVersion field in the declarative configuration to the given value
-// and returns the receiver, so that objects can be built by chaining "With" function invocations.
-// If called multiple times, the APIVersion field is set to the value of the last call.
-func (b *InferencePoolApplyConfiguration) WithAPIVersion(value string) *InferencePoolApplyConfiguration {
-	b.TypeMetaApplyConfiguration.APIVersion = &value
-	return b
-}
-
-// WithName sets the Name field in the declarative configuration to the given value
-// and returns the receiver, so that objects can be built by chaining "With" function invocations.
-// If called multiple times, the Name field is set to the value of the last call.
-func (b *InferencePoolApplyConfiguration) WithName(value string) *InferencePoolApplyConfiguration {
-	b.ensureObjectMetaApplyConfigurationExists()
-	b.ObjectMetaApplyConfiguration.Name = &value
-	return b
-}
-
-// WithGenerateName sets the GenerateName field in the declarative configuration to the given value
-// and returns the receiver, so that objects can be built by chaining "With" function invocations.
-// If called multiple times, the GenerateName field is set to the value of the last call.
-func (b *InferencePoolApplyConfiguration) WithGenerateName(value string) *InferencePoolApplyConfiguration {
-	b.ensureObjectMetaApplyConfigurationExists()
-	b.ObjectMetaApplyConfiguration.GenerateName = &value
-	return b
-}
-
-// WithNamespace sets the Namespace field in the declarative configuration to the given value
-// and returns the receiver, so that objects can be built by chaining "With" function invocations.
-// If called multiple times, the Namespace field is set to the value of the last call.
-func (b *InferencePoolApplyConfiguration) WithNamespace(value string) *InferencePoolApplyConfiguration {
-	b.ensureObjectMetaApplyConfigurationExists()
-	b.ObjectMetaApplyConfiguration.Namespace = &value
-	return b
-}
-
-// WithUID sets the UID field in the declarative configuration to the given value
-// and returns the receiver, so that objects can be built by chaining "With" function invocations.
-// If called multiple times, the UID field is set to the value of the last call.
-func (b *InferencePoolApplyConfiguration) WithUID(value types.UID) *InferencePoolApplyConfiguration {
-	b.ensureObjectMetaApplyConfigurationExists()
-	b.ObjectMetaApplyConfiguration.UID = &value
-	return b
-}
-
-// WithResourceVersion sets the ResourceVersion field in the declarative configuration to the given value
-// and returns the receiver, so that objects can be built by chaining "With" function invocations.
-// If called multiple times, the ResourceVersion field is set to the value of the last call.
-func (b *InferencePoolApplyConfiguration) WithResourceVersion(value string) *InferencePoolApplyConfiguration {
-	b.ensureObjectMetaApplyConfigurationExists()
-	b.ObjectMetaApplyConfiguration.ResourceVersion = &value
-	return b
-}
-
-// WithGeneration sets the Generation field in the declarative configuration to the given value
-// and returns the receiver, so that objects can be built by chaining "With" function invocations.
-// If called multiple times, the Generation field is set to the value of the last call.
-func (b *InferencePoolApplyConfiguration) WithGeneration(value int64) *InferencePoolApplyConfiguration {
-	b.ensureObjectMetaApplyConfigurationExists()
-	b.ObjectMetaApplyConfiguration.Generation = &value
-	return b
-}
-
-// WithCreationTimestamp sets the CreationTimestamp field in the declarative configuration to the given value
-// and returns the receiver, so that objects can be built by chaining "With" function invocations.
-// If called multiple times, the CreationTimestamp field is set to the value of the last call.
-func (b *InferencePoolApplyConfiguration) WithCreationTimestamp(value metav1.Time) *InferencePoolApplyConfiguration {
-	b.ensureObjectMetaApplyConfigurationExists()
-	b.ObjectMetaApplyConfiguration.CreationTimestamp = &value
-	return b
-}
-
-// WithDeletionTimestamp sets the DeletionTimestamp field in the declarative configuration to the given value
-// and returns the receiver, so that objects can be built by chaining "With" function invocations.
-// If called multiple times, the DeletionTimestamp field is set to the value of the last call.
-func (b *InferencePoolApplyConfiguration) WithDeletionTimestamp(value metav1.Time) *InferencePoolApplyConfiguration {
-	b.ensureObjectMetaApplyConfigurationExists()
-	b.ObjectMetaApplyConfiguration.DeletionTimestamp = &value
-	return b
-}
-
-// WithDeletionGracePeriodSeconds sets the DeletionGracePeriodSeconds field in the declarative configuration to the given value
-// and returns the receiver, so that objects can be built by chaining "With" function invocations.
-// If called multiple times, the DeletionGracePeriodSeconds field is set to the value of the last call.
-func (b *InferencePoolApplyConfiguration) WithDeletionGracePeriodSeconds(value int64) *InferencePoolApplyConfiguration {
-	b.ensureObjectMetaApplyConfigurationExists()
-	b.ObjectMetaApplyConfiguration.DeletionGracePeriodSeconds = &value
-	return b
-}
-
-// WithLabels puts the entries into the Labels field in the declarative configuration
-// and returns the receiver, so that objects can be build by chaining "With" function invocations.
-// If called multiple times, the entries provided by each call will be put on the Labels field,
-// overwriting an existing map entries in Labels field with the same key.
-func (b *InferencePoolApplyConfiguration) WithLabels(entries map[string]string) *InferencePoolApplyConfiguration {
-	b.ensureObjectMetaApplyConfigurationExists()
-	if b.ObjectMetaApplyConfiguration.Labels == nil && len(entries) > 0 {
-		b.ObjectMetaApplyConfiguration.Labels = make(map[string]string, len(entries))
-	}
-	for k, v := range entries {
-		b.ObjectMetaApplyConfiguration.Labels[k] = v
-	}
-	return b
-}
-
-// WithAnnotations puts the entries into the Annotations field in the declarative configuration
-// and returns the receiver, so that objects can be build by chaining "With" function invocations.
-// If called multiple times, the entries provided by each call will be put on the Annotations field,
-// overwriting an existing map entries in Annotations field with the same key.
-func (b *InferencePoolApplyConfiguration) WithAnnotations(entries map[string]string) *InferencePoolApplyConfiguration {
-	b.ensureObjectMetaApplyConfigurationExists()
-	if b.ObjectMetaApplyConfiguration.Annotations == nil && len(entries) > 0 {
-		b.ObjectMetaApplyConfiguration.Annotations = make(map[string]string, len(entries))
-	}
-	for k, v := range entries {
-		b.ObjectMetaApplyConfiguration.Annotations[k] = v
-	}
-	return b
-}
-
-// WithOwnerReferences adds the given value to the OwnerReferences field in the declarative configuration
-// and returns the receiver, so that objects can be build by chaining "With" function invocations.
-// If called multiple times, values provided by each call will be appended to the OwnerReferences field.
-func (b *InferencePoolApplyConfiguration) WithOwnerReferences(values ...*v1.OwnerReferenceApplyConfiguration) *InferencePoolApplyConfiguration {
-	b.ensureObjectMetaApplyConfigurationExists()
-	for i := range values {
-		if values[i] == nil {
-			panic("nil value passed to WithOwnerReferences")
-		}
-		b.ObjectMetaApplyConfiguration.OwnerReferences = append(b.ObjectMetaApplyConfiguration.OwnerReferences, *values[i])
-	}
-	return b
-}
-
-// WithFinalizers adds the given value to the Finalizers field in the declarative configuration
-// and returns the receiver, so that objects can be build by chaining "With" function invocations.
-// If called multiple times, values provided by each call will be appended to the Finalizers field.
-func (b *InferencePoolApplyConfiguration) WithFinalizers(values ...string) *InferencePoolApplyConfiguration {
-	b.ensureObjectMetaApplyConfigurationExists()
-	for i := range values {
-		b.ObjectMetaApplyConfiguration.Finalizers = append(b.ObjectMetaApplyConfiguration.Finalizers, values[i])
-	}
-	return b
-}
-
-func (b *InferencePoolApplyConfiguration) ensureObjectMetaApplyConfigurationExists() {
-	if b.ObjectMetaApplyConfiguration == nil {
-		b.ObjectMetaApplyConfiguration = &v1.ObjectMetaApplyConfiguration{}
-	}
-}
-
-// WithSpec sets the Spec field in the declarative configuration to the given value
-// and returns the receiver, so that objects can be built by chaining "With" function invocations.
-// If called multiple times, the Spec field is set to the value of the last call.
-func (b *InferencePoolApplyConfiguration) WithSpec(value *InferencePoolSpecApplyConfiguration) *InferencePoolApplyConfiguration {
-	b.Spec = value
-	return b
-}
-
-// WithStatus sets the Status field in the declarative configuration to the given value
-// and returns the receiver, so that objects can be built by chaining "With" function invocations.
-// If called multiple times, the Status field is set to the value of the last call.
-func (b *InferencePoolApplyConfiguration) WithStatus(value *InferencePoolStatusApplyConfiguration) *InferencePoolApplyConfiguration {
-	b.Status = value
-	return b
-}
-
-// GetName retrieves the value of the Name field in the declarative configuration.
-func (b *InferencePoolApplyConfiguration) GetName() *string {
-	b.ensureObjectMetaApplyConfigurationExists()
-	return b.ObjectMetaApplyConfiguration.Name
-}
diff --git a/client-go/applyconfiguration/api/v1alpha1/inferencepoolspec.go b/client-go/applyconfiguration/api/v1alpha1/inferencepoolspec.go
deleted file mode 100644
index 5f69a154c..000000000
--- a/client-go/applyconfiguration/api/v1alpha1/inferencepoolspec.go
+++ /dev/null
@@ -1,66 +0,0 @@
-/*
-Copyright 2024 The Kubernetes Authors.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/
-// Code generated by applyconfiguration-gen. DO NOT EDIT.
-
-package v1alpha1
-
-import (
-	apiv1alpha1 "sigs.k8s.io/gateway-api-inference-extension/api/v1alpha1"
-)
-
-// InferencePoolSpecApplyConfiguration represents a declarative configuration of the InferencePoolSpec type for use
-// with apply.
-type InferencePoolSpecApplyConfiguration struct {
-	Selector                               map[apiv1alpha1.LabelKey]apiv1alpha1.LabelValue `json:"selector,omitempty"`
-	TargetPortNumber                       *int32                                          `json:"targetPortNumber,omitempty"`
-	EndpointPickerConfigApplyConfiguration `json:",inline"`
-}
-
-// InferencePoolSpecApplyConfiguration constructs a declarative configuration of the InferencePoolSpec type for use with
-// apply.
-func InferencePoolSpec() *InferencePoolSpecApplyConfiguration {
-	return &InferencePoolSpecApplyConfiguration{}
-}
-
-// WithSelector puts the entries into the Selector field in the declarative configuration
-// and returns the receiver, so that objects can be build by chaining "With" function invocations.
-// If called multiple times, the entries provided by each call will be put on the Selector field,
-// overwriting an existing map entries in Selector field with the same key.
-func (b *InferencePoolSpecApplyConfiguration) WithSelector(entries map[apiv1alpha1.LabelKey]apiv1alpha1.LabelValue) *InferencePoolSpecApplyConfiguration {
-	if b.Selector == nil && len(entries) > 0 {
-		b.Selector = make(map[apiv1alpha1.LabelKey]apiv1alpha1.LabelValue, len(entries))
-	}
-	for k, v := range entries {
-		b.Selector[k] = v
-	}
-	return b
-}
-
-// WithTargetPortNumber sets the TargetPortNumber field in the declarative configuration to the given value
-// and returns the receiver, so that objects can be built by chaining "With" function invocations.
-// If called multiple times, the TargetPortNumber field is set to the value of the last call.
-func (b *InferencePoolSpecApplyConfiguration) WithTargetPortNumber(value int32) *InferencePoolSpecApplyConfiguration {
-	b.TargetPortNumber = &value
-	return b
-}
-
-// WithExtensionRef sets the ExtensionRef field in the declarative configuration to the given value
-// and returns the receiver, so that objects can be built by chaining "With" function invocations.
-// If called multiple times, the ExtensionRef field is set to the value of the last call.
-func (b *InferencePoolSpecApplyConfiguration) WithExtensionRef(value *ExtensionApplyConfiguration) *InferencePoolSpecApplyConfiguration {
-	b.EndpointPickerConfigApplyConfiguration.ExtensionRef = value
-	return b
-}
diff --git a/client-go/applyconfiguration/api/v1alpha1/inferencepoolstatus.go b/client-go/applyconfiguration/api/v1alpha1/inferencepoolstatus.go
deleted file mode 100644
index f61a81b36..000000000
--- a/client-go/applyconfiguration/api/v1alpha1/inferencepoolstatus.go
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
-Copyright 2024 The Kubernetes Authors.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/
-// Code generated by applyconfiguration-gen. DO NOT EDIT.
-
-package v1alpha1
-
-import (
-	v1 "k8s.io/client-go/applyconfigurations/meta/v1"
-)
-
-// InferencePoolStatusApplyConfiguration represents a declarative configuration of the InferencePoolStatus type for use
-// with apply.
-type InferencePoolStatusApplyConfiguration struct {
-	Conditions []v1.ConditionApplyConfiguration `json:"conditions,omitempty"`
-}
-
-// InferencePoolStatusApplyConfiguration constructs a declarative configuration of the InferencePoolStatus type for use with
-// apply.
-func InferencePoolStatus() *InferencePoolStatusApplyConfiguration {
-	return &InferencePoolStatusApplyConfiguration{}
-}
-
-// WithConditions adds the given value to the Conditions field in the declarative configuration
-// and returns the receiver, so that objects can be build by chaining "With" function invocations.
-// If called multiple times, values provided by each call will be appended to the Conditions field.
-func (b *InferencePoolStatusApplyConfiguration) WithConditions(values ...*v1.ConditionApplyConfiguration) *InferencePoolStatusApplyConfiguration {
-	for i := range values {
-		if values[i] == nil {
-			panic("nil value passed to WithConditions")
-		}
-		b.Conditions = append(b.Conditions, *values[i])
-	}
-	return b
-}
diff --git a/client-go/applyconfiguration/api/v1alpha1/poolobjectreference.go b/client-go/applyconfiguration/api/v1alpha1/poolobjectreference.go
deleted file mode 100644
index 692a185ef..000000000
--- a/client-go/applyconfiguration/api/v1alpha1/poolobjectreference.go
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
-Copyright 2024 The Kubernetes Authors.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/
-// Code generated by applyconfiguration-gen. DO NOT EDIT.
-
-package v1alpha1
-
-// PoolObjectReferenceApplyConfiguration represents a declarative configuration of the PoolObjectReference type for use
-// with apply.
-type PoolObjectReferenceApplyConfiguration struct {
-	Group *string `json:"group,omitempty"`
-	Kind  *string `json:"kind,omitempty"`
-	Name  *string `json:"name,omitempty"`
-}
-
-// PoolObjectReferenceApplyConfiguration constructs a declarative configuration of the PoolObjectReference type for use with
-// apply.
-func PoolObjectReference() *PoolObjectReferenceApplyConfiguration {
-	return &PoolObjectReferenceApplyConfiguration{}
-}
-
-// WithGroup sets the Group field in the declarative configuration to the given value
-// and returns the receiver, so that objects can be built by chaining "With" function invocations.
-// If called multiple times, the Group field is set to the value of the last call.
-func (b *PoolObjectReferenceApplyConfiguration) WithGroup(value string) *PoolObjectReferenceApplyConfiguration {
-	b.Group = &value
-	return b
-}
-
-// WithKind sets the Kind field in the declarative configuration to the given value
-// and returns the receiver, so that objects can be built by chaining "With" function invocations.
-// If called multiple times, the Kind field is set to the value of the last call.
-func (b *PoolObjectReferenceApplyConfiguration) WithKind(value string) *PoolObjectReferenceApplyConfiguration {
-	b.Kind = &value
-	return b
-}
-
-// WithName sets the Name field in the declarative configuration to the given value
-// and returns the receiver, so that objects can be built by chaining "With" function invocations.
-// If called multiple times, the Name field is set to the value of the last call.
-func (b *PoolObjectReferenceApplyConfiguration) WithName(value string) *PoolObjectReferenceApplyConfiguration {
-	b.Name = &value
-	return b
-}
diff --git a/client-go/applyconfiguration/api/v1alpha1/targetmodel.go b/client-go/applyconfiguration/api/v1alpha1/targetmodel.go
deleted file mode 100644
index f6ac83f8b..000000000
--- a/client-go/applyconfiguration/api/v1alpha1/targetmodel.go
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
-Copyright 2024 The Kubernetes Authors.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/
-// Code generated by applyconfiguration-gen. DO NOT EDIT.
-
-package v1alpha1
-
-// TargetModelApplyConfiguration represents a declarative configuration of the TargetModel type for use
-// with apply.
-type TargetModelApplyConfiguration struct {
-	Name   *string `json:"name,omitempty"`
-	Weight *int32  `json:"weight,omitempty"`
-}
-
-// TargetModelApplyConfiguration constructs a declarative configuration of the TargetModel type for use with
-// apply.
-func TargetModel() *TargetModelApplyConfiguration {
-	return &TargetModelApplyConfiguration{}
-}
-
-// WithName sets the Name field in the declarative configuration to the given value
-// and returns the receiver, so that objects can be built by chaining "With" function invocations.
-// If called multiple times, the Name field is set to the value of the last call.
-func (b *TargetModelApplyConfiguration) WithName(value string) *TargetModelApplyConfiguration {
-	b.Name = &value
-	return b
-}
-
-// WithWeight sets the Weight field in the declarative configuration to the given value
-// and returns the receiver, so that objects can be built by chaining "With" function invocations.
-// If called multiple times, the Weight field is set to the value of the last call.
-func (b *TargetModelApplyConfiguration) WithWeight(value int32) *TargetModelApplyConfiguration {
-	b.Weight = &value
-	return b
-}
diff --git a/client-go/applyconfiguration/utils.go b/client-go/applyconfiguration/utils.go
index eacc9c439..e1ad5ea41 100644
--- a/client-go/applyconfiguration/utils.go
+++ b/client-go/applyconfiguration/utils.go
@@ -21,9 +21,7 @@ import (
 	runtime "k8s.io/apimachinery/pkg/runtime"
 	schema "k8s.io/apimachinery/pkg/runtime/schema"
 	testing "k8s.io/client-go/testing"
-	v1alpha1 "sigs.k8s.io/gateway-api-inference-extension/api/v1alpha1"
 	v1alpha2 "sigs.k8s.io/gateway-api-inference-extension/api/v1alpha2"
-	apiv1alpha1 "sigs.k8s.io/gateway-api-inference-extension/client-go/applyconfiguration/api/v1alpha1"
 	apiv1alpha2 "sigs.k8s.io/gateway-api-inference-extension/client-go/applyconfiguration/api/v1alpha2"
 	internal "sigs.k8s.io/gateway-api-inference-extension/client-go/applyconfiguration/internal"
 )
@@ -32,33 +30,7 @@ import (
 // apply configuration type exists for the given GroupVersionKind.
 func ForKind(kind schema.GroupVersionKind) interface{} {
 	switch kind {
-	// Group=inference.networking.x-k8s.io, Version=v1alpha1
-	case v1alpha1.SchemeGroupVersion.WithKind("EndpointPickerConfig"):
-		return &apiv1alpha1.EndpointPickerConfigApplyConfiguration{}
-	case v1alpha1.SchemeGroupVersion.WithKind("Extension"):
-		return &apiv1alpha1.ExtensionApplyConfiguration{}
-	case v1alpha1.SchemeGroupVersion.WithKind("ExtensionConnection"):
-		return &apiv1alpha1.ExtensionConnectionApplyConfiguration{}
-	case v1alpha1.SchemeGroupVersion.WithKind("ExtensionReference"):
-		return &apiv1alpha1.ExtensionReferenceApplyConfiguration{}
-	case v1alpha1.SchemeGroupVersion.WithKind("InferenceModel"):
-		return &apiv1alpha1.InferenceModelApplyConfiguration{}
-	case v1alpha1.SchemeGroupVersion.WithKind("InferenceModelSpec"):
-		return &apiv1alpha1.InferenceModelSpecApplyConfiguration{}
-	case v1alpha1.SchemeGroupVersion.WithKind("InferenceModelStatus"):
-		return &apiv1alpha1.InferenceModelStatusApplyConfiguration{}
-	case v1alpha1.SchemeGroupVersion.WithKind("InferencePool"):
-		return &apiv1alpha1.InferencePoolApplyConfiguration{}
-	case v1alpha1.SchemeGroupVersion.WithKind("InferencePoolSpec"):
-		return &apiv1alpha1.InferencePoolSpecApplyConfiguration{}
-	case v1alpha1.SchemeGroupVersion.WithKind("InferencePoolStatus"):
-		return &apiv1alpha1.InferencePoolStatusApplyConfiguration{}
-	case v1alpha1.SchemeGroupVersion.WithKind("PoolObjectReference"):
-		return &apiv1alpha1.PoolObjectReferenceApplyConfiguration{}
-	case v1alpha1.SchemeGroupVersion.WithKind("TargetModel"):
-		return &apiv1alpha1.TargetModelApplyConfiguration{}
-
-		// Group=inference.networking.x-k8s.io, Version=v1alpha2
+	// Group=inference.networking.x-k8s.io, Version=v1alpha2
 	case v1alpha2.SchemeGroupVersion.WithKind("EndpointPickerConfig"):
 		return &apiv1alpha2.EndpointPickerConfigApplyConfiguration{}
 	case v1alpha2.SchemeGroupVersion.WithKind("Extension"):
diff --git a/client-go/clientset/versioned/clientset.go b/client-go/clientset/versioned/clientset.go
index 4266285a7..c56d11c7d 100644
--- a/client-go/clientset/versioned/clientset.go
+++ b/client-go/clientset/versioned/clientset.go
@@ -24,28 +24,20 @@ import (
 	discovery "k8s.io/client-go/discovery"
 	rest "k8s.io/client-go/rest"
 	flowcontrol "k8s.io/client-go/util/flowcontrol"
-	inferencev1alpha1 "sigs.k8s.io/gateway-api-inference-extension/client-go/clientset/versioned/typed/api/v1alpha1"
 	inferencev1alpha2 "sigs.k8s.io/gateway-api-inference-extension/client-go/clientset/versioned/typed/api/v1alpha2"
 )
 
 type Interface interface {
 	Discovery() discovery.DiscoveryInterface
-	InferenceV1alpha1() inferencev1alpha1.InferenceV1alpha1Interface
 	InferenceV1alpha2() inferencev1alpha2.InferenceV1alpha2Interface
 }
 
 // Clientset contains the clients for groups.
 type Clientset struct {
 	*discovery.DiscoveryClient
-	inferenceV1alpha1 *inferencev1alpha1.InferenceV1alpha1Client
 	inferenceV1alpha2 *inferencev1alpha2.InferenceV1alpha2Client
 }
 
-// InferenceV1alpha1 retrieves the InferenceV1alpha1Client
-func (c *Clientset) InferenceV1alpha1() inferencev1alpha1.InferenceV1alpha1Interface {
-	return c.inferenceV1alpha1
-}
-
 // InferenceV1alpha2 retrieves the InferenceV1alpha2Client
 func (c *Clientset) InferenceV1alpha2() inferencev1alpha2.InferenceV1alpha2Interface {
 	return c.inferenceV1alpha2
@@ -95,10 +87,6 @@ func NewForConfigAndClient(c *rest.Config, httpClient *http.Client) (*Clientset,
 
 	var cs Clientset
 	var err error
-	cs.inferenceV1alpha1, err = inferencev1alpha1.NewForConfigAndClient(&configShallowCopy, httpClient)
-	if err != nil {
-		return nil, err
-	}
 	cs.inferenceV1alpha2, err = inferencev1alpha2.NewForConfigAndClient(&configShallowCopy, httpClient)
 	if err != nil {
 		return nil, err
@@ -124,7 +112,6 @@ func NewForConfigOrDie(c *rest.Config) *Clientset {
 // New creates a new Clientset for the given RESTClient.
 func New(c rest.Interface) *Clientset {
 	var cs Clientset
-	cs.inferenceV1alpha1 = inferencev1alpha1.New(c)
 	cs.inferenceV1alpha2 = inferencev1alpha2.New(c)
 
 	cs.DiscoveryClient = discovery.NewDiscoveryClient(c)
diff --git a/client-go/clientset/versioned/fake/clientset_generated.go b/client-go/clientset/versioned/fake/clientset_generated.go
index f4f33032d..b0ecd50b3 100644
--- a/client-go/clientset/versioned/fake/clientset_generated.go
+++ b/client-go/clientset/versioned/fake/clientset_generated.go
@@ -25,8 +25,6 @@ import (
 	"k8s.io/client-go/testing"
 	applyconfiguration "sigs.k8s.io/gateway-api-inference-extension/client-go/applyconfiguration"
 	clientset "sigs.k8s.io/gateway-api-inference-extension/client-go/clientset/versioned"
-	inferencev1alpha1 "sigs.k8s.io/gateway-api-inference-extension/client-go/clientset/versioned/typed/api/v1alpha1"
-	fakeinferencev1alpha1 "sigs.k8s.io/gateway-api-inference-extension/client-go/clientset/versioned/typed/api/v1alpha1/fake"
 	inferencev1alpha2 "sigs.k8s.io/gateway-api-inference-extension/client-go/clientset/versioned/typed/api/v1alpha2"
 	fakeinferencev1alpha2 "sigs.k8s.io/gateway-api-inference-extension/client-go/clientset/versioned/typed/api/v1alpha2/fake"
 )
@@ -117,11 +115,6 @@ var (
 	_ testing.FakeClient  = &Clientset{}
 )
 
-// InferenceV1alpha1 retrieves the InferenceV1alpha1Client
-func (c *Clientset) InferenceV1alpha1() inferencev1alpha1.InferenceV1alpha1Interface {
-	return &fakeinferencev1alpha1.FakeInferenceV1alpha1{Fake: &c.Fake}
-}
-
 // InferenceV1alpha2 retrieves the InferenceV1alpha2Client
 func (c *Clientset) InferenceV1alpha2() inferencev1alpha2.InferenceV1alpha2Interface {
 	return &fakeinferencev1alpha2.FakeInferenceV1alpha2{Fake: &c.Fake}
diff --git a/client-go/clientset/versioned/fake/register.go b/client-go/clientset/versioned/fake/register.go
index bc8e69035..365ccb75e 100644
--- a/client-go/clientset/versioned/fake/register.go
+++ b/client-go/clientset/versioned/fake/register.go
@@ -23,7 +23,6 @@ import (
 	schema "k8s.io/apimachinery/pkg/runtime/schema"
 	serializer "k8s.io/apimachinery/pkg/runtime/serializer"
 	utilruntime "k8s.io/apimachinery/pkg/util/runtime"
-	inferencev1alpha1 "sigs.k8s.io/gateway-api-inference-extension/api/v1alpha1"
 	inferencev1alpha2 "sigs.k8s.io/gateway-api-inference-extension/api/v1alpha2"
 )
 
@@ -31,7 +30,6 @@ var scheme = runtime.NewScheme()
 var codecs = serializer.NewCodecFactory(scheme)
 
 var localSchemeBuilder = runtime.SchemeBuilder{
-	inferencev1alpha1.AddToScheme,
 	inferencev1alpha2.AddToScheme,
 }
 
diff --git a/client-go/clientset/versioned/scheme/register.go b/client-go/clientset/versioned/scheme/register.go
index 5727d404f..b656f1215 100644
--- a/client-go/clientset/versioned/scheme/register.go
+++ b/client-go/clientset/versioned/scheme/register.go
@@ -23,7 +23,6 @@ import (
 	schema "k8s.io/apimachinery/pkg/runtime/schema"
 	serializer "k8s.io/apimachinery/pkg/runtime/serializer"
 	utilruntime "k8s.io/apimachinery/pkg/util/runtime"
-	inferencev1alpha1 "sigs.k8s.io/gateway-api-inference-extension/api/v1alpha1"
 	inferencev1alpha2 "sigs.k8s.io/gateway-api-inference-extension/api/v1alpha2"
 )
 
@@ -31,7 +30,6 @@ var Scheme = runtime.NewScheme()
 var Codecs = serializer.NewCodecFactory(Scheme)
 var ParameterCodec = runtime.NewParameterCodec(Scheme)
 var localSchemeBuilder = runtime.SchemeBuilder{
-	inferencev1alpha1.AddToScheme,
 	inferencev1alpha2.AddToScheme,
 }
 
diff --git a/client-go/clientset/versioned/typed/api/v1alpha1/api_client.go b/client-go/clientset/versioned/typed/api/v1alpha1/api_client.go
deleted file mode 100644
index 8cc8a643c..000000000
--- a/client-go/clientset/versioned/typed/api/v1alpha1/api_client.go
+++ /dev/null
@@ -1,111 +0,0 @@
-/*
-Copyright 2024 The Kubernetes Authors.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/
-// Code generated by client-gen. DO NOT EDIT.
-
-package v1alpha1
-
-import (
-	http "net/http"
-
-	rest "k8s.io/client-go/rest"
-	apiv1alpha1 "sigs.k8s.io/gateway-api-inference-extension/api/v1alpha1"
-	scheme "sigs.k8s.io/gateway-api-inference-extension/client-go/clientset/versioned/scheme"
-)
-
-type InferenceV1alpha1Interface interface {
-	RESTClient() rest.Interface
-	InferenceModelsGetter
-	InferencePoolsGetter
-}
-
-// InferenceV1alpha1Client is used to interact with features provided by the inference.networking.x-k8s.io group.
-type InferenceV1alpha1Client struct {
-	restClient rest.Interface
-}
-
-func (c *InferenceV1alpha1Client) InferenceModels(namespace string) InferenceModelInterface {
-	return newInferenceModels(c, namespace)
-}
-
-func (c *InferenceV1alpha1Client) InferencePools(namespace string) InferencePoolInterface {
-	return newInferencePools(c, namespace)
-}
-
-// NewForConfig creates a new InferenceV1alpha1Client for the given config.
-// NewForConfig is equivalent to NewForConfigAndClient(c, httpClient),
-// where httpClient was generated with rest.HTTPClientFor(c).
-func NewForConfig(c *rest.Config) (*InferenceV1alpha1Client, error) {
-	config := *c
-	if err := setConfigDefaults(&config); err != nil {
-		return nil, err
-	}
-	httpClient, err := rest.HTTPClientFor(&config)
-	if err != nil {
-		return nil, err
-	}
-	return NewForConfigAndClient(&config, httpClient)
-}
-
-// NewForConfigAndClient creates a new InferenceV1alpha1Client for the given config and http client.
-// Note the http client provided takes precedence over the configured transport values.
-func NewForConfigAndClient(c *rest.Config, h *http.Client) (*InferenceV1alpha1Client, error) {
-	config := *c
-	if err := setConfigDefaults(&config); err != nil {
-		return nil, err
-	}
-	client, err := rest.RESTClientForConfigAndClient(&config, h)
-	if err != nil {
-		return nil, err
-	}
-	return &InferenceV1alpha1Client{client}, nil
-}
-
-// NewForConfigOrDie creates a new InferenceV1alpha1Client for the given config and
-// panics if there is an error in the config.
-func NewForConfigOrDie(c *rest.Config) *InferenceV1alpha1Client {
-	client, err := NewForConfig(c)
-	if err != nil {
-		panic(err)
-	}
-	return client
-}
-
-// New creates a new InferenceV1alpha1Client for the given RESTClient.
-func New(c rest.Interface) *InferenceV1alpha1Client {
-	return &InferenceV1alpha1Client{c}
-}
-
-func setConfigDefaults(config *rest.Config) error {
-	gv := apiv1alpha1.SchemeGroupVersion
-	config.GroupVersion = &gv
-	config.APIPath = "/apis"
-	config.NegotiatedSerializer = rest.CodecFactoryForGeneratedClient(scheme.Scheme, scheme.Codecs).WithoutConversion()
-
-	if config.UserAgent == "" {
-		config.UserAgent = rest.DefaultKubernetesUserAgent()
-	}
-
-	return nil
-}
-
-// RESTClient returns a RESTClient that is used to communicate
-// with API server by this client implementation.
-func (c *InferenceV1alpha1Client) RESTClient() rest.Interface {
-	if c == nil {
-		return nil
-	}
-	return c.restClient
-}
diff --git a/client-go/clientset/versioned/typed/api/v1alpha1/doc.go b/client-go/clientset/versioned/typed/api/v1alpha1/doc.go
deleted file mode 100644
index 28991e22c..000000000
--- a/client-go/clientset/versioned/typed/api/v1alpha1/doc.go
+++ /dev/null
@@ -1,19 +0,0 @@
-/*
-Copyright 2024 The Kubernetes Authors.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/
-// Code generated by client-gen. DO NOT EDIT.
-
-// This package has the automatically generated typed clients.
-package v1alpha1
diff --git a/client-go/clientset/versioned/typed/api/v1alpha1/fake/doc.go b/client-go/clientset/versioned/typed/api/v1alpha1/fake/doc.go
deleted file mode 100644
index fbfccbb91..000000000
--- a/client-go/clientset/versioned/typed/api/v1alpha1/fake/doc.go
+++ /dev/null
@@ -1,19 +0,0 @@
-/*
-Copyright 2024 The Kubernetes Authors.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/
-// Code generated by client-gen. DO NOT EDIT.
-
-// Package fake has the automatically generated clients.
-package fake
diff --git a/client-go/clientset/versioned/typed/api/v1alpha1/fake/fake_api_client.go b/client-go/clientset/versioned/typed/api/v1alpha1/fake/fake_api_client.go
deleted file mode 100644
index 1dee0f20d..000000000
--- a/client-go/clientset/versioned/typed/api/v1alpha1/fake/fake_api_client.go
+++ /dev/null
@@ -1,43 +0,0 @@
-/*
-Copyright 2024 The Kubernetes Authors.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/
-// Code generated by client-gen. DO NOT EDIT.
-
-package fake
-
-import (
-	rest "k8s.io/client-go/rest"
-	testing "k8s.io/client-go/testing"
-	v1alpha1 "sigs.k8s.io/gateway-api-inference-extension/client-go/clientset/versioned/typed/api/v1alpha1"
-)
-
-type FakeInferenceV1alpha1 struct {
-	*testing.Fake
-}
-
-func (c *FakeInferenceV1alpha1) InferenceModels(namespace string) v1alpha1.InferenceModelInterface {
-	return newFakeInferenceModels(c, namespace)
-}
-
-func (c *FakeInferenceV1alpha1) InferencePools(namespace string) v1alpha1.InferencePoolInterface {
-	return newFakeInferencePools(c, namespace)
-}
-
-// RESTClient returns a RESTClient that is used to communicate
-// with API server by this client implementation.
-func (c *FakeInferenceV1alpha1) RESTClient() rest.Interface {
-	var ret *rest.RESTClient
-	return ret
-}
diff --git a/client-go/clientset/versioned/typed/api/v1alpha1/fake/fake_inferencemodel.go b/client-go/clientset/versioned/typed/api/v1alpha1/fake/fake_inferencemodel.go
deleted file mode 100644
index 44007ae7e..000000000
--- a/client-go/clientset/versioned/typed/api/v1alpha1/fake/fake_inferencemodel.go
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
-Copyright 2024 The Kubernetes Authors.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/
-// Code generated by client-gen. DO NOT EDIT.
-
-package fake
-
-import (
-	gentype "k8s.io/client-go/gentype"
-	v1alpha1 "sigs.k8s.io/gateway-api-inference-extension/api/v1alpha1"
-	apiv1alpha1 "sigs.k8s.io/gateway-api-inference-extension/client-go/applyconfiguration/api/v1alpha1"
-	typedapiv1alpha1 "sigs.k8s.io/gateway-api-inference-extension/client-go/clientset/versioned/typed/api/v1alpha1"
-)
-
-// fakeInferenceModels implements InferenceModelInterface
-type fakeInferenceModels struct {
-	*gentype.FakeClientWithListAndApply[*v1alpha1.InferenceModel, *v1alpha1.InferenceModelList, *apiv1alpha1.InferenceModelApplyConfiguration]
-	Fake *FakeInferenceV1alpha1
-}
-
-func newFakeInferenceModels(fake *FakeInferenceV1alpha1, namespace string) typedapiv1alpha1.InferenceModelInterface {
-	return &fakeInferenceModels{
-		gentype.NewFakeClientWithListAndApply[*v1alpha1.InferenceModel, *v1alpha1.InferenceModelList, *apiv1alpha1.InferenceModelApplyConfiguration](
-			fake.Fake,
-			namespace,
-			v1alpha1.SchemeGroupVersion.WithResource("inferencemodels"),
-			v1alpha1.SchemeGroupVersion.WithKind("InferenceModel"),
-			func() *v1alpha1.InferenceModel { return &v1alpha1.InferenceModel{} },
-			func() *v1alpha1.InferenceModelList { return &v1alpha1.InferenceModelList{} },
-			func(dst, src *v1alpha1.InferenceModelList) { dst.ListMeta = src.ListMeta },
-			func(list *v1alpha1.InferenceModelList) []*v1alpha1.InferenceModel {
-				return gentype.ToPointerSlice(list.Items)
-			},
-			func(list *v1alpha1.InferenceModelList, items []*v1alpha1.InferenceModel) {
-				list.Items = gentype.FromPointerSlice(items)
-			},
-		),
-		fake,
-	}
-}
diff --git a/client-go/clientset/versioned/typed/api/v1alpha1/fake/fake_inferencepool.go b/client-go/clientset/versioned/typed/api/v1alpha1/fake/fake_inferencepool.go
deleted file mode 100644
index cd0764aa6..000000000
--- a/client-go/clientset/versioned/typed/api/v1alpha1/fake/fake_inferencepool.go
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
-Copyright 2024 The Kubernetes Authors.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/
-// Code generated by client-gen. DO NOT EDIT.
-
-package fake
-
-import (
-	gentype "k8s.io/client-go/gentype"
-	v1alpha1 "sigs.k8s.io/gateway-api-inference-extension/api/v1alpha1"
-	apiv1alpha1 "sigs.k8s.io/gateway-api-inference-extension/client-go/applyconfiguration/api/v1alpha1"
-	typedapiv1alpha1 "sigs.k8s.io/gateway-api-inference-extension/client-go/clientset/versioned/typed/api/v1alpha1"
-)
-
-// fakeInferencePools implements InferencePoolInterface
-type fakeInferencePools struct {
-	*gentype.FakeClientWithListAndApply[*v1alpha1.InferencePool, *v1alpha1.InferencePoolList, *apiv1alpha1.InferencePoolApplyConfiguration]
-	Fake *FakeInferenceV1alpha1
-}
-
-func newFakeInferencePools(fake *FakeInferenceV1alpha1, namespace string) typedapiv1alpha1.InferencePoolInterface {
-	return &fakeInferencePools{
-		gentype.NewFakeClientWithListAndApply[*v1alpha1.InferencePool, *v1alpha1.InferencePoolList, *apiv1alpha1.InferencePoolApplyConfiguration](
-			fake.Fake,
-			namespace,
-			v1alpha1.SchemeGroupVersion.WithResource("inferencepools"),
-			v1alpha1.SchemeGroupVersion.WithKind("InferencePool"),
-			func() *v1alpha1.InferencePool { return &v1alpha1.InferencePool{} },
-			func() *v1alpha1.InferencePoolList { return &v1alpha1.InferencePoolList{} },
-			func(dst, src *v1alpha1.InferencePoolList) { dst.ListMeta = src.ListMeta },
-			func(list *v1alpha1.InferencePoolList) []*v1alpha1.InferencePool {
-				return gentype.ToPointerSlice(list.Items)
-			},
-			func(list *v1alpha1.InferencePoolList, items []*v1alpha1.InferencePool) {
-				list.Items = gentype.FromPointerSlice(items)
-			},
-		),
-		fake,
-	}
-}
diff --git a/client-go/clientset/versioned/typed/api/v1alpha1/generated_expansion.go b/client-go/clientset/versioned/typed/api/v1alpha1/generated_expansion.go
deleted file mode 100644
index 65c88eb10..000000000
--- a/client-go/clientset/versioned/typed/api/v1alpha1/generated_expansion.go
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
-Copyright 2024 The Kubernetes Authors.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/
-// Code generated by client-gen. DO NOT EDIT.
-
-package v1alpha1
-
-type InferenceModelExpansion interface{}
-
-type InferencePoolExpansion interface{}
diff --git a/client-go/clientset/versioned/typed/api/v1alpha1/inferencemodel.go b/client-go/clientset/versioned/typed/api/v1alpha1/inferencemodel.go
deleted file mode 100644
index 4c7c59418..000000000
--- a/client-go/clientset/versioned/typed/api/v1alpha1/inferencemodel.go
+++ /dev/null
@@ -1,73 +0,0 @@
-/*
-Copyright 2024 The Kubernetes Authors.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/
-// Code generated by client-gen. DO NOT EDIT.
-
-package v1alpha1
-
-import (
-	context "context"
-
-	v1 "k8s.io/apimachinery/pkg/apis/meta/v1"
-	types "k8s.io/apimachinery/pkg/types"
-	watch "k8s.io/apimachinery/pkg/watch"
-	gentype "k8s.io/client-go/gentype"
-	apiv1alpha1 "sigs.k8s.io/gateway-api-inference-extension/api/v1alpha1"
-	applyconfigurationapiv1alpha1 "sigs.k8s.io/gateway-api-inference-extension/client-go/applyconfiguration/api/v1alpha1"
-	scheme "sigs.k8s.io/gateway-api-inference-extension/client-go/clientset/versioned/scheme"
-)
-
-// InferenceModelsGetter has a method to return a InferenceModelInterface.
-// A group's client should implement this interface.
-type InferenceModelsGetter interface {
-	InferenceModels(namespace string) InferenceModelInterface
-}
-
-// InferenceModelInterface has methods to work with InferenceModel resources.
-type InferenceModelInterface interface {
-	Create(ctx context.Context, inferenceModel *apiv1alpha1.InferenceModel, opts v1.CreateOptions) (*apiv1alpha1.InferenceModel, error)
-	Update(ctx context.Context, inferenceModel *apiv1alpha1.InferenceModel, opts v1.UpdateOptions) (*apiv1alpha1.InferenceModel, error)
-	// Add a +genclient:noStatus comment above the type to avoid generating UpdateStatus().
-	UpdateStatus(ctx context.Context, inferenceModel *apiv1alpha1.InferenceModel, opts v1.UpdateOptions) (*apiv1alpha1.InferenceModel, error)
-	Delete(ctx context.Context, name string, opts v1.DeleteOptions) error
-	DeleteCollection(ctx context.Context, opts v1.DeleteOptions, listOpts v1.ListOptions) error
-	Get(ctx context.Context, name string, opts v1.GetOptions) (*apiv1alpha1.InferenceModel, error)
-	List(ctx context.Context, opts v1.ListOptions) (*apiv1alpha1.InferenceModelList, error)
-	Watch(ctx context.Context, opts v1.ListOptions) (watch.Interface, error)
-	Patch(ctx context.Context, name string, pt types.PatchType, data []byte, opts v1.PatchOptions, subresources ...string) (result *apiv1alpha1.InferenceModel, err error)
-	Apply(ctx context.Context, inferenceModel *applyconfigurationapiv1alpha1.InferenceModelApplyConfiguration, opts v1.ApplyOptions) (result *apiv1alpha1.InferenceModel, err error)
-	// Add a +genclient:noStatus comment above the type to avoid generating ApplyStatus().
-	ApplyStatus(ctx context.Context, inferenceModel *applyconfigurationapiv1alpha1.InferenceModelApplyConfiguration, opts v1.ApplyOptions) (result *apiv1alpha1.InferenceModel, err error)
-	InferenceModelExpansion
-}
-
-// inferenceModels implements InferenceModelInterface
-type inferenceModels struct {
-	*gentype.ClientWithListAndApply[*apiv1alpha1.InferenceModel, *apiv1alpha1.InferenceModelList, *applyconfigurationapiv1alpha1.InferenceModelApplyConfiguration]
-}
-
-// newInferenceModels returns a InferenceModels
-func newInferenceModels(c *InferenceV1alpha1Client, namespace string) *inferenceModels {
-	return &inferenceModels{
-		gentype.NewClientWithListAndApply[*apiv1alpha1.InferenceModel, *apiv1alpha1.InferenceModelList, *applyconfigurationapiv1alpha1.InferenceModelApplyConfiguration](
-			"inferencemodels",
-			c.RESTClient(),
-			scheme.ParameterCodec,
-			namespace,
-			func() *apiv1alpha1.InferenceModel { return &apiv1alpha1.InferenceModel{} },
-			func() *apiv1alpha1.InferenceModelList { return &apiv1alpha1.InferenceModelList{} },
-		),
-	}
-}
diff --git a/client-go/clientset/versioned/typed/api/v1alpha1/inferencepool.go b/client-go/clientset/versioned/typed/api/v1alpha1/inferencepool.go
deleted file mode 100644
index 9af918017..000000000
--- a/client-go/clientset/versioned/typed/api/v1alpha1/inferencepool.go
+++ /dev/null
@@ -1,73 +0,0 @@
-/*
-Copyright 2024 The Kubernetes Authors.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/
-// Code generated by client-gen. DO NOT EDIT.
-
-package v1alpha1
-
-import (
-	context "context"
-
-	v1 "k8s.io/apimachinery/pkg/apis/meta/v1"
-	types "k8s.io/apimachinery/pkg/types"
-	watch "k8s.io/apimachinery/pkg/watch"
-	gentype "k8s.io/client-go/gentype"
-	apiv1alpha1 "sigs.k8s.io/gateway-api-inference-extension/api/v1alpha1"
-	applyconfigurationapiv1alpha1 "sigs.k8s.io/gateway-api-inference-extension/client-go/applyconfiguration/api/v1alpha1"
-	scheme "sigs.k8s.io/gateway-api-inference-extension/client-go/clientset/versioned/scheme"
-)
-
-// InferencePoolsGetter has a method to return a InferencePoolInterface.
-// A group's client should implement this interface.
-type InferencePoolsGetter interface {
-	InferencePools(namespace string) InferencePoolInterface
-}
-
-// InferencePoolInterface has methods to work with InferencePool resources.
-type InferencePoolInterface interface {
-	Create(ctx context.Context, inferencePool *apiv1alpha1.InferencePool, opts v1.CreateOptions) (*apiv1alpha1.InferencePool, error)
-	Update(ctx context.Context, inferencePool *apiv1alpha1.InferencePool, opts v1.UpdateOptions) (*apiv1alpha1.InferencePool, error)
-	// Add a +genclient:noStatus comment above the type to avoid generating UpdateStatus().
-	UpdateStatus(ctx context.Context, inferencePool *apiv1alpha1.InferencePool, opts v1.UpdateOptions) (*apiv1alpha1.InferencePool, error)
-	Delete(ctx context.Context, name string, opts v1.DeleteOptions) error
-	DeleteCollection(ctx context.Context, opts v1.DeleteOptions, listOpts v1.ListOptions) error
-	Get(ctx context.Context, name string, opts v1.GetOptions) (*apiv1alpha1.InferencePool, error)
-	List(ctx context.Context, opts v1.ListOptions) (*apiv1alpha1.InferencePoolList, error)
-	Watch(ctx context.Context, opts v1.ListOptions) (watch.Interface, error)
-	Patch(ctx context.Context, name string, pt types.PatchType, data []byte, opts v1.PatchOptions, subresources ...string) (result *apiv1alpha1.InferencePool, err error)
-	Apply(ctx context.Context, inferencePool *applyconfigurationapiv1alpha1.InferencePoolApplyConfiguration, opts v1.ApplyOptions) (result *apiv1alpha1.InferencePool, err error)
-	// Add a +genclient:noStatus comment above the type to avoid generating ApplyStatus().
-	ApplyStatus(ctx context.Context, inferencePool *applyconfigurationapiv1alpha1.InferencePoolApplyConfiguration, opts v1.ApplyOptions) (result *apiv1alpha1.InferencePool, err error)
-	InferencePoolExpansion
-}
-
-// inferencePools implements InferencePoolInterface
-type inferencePools struct {
-	*gentype.ClientWithListAndApply[*apiv1alpha1.InferencePool, *apiv1alpha1.InferencePoolList, *applyconfigurationapiv1alpha1.InferencePoolApplyConfiguration]
-}
-
-// newInferencePools returns a InferencePools
-func newInferencePools(c *InferenceV1alpha1Client, namespace string) *inferencePools {
-	return &inferencePools{
-		gentype.NewClientWithListAndApply[*apiv1alpha1.InferencePool, *apiv1alpha1.InferencePoolList, *applyconfigurationapiv1alpha1.InferencePoolApplyConfiguration](
-			"inferencepools",
-			c.RESTClient(),
-			scheme.ParameterCodec,
-			namespace,
-			func() *apiv1alpha1.InferencePool { return &apiv1alpha1.InferencePool{} },
-			func() *apiv1alpha1.InferencePoolList { return &apiv1alpha1.InferencePoolList{} },
-		),
-	}
-}
diff --git a/client-go/informers/externalversions/api/interface.go b/client-go/informers/externalversions/api/interface.go
index 210b89f84..10eef3976 100644
--- a/client-go/informers/externalversions/api/interface.go
+++ b/client-go/informers/externalversions/api/interface.go
@@ -18,15 +18,12 @@ limitations under the License.
 package api
 
 import (
-	v1alpha1 "sigs.k8s.io/gateway-api-inference-extension/client-go/informers/externalversions/api/v1alpha1"
 	v1alpha2 "sigs.k8s.io/gateway-api-inference-extension/client-go/informers/externalversions/api/v1alpha2"
 	internalinterfaces "sigs.k8s.io/gateway-api-inference-extension/client-go/informers/externalversions/internalinterfaces"
 )
 
 // Interface provides access to each of this group's versions.
 type Interface interface {
-	// V1alpha1 provides access to shared informers for resources in V1alpha1.
-	V1alpha1() v1alpha1.Interface
 	// V1alpha2 provides access to shared informers for resources in V1alpha2.
 	V1alpha2() v1alpha2.Interface
 }
@@ -42,11 +39,6 @@ func New(f internalinterfaces.SharedInformerFactory, namespace string, tweakList
 	return &group{factory: f, namespace: namespace, tweakListOptions: tweakListOptions}
 }
 
-// V1alpha1 returns a new v1alpha1.Interface.
-func (g *group) V1alpha1() v1alpha1.Interface {
-	return v1alpha1.New(g.factory, g.namespace, g.tweakListOptions)
-}
-
 // V1alpha2 returns a new v1alpha2.Interface.
 func (g *group) V1alpha2() v1alpha2.Interface {
 	return v1alpha2.New(g.factory, g.namespace, g.tweakListOptions)
diff --git a/client-go/informers/externalversions/api/v1alpha1/inferencemodel.go b/client-go/informers/externalversions/api/v1alpha1/inferencemodel.go
deleted file mode 100644
index a1522e48a..000000000
--- a/client-go/informers/externalversions/api/v1alpha1/inferencemodel.go
+++ /dev/null
@@ -1,89 +0,0 @@
-/*
-Copyright 2024 The Kubernetes Authors.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/
-// Code generated by informer-gen. DO NOT EDIT.
-
-package v1alpha1
-
-import (
-	context "context"
-	time "time"
-
-	v1 "k8s.io/apimachinery/pkg/apis/meta/v1"
-	runtime "k8s.io/apimachinery/pkg/runtime"
-	watch "k8s.io/apimachinery/pkg/watch"
-	cache "k8s.io/client-go/tools/cache"
-	gatewayapiinferenceextensionapiv1alpha1 "sigs.k8s.io/gateway-api-inference-extension/api/v1alpha1"
-	versioned "sigs.k8s.io/gateway-api-inference-extension/client-go/clientset/versioned"
-	internalinterfaces "sigs.k8s.io/gateway-api-inference-extension/client-go/informers/externalversions/internalinterfaces"
-	apiv1alpha1 "sigs.k8s.io/gateway-api-inference-extension/client-go/listers/api/v1alpha1"
-)
-
-// InferenceModelInformer provides access to a shared informer and lister for
-// InferenceModels.
-type InferenceModelInformer interface {
-	Informer() cache.SharedIndexInformer
-	Lister() apiv1alpha1.InferenceModelLister
-}
-
-type inferenceModelInformer struct {
-	factory          internalinterfaces.SharedInformerFactory
-	tweakListOptions internalinterfaces.TweakListOptionsFunc
-	namespace        string
-}
-
-// NewInferenceModelInformer constructs a new informer for InferenceModel type.
-// Always prefer using an informer factory to get a shared informer instead of getting an independent
-// one. This reduces memory footprint and number of connections to the server.
-func NewInferenceModelInformer(client versioned.Interface, namespace string, resyncPeriod time.Duration, indexers cache.Indexers) cache.SharedIndexInformer {
-	return NewFilteredInferenceModelInformer(client, namespace, resyncPeriod, indexers, nil)
-}
-
-// NewFilteredInferenceModelInformer constructs a new informer for InferenceModel type.
-// Always prefer using an informer factory to get a shared informer instead of getting an independent
-// one. This reduces memory footprint and number of connections to the server.
-func NewFilteredInferenceModelInformer(client versioned.Interface, namespace string, resyncPeriod time.Duration, indexers cache.Indexers, tweakListOptions internalinterfaces.TweakListOptionsFunc) cache.SharedIndexInformer {
-	return cache.NewSharedIndexInformer(
-		&cache.ListWatch{
-			ListFunc: func(options v1.ListOptions) (runtime.Object, error) {
-				if tweakListOptions != nil {
-					tweakListOptions(&options)
-				}
-				return client.InferenceV1alpha1().InferenceModels(namespace).List(context.TODO(), options)
-			},
-			WatchFunc: func(options v1.ListOptions) (watch.Interface, error) {
-				if tweakListOptions != nil {
-					tweakListOptions(&options)
-				}
-				return client.InferenceV1alpha1().InferenceModels(namespace).Watch(context.TODO(), options)
-			},
-		},
-		&gatewayapiinferenceextensionapiv1alpha1.InferenceModel{},
-		resyncPeriod,
-		indexers,
-	)
-}
-
-func (f *inferenceModelInformer) defaultInformer(client versioned.Interface, resyncPeriod time.Duration) cache.SharedIndexInformer {
-	return NewFilteredInferenceModelInformer(client, f.namespace, resyncPeriod, cache.Indexers{cache.NamespaceIndex: cache.MetaNamespaceIndexFunc}, f.tweakListOptions)
-}
-
-func (f *inferenceModelInformer) Informer() cache.SharedIndexInformer {
-	return f.factory.InformerFor(&gatewayapiinferenceextensionapiv1alpha1.InferenceModel{}, f.defaultInformer)
-}
-
-func (f *inferenceModelInformer) Lister() apiv1alpha1.InferenceModelLister {
-	return apiv1alpha1.NewInferenceModelLister(f.Informer().GetIndexer())
-}
diff --git a/client-go/informers/externalversions/api/v1alpha1/inferencepool.go b/client-go/informers/externalversions/api/v1alpha1/inferencepool.go
deleted file mode 100644
index 27f2d29eb..000000000
--- a/client-go/informers/externalversions/api/v1alpha1/inferencepool.go
+++ /dev/null
@@ -1,89 +0,0 @@
-/*
-Copyright 2024 The Kubernetes Authors.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/
-// Code generated by informer-gen. DO NOT EDIT.
-
-package v1alpha1
-
-import (
-	context "context"
-	time "time"
-
-	v1 "k8s.io/apimachinery/pkg/apis/meta/v1"
-	runtime "k8s.io/apimachinery/pkg/runtime"
-	watch "k8s.io/apimachinery/pkg/watch"
-	cache "k8s.io/client-go/tools/cache"
-	gatewayapiinferenceextensionapiv1alpha1 "sigs.k8s.io/gateway-api-inference-extension/api/v1alpha1"
-	versioned "sigs.k8s.io/gateway-api-inference-extension/client-go/clientset/versioned"
-	internalinterfaces "sigs.k8s.io/gateway-api-inference-extension/client-go/informers/externalversions/internalinterfaces"
-	apiv1alpha1 "sigs.k8s.io/gateway-api-inference-extension/client-go/listers/api/v1alpha1"
-)
-
-// InferencePoolInformer provides access to a shared informer and lister for
-// InferencePools.
-type InferencePoolInformer interface {
-	Informer() cache.SharedIndexInformer
-	Lister() apiv1alpha1.InferencePoolLister
-}
-
-type inferencePoolInformer struct {
-	factory          internalinterfaces.SharedInformerFactory
-	tweakListOptions internalinterfaces.TweakListOptionsFunc
-	namespace        string
-}
-
-// NewInferencePoolInformer constructs a new informer for InferencePool type.
-// Always prefer using an informer factory to get a shared informer instead of getting an independent
-// one. This reduces memory footprint and number of connections to the server.
-func NewInferencePoolInformer(client versioned.Interface, namespace string, resyncPeriod time.Duration, indexers cache.Indexers) cache.SharedIndexInformer {
-	return NewFilteredInferencePoolInformer(client, namespace, resyncPeriod, indexers, nil)
-}
-
-// NewFilteredInferencePoolInformer constructs a new informer for InferencePool type.
-// Always prefer using an informer factory to get a shared informer instead of getting an independent
-// one. This reduces memory footprint and number of connections to the server.
-func NewFilteredInferencePoolInformer(client versioned.Interface, namespace string, resyncPeriod time.Duration, indexers cache.Indexers, tweakListOptions internalinterfaces.TweakListOptionsFunc) cache.SharedIndexInformer {
-	return cache.NewSharedIndexInformer(
-		&cache.ListWatch{
-			ListFunc: func(options v1.ListOptions) (runtime.Object, error) {
-				if tweakListOptions != nil {
-					tweakListOptions(&options)
-				}
-				return client.InferenceV1alpha1().InferencePools(namespace).List(context.TODO(), options)
-			},
-			WatchFunc: func(options v1.ListOptions) (watch.Interface, error) {
-				if tweakListOptions != nil {
-					tweakListOptions(&options)
-				}
-				return client.InferenceV1alpha1().InferencePools(namespace).Watch(context.TODO(), options)
-			},
-		},
-		&gatewayapiinferenceextensionapiv1alpha1.InferencePool{},
-		resyncPeriod,
-		indexers,
-	)
-}
-
-func (f *inferencePoolInformer) defaultInformer(client versioned.Interface, resyncPeriod time.Duration) cache.SharedIndexInformer {
-	return NewFilteredInferencePoolInformer(client, f.namespace, resyncPeriod, cache.Indexers{cache.NamespaceIndex: cache.MetaNamespaceIndexFunc}, f.tweakListOptions)
-}
-
-func (f *inferencePoolInformer) Informer() cache.SharedIndexInformer {
-	return f.factory.InformerFor(&gatewayapiinferenceextensionapiv1alpha1.InferencePool{}, f.defaultInformer)
-}
-
-func (f *inferencePoolInformer) Lister() apiv1alpha1.InferencePoolLister {
-	return apiv1alpha1.NewInferencePoolLister(f.Informer().GetIndexer())
-}
diff --git a/client-go/informers/externalversions/api/v1alpha1/interface.go b/client-go/informers/externalversions/api/v1alpha1/interface.go
deleted file mode 100644
index 3ea6d988a..000000000
--- a/client-go/informers/externalversions/api/v1alpha1/interface.go
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
-Copyright 2024 The Kubernetes Authors.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/
-// Code generated by informer-gen. DO NOT EDIT.
-
-package v1alpha1
-
-import (
-	internalinterfaces "sigs.k8s.io/gateway-api-inference-extension/client-go/informers/externalversions/internalinterfaces"
-)
-
-// Interface provides access to all the informers in this group version.
-type Interface interface {
-	// InferenceModels returns a InferenceModelInformer.
-	InferenceModels() InferenceModelInformer
-	// InferencePools returns a InferencePoolInformer.
-	InferencePools() InferencePoolInformer
-}
-
-type version struct {
-	factory          internalinterfaces.SharedInformerFactory
-	namespace        string
-	tweakListOptions internalinterfaces.TweakListOptionsFunc
-}
-
-// New returns a new Interface.
-func New(f internalinterfaces.SharedInformerFactory, namespace string, tweakListOptions internalinterfaces.TweakListOptionsFunc) Interface {
-	return &version{factory: f, namespace: namespace, tweakListOptions: tweakListOptions}
-}
-
-// InferenceModels returns a InferenceModelInformer.
-func (v *version) InferenceModels() InferenceModelInformer {
-	return &inferenceModelInformer{factory: v.factory, namespace: v.namespace, tweakListOptions: v.tweakListOptions}
-}
-
-// InferencePools returns a InferencePoolInformer.
-func (v *version) InferencePools() InferencePoolInformer {
-	return &inferencePoolInformer{factory: v.factory, namespace: v.namespace, tweakListOptions: v.tweakListOptions}
-}
diff --git a/client-go/informers/externalversions/generic.go b/client-go/informers/externalversions/generic.go
index 9f363d884..4186b2f65 100644
--- a/client-go/informers/externalversions/generic.go
+++ b/client-go/informers/externalversions/generic.go
@@ -22,7 +22,6 @@ import (
 
 	schema "k8s.io/apimachinery/pkg/runtime/schema"
 	cache "k8s.io/client-go/tools/cache"
-	v1alpha1 "sigs.k8s.io/gateway-api-inference-extension/api/v1alpha1"
 	v1alpha2 "sigs.k8s.io/gateway-api-inference-extension/api/v1alpha2"
 )
 
@@ -52,13 +51,7 @@ func (f *genericInformer) Lister() cache.GenericLister {
 // TODO extend this to unknown resources with a client pool
 func (f *sharedInformerFactory) ForResource(resource schema.GroupVersionResource) (GenericInformer, error) {
 	switch resource {
-	// Group=inference.networking.x-k8s.io, Version=v1alpha1
-	case v1alpha1.SchemeGroupVersion.WithResource("inferencemodels"):
-		return &genericInformer{resource: resource.GroupResource(), informer: f.Inference().V1alpha1().InferenceModels().Informer()}, nil
-	case v1alpha1.SchemeGroupVersion.WithResource("inferencepools"):
-		return &genericInformer{resource: resource.GroupResource(), informer: f.Inference().V1alpha1().InferencePools().Informer()}, nil
-
-		// Group=inference.networking.x-k8s.io, Version=v1alpha2
+	// Group=inference.networking.x-k8s.io, Version=v1alpha2
 	case v1alpha2.SchemeGroupVersion.WithResource("inferencemodels"):
 		return &genericInformer{resource: resource.GroupResource(), informer: f.Inference().V1alpha2().InferenceModels().Informer()}, nil
 	case v1alpha2.SchemeGroupVersion.WithResource("inferencepools"):
diff --git a/client-go/listers/api/v1alpha1/expansion_generated.go b/client-go/listers/api/v1alpha1/expansion_generated.go
deleted file mode 100644
index ffbe67cf3..000000000
--- a/client-go/listers/api/v1alpha1/expansion_generated.go
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
-Copyright 2024 The Kubernetes Authors.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/
-// Code generated by lister-gen. DO NOT EDIT.
-
-package v1alpha1
-
-// InferenceModelListerExpansion allows custom methods to be added to
-// InferenceModelLister.
-type InferenceModelListerExpansion interface{}
-
-// InferenceModelNamespaceListerExpansion allows custom methods to be added to
-// InferenceModelNamespaceLister.
-type InferenceModelNamespaceListerExpansion interface{}
-
-// InferencePoolListerExpansion allows custom methods to be added to
-// InferencePoolLister.
-type InferencePoolListerExpansion interface{}
-
-// InferencePoolNamespaceListerExpansion allows custom methods to be added to
-// InferencePoolNamespaceLister.
-type InferencePoolNamespaceListerExpansion interface{}
diff --git a/client-go/listers/api/v1alpha1/inferencemodel.go b/client-go/listers/api/v1alpha1/inferencemodel.go
deleted file mode 100644
index b43428424..000000000
--- a/client-go/listers/api/v1alpha1/inferencemodel.go
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
-Copyright 2024 The Kubernetes Authors.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/
-// Code generated by lister-gen. DO NOT EDIT.
-
-package v1alpha1
-
-import (
-	labels "k8s.io/apimachinery/pkg/labels"
-	listers "k8s.io/client-go/listers"
-	cache "k8s.io/client-go/tools/cache"
-	apiv1alpha1 "sigs.k8s.io/gateway-api-inference-extension/api/v1alpha1"
-)
-
-// InferenceModelLister helps list InferenceModels.
-// All objects returned here must be treated as read-only.
-type InferenceModelLister interface {
-	// List lists all InferenceModels in the indexer.
-	// Objects returned here must be treated as read-only.
-	List(selector labels.Selector) (ret []*apiv1alpha1.InferenceModel, err error)
-	// InferenceModels returns an object that can list and get InferenceModels.
-	InferenceModels(namespace string) InferenceModelNamespaceLister
-	InferenceModelListerExpansion
-}
-
-// inferenceModelLister implements the InferenceModelLister interface.
-type inferenceModelLister struct {
-	listers.ResourceIndexer[*apiv1alpha1.InferenceModel]
-}
-
-// NewInferenceModelLister returns a new InferenceModelLister.
-func NewInferenceModelLister(indexer cache.Indexer) InferenceModelLister {
-	return &inferenceModelLister{listers.New[*apiv1alpha1.InferenceModel](indexer, apiv1alpha1.Resource("inferencemodel"))}
-}
-
-// InferenceModels returns an object that can list and get InferenceModels.
-func (s *inferenceModelLister) InferenceModels(namespace string) InferenceModelNamespaceLister {
-	return inferenceModelNamespaceLister{listers.NewNamespaced[*apiv1alpha1.InferenceModel](s.ResourceIndexer, namespace)}
-}
-
-// InferenceModelNamespaceLister helps list and get InferenceModels.
-// All objects returned here must be treated as read-only.
-type InferenceModelNamespaceLister interface {
-	// List lists all InferenceModels in the indexer for a given namespace.
-	// Objects returned here must be treated as read-only.
-	List(selector labels.Selector) (ret []*apiv1alpha1.InferenceModel, err error)
-	// Get retrieves the InferenceModel from the indexer for a given namespace and name.
-	// Objects returned here must be treated as read-only.
-	Get(name string) (*apiv1alpha1.InferenceModel, error)
-	InferenceModelNamespaceListerExpansion
-}
-
-// inferenceModelNamespaceLister implements the InferenceModelNamespaceLister
-// interface.
-type inferenceModelNamespaceLister struct {
-	listers.ResourceIndexer[*apiv1alpha1.InferenceModel]
-}
diff --git a/client-go/listers/api/v1alpha1/inferencepool.go b/client-go/listers/api/v1alpha1/inferencepool.go
deleted file mode 100644
index 387daf393..000000000
--- a/client-go/listers/api/v1alpha1/inferencepool.go
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
-Copyright 2024 The Kubernetes Authors.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/
-// Code generated by lister-gen. DO NOT EDIT.
-
-package v1alpha1
-
-import (
-	labels "k8s.io/apimachinery/pkg/labels"
-	listers "k8s.io/client-go/listers"
-	cache "k8s.io/client-go/tools/cache"
-	apiv1alpha1 "sigs.k8s.io/gateway-api-inference-extension/api/v1alpha1"
-)
-
-// InferencePoolLister helps list InferencePools.
-// All objects returned here must be treated as read-only.
-type InferencePoolLister interface {
-	// List lists all InferencePools in the indexer.
-	// Objects returned here must be treated as read-only.
-	List(selector labels.Selector) (ret []*apiv1alpha1.InferencePool, err error)
-	// InferencePools returns an object that can list and get InferencePools.
-	InferencePools(namespace string) InferencePoolNamespaceLister
-	InferencePoolListerExpansion
-}
-
-// inferencePoolLister implements the InferencePoolLister interface.
-type inferencePoolLister struct {
-	listers.ResourceIndexer[*apiv1alpha1.InferencePool]
-}
-
-// NewInferencePoolLister returns a new InferencePoolLister.
-func NewInferencePoolLister(indexer cache.Indexer) InferencePoolLister {
-	return &inferencePoolLister{listers.New[*apiv1alpha1.InferencePool](indexer, apiv1alpha1.Resource("inferencepool"))}
-}
-
-// InferencePools returns an object that can list and get InferencePools.
-func (s *inferencePoolLister) InferencePools(namespace string) InferencePoolNamespaceLister {
-	return inferencePoolNamespaceLister{listers.NewNamespaced[*apiv1alpha1.InferencePool](s.ResourceIndexer, namespace)}
-}
-
-// InferencePoolNamespaceLister helps list and get InferencePools.
-// All objects returned here must be treated as read-only.
-type InferencePoolNamespaceLister interface {
-	// List lists all InferencePools in the indexer for a given namespace.
-	// Objects returned here must be treated as read-only.
-	List(selector labels.Selector) (ret []*apiv1alpha1.InferencePool, err error)
-	// Get retrieves the InferencePool from the indexer for a given namespace and name.
-	// Objects returned here must be treated as read-only.
-	Get(name string) (*apiv1alpha1.InferencePool, error)
-	InferencePoolNamespaceListerExpansion
-}
-
-// inferencePoolNamespaceLister implements the InferencePoolNamespaceLister
-// interface.
-type inferencePoolNamespaceLister struct {
-	listers.ResourceIndexer[*apiv1alpha1.InferencePool]
-}
diff --git a/cmd/epp/main.go b/cmd/epp/main.go
index dd47fa27e..ce15e3b20 100644
--- a/cmd/epp/main.go
+++ b/cmd/epp/main.go
@@ -39,7 +39,6 @@ import (
 	"sigs.k8s.io/controller-runtime/pkg/log/zap"
 	"sigs.k8s.io/controller-runtime/pkg/manager"
 	"sigs.k8s.io/controller-runtime/pkg/metrics/filters"
-	"sigs.k8s.io/gateway-api-inference-extension/api/v1alpha1"
 	"sigs.k8s.io/gateway-api-inference-extension/api/v1alpha2"
 	"sigs.k8s.io/gateway-api-inference-extension/internal/runnable"
 	"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/backend"
@@ -104,9 +103,7 @@ var (
 
 func init() {
 	utilruntime.Must(clientgoscheme.AddToScheme(scheme))
-	utilruntime.Must(v1alpha1.AddToScheme(scheme))
 	utilruntime.Must(v1alpha2.AddToScheme(scheme))
-
 }
 
 func main() {
diff --git a/config/crd/bases/inference.networking.x-k8s.io_inferencemodels.yaml b/config/crd/bases/inference.networking.x-k8s.io_inferencemodels.yaml
index 09258c204..2995e8634 100644
--- a/config/crd/bases/inference.networking.x-k8s.io_inferencemodels.yaml
+++ b/config/crd/bases/inference.networking.x-k8s.io_inferencemodels.yaml
@@ -14,230 +14,6 @@ spec:
     singular: inferencemodel
   scope: Namespaced
   versions:
-  - name: v1alpha1
-    schema:
-      openAPIV3Schema:
-        description: InferenceModel is the Schema for the InferenceModels API.
-        properties:
-          apiVersion:
-            description: |-
-              APIVersion defines the versioned schema of this representation of an object.
-              Servers should convert recognized schemas to the latest internal value, and
-              may reject unrecognized values.
-              More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources
-            type: string
-          kind:
-            description: |-
-              Kind is a string value representing the REST resource this object represents.
-              Servers may infer this from the endpoint the client submits requests to.
-              Cannot be updated.
-              In CamelCase.
-              More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
-            type: string
-          metadata:
-            type: object
-          spec:
-            description: |-
-              InferenceModelSpec represents the desired state of a specific model use case. This resource is
-              managed by the "Inference Workload Owner" persona.
-
-              The Inference Workload Owner persona is someone that trains, verifies, and
-              leverages a large language model from a model frontend, drives the lifecycle
-              and rollout of new versions of those models, and defines the specific
-              performance and latency goals for the model. These workloads are
-              expected to operate within an InferencePool sharing compute capacity with other
-              InferenceModels, defined by the Inference Platform Admin.
-
-              InferenceModel's modelName (not the ObjectMeta name) is unique for a given InferencePool,
-              if the name is reused, an error will be shown on the status of a
-              InferenceModel that attempted to reuse. The oldest InferenceModel, based on
-              creation timestamp, will be selected to remain valid. In the event of a race
-              condition, one will be selected at random.
-            properties:
-              criticality:
-                description: |-
-                  Criticality defines how important it is to serve the model compared to other models referencing the same pool.
-                  Criticality impacts how traffic is handled in resource constrained situations. It handles this by
-                  queuing or rejecting requests of lower criticality. InferenceModels of an equivalent Criticality will
-                  fairly share resources over throughput of tokens. In the future, the metric used to calculate fairness,
-                  and the proportionality of fairness will be configurable.
-
-                  Default values for this field will not be set, to allow for future additions of new field that may 'one of' with this field.
-                  Any implementations that may consume this field may treat an unset value as the 'Standard' range.
-                enum:
-                - Critical
-                - Standard
-                - Sheddable
-                type: string
-              modelName:
-                description: |-
-                  ModelName is the name of the model as it will be set in the "model" parameter for an incoming request.
-                  ModelNames must be unique for a referencing InferencePool
-                  (names can be reused for a different pool in the same cluster).
-                  The modelName with the oldest creation timestamp is retained, and the incoming
-                  InferenceModel is sets the Ready status to false with a corresponding reason.
-                  In the rare case of a race condition, one Model will be selected randomly to be considered valid, and the other rejected.
-                  Names can be reserved without an underlying model configured in the pool.
-                  This can be done by specifying a target model and setting the weight to zero,
-                  an error will be returned specifying that no valid target model is found.
-                maxLength: 256
-                type: string
-              poolRef:
-                description: PoolRef is a reference to the inference pool, the pool
-                  must exist in the same namespace.
-                properties:
-                  group:
-                    default: inference.networking.x-k8s.io
-                    description: Group is the group of the referent.
-                    maxLength: 253
-                    pattern: ^$|^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$
-                    type: string
-                  kind:
-                    default: InferencePool
-                    description: Kind is kind of the referent. For example "InferencePool".
-                    maxLength: 63
-                    minLength: 1
-                    pattern: ^[a-zA-Z]([-a-zA-Z0-9]*[a-zA-Z0-9])?$
-                    type: string
-                  name:
-                    description: Name is the name of the referent.
-                    maxLength: 253
-                    minLength: 1
-                    type: string
-                required:
-                - name
-                type: object
-              targetModels:
-                description: |-
-                  TargetModels allow multiple versions of a model for traffic splitting.
-                  If not specified, the target model name is defaulted to the modelName parameter.
-                  modelName is often in reference to a LoRA adapter.
-                items:
-                  description: |-
-                    TargetModel represents a deployed model or a LoRA adapter. The
-                    Name field is expected to match the name of the LoRA adapter
-                    (or base model) as it is registered within the model server. Inference
-                    Gateway assumes that the model exists on the model server and it's the
-                    responsibility of the user to validate a correct match. Should a model fail
-                    to exist at request time, the error is processed by the Inference Gateway
-                    and emitted on the appropriate InferenceModel object.
-                  properties:
-                    name:
-                      description: Name is the name of the adapter or base model,
-                        as expected by the ModelServer.
-                      maxLength: 253
-                      type: string
-                    weight:
-                      description: |-
-                        Weight is used to determine the proportion of traffic that should be
-                        sent to this model when multiple target models are specified.
-
-                        Weight defines the proportion of requests forwarded to the specified
-                        model. This is computed as weight/(sum of all weights in this
-                        TargetModels list). For non-zero values, there may be some epsilon from
-                        the exact proportion defined here depending on the precision an
-                        implementation supports. Weight is not a percentage and the sum of
-                        weights does not need to equal 100.
-
-                        If a weight is set for any targetModel, it must be set for all targetModels.
-                        Conversely weights are optional, so long as ALL targetModels do not specify a weight.
-                      format: int32
-                      maximum: 1000000
-                      minimum: 0
-                      type: integer
-                  required:
-                  - name
-                  type: object
-                maxItems: 10
-                type: array
-                x-kubernetes-validations:
-                - message: Weights should be set for all models, or none of the models.
-                  rule: self.all(model, has(model.weight)) || self.all(model, !has(model.weight))
-            required:
-            - modelName
-            - poolRef
-            type: object
-          status:
-            description: InferenceModelStatus defines the observed state of InferenceModel
-            properties:
-              conditions:
-                default:
-                - lastTransitionTime: "1970-01-01T00:00:00Z"
-                  message: Waiting for controller
-                  reason: Pending
-                  status: Unknown
-                  type: Ready
-                description: |-
-                  Conditions track the state of the InferenceModel.
-
-                  Known condition types are:
-
-                  * "Accepted"
-                items:
-                  description: Condition contains details for one aspect of the current
-                    state of this API Resource.
-                  properties:
-                    lastTransitionTime:
-                      description: |-
-                        lastTransitionTime is the last time the condition transitioned from one status to another.
-                        This should be when the underlying condition changed.  If that is not known, then using the time when the API field changed is acceptable.
-                      format: date-time
-                      type: string
-                    message:
-                      description: |-
-                        message is a human readable message indicating details about the transition.
-                        This may be an empty string.
-                      maxLength: 32768
-                      type: string
-                    observedGeneration:
-                      description: |-
-                        observedGeneration represents the .metadata.generation that the condition was set based upon.
-                        For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date
-                        with respect to the current state of the instance.
-                      format: int64
-                      minimum: 0
-                      type: integer
-                    reason:
-                      description: |-
-                        reason contains a programmatic identifier indicating the reason for the condition's last transition.
-                        Producers of specific condition types may define expected values and meanings for this field,
-                        and whether the values are considered a guaranteed API.
-                        The value should be a CamelCase string.
-                        This field may not be empty.
-                      maxLength: 1024
-                      minLength: 1
-                      pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$
-                      type: string
-                    status:
-                      description: status of the condition, one of True, False, Unknown.
-                      enum:
-                      - "True"
-                      - "False"
-                      - Unknown
-                      type: string
-                    type:
-                      description: type of condition in CamelCase or in foo.example.com/CamelCase.
-                      maxLength: 316
-                      pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$
-                      type: string
-                  required:
-                  - lastTransitionTime
-                  - message
-                  - reason
-                  - status
-                  - type
-                  type: object
-                maxItems: 8
-                type: array
-                x-kubernetes-list-map-keys:
-                - type
-                x-kubernetes-list-type: map
-            type: object
-        type: object
-    served: true
-    storage: false
-    subresources:
-      status: {}
   - name: v1alpha2
     schema:
       openAPIV3Schema:
diff --git a/config/crd/bases/inference.networking.x-k8s.io_inferencepools.yaml b/config/crd/bases/inference.networking.x-k8s.io_inferencepools.yaml
index 918e95cb7..5b938590b 100644
--- a/config/crd/bases/inference.networking.x-k8s.io_inferencepools.yaml
+++ b/config/crd/bases/inference.networking.x-k8s.io_inferencepools.yaml
@@ -14,196 +14,6 @@ spec:
     singular: inferencepool
   scope: Namespaced
   versions:
-  - name: v1alpha1
-    schema:
-      openAPIV3Schema:
-        description: InferencePool is the Schema for the InferencePools API.
-        properties:
-          apiVersion:
-            description: |-
-              APIVersion defines the versioned schema of this representation of an object.
-              Servers should convert recognized schemas to the latest internal value, and
-              may reject unrecognized values.
-              More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources
-            type: string
-          kind:
-            description: |-
-              Kind is a string value representing the REST resource this object represents.
-              Servers may infer this from the endpoint the client submits requests to.
-              Cannot be updated.
-              In CamelCase.
-              More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
-            type: string
-          metadata:
-            type: object
-          spec:
-            description: InferencePoolSpec defines the desired state of InferencePool
-            properties:
-              extensionRef:
-                description: Extension configures an endpoint picker as an extension
-                  service.
-                properties:
-                  failureMode:
-                    default: FailClose
-                    description: |-
-                      Configures how the gateway handles the case when the extension is not responsive.
-                      Defaults to failClose.
-                    enum:
-                    - FailOpen
-                    - FailClose
-                    type: string
-                  group:
-                    default: ""
-                    description: |-
-                      Group is the group of the referent.
-                      When unspecified or empty string, core API group is inferred.
-                    type: string
-                  kind:
-                    default: Service
-                    description: |-
-                      Kind is the Kubernetes resource kind of the referent. For example
-                      "Service".
-
-                      Defaults to "Service" when not specified.
-
-                      ExternalName services can refer to CNAME DNS records that may live
-                      outside of the cluster and as such are difficult to reason about in
-                      terms of conformance. They also may not be safe to forward to (see
-                      CVE-2021-25740 for more information). Implementations MUST NOT
-                      support ExternalName Services.
-                    type: string
-                  name:
-                    description: Name is the name of the referent.
-                    type: string
-                  targetPortNumber:
-                    description: |-
-                      The port number on the pods running the extension. When unspecified, implementations SHOULD infer a
-                      default value of 9002 when the Kind is Service.
-                    format: int32
-                    maximum: 65535
-                    minimum: 1
-                    type: integer
-                required:
-                - name
-                type: object
-              selector:
-                additionalProperties:
-                  description: |-
-                    LabelValue is the value of a label. This is used for validation
-                    of maps. This matches the Kubernetes label validation rules:
-                    * must be 63 characters or less (can be empty),
-                    * unless empty, must begin and end with an alphanumeric character ([a-z0-9A-Z]),
-                    * could contain dashes (-), underscores (_), dots (.), and alphanumerics between.
-
-                    Valid values include:
-
-                    * MyValue
-                    * my.name
-                    * 123-my-value
-                  maxLength: 63
-                  minLength: 0
-                  pattern: ^(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])?$
-                  type: string
-                description: |-
-                  Selector defines a map of labels to watch model server pods
-                  that should be included in the InferencePool.
-                  In some cases, implementations may translate this field to a Service selector, so this matches the simple
-                  map used for Service selectors instead of the full Kubernetes LabelSelector type.
-                type: object
-              targetPortNumber:
-                description: |-
-                  TargetPortNumber defines the port number to access the selected model servers.
-                  The number must be in the range 1 to 65535.
-                format: int32
-                maximum: 65535
-                minimum: 1
-                type: integer
-            required:
-            - extensionRef
-            - selector
-            - targetPortNumber
-            type: object
-          status:
-            description: InferencePoolStatus defines the observed state of InferencePool
-            properties:
-              conditions:
-                default:
-                - lastTransitionTime: "1970-01-01T00:00:00Z"
-                  message: Waiting for controller
-                  reason: Pending
-                  status: Unknown
-                  type: Ready
-                description: |-
-                  Conditions track the state of the InferencePool.
-
-                  Known condition types are:
-
-                  * "Ready"
-                items:
-                  description: Condition contains details for one aspect of the current
-                    state of this API Resource.
-                  properties:
-                    lastTransitionTime:
-                      description: |-
-                        lastTransitionTime is the last time the condition transitioned from one status to another.
-                        This should be when the underlying condition changed.  If that is not known, then using the time when the API field changed is acceptable.
-                      format: date-time
-                      type: string
-                    message:
-                      description: |-
-                        message is a human readable message indicating details about the transition.
-                        This may be an empty string.
-                      maxLength: 32768
-                      type: string
-                    observedGeneration:
-                      description: |-
-                        observedGeneration represents the .metadata.generation that the condition was set based upon.
-                        For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date
-                        with respect to the current state of the instance.
-                      format: int64
-                      minimum: 0
-                      type: integer
-                    reason:
-                      description: |-
-                        reason contains a programmatic identifier indicating the reason for the condition's last transition.
-                        Producers of specific condition types may define expected values and meanings for this field,
-                        and whether the values are considered a guaranteed API.
-                        The value should be a CamelCase string.
-                        This field may not be empty.
-                      maxLength: 1024
-                      minLength: 1
-                      pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$
-                      type: string
-                    status:
-                      description: status of the condition, one of True, False, Unknown.
-                      enum:
-                      - "True"
-                      - "False"
-                      - Unknown
-                      type: string
-                    type:
-                      description: type of condition in CamelCase or in foo.example.com/CamelCase.
-                      maxLength: 316
-                      pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$
-                      type: string
-                  required:
-                  - lastTransitionTime
-                  - message
-                  - reason
-                  - status
-                  - type
-                  type: object
-                maxItems: 8
-                type: array
-                x-kubernetes-list-map-keys:
-                - type
-                x-kubernetes-list-type: map
-            type: object
-        type: object
-    served: true
-    storage: false
-    subresources:
-      status: {}
   - name: v1alpha2
     schema:
       openAPIV3Schema:

From 7001c3270a27ce40c8cc259bbc8494fcbf6ca7a5 Mon Sep 17 00:00:00 2001
From: Zhonghu Xu <xuzhonghu@huawei.com>
Date: Wed, 26 Feb 2025 15:10:19 +0800
Subject: [PATCH 3/3] Add document to disallow cross namespace match explicitly

---
 api/v1alpha2/inferencepool_types.go                             | 2 ++
 .../crd/bases/inference.networking.x-k8s.io_inferencepools.yaml | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/api/v1alpha2/inferencepool_types.go b/api/v1alpha2/inferencepool_types.go
index 716bfb116..0781f0441 100644
--- a/api/v1alpha2/inferencepool_types.go
+++ b/api/v1alpha2/inferencepool_types.go
@@ -50,6 +50,8 @@ type InferencePoolSpec struct {
 	// that should be included in the InferencePool.
 	// In some cases, implementations may translate this field to a Service selector, so this matches the simple
 	// map used for Service selectors instead of the full Kubernetes LabelSelector type.
+	// If sepecified, it will be applied to match the model server pods in the same namespace as the InferencePool.
+	// Cross namesoace selector is not supported.
 	//
 	// +kubebuilder:validation:Required
 	Selector map[LabelKey]LabelValue `json:"selector"`
diff --git a/config/crd/bases/inference.networking.x-k8s.io_inferencepools.yaml b/config/crd/bases/inference.networking.x-k8s.io_inferencepools.yaml
index 5b938590b..8a7ad9382 100644
--- a/config/crd/bases/inference.networking.x-k8s.io_inferencepools.yaml
+++ b/config/crd/bases/inference.networking.x-k8s.io_inferencepools.yaml
@@ -109,6 +109,8 @@ spec:
                   that should be included in the InferencePool.
                   In some cases, implementations may translate this field to a Service selector, so this matches the simple
                   map used for Service selectors instead of the full Kubernetes LabelSelector type.
+                  If sepecified, it will be applied to match the model server pods in the same namespace as the InferencePool.
+                  Cross namesoace selector is not supported.
                 type: object
               targetPortNumber:
                 description: |-