diff --git a/api/v1alpha2/doc.go b/api/v1alpha2/doc.go
new file mode 100644
index 000000000..90a35f58c
--- /dev/null
+++ b/api/v1alpha2/doc.go
@@ -0,0 +1,23 @@
+/*
+Copyright 2025 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+// Package v1alpha2 contains API Schema definitions for the
+// inference.networking.x-k8s.io API group.
+//
+// +k8s:openapi-gen=true
+// +kubebuilder:object:generate=true
+// +groupName=inference.networking.x-k8s.io
+package v1alpha2
diff --git a/api/v1alpha2/groupversion_info.go b/api/v1alpha2/groupversion_info.go
new file mode 100644
index 000000000..f9eb9b1e2
--- /dev/null
+++ b/api/v1alpha2/groupversion_info.go
@@ -0,0 +1,45 @@
+/*
+Copyright 2025 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+// Package v1alpha2 contains API Schema definitions for the gateway v1alpha2 API group
+// +kubebuilder:object:generate=true
+// +groupName=inference.networking.x-k8s.io
+package v1alpha2
+
+import (
+	"k8s.io/apimachinery/pkg/runtime/schema"
+	"sigs.k8s.io/controller-runtime/pkg/scheme"
+)
+
+var (
+	// GroupVersion is group version used to register these objects
+	GroupVersion = schema.GroupVersion{Group: "inference.networking.x-k8s.io", Version: "v1alpha2"}
+
+	// SchemeGroupVersion is alias to GroupVersion for client-go libraries.
+	// It is required by pkg/client/informers/externalversions/...
+	SchemeGroupVersion = GroupVersion
+
+	// SchemeBuilder is used to add go types to the GroupVersionKind scheme
+	SchemeBuilder = &scheme.Builder{GroupVersion: GroupVersion}
+
+	// AddToScheme adds the types in this group-version to the given scheme.
+	AddToScheme = SchemeBuilder.AddToScheme
+)
+
+// Resource is required by pkg/client/listers/...
+func Resource(resource string) schema.GroupResource {
+	return GroupVersion.WithResource(resource).GroupResource()
+}
diff --git a/api/v1alpha2/inferencemodel_types.go b/api/v1alpha2/inferencemodel_types.go
new file mode 100644
index 000000000..9ab1fd86f
--- /dev/null
+++ b/api/v1alpha2/inferencemodel_types.go
@@ -0,0 +1,235 @@
+/*
+Copyright 2025 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package v1alpha2
+
+import (
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+)
+
+// InferenceModel is the Schema for the InferenceModels API.
+//
+// +kubebuilder:object:root=true
+// +kubebuilder:subresource:status
+// +kubebuilder:storageversion
+// +genclient
+type InferenceModel struct {
+	metav1.TypeMeta   `json:",inline"`
+	metav1.ObjectMeta `json:"metadata,omitempty"`
+
+	Spec   InferenceModelSpec   `json:"spec,omitempty"`
+	Status InferenceModelStatus `json:"status,omitempty"`
+}
+
+// InferenceModelList contains a list of InferenceModel.
+//
+// +kubebuilder:object:root=true
+type InferenceModelList struct {
+	metav1.TypeMeta `json:",inline"`
+	metav1.ListMeta `json:"metadata,omitempty"`
+	Items           []InferenceModel `json:"items"`
+}
+
+// InferenceModelSpec represents the desired state of a specific model use case. This resource is
+// managed by the "Inference Workload Owner" persona.
+//
+// The Inference Workload Owner persona is someone that trains, verifies, and
+// leverages a large language model from a model frontend, drives the lifecycle
+// and rollout of new versions of those models, and defines the specific
+// performance and latency goals for the model. These workloads are
+// expected to operate within an InferencePool sharing compute capacity with other
+// InferenceModels, defined by the Inference Platform Admin.
+//
+// InferenceModel's modelName (not the ObjectMeta name) is unique for a given InferencePool,
+// if the name is reused, an error will be shown on the status of a
+// InferenceModel that attempted to reuse. The oldest InferenceModel, based on
+// creation timestamp, will be selected to remain valid. In the event of a race
+// condition, one will be selected at random.
+type InferenceModelSpec struct {
+	// ModelName is the name of the model as it will be set in the "model" parameter for an incoming request.
+	// ModelNames must be unique for a referencing InferencePool
+	// (names can be reused for a different pool in the same cluster).
+	// The modelName with the oldest creation timestamp is retained, and the incoming
+	// InferenceModel is sets the Ready status to false with a corresponding reason.
+	// In the rare case of a race condition, one Model will be selected randomly to be considered valid, and the other rejected.
+	// Names can be reserved without an underlying model configured in the pool.
+	// This can be done by specifying a target model and setting the weight to zero,
+	// an error will be returned specifying that no valid target model is found.
+	//
+	// +kubebuilder:validation:MaxLength=256
+	// +kubebuilder:validation:Required
+	ModelName string `json:"modelName"`
+
+	// Criticality defines how important it is to serve the model compared to other models referencing the same pool.
+	// Criticality impacts how traffic is handled in resource constrained situations. It handles this by
+	// queuing or rejecting requests of lower criticality. InferenceModels of an equivalent Criticality will
+	// fairly share resources over throughput of tokens. In the future, the metric used to calculate fairness,
+	// and the proportionality of fairness will be configurable.
+	//
+	// Default values for this field will not be set, to allow for future additions of new field that may 'one of' with this field.
+	// Any implementations that may consume this field may treat an unset value as the 'Standard' range.
+	// +optional
+	Criticality *Criticality `json:"criticality,omitempty"`
+
+	// TargetModels allow multiple versions of a model for traffic splitting.
+	// If not specified, the target model name is defaulted to the modelName parameter.
+	// modelName is often in reference to a LoRA adapter.
+	//
+	// +optional
+	// +kubebuilder:validation:MaxItems=10
+	// +kubebuilder:validation:XValidation:message="Weights should be set for all models, or none of the models.",rule="self.all(model, has(model.weight)) || self.all(model, !has(model.weight))"
+	TargetModels []TargetModel `json:"targetModels,omitempty"`
+
+	// PoolRef is a reference to the inference pool, the pool must exist in the same namespace.
+	//
+	// +kubebuilder:validation:Required
+	PoolRef PoolObjectReference `json:"poolRef"`
+}
+
+// PoolObjectReference identifies an API object within the namespace of the
+// referrer.
+type PoolObjectReference struct {
+	// Group is the group of the referent.
+	//
+	// +optional
+	// +kubebuilder:default="inference.networking.x-k8s.io"
+	// +kubebuilder:validation:MaxLength=253
+	// +kubebuilder:validation:Pattern=`^$|^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$`
+	Group string `json:"group,omitempty"`
+
+	// Kind is kind of the referent. For example "InferencePool".
+	//
+	// +optional
+	// +kubebuilder:default="InferencePool"
+	// +kubebuilder:validation:MinLength=1
+	// +kubebuilder:validation:MaxLength=63
+	// +kubebuilder:validation:Pattern=`^[a-zA-Z]([-a-zA-Z0-9]*[a-zA-Z0-9])?$`
+	Kind string `json:"kind,omitempty"`
+
+	// Name is the name of the referent.
+	//
+	// +kubebuilder:validation:MinLength=1
+	// +kubebuilder:validation:MaxLength=253
+	// +kubebuilder:validation:Required
+	Name string `json:"name"`
+}
+
+// Criticality defines how important it is to serve the model compared to other models.
+// Criticality is intentionally a bounded enum to contain the possibilities that need to be supported by the load balancing algorithm. Any reference to the Criticality field must be optional(use a pointer), and set no default.
+// This allows us to union this with a oneOf field in the future should we wish to adjust/extend this behavior.
+// +kubebuilder:validation:Enum=Critical;Standard;Sheddable
+type Criticality string
+
+const (
+	// Critical defines the highest level of criticality. Requests to this band will be shed last.
+	Critical Criticality = "Critical"
+
+	// Standard defines the base criticality level and is more important than Sheddable but less
+	// important than Critical. Requests in this band will be shed before critical traffic.
+	// Most models are expected to fall within this band.
+	Standard Criticality = "Standard"
+
+	// Sheddable defines the lowest level of criticality. Requests to this band will be shed before
+	// all other bands.
+	Sheddable Criticality = "Sheddable"
+)
+
+// TargetModel represents a deployed model or a LoRA adapter. The
+// Name field is expected to match the name of the LoRA adapter
+// (or base model) as it is registered within the model server. Inference
+// Gateway assumes that the model exists on the model server and it's the
+// responsibility of the user to validate a correct match. Should a model fail
+// to exist at request time, the error is processed by the Inference Gateway
+// and emitted on the appropriate InferenceModel object.
+type TargetModel struct {
+	// Name is the name of the adapter or base model, as expected by the ModelServer.
+	//
+	// +kubebuilder:validation:MaxLength=253
+	// +kubebuilder:validation:Required
+	Name string `json:"name"`
+
+	// Weight is used to determine the proportion of traffic that should be
+	// sent to this model when multiple target models are specified.
+	//
+	// Weight defines the proportion of requests forwarded to the specified
+	// model. This is computed as weight/(sum of all weights in this
+	// TargetModels list). For non-zero values, there may be some epsilon from
+	// the exact proportion defined here depending on the precision an
+	// implementation supports. Weight is not a percentage and the sum of
+	// weights does not need to equal 100.
+	//
+	// If a weight is set for any targetModel, it must be set for all targetModels.
+	// Conversely weights are optional, so long as ALL targetModels do not specify a weight.
+	//
+	// +optional
+	// +kubebuilder:validation:Minimum=0
+	// +kubebuilder:validation:Maximum=1000000
+	Weight *int32 `json:"weight,omitempty"`
+}
+
+// InferenceModelStatus defines the observed state of InferenceModel
+type InferenceModelStatus struct {
+	// Conditions track the state of the InferenceModel.
+	//
+	// Known condition types are:
+	//
+	// * "Accepted"
+	//
+	// +optional
+	// +listType=map
+	// +listMapKey=type
+	// +kubebuilder:validation:MaxItems=8
+	// +kubebuilder:default={{type: "Ready", status: "Unknown", reason:"Pending", message:"Waiting for controller", lastTransitionTime: "1970-01-01T00:00:00Z"}}
+	Conditions []metav1.Condition `json:"conditions,omitempty"`
+}
+
+// InferenceModelConditionType is a type of condition for the InferenceModel.
+type InferenceModelConditionType string
+
+// InferenceModelConditionReason is the reason for a given InferenceModelConditionType.
+type InferenceModelConditionReason string
+
+const (
+	// ModelConditionAccepted indicates if the model config is accepted, and if not, why.
+	//
+	// Possible reasons for this condition to be True are:
+	//
+	// * "Accepted"
+	//
+	// Possible reasons for this condition to be False are:
+	//
+	// * "ModelNameInUse"
+	//
+	// Possible reasons for this condition to be Unknown are:
+	//
+	// * "Pending"
+	//
+	ModelConditionAccepted InferenceModelConditionType = "Accepted"
+
+	// ModelReasonAccepted is the desired state. Model conforms to the state of the pool.
+	ModelReasonAccepted InferenceModelConditionReason = "Accepted"
+
+	// ModelReasonNameInUse is used when a given ModelName already exists within the pool.
+	// Details about naming conflict resolution are on the ModelName field itself.
+	ModelReasonNameInUse InferenceModelConditionReason = "ModelNameInUse"
+
+	// ModelReasonPending is the initial state, and indicates that the controller has not yet reconciled the InferenceModel.
+	ModelReasonPending InferenceModelConditionReason = "Pending"
+)
+
+func init() {
+	SchemeBuilder.Register(&InferenceModel{}, &InferenceModelList{})
+}
diff --git a/api/v1alpha2/inferencepool_types.go b/api/v1alpha2/inferencepool_types.go
new file mode 100644
index 000000000..716bfb116
--- /dev/null
+++ b/api/v1alpha2/inferencepool_types.go
@@ -0,0 +1,255 @@
+/*
+Copyright 2025 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package v1alpha2
+
+import (
+	corev1 "k8s.io/api/core/v1"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+)
+
+// InferencePool is the Schema for the InferencePools API.
+//
+// +kubebuilder:object:root=true
+// +kubebuilder:subresource:status
+// +kubebuilder:storageversion
+// +genclient
+type InferencePool struct {
+	metav1.TypeMeta   `json:",inline"`
+	metav1.ObjectMeta `json:"metadata,omitempty"`
+
+	Spec   InferencePoolSpec   `json:"spec,omitempty"`
+	Status InferencePoolStatus `json:"status,omitempty"`
+}
+
+// InferencePoolList contains a list of InferencePool.
+//
+// +kubebuilder:object:root=true
+type InferencePoolList struct {
+	metav1.TypeMeta `json:",inline"`
+	metav1.ListMeta `json:"metadata,omitempty"`
+	Items           []InferencePool `json:"items"`
+}
+
+// InferencePoolSpec defines the desired state of InferencePool
+type InferencePoolSpec struct {
+	// Selector defines a map of labels to watch model server pods
+	// that should be included in the InferencePool.
+	// In some cases, implementations may translate this field to a Service selector, so this matches the simple
+	// map used for Service selectors instead of the full Kubernetes LabelSelector type.
+	//
+	// +kubebuilder:validation:Required
+	Selector map[LabelKey]LabelValue `json:"selector"`
+
+	// TargetPortNumber defines the port number to access the selected model servers.
+	// The number must be in the range 1 to 65535.
+	//
+	// +kubebuilder:validation:Minimum=1
+	// +kubebuilder:validation:Maximum=65535
+	// +kubebuilder:validation:Required
+	TargetPortNumber int32 `json:"targetPortNumber"`
+
+	// EndpointPickerConfig specifies the configuration needed by the proxy to discover and connect to the endpoint
+	// picker service that picks endpoints for the requests routed to this pool.
+	EndpointPickerConfig `json:",inline"`
+}
+
+// EndpointPickerConfig specifies the configuration needed by the proxy to discover and connect to the endpoint picker extension.
+// This type is intended to be a union of mutually exclusive configuration options that we may add in the future.
+type EndpointPickerConfig struct {
+	// Extension configures an endpoint picker as an extension service.
+	//
+	// +kubebuilder:validation:Required
+	ExtensionRef *Extension `json:"extensionRef,omitempty"`
+}
+
+// Extension specifies how to configure an extension that runs the endpoint picker.
+type Extension struct {
+	// Reference is a reference to a service extension.
+	ExtensionReference `json:",inline"`
+
+	// ExtensionConnection configures the connection between the gateway and the extension.
+	ExtensionConnection `json:",inline"`
+}
+
+// ExtensionReference is a reference to the extension deployment.
+type ExtensionReference struct {
+	// Group is the group of the referent.
+	// When unspecified or empty string, core API group is inferred.
+	//
+	// +optional
+	// +kubebuilder:default=""
+	Group *string `json:"group,omitempty"`
+
+	// Kind is the Kubernetes resource kind of the referent. For example
+	// "Service".
+	//
+	// Defaults to "Service" when not specified.
+	//
+	// ExternalName services can refer to CNAME DNS records that may live
+	// outside of the cluster and as such are difficult to reason about in
+	// terms of conformance. They also may not be safe to forward to (see
+	// CVE-2021-25740 for more information). Implementations MUST NOT
+	// support ExternalName Services.
+	//
+	// +optional
+	// +kubebuilder:default=Service
+	Kind *string `json:"kind,omitempty"`
+
+	// Name is the name of the referent.
+	//
+	// +kubebuilder:validation:Required
+	Name string `json:"name"`
+
+	// The port number on the service running the extension. When unspecified, implementations SHOULD infer a
+	// default value of 9002 when the Kind is Service.
+	//
+	// +kubebuilder:validation:Minimum=1
+	// +kubebuilder:validation:Maximum=65535
+	// +optional
+	PortNumber *int32 `json:"targetPortNumber,omitempty"`
+}
+
+// ExtensionConnection encapsulates options that configures the connection to the extension.
+type ExtensionConnection struct {
+	// Configures how the gateway handles the case when the extension is not responsive.
+	// Defaults to failClose.
+	//
+	// +optional
+	// +kubebuilder:default="FailClose"
+	FailureMode *ExtensionFailureMode `json:"failureMode"`
+}
+
+// ExtensionFailureMode defines the options for how the gateway handles the case when the extension is not
+// responsive.
+// +kubebuilder:validation:Enum=FailOpen;FailClose
+type ExtensionFailureMode string
+
+const (
+	// FailOpen specifies that the proxy should not drop the request and forward the request to and endpoint of its picking.
+	FailOpen ExtensionFailureMode = "FailOpen"
+	// FailClose specifies that the proxy should drop the request.
+	FailClose ExtensionFailureMode = "FailClose"
+)
+
+// LabelKey was originally copied from: https://github.com/kubernetes-sigs/gateway-api/blob/99a3934c6bc1ce0874f3a4c5f20cafd8977ffcb4/apis/v1/shared_types.go#L694-L731
+// Duplicated as to not take an unexpected dependency on gw's API.
+//
+// LabelKey is the key of a label. This is used for validation
+// of maps. This matches the Kubernetes "qualified name" validation that is used for labels.
+// Labels are case sensitive, so: my-label and My-Label are considered distinct.
+//
+// Valid values include:
+//
+// * example
+// * example.com
+// * example.com/path
+// * example.com/path.html
+//
+// Invalid values include:
+//
+// * example~ - "~" is an invalid character
+// * example.com. - can not start or end with "."
+//
+// +kubebuilder:validation:MinLength=1
+// +kubebuilder:validation:MaxLength=253
+// +kubebuilder:validation:Pattern=`^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?([A-Za-z0-9][-A-Za-z0-9_.]{0,61})?[A-Za-z0-9]$`
+type LabelKey string
+
+// LabelValue is the value of a label. This is used for validation
+// of maps. This matches the Kubernetes label validation rules:
+// * must be 63 characters or less (can be empty),
+// * unless empty, must begin and end with an alphanumeric character ([a-z0-9A-Z]),
+// * could contain dashes (-), underscores (_), dots (.), and alphanumerics between.
+//
+// Valid values include:
+//
+// * MyValue
+// * my.name
+// * 123-my-value
+//
+// +kubebuilder:validation:MinLength=0
+// +kubebuilder:validation:MaxLength=63
+// +kubebuilder:validation:Pattern=`^(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])?$`
+type LabelValue string
+
+// InferencePoolStatus defines the observed state of InferencePool
+type InferencePoolStatus struct {
+	// Parents is a list of parent resources (usually Gateways) that are
+	// associated with the route, and the status of the InferencePool with respect to
+	// each parent.
+	//
+	// A maximum of 32 Gateways will be represented in this list. An empty list
+	// means the route has not been attached to any Gateway.
+	//
+	// +kubebuilder:validation:MaxItems=32
+	Parents []PoolStatus `json:"parent,omitempty"`
+}
+
+// PoolStatus defines the observed state of InferencePool from a gateway.
+type PoolStatus struct {
+	// GatewayRef indicates the gateway that observed state of InferencePool.
+	GatewayRef corev1.ObjectReference `json:"parentRef"`
+	// Conditions track the state of the InferencePool.
+	//
+	// Known condition types are:
+	//
+	// * "Ready"
+	//
+	// +optional
+	// +listType=map
+	// +listMapKey=type
+	// +kubebuilder:validation:MaxItems=8
+	// +kubebuilder:default={{type: "Ready", status: "Unknown", reason:"Pending", message:"Waiting for controller", lastTransitionTime: "1970-01-01T00:00:00Z"}}
+	Conditions []metav1.Condition `json:"conditions,omitempty"`
+}
+
+// InferencePoolConditionType is a type of condition for the InferencePool
+type InferencePoolConditionType string
+
+// InferencePoolConditionReason is the reason for a given InferencePoolConditionType
+type InferencePoolConditionReason string
+
+const (
+	// PoolConditionReady indicates if the pool is ready to accept traffic, and if not, why.
+	//
+	// Possible reasons for this condition to be True are:
+	//
+	// * "Ready"
+	//
+	// Possible reasons for this condition to be False are:
+	//
+	// * "EndpointPickerNotHealthy"
+	//
+	// Possible reasons for this condition to be Unknown are:
+	//
+	// * "Pending"
+	//
+	PoolConditionReady InferencePoolConditionType = "Ready"
+
+	// PoolReasonReady is the desired state. The pool and its components are initialized and ready for traffic.
+	PoolReasonReady InferencePoolConditionReason = "Ready"
+
+	// PoolReasonEPPNotHealthy is used when the EPP has not yet passed health checks, or has started failing them.
+	PoolReasonEPPNotHealthy InferencePoolConditionReason = "EndpointPickerNotHealthy"
+
+	// PoolReasonPending is the initial state, and indicates that the controller has not yet reconciled this pool.
+	PoolReasonPending InferencePoolConditionReason = "Pending"
+)
+
+func init() {
+	SchemeBuilder.Register(&InferencePool{}, &InferencePoolList{})
+}
diff --git a/api/v1alpha2/zz_generated.deepcopy.go b/api/v1alpha2/zz_generated.deepcopy.go
new file mode 100644
index 000000000..9b685969a
--- /dev/null
+++ b/api/v1alpha2/zz_generated.deepcopy.go
@@ -0,0 +1,384 @@
+//go:build !ignore_autogenerated
+
+/*
+Copyright 2024 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+// Code generated by controller-gen. DO NOT EDIT.
+
+package v1alpha2
+
+import (
+	"k8s.io/apimachinery/pkg/apis/meta/v1"
+	runtime "k8s.io/apimachinery/pkg/runtime"
+)
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *EndpointPickerConfig) DeepCopyInto(out *EndpointPickerConfig) {
+	*out = *in
+	if in.ExtensionRef != nil {
+		in, out := &in.ExtensionRef, &out.ExtensionRef
+		*out = new(Extension)
+		(*in).DeepCopyInto(*out)
+	}
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new EndpointPickerConfig.
+func (in *EndpointPickerConfig) DeepCopy() *EndpointPickerConfig {
+	if in == nil {
+		return nil
+	}
+	out := new(EndpointPickerConfig)
+	in.DeepCopyInto(out)
+	return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *Extension) DeepCopyInto(out *Extension) {
+	*out = *in
+	in.ExtensionReference.DeepCopyInto(&out.ExtensionReference)
+	in.ExtensionConnection.DeepCopyInto(&out.ExtensionConnection)
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Extension.
+func (in *Extension) DeepCopy() *Extension {
+	if in == nil {
+		return nil
+	}
+	out := new(Extension)
+	in.DeepCopyInto(out)
+	return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *ExtensionConnection) DeepCopyInto(out *ExtensionConnection) {
+	*out = *in
+	if in.FailureMode != nil {
+		in, out := &in.FailureMode, &out.FailureMode
+		*out = new(ExtensionFailureMode)
+		**out = **in
+	}
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ExtensionConnection.
+func (in *ExtensionConnection) DeepCopy() *ExtensionConnection {
+	if in == nil {
+		return nil
+	}
+	out := new(ExtensionConnection)
+	in.DeepCopyInto(out)
+	return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *ExtensionReference) DeepCopyInto(out *ExtensionReference) {
+	*out = *in
+	if in.Group != nil {
+		in, out := &in.Group, &out.Group
+		*out = new(string)
+		**out = **in
+	}
+	if in.Kind != nil {
+		in, out := &in.Kind, &out.Kind
+		*out = new(string)
+		**out = **in
+	}
+	if in.PortNumber != nil {
+		in, out := &in.PortNumber, &out.PortNumber
+		*out = new(int32)
+		**out = **in
+	}
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ExtensionReference.
+func (in *ExtensionReference) DeepCopy() *ExtensionReference {
+	if in == nil {
+		return nil
+	}
+	out := new(ExtensionReference)
+	in.DeepCopyInto(out)
+	return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *InferenceModel) DeepCopyInto(out *InferenceModel) {
+	*out = *in
+	out.TypeMeta = in.TypeMeta
+	in.ObjectMeta.DeepCopyInto(&out.ObjectMeta)
+	in.Spec.DeepCopyInto(&out.Spec)
+	in.Status.DeepCopyInto(&out.Status)
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new InferenceModel.
+func (in *InferenceModel) DeepCopy() *InferenceModel {
+	if in == nil {
+		return nil
+	}
+	out := new(InferenceModel)
+	in.DeepCopyInto(out)
+	return out
+}
+
+// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
+func (in *InferenceModel) DeepCopyObject() runtime.Object {
+	if c := in.DeepCopy(); c != nil {
+		return c
+	}
+	return nil
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *InferenceModelList) DeepCopyInto(out *InferenceModelList) {
+	*out = *in
+	out.TypeMeta = in.TypeMeta
+	in.ListMeta.DeepCopyInto(&out.ListMeta)
+	if in.Items != nil {
+		in, out := &in.Items, &out.Items
+		*out = make([]InferenceModel, len(*in))
+		for i := range *in {
+			(*in)[i].DeepCopyInto(&(*out)[i])
+		}
+	}
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new InferenceModelList.
+func (in *InferenceModelList) DeepCopy() *InferenceModelList {
+	if in == nil {
+		return nil
+	}
+	out := new(InferenceModelList)
+	in.DeepCopyInto(out)
+	return out
+}
+
+// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
+func (in *InferenceModelList) DeepCopyObject() runtime.Object {
+	if c := in.DeepCopy(); c != nil {
+		return c
+	}
+	return nil
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *InferenceModelSpec) DeepCopyInto(out *InferenceModelSpec) {
+	*out = *in
+	if in.Criticality != nil {
+		in, out := &in.Criticality, &out.Criticality
+		*out = new(Criticality)
+		**out = **in
+	}
+	if in.TargetModels != nil {
+		in, out := &in.TargetModels, &out.TargetModels
+		*out = make([]TargetModel, len(*in))
+		for i := range *in {
+			(*in)[i].DeepCopyInto(&(*out)[i])
+		}
+	}
+	out.PoolRef = in.PoolRef
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new InferenceModelSpec.
+func (in *InferenceModelSpec) DeepCopy() *InferenceModelSpec {
+	if in == nil {
+		return nil
+	}
+	out := new(InferenceModelSpec)
+	in.DeepCopyInto(out)
+	return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *InferenceModelStatus) DeepCopyInto(out *InferenceModelStatus) {
+	*out = *in
+	if in.Conditions != nil {
+		in, out := &in.Conditions, &out.Conditions
+		*out = make([]v1.Condition, len(*in))
+		for i := range *in {
+			(*in)[i].DeepCopyInto(&(*out)[i])
+		}
+	}
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new InferenceModelStatus.
+func (in *InferenceModelStatus) DeepCopy() *InferenceModelStatus {
+	if in == nil {
+		return nil
+	}
+	out := new(InferenceModelStatus)
+	in.DeepCopyInto(out)
+	return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *InferencePool) DeepCopyInto(out *InferencePool) {
+	*out = *in
+	out.TypeMeta = in.TypeMeta
+	in.ObjectMeta.DeepCopyInto(&out.ObjectMeta)
+	in.Spec.DeepCopyInto(&out.Spec)
+	in.Status.DeepCopyInto(&out.Status)
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new InferencePool.
+func (in *InferencePool) DeepCopy() *InferencePool {
+	if in == nil {
+		return nil
+	}
+	out := new(InferencePool)
+	in.DeepCopyInto(out)
+	return out
+}
+
+// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
+func (in *InferencePool) DeepCopyObject() runtime.Object {
+	if c := in.DeepCopy(); c != nil {
+		return c
+	}
+	return nil
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *InferencePoolList) DeepCopyInto(out *InferencePoolList) {
+	*out = *in
+	out.TypeMeta = in.TypeMeta
+	in.ListMeta.DeepCopyInto(&out.ListMeta)
+	if in.Items != nil {
+		in, out := &in.Items, &out.Items
+		*out = make([]InferencePool, len(*in))
+		for i := range *in {
+			(*in)[i].DeepCopyInto(&(*out)[i])
+		}
+	}
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new InferencePoolList.
+func (in *InferencePoolList) DeepCopy() *InferencePoolList {
+	if in == nil {
+		return nil
+	}
+	out := new(InferencePoolList)
+	in.DeepCopyInto(out)
+	return out
+}
+
+// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
+func (in *InferencePoolList) DeepCopyObject() runtime.Object {
+	if c := in.DeepCopy(); c != nil {
+		return c
+	}
+	return nil
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *InferencePoolSpec) DeepCopyInto(out *InferencePoolSpec) {
+	*out = *in
+	if in.Selector != nil {
+		in, out := &in.Selector, &out.Selector
+		*out = make(map[LabelKey]LabelValue, len(*in))
+		for key, val := range *in {
+			(*out)[key] = val
+		}
+	}
+	in.EndpointPickerConfig.DeepCopyInto(&out.EndpointPickerConfig)
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new InferencePoolSpec.
+func (in *InferencePoolSpec) DeepCopy() *InferencePoolSpec {
+	if in == nil {
+		return nil
+	}
+	out := new(InferencePoolSpec)
+	in.DeepCopyInto(out)
+	return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *InferencePoolStatus) DeepCopyInto(out *InferencePoolStatus) {
+	*out = *in
+	if in.Parents != nil {
+		in, out := &in.Parents, &out.Parents
+		*out = make([]PoolStatus, len(*in))
+		for i := range *in {
+			(*in)[i].DeepCopyInto(&(*out)[i])
+		}
+	}
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new InferencePoolStatus.
+func (in *InferencePoolStatus) DeepCopy() *InferencePoolStatus {
+	if in == nil {
+		return nil
+	}
+	out := new(InferencePoolStatus)
+	in.DeepCopyInto(out)
+	return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *PoolObjectReference) DeepCopyInto(out *PoolObjectReference) {
+	*out = *in
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PoolObjectReference.
+func (in *PoolObjectReference) DeepCopy() *PoolObjectReference {
+	if in == nil {
+		return nil
+	}
+	out := new(PoolObjectReference)
+	in.DeepCopyInto(out)
+	return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *PoolStatus) DeepCopyInto(out *PoolStatus) {
+	*out = *in
+	out.GatewayRef = in.GatewayRef
+	if in.Conditions != nil {
+		in, out := &in.Conditions, &out.Conditions
+		*out = make([]v1.Condition, len(*in))
+		for i := range *in {
+			(*in)[i].DeepCopyInto(&(*out)[i])
+		}
+	}
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PoolStatus.
+func (in *PoolStatus) DeepCopy() *PoolStatus {
+	if in == nil {
+		return nil
+	}
+	out := new(PoolStatus)
+	in.DeepCopyInto(out)
+	return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *TargetModel) DeepCopyInto(out *TargetModel) {
+	*out = *in
+	if in.Weight != nil {
+		in, out := &in.Weight, &out.Weight
+		*out = new(int32)
+		**out = **in
+	}
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new TargetModel.
+func (in *TargetModel) DeepCopy() *TargetModel {
+	if in == nil {
+		return nil
+	}
+	out := new(TargetModel)
+	in.DeepCopyInto(out)
+	return out
+}
diff --git a/client-go/applyconfiguration/api/v1alpha2/endpointpickerconfig.go b/client-go/applyconfiguration/api/v1alpha2/endpointpickerconfig.go
new file mode 100644
index 000000000..007b88703
--- /dev/null
+++ b/client-go/applyconfiguration/api/v1alpha2/endpointpickerconfig.go
@@ -0,0 +1,38 @@
+/*
+Copyright 2024 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+// Code generated by applyconfiguration-gen. DO NOT EDIT.
+
+package v1alpha2
+
+// EndpointPickerConfigApplyConfiguration represents a declarative configuration of the EndpointPickerConfig type for use
+// with apply.
+type EndpointPickerConfigApplyConfiguration struct {
+	ExtensionRef *ExtensionApplyConfiguration `json:"extensionRef,omitempty"`
+}
+
+// EndpointPickerConfigApplyConfiguration constructs a declarative configuration of the EndpointPickerConfig type for use with
+// apply.
+func EndpointPickerConfig() *EndpointPickerConfigApplyConfiguration {
+	return &EndpointPickerConfigApplyConfiguration{}
+}
+
+// WithExtensionRef sets the ExtensionRef field in the declarative configuration to the given value
+// and returns the receiver, so that objects can be built by chaining "With" function invocations.
+// If called multiple times, the ExtensionRef field is set to the value of the last call.
+func (b *EndpointPickerConfigApplyConfiguration) WithExtensionRef(value *ExtensionApplyConfiguration) *EndpointPickerConfigApplyConfiguration {
+	b.ExtensionRef = value
+	return b
+}
diff --git a/client-go/applyconfiguration/api/v1alpha2/extension.go b/client-go/applyconfiguration/api/v1alpha2/extension.go
new file mode 100644
index 000000000..b38026135
--- /dev/null
+++ b/client-go/applyconfiguration/api/v1alpha2/extension.go
@@ -0,0 +1,75 @@
+/*
+Copyright 2024 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+// Code generated by applyconfiguration-gen. DO NOT EDIT.
+
+package v1alpha2
+
+import (
+	apiv1alpha2 "sigs.k8s.io/gateway-api-inference-extension/api/v1alpha2"
+)
+
+// ExtensionApplyConfiguration represents a declarative configuration of the Extension type for use
+// with apply.
+type ExtensionApplyConfiguration struct {
+	ExtensionReferenceApplyConfiguration  `json:",inline"`
+	ExtensionConnectionApplyConfiguration `json:",inline"`
+}
+
+// ExtensionApplyConfiguration constructs a declarative configuration of the Extension type for use with
+// apply.
+func Extension() *ExtensionApplyConfiguration {
+	return &ExtensionApplyConfiguration{}
+}
+
+// WithGroup sets the Group field in the declarative configuration to the given value
+// and returns the receiver, so that objects can be built by chaining "With" function invocations.
+// If called multiple times, the Group field is set to the value of the last call.
+func (b *ExtensionApplyConfiguration) WithGroup(value string) *ExtensionApplyConfiguration {
+	b.ExtensionReferenceApplyConfiguration.Group = &value
+	return b
+}
+
+// WithKind sets the Kind field in the declarative configuration to the given value
+// and returns the receiver, so that objects can be built by chaining "With" function invocations.
+// If called multiple times, the Kind field is set to the value of the last call.
+func (b *ExtensionApplyConfiguration) WithKind(value string) *ExtensionApplyConfiguration {
+	b.ExtensionReferenceApplyConfiguration.Kind = &value
+	return b
+}
+
+// WithName sets the Name field in the declarative configuration to the given value
+// and returns the receiver, so that objects can be built by chaining "With" function invocations.
+// If called multiple times, the Name field is set to the value of the last call.
+func (b *ExtensionApplyConfiguration) WithName(value string) *ExtensionApplyConfiguration {
+	b.ExtensionReferenceApplyConfiguration.Name = &value
+	return b
+}
+
+// WithPortNumber sets the PortNumber field in the declarative configuration to the given value
+// and returns the receiver, so that objects can be built by chaining "With" function invocations.
+// If called multiple times, the PortNumber field is set to the value of the last call.
+func (b *ExtensionApplyConfiguration) WithPortNumber(value int32) *ExtensionApplyConfiguration {
+	b.ExtensionReferenceApplyConfiguration.PortNumber = &value
+	return b
+}
+
+// WithFailureMode sets the FailureMode field in the declarative configuration to the given value
+// and returns the receiver, so that objects can be built by chaining "With" function invocations.
+// If called multiple times, the FailureMode field is set to the value of the last call.
+func (b *ExtensionApplyConfiguration) WithFailureMode(value apiv1alpha2.ExtensionFailureMode) *ExtensionApplyConfiguration {
+	b.ExtensionConnectionApplyConfiguration.FailureMode = &value
+	return b
+}
diff --git a/client-go/applyconfiguration/api/v1alpha2/extensionconnection.go b/client-go/applyconfiguration/api/v1alpha2/extensionconnection.go
new file mode 100644
index 000000000..2a59b8303
--- /dev/null
+++ b/client-go/applyconfiguration/api/v1alpha2/extensionconnection.go
@@ -0,0 +1,42 @@
+/*
+Copyright 2024 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+// Code generated by applyconfiguration-gen. DO NOT EDIT.
+
+package v1alpha2
+
+import (
+	apiv1alpha2 "sigs.k8s.io/gateway-api-inference-extension/api/v1alpha2"
+)
+
+// ExtensionConnectionApplyConfiguration represents a declarative configuration of the ExtensionConnection type for use
+// with apply.
+type ExtensionConnectionApplyConfiguration struct {
+	FailureMode *apiv1alpha2.ExtensionFailureMode `json:"failureMode,omitempty"`
+}
+
+// ExtensionConnectionApplyConfiguration constructs a declarative configuration of the ExtensionConnection type for use with
+// apply.
+func ExtensionConnection() *ExtensionConnectionApplyConfiguration {
+	return &ExtensionConnectionApplyConfiguration{}
+}
+
+// WithFailureMode sets the FailureMode field in the declarative configuration to the given value
+// and returns the receiver, so that objects can be built by chaining "With" function invocations.
+// If called multiple times, the FailureMode field is set to the value of the last call.
+func (b *ExtensionConnectionApplyConfiguration) WithFailureMode(value apiv1alpha2.ExtensionFailureMode) *ExtensionConnectionApplyConfiguration {
+	b.FailureMode = &value
+	return b
+}
diff --git a/client-go/applyconfiguration/api/v1alpha2/extensionreference.go b/client-go/applyconfiguration/api/v1alpha2/extensionreference.go
new file mode 100644
index 000000000..71034710d
--- /dev/null
+++ b/client-go/applyconfiguration/api/v1alpha2/extensionreference.go
@@ -0,0 +1,65 @@
+/*
+Copyright 2024 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+// Code generated by applyconfiguration-gen. DO NOT EDIT.
+
+package v1alpha2
+
+// ExtensionReferenceApplyConfiguration represents a declarative configuration of the ExtensionReference type for use
+// with apply.
+type ExtensionReferenceApplyConfiguration struct {
+	Group      *string `json:"group,omitempty"`
+	Kind       *string `json:"kind,omitempty"`
+	Name       *string `json:"name,omitempty"`
+	PortNumber *int32  `json:"targetPortNumber,omitempty"`
+}
+
+// ExtensionReferenceApplyConfiguration constructs a declarative configuration of the ExtensionReference type for use with
+// apply.
+func ExtensionReference() *ExtensionReferenceApplyConfiguration {
+	return &ExtensionReferenceApplyConfiguration{}
+}
+
+// WithGroup sets the Group field in the declarative configuration to the given value
+// and returns the receiver, so that objects can be built by chaining "With" function invocations.
+// If called multiple times, the Group field is set to the value of the last call.
+func (b *ExtensionReferenceApplyConfiguration) WithGroup(value string) *ExtensionReferenceApplyConfiguration {
+	b.Group = &value
+	return b
+}
+
+// WithKind sets the Kind field in the declarative configuration to the given value
+// and returns the receiver, so that objects can be built by chaining "With" function invocations.
+// If called multiple times, the Kind field is set to the value of the last call.
+func (b *ExtensionReferenceApplyConfiguration) WithKind(value string) *ExtensionReferenceApplyConfiguration {
+	b.Kind = &value
+	return b
+}
+
+// WithName sets the Name field in the declarative configuration to the given value
+// and returns the receiver, so that objects can be built by chaining "With" function invocations.
+// If called multiple times, the Name field is set to the value of the last call.
+func (b *ExtensionReferenceApplyConfiguration) WithName(value string) *ExtensionReferenceApplyConfiguration {
+	b.Name = &value
+	return b
+}
+
+// WithPortNumber sets the PortNumber field in the declarative configuration to the given value
+// and returns the receiver, so that objects can be built by chaining "With" function invocations.
+// If called multiple times, the PortNumber field is set to the value of the last call.
+func (b *ExtensionReferenceApplyConfiguration) WithPortNumber(value int32) *ExtensionReferenceApplyConfiguration {
+	b.PortNumber = &value
+	return b
+}
diff --git a/client-go/applyconfiguration/api/v1alpha2/inferencemodel.go b/client-go/applyconfiguration/api/v1alpha2/inferencemodel.go
new file mode 100644
index 000000000..1fbfe106f
--- /dev/null
+++ b/client-go/applyconfiguration/api/v1alpha2/inferencemodel.go
@@ -0,0 +1,224 @@
+/*
+Copyright 2024 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+// Code generated by applyconfiguration-gen. DO NOT EDIT.
+
+package v1alpha2
+
+import (
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	types "k8s.io/apimachinery/pkg/types"
+	v1 "k8s.io/client-go/applyconfigurations/meta/v1"
+)
+
+// InferenceModelApplyConfiguration represents a declarative configuration of the InferenceModel type for use
+// with apply.
+type InferenceModelApplyConfiguration struct {
+	v1.TypeMetaApplyConfiguration    `json:",inline"`
+	*v1.ObjectMetaApplyConfiguration `json:"metadata,omitempty"`
+	Spec                             *InferenceModelSpecApplyConfiguration   `json:"spec,omitempty"`
+	Status                           *InferenceModelStatusApplyConfiguration `json:"status,omitempty"`
+}
+
+// InferenceModel constructs a declarative configuration of the InferenceModel type for use with
+// apply.
+func InferenceModel(name, namespace string) *InferenceModelApplyConfiguration {
+	b := &InferenceModelApplyConfiguration{}
+	b.WithName(name)
+	b.WithNamespace(namespace)
+	b.WithKind("InferenceModel")
+	b.WithAPIVersion("inference.networking.x-k8s.io/v1alpha2")
+	return b
+}
+
+// WithKind sets the Kind field in the declarative configuration to the given value
+// and returns the receiver, so that objects can be built by chaining "With" function invocations.
+// If called multiple times, the Kind field is set to the value of the last call.
+func (b *InferenceModelApplyConfiguration) WithKind(value string) *InferenceModelApplyConfiguration {
+	b.TypeMetaApplyConfiguration.Kind = &value
+	return b
+}
+
+// WithAPIVersion sets the APIVersion field in the declarative configuration to the given value
+// and returns the receiver, so that objects can be built by chaining "With" function invocations.
+// If called multiple times, the APIVersion field is set to the value of the last call.
+func (b *InferenceModelApplyConfiguration) WithAPIVersion(value string) *InferenceModelApplyConfiguration {
+	b.TypeMetaApplyConfiguration.APIVersion = &value
+	return b
+}
+
+// WithName sets the Name field in the declarative configuration to the given value
+// and returns the receiver, so that objects can be built by chaining "With" function invocations.
+// If called multiple times, the Name field is set to the value of the last call.
+func (b *InferenceModelApplyConfiguration) WithName(value string) *InferenceModelApplyConfiguration {
+	b.ensureObjectMetaApplyConfigurationExists()
+	b.ObjectMetaApplyConfiguration.Name = &value
+	return b
+}
+
+// WithGenerateName sets the GenerateName field in the declarative configuration to the given value
+// and returns the receiver, so that objects can be built by chaining "With" function invocations.
+// If called multiple times, the GenerateName field is set to the value of the last call.
+func (b *InferenceModelApplyConfiguration) WithGenerateName(value string) *InferenceModelApplyConfiguration {
+	b.ensureObjectMetaApplyConfigurationExists()
+	b.ObjectMetaApplyConfiguration.GenerateName = &value
+	return b
+}
+
+// WithNamespace sets the Namespace field in the declarative configuration to the given value
+// and returns the receiver, so that objects can be built by chaining "With" function invocations.
+// If called multiple times, the Namespace field is set to the value of the last call.
+func (b *InferenceModelApplyConfiguration) WithNamespace(value string) *InferenceModelApplyConfiguration {
+	b.ensureObjectMetaApplyConfigurationExists()
+	b.ObjectMetaApplyConfiguration.Namespace = &value
+	return b
+}
+
+// WithUID sets the UID field in the declarative configuration to the given value
+// and returns the receiver, so that objects can be built by chaining "With" function invocations.
+// If called multiple times, the UID field is set to the value of the last call.
+func (b *InferenceModelApplyConfiguration) WithUID(value types.UID) *InferenceModelApplyConfiguration {
+	b.ensureObjectMetaApplyConfigurationExists()
+	b.ObjectMetaApplyConfiguration.UID = &value
+	return b
+}
+
+// WithResourceVersion sets the ResourceVersion field in the declarative configuration to the given value
+// and returns the receiver, so that objects can be built by chaining "With" function invocations.
+// If called multiple times, the ResourceVersion field is set to the value of the last call.
+func (b *InferenceModelApplyConfiguration) WithResourceVersion(value string) *InferenceModelApplyConfiguration {
+	b.ensureObjectMetaApplyConfigurationExists()
+	b.ObjectMetaApplyConfiguration.ResourceVersion = &value
+	return b
+}
+
+// WithGeneration sets the Generation field in the declarative configuration to the given value
+// and returns the receiver, so that objects can be built by chaining "With" function invocations.
+// If called multiple times, the Generation field is set to the value of the last call.
+func (b *InferenceModelApplyConfiguration) WithGeneration(value int64) *InferenceModelApplyConfiguration {
+	b.ensureObjectMetaApplyConfigurationExists()
+	b.ObjectMetaApplyConfiguration.Generation = &value
+	return b
+}
+
+// WithCreationTimestamp sets the CreationTimestamp field in the declarative configuration to the given value
+// and returns the receiver, so that objects can be built by chaining "With" function invocations.
+// If called multiple times, the CreationTimestamp field is set to the value of the last call.
+func (b *InferenceModelApplyConfiguration) WithCreationTimestamp(value metav1.Time) *InferenceModelApplyConfiguration {
+	b.ensureObjectMetaApplyConfigurationExists()
+	b.ObjectMetaApplyConfiguration.CreationTimestamp = &value
+	return b
+}
+
+// WithDeletionTimestamp sets the DeletionTimestamp field in the declarative configuration to the given value
+// and returns the receiver, so that objects can be built by chaining "With" function invocations.
+// If called multiple times, the DeletionTimestamp field is set to the value of the last call.
+func (b *InferenceModelApplyConfiguration) WithDeletionTimestamp(value metav1.Time) *InferenceModelApplyConfiguration {
+	b.ensureObjectMetaApplyConfigurationExists()
+	b.ObjectMetaApplyConfiguration.DeletionTimestamp = &value
+	return b
+}
+
+// WithDeletionGracePeriodSeconds sets the DeletionGracePeriodSeconds field in the declarative configuration to the given value
+// and returns the receiver, so that objects can be built by chaining "With" function invocations.
+// If called multiple times, the DeletionGracePeriodSeconds field is set to the value of the last call.
+func (b *InferenceModelApplyConfiguration) WithDeletionGracePeriodSeconds(value int64) *InferenceModelApplyConfiguration {
+	b.ensureObjectMetaApplyConfigurationExists()
+	b.ObjectMetaApplyConfiguration.DeletionGracePeriodSeconds = &value
+	return b
+}
+
+// WithLabels puts the entries into the Labels field in the declarative configuration
+// and returns the receiver, so that objects can be build by chaining "With" function invocations.
+// If called multiple times, the entries provided by each call will be put on the Labels field,
+// overwriting an existing map entries in Labels field with the same key.
+func (b *InferenceModelApplyConfiguration) WithLabels(entries map[string]string) *InferenceModelApplyConfiguration {
+	b.ensureObjectMetaApplyConfigurationExists()
+	if b.ObjectMetaApplyConfiguration.Labels == nil && len(entries) > 0 {
+		b.ObjectMetaApplyConfiguration.Labels = make(map[string]string, len(entries))
+	}
+	for k, v := range entries {
+		b.ObjectMetaApplyConfiguration.Labels[k] = v
+	}
+	return b
+}
+
+// WithAnnotations puts the entries into the Annotations field in the declarative configuration
+// and returns the receiver, so that objects can be build by chaining "With" function invocations.
+// If called multiple times, the entries provided by each call will be put on the Annotations field,
+// overwriting an existing map entries in Annotations field with the same key.
+func (b *InferenceModelApplyConfiguration) WithAnnotations(entries map[string]string) *InferenceModelApplyConfiguration {
+	b.ensureObjectMetaApplyConfigurationExists()
+	if b.ObjectMetaApplyConfiguration.Annotations == nil && len(entries) > 0 {
+		b.ObjectMetaApplyConfiguration.Annotations = make(map[string]string, len(entries))
+	}
+	for k, v := range entries {
+		b.ObjectMetaApplyConfiguration.Annotations[k] = v
+	}
+	return b
+}
+
+// WithOwnerReferences adds the given value to the OwnerReferences field in the declarative configuration
+// and returns the receiver, so that objects can be build by chaining "With" function invocations.
+// If called multiple times, values provided by each call will be appended to the OwnerReferences field.
+func (b *InferenceModelApplyConfiguration) WithOwnerReferences(values ...*v1.OwnerReferenceApplyConfiguration) *InferenceModelApplyConfiguration {
+	b.ensureObjectMetaApplyConfigurationExists()
+	for i := range values {
+		if values[i] == nil {
+			panic("nil value passed to WithOwnerReferences")
+		}
+		b.ObjectMetaApplyConfiguration.OwnerReferences = append(b.ObjectMetaApplyConfiguration.OwnerReferences, *values[i])
+	}
+	return b
+}
+
+// WithFinalizers adds the given value to the Finalizers field in the declarative configuration
+// and returns the receiver, so that objects can be build by chaining "With" function invocations.
+// If called multiple times, values provided by each call will be appended to the Finalizers field.
+func (b *InferenceModelApplyConfiguration) WithFinalizers(values ...string) *InferenceModelApplyConfiguration {
+	b.ensureObjectMetaApplyConfigurationExists()
+	for i := range values {
+		b.ObjectMetaApplyConfiguration.Finalizers = append(b.ObjectMetaApplyConfiguration.Finalizers, values[i])
+	}
+	return b
+}
+
+func (b *InferenceModelApplyConfiguration) ensureObjectMetaApplyConfigurationExists() {
+	if b.ObjectMetaApplyConfiguration == nil {
+		b.ObjectMetaApplyConfiguration = &v1.ObjectMetaApplyConfiguration{}
+	}
+}
+
+// WithSpec sets the Spec field in the declarative configuration to the given value
+// and returns the receiver, so that objects can be built by chaining "With" function invocations.
+// If called multiple times, the Spec field is set to the value of the last call.
+func (b *InferenceModelApplyConfiguration) WithSpec(value *InferenceModelSpecApplyConfiguration) *InferenceModelApplyConfiguration {
+	b.Spec = value
+	return b
+}
+
+// WithStatus sets the Status field in the declarative configuration to the given value
+// and returns the receiver, so that objects can be built by chaining "With" function invocations.
+// If called multiple times, the Status field is set to the value of the last call.
+func (b *InferenceModelApplyConfiguration) WithStatus(value *InferenceModelStatusApplyConfiguration) *InferenceModelApplyConfiguration {
+	b.Status = value
+	return b
+}
+
+// GetName retrieves the value of the Name field in the declarative configuration.
+func (b *InferenceModelApplyConfiguration) GetName() *string {
+	b.ensureObjectMetaApplyConfigurationExists()
+	return b.ObjectMetaApplyConfiguration.Name
+}
diff --git a/client-go/applyconfiguration/api/v1alpha2/inferencemodelspec.go b/client-go/applyconfiguration/api/v1alpha2/inferencemodelspec.go
new file mode 100644
index 000000000..438ccd48d
--- /dev/null
+++ b/client-go/applyconfiguration/api/v1alpha2/inferencemodelspec.go
@@ -0,0 +1,74 @@
+/*
+Copyright 2024 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+// Code generated by applyconfiguration-gen. DO NOT EDIT.
+
+package v1alpha2
+
+import (
+	apiv1alpha2 "sigs.k8s.io/gateway-api-inference-extension/api/v1alpha2"
+)
+
+// InferenceModelSpecApplyConfiguration represents a declarative configuration of the InferenceModelSpec type for use
+// with apply.
+type InferenceModelSpecApplyConfiguration struct {
+	ModelName    *string                                `json:"modelName,omitempty"`
+	Criticality  *apiv1alpha2.Criticality               `json:"criticality,omitempty"`
+	TargetModels []TargetModelApplyConfiguration        `json:"targetModels,omitempty"`
+	PoolRef      *PoolObjectReferenceApplyConfiguration `json:"poolRef,omitempty"`
+}
+
+// InferenceModelSpecApplyConfiguration constructs a declarative configuration of the InferenceModelSpec type for use with
+// apply.
+func InferenceModelSpec() *InferenceModelSpecApplyConfiguration {
+	return &InferenceModelSpecApplyConfiguration{}
+}
+
+// WithModelName sets the ModelName field in the declarative configuration to the given value
+// and returns the receiver, so that objects can be built by chaining "With" function invocations.
+// If called multiple times, the ModelName field is set to the value of the last call.
+func (b *InferenceModelSpecApplyConfiguration) WithModelName(value string) *InferenceModelSpecApplyConfiguration {
+	b.ModelName = &value
+	return b
+}
+
+// WithCriticality sets the Criticality field in the declarative configuration to the given value
+// and returns the receiver, so that objects can be built by chaining "With" function invocations.
+// If called multiple times, the Criticality field is set to the value of the last call.
+func (b *InferenceModelSpecApplyConfiguration) WithCriticality(value apiv1alpha2.Criticality) *InferenceModelSpecApplyConfiguration {
+	b.Criticality = &value
+	return b
+}
+
+// WithTargetModels adds the given value to the TargetModels field in the declarative configuration
+// and returns the receiver, so that objects can be build by chaining "With" function invocations.
+// If called multiple times, values provided by each call will be appended to the TargetModels field.
+func (b *InferenceModelSpecApplyConfiguration) WithTargetModels(values ...*TargetModelApplyConfiguration) *InferenceModelSpecApplyConfiguration {
+	for i := range values {
+		if values[i] == nil {
+			panic("nil value passed to WithTargetModels")
+		}
+		b.TargetModels = append(b.TargetModels, *values[i])
+	}
+	return b
+}
+
+// WithPoolRef sets the PoolRef field in the declarative configuration to the given value
+// and returns the receiver, so that objects can be built by chaining "With" function invocations.
+// If called multiple times, the PoolRef field is set to the value of the last call.
+func (b *InferenceModelSpecApplyConfiguration) WithPoolRef(value *PoolObjectReferenceApplyConfiguration) *InferenceModelSpecApplyConfiguration {
+	b.PoolRef = value
+	return b
+}
diff --git a/client-go/applyconfiguration/api/v1alpha2/inferencemodelstatus.go b/client-go/applyconfiguration/api/v1alpha2/inferencemodelstatus.go
new file mode 100644
index 000000000..e8142efe5
--- /dev/null
+++ b/client-go/applyconfiguration/api/v1alpha2/inferencemodelstatus.go
@@ -0,0 +1,47 @@
+/*
+Copyright 2024 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+// Code generated by applyconfiguration-gen. DO NOT EDIT.
+
+package v1alpha2
+
+import (
+	v1 "k8s.io/client-go/applyconfigurations/meta/v1"
+)
+
+// InferenceModelStatusApplyConfiguration represents a declarative configuration of the InferenceModelStatus type for use
+// with apply.
+type InferenceModelStatusApplyConfiguration struct {
+	Conditions []v1.ConditionApplyConfiguration `json:"conditions,omitempty"`
+}
+
+// InferenceModelStatusApplyConfiguration constructs a declarative configuration of the InferenceModelStatus type for use with
+// apply.
+func InferenceModelStatus() *InferenceModelStatusApplyConfiguration {
+	return &InferenceModelStatusApplyConfiguration{}
+}
+
+// WithConditions adds the given value to the Conditions field in the declarative configuration
+// and returns the receiver, so that objects can be build by chaining "With" function invocations.
+// If called multiple times, values provided by each call will be appended to the Conditions field.
+func (b *InferenceModelStatusApplyConfiguration) WithConditions(values ...*v1.ConditionApplyConfiguration) *InferenceModelStatusApplyConfiguration {
+	for i := range values {
+		if values[i] == nil {
+			panic("nil value passed to WithConditions")
+		}
+		b.Conditions = append(b.Conditions, *values[i])
+	}
+	return b
+}
diff --git a/client-go/applyconfiguration/api/v1alpha2/inferencepool.go b/client-go/applyconfiguration/api/v1alpha2/inferencepool.go
new file mode 100644
index 000000000..cd725cb6a
--- /dev/null
+++ b/client-go/applyconfiguration/api/v1alpha2/inferencepool.go
@@ -0,0 +1,224 @@
+/*
+Copyright 2024 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+// Code generated by applyconfiguration-gen. DO NOT EDIT.
+
+package v1alpha2
+
+import (
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	types "k8s.io/apimachinery/pkg/types"
+	v1 "k8s.io/client-go/applyconfigurations/meta/v1"
+)
+
+// InferencePoolApplyConfiguration represents a declarative configuration of the InferencePool type for use
+// with apply.
+type InferencePoolApplyConfiguration struct {
+	v1.TypeMetaApplyConfiguration    `json:",inline"`
+	*v1.ObjectMetaApplyConfiguration `json:"metadata,omitempty"`
+	Spec                             *InferencePoolSpecApplyConfiguration   `json:"spec,omitempty"`
+	Status                           *InferencePoolStatusApplyConfiguration `json:"status,omitempty"`
+}
+
+// InferencePool constructs a declarative configuration of the InferencePool type for use with
+// apply.
+func InferencePool(name, namespace string) *InferencePoolApplyConfiguration {
+	b := &InferencePoolApplyConfiguration{}
+	b.WithName(name)
+	b.WithNamespace(namespace)
+	b.WithKind("InferencePool")
+	b.WithAPIVersion("inference.networking.x-k8s.io/v1alpha2")
+	return b
+}
+
+// WithKind sets the Kind field in the declarative configuration to the given value
+// and returns the receiver, so that objects can be built by chaining "With" function invocations.
+// If called multiple times, the Kind field is set to the value of the last call.
+func (b *InferencePoolApplyConfiguration) WithKind(value string) *InferencePoolApplyConfiguration {
+	b.TypeMetaApplyConfiguration.Kind = &value
+	return b
+}
+
+// WithAPIVersion sets the APIVersion field in the declarative configuration to the given value
+// and returns the receiver, so that objects can be built by chaining "With" function invocations.
+// If called multiple times, the APIVersion field is set to the value of the last call.
+func (b *InferencePoolApplyConfiguration) WithAPIVersion(value string) *InferencePoolApplyConfiguration {
+	b.TypeMetaApplyConfiguration.APIVersion = &value
+	return b
+}
+
+// WithName sets the Name field in the declarative configuration to the given value
+// and returns the receiver, so that objects can be built by chaining "With" function invocations.
+// If called multiple times, the Name field is set to the value of the last call.
+func (b *InferencePoolApplyConfiguration) WithName(value string) *InferencePoolApplyConfiguration {
+	b.ensureObjectMetaApplyConfigurationExists()
+	b.ObjectMetaApplyConfiguration.Name = &value
+	return b
+}
+
+// WithGenerateName sets the GenerateName field in the declarative configuration to the given value
+// and returns the receiver, so that objects can be built by chaining "With" function invocations.
+// If called multiple times, the GenerateName field is set to the value of the last call.
+func (b *InferencePoolApplyConfiguration) WithGenerateName(value string) *InferencePoolApplyConfiguration {
+	b.ensureObjectMetaApplyConfigurationExists()
+	b.ObjectMetaApplyConfiguration.GenerateName = &value
+	return b
+}
+
+// WithNamespace sets the Namespace field in the declarative configuration to the given value
+// and returns the receiver, so that objects can be built by chaining "With" function invocations.
+// If called multiple times, the Namespace field is set to the value of the last call.
+func (b *InferencePoolApplyConfiguration) WithNamespace(value string) *InferencePoolApplyConfiguration {
+	b.ensureObjectMetaApplyConfigurationExists()
+	b.ObjectMetaApplyConfiguration.Namespace = &value
+	return b
+}
+
+// WithUID sets the UID field in the declarative configuration to the given value
+// and returns the receiver, so that objects can be built by chaining "With" function invocations.
+// If called multiple times, the UID field is set to the value of the last call.
+func (b *InferencePoolApplyConfiguration) WithUID(value types.UID) *InferencePoolApplyConfiguration {
+	b.ensureObjectMetaApplyConfigurationExists()
+	b.ObjectMetaApplyConfiguration.UID = &value
+	return b
+}
+
+// WithResourceVersion sets the ResourceVersion field in the declarative configuration to the given value
+// and returns the receiver, so that objects can be built by chaining "With" function invocations.
+// If called multiple times, the ResourceVersion field is set to the value of the last call.
+func (b *InferencePoolApplyConfiguration) WithResourceVersion(value string) *InferencePoolApplyConfiguration {
+	b.ensureObjectMetaApplyConfigurationExists()
+	b.ObjectMetaApplyConfiguration.ResourceVersion = &value
+	return b
+}
+
+// WithGeneration sets the Generation field in the declarative configuration to the given value
+// and returns the receiver, so that objects can be built by chaining "With" function invocations.
+// If called multiple times, the Generation field is set to the value of the last call.
+func (b *InferencePoolApplyConfiguration) WithGeneration(value int64) *InferencePoolApplyConfiguration {
+	b.ensureObjectMetaApplyConfigurationExists()
+	b.ObjectMetaApplyConfiguration.Generation = &value
+	return b
+}
+
+// WithCreationTimestamp sets the CreationTimestamp field in the declarative configuration to the given value
+// and returns the receiver, so that objects can be built by chaining "With" function invocations.
+// If called multiple times, the CreationTimestamp field is set to the value of the last call.
+func (b *InferencePoolApplyConfiguration) WithCreationTimestamp(value metav1.Time) *InferencePoolApplyConfiguration {
+	b.ensureObjectMetaApplyConfigurationExists()
+	b.ObjectMetaApplyConfiguration.CreationTimestamp = &value
+	return b
+}
+
+// WithDeletionTimestamp sets the DeletionTimestamp field in the declarative configuration to the given value
+// and returns the receiver, so that objects can be built by chaining "With" function invocations.
+// If called multiple times, the DeletionTimestamp field is set to the value of the last call.
+func (b *InferencePoolApplyConfiguration) WithDeletionTimestamp(value metav1.Time) *InferencePoolApplyConfiguration {
+	b.ensureObjectMetaApplyConfigurationExists()
+	b.ObjectMetaApplyConfiguration.DeletionTimestamp = &value
+	return b
+}
+
+// WithDeletionGracePeriodSeconds sets the DeletionGracePeriodSeconds field in the declarative configuration to the given value
+// and returns the receiver, so that objects can be built by chaining "With" function invocations.
+// If called multiple times, the DeletionGracePeriodSeconds field is set to the value of the last call.
+func (b *InferencePoolApplyConfiguration) WithDeletionGracePeriodSeconds(value int64) *InferencePoolApplyConfiguration {
+	b.ensureObjectMetaApplyConfigurationExists()
+	b.ObjectMetaApplyConfiguration.DeletionGracePeriodSeconds = &value
+	return b
+}
+
+// WithLabels puts the entries into the Labels field in the declarative configuration
+// and returns the receiver, so that objects can be build by chaining "With" function invocations.
+// If called multiple times, the entries provided by each call will be put on the Labels field,
+// overwriting an existing map entries in Labels field with the same key.
+func (b *InferencePoolApplyConfiguration) WithLabels(entries map[string]string) *InferencePoolApplyConfiguration {
+	b.ensureObjectMetaApplyConfigurationExists()
+	if b.ObjectMetaApplyConfiguration.Labels == nil && len(entries) > 0 {
+		b.ObjectMetaApplyConfiguration.Labels = make(map[string]string, len(entries))
+	}
+	for k, v := range entries {
+		b.ObjectMetaApplyConfiguration.Labels[k] = v
+	}
+	return b
+}
+
+// WithAnnotations puts the entries into the Annotations field in the declarative configuration
+// and returns the receiver, so that objects can be build by chaining "With" function invocations.
+// If called multiple times, the entries provided by each call will be put on the Annotations field,
+// overwriting an existing map entries in Annotations field with the same key.
+func (b *InferencePoolApplyConfiguration) WithAnnotations(entries map[string]string) *InferencePoolApplyConfiguration {
+	b.ensureObjectMetaApplyConfigurationExists()
+	if b.ObjectMetaApplyConfiguration.Annotations == nil && len(entries) > 0 {
+		b.ObjectMetaApplyConfiguration.Annotations = make(map[string]string, len(entries))
+	}
+	for k, v := range entries {
+		b.ObjectMetaApplyConfiguration.Annotations[k] = v
+	}
+	return b
+}
+
+// WithOwnerReferences adds the given value to the OwnerReferences field in the declarative configuration
+// and returns the receiver, so that objects can be build by chaining "With" function invocations.
+// If called multiple times, values provided by each call will be appended to the OwnerReferences field.
+func (b *InferencePoolApplyConfiguration) WithOwnerReferences(values ...*v1.OwnerReferenceApplyConfiguration) *InferencePoolApplyConfiguration {
+	b.ensureObjectMetaApplyConfigurationExists()
+	for i := range values {
+		if values[i] == nil {
+			panic("nil value passed to WithOwnerReferences")
+		}
+		b.ObjectMetaApplyConfiguration.OwnerReferences = append(b.ObjectMetaApplyConfiguration.OwnerReferences, *values[i])
+	}
+	return b
+}
+
+// WithFinalizers adds the given value to the Finalizers field in the declarative configuration
+// and returns the receiver, so that objects can be build by chaining "With" function invocations.
+// If called multiple times, values provided by each call will be appended to the Finalizers field.
+func (b *InferencePoolApplyConfiguration) WithFinalizers(values ...string) *InferencePoolApplyConfiguration {
+	b.ensureObjectMetaApplyConfigurationExists()
+	for i := range values {
+		b.ObjectMetaApplyConfiguration.Finalizers = append(b.ObjectMetaApplyConfiguration.Finalizers, values[i])
+	}
+	return b
+}
+
+func (b *InferencePoolApplyConfiguration) ensureObjectMetaApplyConfigurationExists() {
+	if b.ObjectMetaApplyConfiguration == nil {
+		b.ObjectMetaApplyConfiguration = &v1.ObjectMetaApplyConfiguration{}
+	}
+}
+
+// WithSpec sets the Spec field in the declarative configuration to the given value
+// and returns the receiver, so that objects can be built by chaining "With" function invocations.
+// If called multiple times, the Spec field is set to the value of the last call.
+func (b *InferencePoolApplyConfiguration) WithSpec(value *InferencePoolSpecApplyConfiguration) *InferencePoolApplyConfiguration {
+	b.Spec = value
+	return b
+}
+
+// WithStatus sets the Status field in the declarative configuration to the given value
+// and returns the receiver, so that objects can be built by chaining "With" function invocations.
+// If called multiple times, the Status field is set to the value of the last call.
+func (b *InferencePoolApplyConfiguration) WithStatus(value *InferencePoolStatusApplyConfiguration) *InferencePoolApplyConfiguration {
+	b.Status = value
+	return b
+}
+
+// GetName retrieves the value of the Name field in the declarative configuration.
+func (b *InferencePoolApplyConfiguration) GetName() *string {
+	b.ensureObjectMetaApplyConfigurationExists()
+	return b.ObjectMetaApplyConfiguration.Name
+}
diff --git a/client-go/applyconfiguration/api/v1alpha2/inferencepoolspec.go b/client-go/applyconfiguration/api/v1alpha2/inferencepoolspec.go
new file mode 100644
index 000000000..e4d5a97d7
--- /dev/null
+++ b/client-go/applyconfiguration/api/v1alpha2/inferencepoolspec.go
@@ -0,0 +1,66 @@
+/*
+Copyright 2024 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+// Code generated by applyconfiguration-gen. DO NOT EDIT.
+
+package v1alpha2
+
+import (
+	apiv1alpha2 "sigs.k8s.io/gateway-api-inference-extension/api/v1alpha2"
+)
+
+// InferencePoolSpecApplyConfiguration represents a declarative configuration of the InferencePoolSpec type for use
+// with apply.
+type InferencePoolSpecApplyConfiguration struct {
+	Selector                               map[apiv1alpha2.LabelKey]apiv1alpha2.LabelValue `json:"selector,omitempty"`
+	TargetPortNumber                       *int32                                          `json:"targetPortNumber,omitempty"`
+	EndpointPickerConfigApplyConfiguration `json:",inline"`
+}
+
+// InferencePoolSpecApplyConfiguration constructs a declarative configuration of the InferencePoolSpec type for use with
+// apply.
+func InferencePoolSpec() *InferencePoolSpecApplyConfiguration {
+	return &InferencePoolSpecApplyConfiguration{}
+}
+
+// WithSelector puts the entries into the Selector field in the declarative configuration
+// and returns the receiver, so that objects can be build by chaining "With" function invocations.
+// If called multiple times, the entries provided by each call will be put on the Selector field,
+// overwriting an existing map entries in Selector field with the same key.
+func (b *InferencePoolSpecApplyConfiguration) WithSelector(entries map[apiv1alpha2.LabelKey]apiv1alpha2.LabelValue) *InferencePoolSpecApplyConfiguration {
+	if b.Selector == nil && len(entries) > 0 {
+		b.Selector = make(map[apiv1alpha2.LabelKey]apiv1alpha2.LabelValue, len(entries))
+	}
+	for k, v := range entries {
+		b.Selector[k] = v
+	}
+	return b
+}
+
+// WithTargetPortNumber sets the TargetPortNumber field in the declarative configuration to the given value
+// and returns the receiver, so that objects can be built by chaining "With" function invocations.
+// If called multiple times, the TargetPortNumber field is set to the value of the last call.
+func (b *InferencePoolSpecApplyConfiguration) WithTargetPortNumber(value int32) *InferencePoolSpecApplyConfiguration {
+	b.TargetPortNumber = &value
+	return b
+}
+
+// WithExtensionRef sets the ExtensionRef field in the declarative configuration to the given value
+// and returns the receiver, so that objects can be built by chaining "With" function invocations.
+// If called multiple times, the ExtensionRef field is set to the value of the last call.
+func (b *InferencePoolSpecApplyConfiguration) WithExtensionRef(value *ExtensionApplyConfiguration) *InferencePoolSpecApplyConfiguration {
+	b.EndpointPickerConfigApplyConfiguration.ExtensionRef = value
+	return b
+}
diff --git a/client-go/applyconfiguration/api/v1alpha2/inferencepoolstatus.go b/client-go/applyconfiguration/api/v1alpha2/inferencepoolstatus.go
new file mode 100644
index 000000000..9587dabe8
--- /dev/null
+++ b/client-go/applyconfiguration/api/v1alpha2/inferencepoolstatus.go
@@ -0,0 +1,43 @@
+/*
+Copyright 2024 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+// Code generated by applyconfiguration-gen. DO NOT EDIT.
+
+package v1alpha2
+
+// InferencePoolStatusApplyConfiguration represents a declarative configuration of the InferencePoolStatus type for use
+// with apply.
+type InferencePoolStatusApplyConfiguration struct {
+	Parents []PoolStatusApplyConfiguration `json:"parent,omitempty"`
+}
+
+// InferencePoolStatusApplyConfiguration constructs a declarative configuration of the InferencePoolStatus type for use with
+// apply.
+func InferencePoolStatus() *InferencePoolStatusApplyConfiguration {
+	return &InferencePoolStatusApplyConfiguration{}
+}
+
+// WithParents adds the given value to the Parents field in the declarative configuration
+// and returns the receiver, so that objects can be build by chaining "With" function invocations.
+// If called multiple times, values provided by each call will be appended to the Parents field.
+func (b *InferencePoolStatusApplyConfiguration) WithParents(values ...*PoolStatusApplyConfiguration) *InferencePoolStatusApplyConfiguration {
+	for i := range values {
+		if values[i] == nil {
+			panic("nil value passed to WithParents")
+		}
+		b.Parents = append(b.Parents, *values[i])
+	}
+	return b
+}
diff --git a/client-go/applyconfiguration/api/v1alpha2/poolobjectreference.go b/client-go/applyconfiguration/api/v1alpha2/poolobjectreference.go
new file mode 100644
index 000000000..cc88c950a
--- /dev/null
+++ b/client-go/applyconfiguration/api/v1alpha2/poolobjectreference.go
@@ -0,0 +1,56 @@
+/*
+Copyright 2024 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+// Code generated by applyconfiguration-gen. DO NOT EDIT.
+
+package v1alpha2
+
+// PoolObjectReferenceApplyConfiguration represents a declarative configuration of the PoolObjectReference type for use
+// with apply.
+type PoolObjectReferenceApplyConfiguration struct {
+	Group *string `json:"group,omitempty"`
+	Kind  *string `json:"kind,omitempty"`
+	Name  *string `json:"name,omitempty"`
+}
+
+// PoolObjectReferenceApplyConfiguration constructs a declarative configuration of the PoolObjectReference type for use with
+// apply.
+func PoolObjectReference() *PoolObjectReferenceApplyConfiguration {
+	return &PoolObjectReferenceApplyConfiguration{}
+}
+
+// WithGroup sets the Group field in the declarative configuration to the given value
+// and returns the receiver, so that objects can be built by chaining "With" function invocations.
+// If called multiple times, the Group field is set to the value of the last call.
+func (b *PoolObjectReferenceApplyConfiguration) WithGroup(value string) *PoolObjectReferenceApplyConfiguration {
+	b.Group = &value
+	return b
+}
+
+// WithKind sets the Kind field in the declarative configuration to the given value
+// and returns the receiver, so that objects can be built by chaining "With" function invocations.
+// If called multiple times, the Kind field is set to the value of the last call.
+func (b *PoolObjectReferenceApplyConfiguration) WithKind(value string) *PoolObjectReferenceApplyConfiguration {
+	b.Kind = &value
+	return b
+}
+
+// WithName sets the Name field in the declarative configuration to the given value
+// and returns the receiver, so that objects can be built by chaining "With" function invocations.
+// If called multiple times, the Name field is set to the value of the last call.
+func (b *PoolObjectReferenceApplyConfiguration) WithName(value string) *PoolObjectReferenceApplyConfiguration {
+	b.Name = &value
+	return b
+}
diff --git a/client-go/applyconfiguration/api/v1alpha2/poolstatus.go b/client-go/applyconfiguration/api/v1alpha2/poolstatus.go
new file mode 100644
index 000000000..bff299358
--- /dev/null
+++ b/client-go/applyconfiguration/api/v1alpha2/poolstatus.go
@@ -0,0 +1,57 @@
+/*
+Copyright 2024 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+// Code generated by applyconfiguration-gen. DO NOT EDIT.
+
+package v1alpha2
+
+import (
+	v1 "k8s.io/api/core/v1"
+	metav1 "k8s.io/client-go/applyconfigurations/meta/v1"
+)
+
+// PoolStatusApplyConfiguration represents a declarative configuration of the PoolStatus type for use
+// with apply.
+type PoolStatusApplyConfiguration struct {
+	GatewayRef *v1.ObjectReference                  `json:"parentRef,omitempty"`
+	Conditions []metav1.ConditionApplyConfiguration `json:"conditions,omitempty"`
+}
+
+// PoolStatusApplyConfiguration constructs a declarative configuration of the PoolStatus type for use with
+// apply.
+func PoolStatus() *PoolStatusApplyConfiguration {
+	return &PoolStatusApplyConfiguration{}
+}
+
+// WithGatewayRef sets the GatewayRef field in the declarative configuration to the given value
+// and returns the receiver, so that objects can be built by chaining "With" function invocations.
+// If called multiple times, the GatewayRef field is set to the value of the last call.
+func (b *PoolStatusApplyConfiguration) WithGatewayRef(value v1.ObjectReference) *PoolStatusApplyConfiguration {
+	b.GatewayRef = &value
+	return b
+}
+
+// WithConditions adds the given value to the Conditions field in the declarative configuration
+// and returns the receiver, so that objects can be build by chaining "With" function invocations.
+// If called multiple times, values provided by each call will be appended to the Conditions field.
+func (b *PoolStatusApplyConfiguration) WithConditions(values ...*metav1.ConditionApplyConfiguration) *PoolStatusApplyConfiguration {
+	for i := range values {
+		if values[i] == nil {
+			panic("nil value passed to WithConditions")
+		}
+		b.Conditions = append(b.Conditions, *values[i])
+	}
+	return b
+}
diff --git a/client-go/applyconfiguration/api/v1alpha2/targetmodel.go b/client-go/applyconfiguration/api/v1alpha2/targetmodel.go
new file mode 100644
index 000000000..4ed9b4bcb
--- /dev/null
+++ b/client-go/applyconfiguration/api/v1alpha2/targetmodel.go
@@ -0,0 +1,47 @@
+/*
+Copyright 2024 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+// Code generated by applyconfiguration-gen. DO NOT EDIT.
+
+package v1alpha2
+
+// TargetModelApplyConfiguration represents a declarative configuration of the TargetModel type for use
+// with apply.
+type TargetModelApplyConfiguration struct {
+	Name   *string `json:"name,omitempty"`
+	Weight *int32  `json:"weight,omitempty"`
+}
+
+// TargetModelApplyConfiguration constructs a declarative configuration of the TargetModel type for use with
+// apply.
+func TargetModel() *TargetModelApplyConfiguration {
+	return &TargetModelApplyConfiguration{}
+}
+
+// WithName sets the Name field in the declarative configuration to the given value
+// and returns the receiver, so that objects can be built by chaining "With" function invocations.
+// If called multiple times, the Name field is set to the value of the last call.
+func (b *TargetModelApplyConfiguration) WithName(value string) *TargetModelApplyConfiguration {
+	b.Name = &value
+	return b
+}
+
+// WithWeight sets the Weight field in the declarative configuration to the given value
+// and returns the receiver, so that objects can be built by chaining "With" function invocations.
+// If called multiple times, the Weight field is set to the value of the last call.
+func (b *TargetModelApplyConfiguration) WithWeight(value int32) *TargetModelApplyConfiguration {
+	b.Weight = &value
+	return b
+}
diff --git a/client-go/applyconfiguration/utils.go b/client-go/applyconfiguration/utils.go
index 677fa6e37..eacc9c439 100644
--- a/client-go/applyconfiguration/utils.go
+++ b/client-go/applyconfiguration/utils.go
@@ -22,7 +22,9 @@ import (
 	schema "k8s.io/apimachinery/pkg/runtime/schema"
 	testing "k8s.io/client-go/testing"
 	v1alpha1 "sigs.k8s.io/gateway-api-inference-extension/api/v1alpha1"
+	v1alpha2 "sigs.k8s.io/gateway-api-inference-extension/api/v1alpha2"
 	apiv1alpha1 "sigs.k8s.io/gateway-api-inference-extension/client-go/applyconfiguration/api/v1alpha1"
+	apiv1alpha2 "sigs.k8s.io/gateway-api-inference-extension/client-go/applyconfiguration/api/v1alpha2"
 	internal "sigs.k8s.io/gateway-api-inference-extension/client-go/applyconfiguration/internal"
 )
 
@@ -56,6 +58,34 @@ func ForKind(kind schema.GroupVersionKind) interface{} {
 	case v1alpha1.SchemeGroupVersion.WithKind("TargetModel"):
 		return &apiv1alpha1.TargetModelApplyConfiguration{}
 
+		// Group=inference.networking.x-k8s.io, Version=v1alpha2
+	case v1alpha2.SchemeGroupVersion.WithKind("EndpointPickerConfig"):
+		return &apiv1alpha2.EndpointPickerConfigApplyConfiguration{}
+	case v1alpha2.SchemeGroupVersion.WithKind("Extension"):
+		return &apiv1alpha2.ExtensionApplyConfiguration{}
+	case v1alpha2.SchemeGroupVersion.WithKind("ExtensionConnection"):
+		return &apiv1alpha2.ExtensionConnectionApplyConfiguration{}
+	case v1alpha2.SchemeGroupVersion.WithKind("ExtensionReference"):
+		return &apiv1alpha2.ExtensionReferenceApplyConfiguration{}
+	case v1alpha2.SchemeGroupVersion.WithKind("InferenceModel"):
+		return &apiv1alpha2.InferenceModelApplyConfiguration{}
+	case v1alpha2.SchemeGroupVersion.WithKind("InferenceModelSpec"):
+		return &apiv1alpha2.InferenceModelSpecApplyConfiguration{}
+	case v1alpha2.SchemeGroupVersion.WithKind("InferenceModelStatus"):
+		return &apiv1alpha2.InferenceModelStatusApplyConfiguration{}
+	case v1alpha2.SchemeGroupVersion.WithKind("InferencePool"):
+		return &apiv1alpha2.InferencePoolApplyConfiguration{}
+	case v1alpha2.SchemeGroupVersion.WithKind("InferencePoolSpec"):
+		return &apiv1alpha2.InferencePoolSpecApplyConfiguration{}
+	case v1alpha2.SchemeGroupVersion.WithKind("InferencePoolStatus"):
+		return &apiv1alpha2.InferencePoolStatusApplyConfiguration{}
+	case v1alpha2.SchemeGroupVersion.WithKind("PoolObjectReference"):
+		return &apiv1alpha2.PoolObjectReferenceApplyConfiguration{}
+	case v1alpha2.SchemeGroupVersion.WithKind("PoolStatus"):
+		return &apiv1alpha2.PoolStatusApplyConfiguration{}
+	case v1alpha2.SchemeGroupVersion.WithKind("TargetModel"):
+		return &apiv1alpha2.TargetModelApplyConfiguration{}
+
 	}
 	return nil
 }
diff --git a/client-go/clientset/versioned/clientset.go b/client-go/clientset/versioned/clientset.go
index b7ebc1d83..4266285a7 100644
--- a/client-go/clientset/versioned/clientset.go
+++ b/client-go/clientset/versioned/clientset.go
@@ -25,17 +25,20 @@ import (
 	rest "k8s.io/client-go/rest"
 	flowcontrol "k8s.io/client-go/util/flowcontrol"
 	inferencev1alpha1 "sigs.k8s.io/gateway-api-inference-extension/client-go/clientset/versioned/typed/api/v1alpha1"
+	inferencev1alpha2 "sigs.k8s.io/gateway-api-inference-extension/client-go/clientset/versioned/typed/api/v1alpha2"
 )
 
 type Interface interface {
 	Discovery() discovery.DiscoveryInterface
 	InferenceV1alpha1() inferencev1alpha1.InferenceV1alpha1Interface
+	InferenceV1alpha2() inferencev1alpha2.InferenceV1alpha2Interface
 }
 
 // Clientset contains the clients for groups.
 type Clientset struct {
 	*discovery.DiscoveryClient
 	inferenceV1alpha1 *inferencev1alpha1.InferenceV1alpha1Client
+	inferenceV1alpha2 *inferencev1alpha2.InferenceV1alpha2Client
 }
 
 // InferenceV1alpha1 retrieves the InferenceV1alpha1Client
@@ -43,6 +46,11 @@ func (c *Clientset) InferenceV1alpha1() inferencev1alpha1.InferenceV1alpha1Inter
 	return c.inferenceV1alpha1
 }
 
+// InferenceV1alpha2 retrieves the InferenceV1alpha2Client
+func (c *Clientset) InferenceV1alpha2() inferencev1alpha2.InferenceV1alpha2Interface {
+	return c.inferenceV1alpha2
+}
+
 // Discovery retrieves the DiscoveryClient
 func (c *Clientset) Discovery() discovery.DiscoveryInterface {
 	if c == nil {
@@ -91,6 +99,10 @@ func NewForConfigAndClient(c *rest.Config, httpClient *http.Client) (*Clientset,
 	if err != nil {
 		return nil, err
 	}
+	cs.inferenceV1alpha2, err = inferencev1alpha2.NewForConfigAndClient(&configShallowCopy, httpClient)
+	if err != nil {
+		return nil, err
+	}
 
 	cs.DiscoveryClient, err = discovery.NewDiscoveryClientForConfigAndClient(&configShallowCopy, httpClient)
 	if err != nil {
@@ -113,6 +125,7 @@ func NewForConfigOrDie(c *rest.Config) *Clientset {
 func New(c rest.Interface) *Clientset {
 	var cs Clientset
 	cs.inferenceV1alpha1 = inferencev1alpha1.New(c)
+	cs.inferenceV1alpha2 = inferencev1alpha2.New(c)
 
 	cs.DiscoveryClient = discovery.NewDiscoveryClient(c)
 	return &cs
diff --git a/client-go/clientset/versioned/fake/clientset_generated.go b/client-go/clientset/versioned/fake/clientset_generated.go
index 1e54db319..f4f33032d 100644
--- a/client-go/clientset/versioned/fake/clientset_generated.go
+++ b/client-go/clientset/versioned/fake/clientset_generated.go
@@ -27,6 +27,8 @@ import (
 	clientset "sigs.k8s.io/gateway-api-inference-extension/client-go/clientset/versioned"
 	inferencev1alpha1 "sigs.k8s.io/gateway-api-inference-extension/client-go/clientset/versioned/typed/api/v1alpha1"
 	fakeinferencev1alpha1 "sigs.k8s.io/gateway-api-inference-extension/client-go/clientset/versioned/typed/api/v1alpha1/fake"
+	inferencev1alpha2 "sigs.k8s.io/gateway-api-inference-extension/client-go/clientset/versioned/typed/api/v1alpha2"
+	fakeinferencev1alpha2 "sigs.k8s.io/gateway-api-inference-extension/client-go/clientset/versioned/typed/api/v1alpha2/fake"
 )
 
 // NewSimpleClientset returns a clientset that will respond with the provided objects.
@@ -119,3 +121,8 @@ var (
 func (c *Clientset) InferenceV1alpha1() inferencev1alpha1.InferenceV1alpha1Interface {
 	return &fakeinferencev1alpha1.FakeInferenceV1alpha1{Fake: &c.Fake}
 }
+
+// InferenceV1alpha2 retrieves the InferenceV1alpha2Client
+func (c *Clientset) InferenceV1alpha2() inferencev1alpha2.InferenceV1alpha2Interface {
+	return &fakeinferencev1alpha2.FakeInferenceV1alpha2{Fake: &c.Fake}
+}
diff --git a/client-go/clientset/versioned/fake/register.go b/client-go/clientset/versioned/fake/register.go
index b72a8ce30..bc8e69035 100644
--- a/client-go/clientset/versioned/fake/register.go
+++ b/client-go/clientset/versioned/fake/register.go
@@ -24,6 +24,7 @@ import (
 	serializer "k8s.io/apimachinery/pkg/runtime/serializer"
 	utilruntime "k8s.io/apimachinery/pkg/util/runtime"
 	inferencev1alpha1 "sigs.k8s.io/gateway-api-inference-extension/api/v1alpha1"
+	inferencev1alpha2 "sigs.k8s.io/gateway-api-inference-extension/api/v1alpha2"
 )
 
 var scheme = runtime.NewScheme()
@@ -31,6 +32,7 @@ var codecs = serializer.NewCodecFactory(scheme)
 
 var localSchemeBuilder = runtime.SchemeBuilder{
 	inferencev1alpha1.AddToScheme,
+	inferencev1alpha2.AddToScheme,
 }
 
 // AddToScheme adds all types of this clientset into the given scheme. This allows composition
diff --git a/client-go/clientset/versioned/scheme/register.go b/client-go/clientset/versioned/scheme/register.go
index c4c061589..5727d404f 100644
--- a/client-go/clientset/versioned/scheme/register.go
+++ b/client-go/clientset/versioned/scheme/register.go
@@ -24,6 +24,7 @@ import (
 	serializer "k8s.io/apimachinery/pkg/runtime/serializer"
 	utilruntime "k8s.io/apimachinery/pkg/util/runtime"
 	inferencev1alpha1 "sigs.k8s.io/gateway-api-inference-extension/api/v1alpha1"
+	inferencev1alpha2 "sigs.k8s.io/gateway-api-inference-extension/api/v1alpha2"
 )
 
 var Scheme = runtime.NewScheme()
@@ -31,6 +32,7 @@ var Codecs = serializer.NewCodecFactory(Scheme)
 var ParameterCodec = runtime.NewParameterCodec(Scheme)
 var localSchemeBuilder = runtime.SchemeBuilder{
 	inferencev1alpha1.AddToScheme,
+	inferencev1alpha2.AddToScheme,
 }
 
 // AddToScheme adds all types of this clientset into the given scheme. This allows composition
diff --git a/client-go/clientset/versioned/typed/api/v1alpha2/api_client.go b/client-go/clientset/versioned/typed/api/v1alpha2/api_client.go
new file mode 100644
index 000000000..b011ca928
--- /dev/null
+++ b/client-go/clientset/versioned/typed/api/v1alpha2/api_client.go
@@ -0,0 +1,111 @@
+/*
+Copyright 2024 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+// Code generated by client-gen. DO NOT EDIT.
+
+package v1alpha2
+
+import (
+	http "net/http"
+
+	rest "k8s.io/client-go/rest"
+	apiv1alpha2 "sigs.k8s.io/gateway-api-inference-extension/api/v1alpha2"
+	scheme "sigs.k8s.io/gateway-api-inference-extension/client-go/clientset/versioned/scheme"
+)
+
+type InferenceV1alpha2Interface interface {
+	RESTClient() rest.Interface
+	InferenceModelsGetter
+	InferencePoolsGetter
+}
+
+// InferenceV1alpha2Client is used to interact with features provided by the inference.networking.x-k8s.io group.
+type InferenceV1alpha2Client struct {
+	restClient rest.Interface
+}
+
+func (c *InferenceV1alpha2Client) InferenceModels(namespace string) InferenceModelInterface {
+	return newInferenceModels(c, namespace)
+}
+
+func (c *InferenceV1alpha2Client) InferencePools(namespace string) InferencePoolInterface {
+	return newInferencePools(c, namespace)
+}
+
+// NewForConfig creates a new InferenceV1alpha2Client for the given config.
+// NewForConfig is equivalent to NewForConfigAndClient(c, httpClient),
+// where httpClient was generated with rest.HTTPClientFor(c).
+func NewForConfig(c *rest.Config) (*InferenceV1alpha2Client, error) {
+	config := *c
+	if err := setConfigDefaults(&config); err != nil {
+		return nil, err
+	}
+	httpClient, err := rest.HTTPClientFor(&config)
+	if err != nil {
+		return nil, err
+	}
+	return NewForConfigAndClient(&config, httpClient)
+}
+
+// NewForConfigAndClient creates a new InferenceV1alpha2Client for the given config and http client.
+// Note the http client provided takes precedence over the configured transport values.
+func NewForConfigAndClient(c *rest.Config, h *http.Client) (*InferenceV1alpha2Client, error) {
+	config := *c
+	if err := setConfigDefaults(&config); err != nil {
+		return nil, err
+	}
+	client, err := rest.RESTClientForConfigAndClient(&config, h)
+	if err != nil {
+		return nil, err
+	}
+	return &InferenceV1alpha2Client{client}, nil
+}
+
+// NewForConfigOrDie creates a new InferenceV1alpha2Client for the given config and
+// panics if there is an error in the config.
+func NewForConfigOrDie(c *rest.Config) *InferenceV1alpha2Client {
+	client, err := NewForConfig(c)
+	if err != nil {
+		panic(err)
+	}
+	return client
+}
+
+// New creates a new InferenceV1alpha2Client for the given RESTClient.
+func New(c rest.Interface) *InferenceV1alpha2Client {
+	return &InferenceV1alpha2Client{c}
+}
+
+func setConfigDefaults(config *rest.Config) error {
+	gv := apiv1alpha2.SchemeGroupVersion
+	config.GroupVersion = &gv
+	config.APIPath = "/apis"
+	config.NegotiatedSerializer = rest.CodecFactoryForGeneratedClient(scheme.Scheme, scheme.Codecs).WithoutConversion()
+
+	if config.UserAgent == "" {
+		config.UserAgent = rest.DefaultKubernetesUserAgent()
+	}
+
+	return nil
+}
+
+// RESTClient returns a RESTClient that is used to communicate
+// with API server by this client implementation.
+func (c *InferenceV1alpha2Client) RESTClient() rest.Interface {
+	if c == nil {
+		return nil
+	}
+	return c.restClient
+}
diff --git a/client-go/clientset/versioned/typed/api/v1alpha2/doc.go b/client-go/clientset/versioned/typed/api/v1alpha2/doc.go
new file mode 100644
index 000000000..2bcba220f
--- /dev/null
+++ b/client-go/clientset/versioned/typed/api/v1alpha2/doc.go
@@ -0,0 +1,19 @@
+/*
+Copyright 2024 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+// Code generated by client-gen. DO NOT EDIT.
+
+// This package has the automatically generated typed clients.
+package v1alpha2
diff --git a/client-go/clientset/versioned/typed/api/v1alpha2/fake/doc.go b/client-go/clientset/versioned/typed/api/v1alpha2/fake/doc.go
new file mode 100644
index 000000000..fbfccbb91
--- /dev/null
+++ b/client-go/clientset/versioned/typed/api/v1alpha2/fake/doc.go
@@ -0,0 +1,19 @@
+/*
+Copyright 2024 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+// Code generated by client-gen. DO NOT EDIT.
+
+// Package fake has the automatically generated clients.
+package fake
diff --git a/client-go/clientset/versioned/typed/api/v1alpha2/fake/fake_api_client.go b/client-go/clientset/versioned/typed/api/v1alpha2/fake/fake_api_client.go
new file mode 100644
index 000000000..0296608cf
--- /dev/null
+++ b/client-go/clientset/versioned/typed/api/v1alpha2/fake/fake_api_client.go
@@ -0,0 +1,43 @@
+/*
+Copyright 2024 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+// Code generated by client-gen. DO NOT EDIT.
+
+package fake
+
+import (
+	rest "k8s.io/client-go/rest"
+	testing "k8s.io/client-go/testing"
+	v1alpha2 "sigs.k8s.io/gateway-api-inference-extension/client-go/clientset/versioned/typed/api/v1alpha2"
+)
+
+type FakeInferenceV1alpha2 struct {
+	*testing.Fake
+}
+
+func (c *FakeInferenceV1alpha2) InferenceModels(namespace string) v1alpha2.InferenceModelInterface {
+	return newFakeInferenceModels(c, namespace)
+}
+
+func (c *FakeInferenceV1alpha2) InferencePools(namespace string) v1alpha2.InferencePoolInterface {
+	return newFakeInferencePools(c, namespace)
+}
+
+// RESTClient returns a RESTClient that is used to communicate
+// with API server by this client implementation.
+func (c *FakeInferenceV1alpha2) RESTClient() rest.Interface {
+	var ret *rest.RESTClient
+	return ret
+}
diff --git a/client-go/clientset/versioned/typed/api/v1alpha2/fake/fake_inferencemodel.go b/client-go/clientset/versioned/typed/api/v1alpha2/fake/fake_inferencemodel.go
new file mode 100644
index 000000000..2492a5573
--- /dev/null
+++ b/client-go/clientset/versioned/typed/api/v1alpha2/fake/fake_inferencemodel.go
@@ -0,0 +1,52 @@
+/*
+Copyright 2024 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+// Code generated by client-gen. DO NOT EDIT.
+
+package fake
+
+import (
+	gentype "k8s.io/client-go/gentype"
+	v1alpha2 "sigs.k8s.io/gateway-api-inference-extension/api/v1alpha2"
+	apiv1alpha2 "sigs.k8s.io/gateway-api-inference-extension/client-go/applyconfiguration/api/v1alpha2"
+	typedapiv1alpha2 "sigs.k8s.io/gateway-api-inference-extension/client-go/clientset/versioned/typed/api/v1alpha2"
+)
+
+// fakeInferenceModels implements InferenceModelInterface
+type fakeInferenceModels struct {
+	*gentype.FakeClientWithListAndApply[*v1alpha2.InferenceModel, *v1alpha2.InferenceModelList, *apiv1alpha2.InferenceModelApplyConfiguration]
+	Fake *FakeInferenceV1alpha2
+}
+
+func newFakeInferenceModels(fake *FakeInferenceV1alpha2, namespace string) typedapiv1alpha2.InferenceModelInterface {
+	return &fakeInferenceModels{
+		gentype.NewFakeClientWithListAndApply[*v1alpha2.InferenceModel, *v1alpha2.InferenceModelList, *apiv1alpha2.InferenceModelApplyConfiguration](
+			fake.Fake,
+			namespace,
+			v1alpha2.SchemeGroupVersion.WithResource("inferencemodels"),
+			v1alpha2.SchemeGroupVersion.WithKind("InferenceModel"),
+			func() *v1alpha2.InferenceModel { return &v1alpha2.InferenceModel{} },
+			func() *v1alpha2.InferenceModelList { return &v1alpha2.InferenceModelList{} },
+			func(dst, src *v1alpha2.InferenceModelList) { dst.ListMeta = src.ListMeta },
+			func(list *v1alpha2.InferenceModelList) []*v1alpha2.InferenceModel {
+				return gentype.ToPointerSlice(list.Items)
+			},
+			func(list *v1alpha2.InferenceModelList, items []*v1alpha2.InferenceModel) {
+				list.Items = gentype.FromPointerSlice(items)
+			},
+		),
+		fake,
+	}
+}
diff --git a/client-go/clientset/versioned/typed/api/v1alpha2/fake/fake_inferencepool.go b/client-go/clientset/versioned/typed/api/v1alpha2/fake/fake_inferencepool.go
new file mode 100644
index 000000000..64b087dd0
--- /dev/null
+++ b/client-go/clientset/versioned/typed/api/v1alpha2/fake/fake_inferencepool.go
@@ -0,0 +1,52 @@
+/*
+Copyright 2024 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+// Code generated by client-gen. DO NOT EDIT.
+
+package fake
+
+import (
+	gentype "k8s.io/client-go/gentype"
+	v1alpha2 "sigs.k8s.io/gateway-api-inference-extension/api/v1alpha2"
+	apiv1alpha2 "sigs.k8s.io/gateway-api-inference-extension/client-go/applyconfiguration/api/v1alpha2"
+	typedapiv1alpha2 "sigs.k8s.io/gateway-api-inference-extension/client-go/clientset/versioned/typed/api/v1alpha2"
+)
+
+// fakeInferencePools implements InferencePoolInterface
+type fakeInferencePools struct {
+	*gentype.FakeClientWithListAndApply[*v1alpha2.InferencePool, *v1alpha2.InferencePoolList, *apiv1alpha2.InferencePoolApplyConfiguration]
+	Fake *FakeInferenceV1alpha2
+}
+
+func newFakeInferencePools(fake *FakeInferenceV1alpha2, namespace string) typedapiv1alpha2.InferencePoolInterface {
+	return &fakeInferencePools{
+		gentype.NewFakeClientWithListAndApply[*v1alpha2.InferencePool, *v1alpha2.InferencePoolList, *apiv1alpha2.InferencePoolApplyConfiguration](
+			fake.Fake,
+			namespace,
+			v1alpha2.SchemeGroupVersion.WithResource("inferencepools"),
+			v1alpha2.SchemeGroupVersion.WithKind("InferencePool"),
+			func() *v1alpha2.InferencePool { return &v1alpha2.InferencePool{} },
+			func() *v1alpha2.InferencePoolList { return &v1alpha2.InferencePoolList{} },
+			func(dst, src *v1alpha2.InferencePoolList) { dst.ListMeta = src.ListMeta },
+			func(list *v1alpha2.InferencePoolList) []*v1alpha2.InferencePool {
+				return gentype.ToPointerSlice(list.Items)
+			},
+			func(list *v1alpha2.InferencePoolList, items []*v1alpha2.InferencePool) {
+				list.Items = gentype.FromPointerSlice(items)
+			},
+		),
+		fake,
+	}
+}
diff --git a/client-go/clientset/versioned/typed/api/v1alpha2/generated_expansion.go b/client-go/clientset/versioned/typed/api/v1alpha2/generated_expansion.go
new file mode 100644
index 000000000..399789d8c
--- /dev/null
+++ b/client-go/clientset/versioned/typed/api/v1alpha2/generated_expansion.go
@@ -0,0 +1,22 @@
+/*
+Copyright 2024 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+// Code generated by client-gen. DO NOT EDIT.
+
+package v1alpha2
+
+type InferenceModelExpansion interface{}
+
+type InferencePoolExpansion interface{}
diff --git a/client-go/clientset/versioned/typed/api/v1alpha2/inferencemodel.go b/client-go/clientset/versioned/typed/api/v1alpha2/inferencemodel.go
new file mode 100644
index 000000000..ee0d92c1c
--- /dev/null
+++ b/client-go/clientset/versioned/typed/api/v1alpha2/inferencemodel.go
@@ -0,0 +1,73 @@
+/*
+Copyright 2024 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+// Code generated by client-gen. DO NOT EDIT.
+
+package v1alpha2
+
+import (
+	context "context"
+
+	v1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	types "k8s.io/apimachinery/pkg/types"
+	watch "k8s.io/apimachinery/pkg/watch"
+	gentype "k8s.io/client-go/gentype"
+	apiv1alpha2 "sigs.k8s.io/gateway-api-inference-extension/api/v1alpha2"
+	applyconfigurationapiv1alpha2 "sigs.k8s.io/gateway-api-inference-extension/client-go/applyconfiguration/api/v1alpha2"
+	scheme "sigs.k8s.io/gateway-api-inference-extension/client-go/clientset/versioned/scheme"
+)
+
+// InferenceModelsGetter has a method to return a InferenceModelInterface.
+// A group's client should implement this interface.
+type InferenceModelsGetter interface {
+	InferenceModels(namespace string) InferenceModelInterface
+}
+
+// InferenceModelInterface has methods to work with InferenceModel resources.
+type InferenceModelInterface interface {
+	Create(ctx context.Context, inferenceModel *apiv1alpha2.InferenceModel, opts v1.CreateOptions) (*apiv1alpha2.InferenceModel, error)
+	Update(ctx context.Context, inferenceModel *apiv1alpha2.InferenceModel, opts v1.UpdateOptions) (*apiv1alpha2.InferenceModel, error)
+	// Add a +genclient:noStatus comment above the type to avoid generating UpdateStatus().
+	UpdateStatus(ctx context.Context, inferenceModel *apiv1alpha2.InferenceModel, opts v1.UpdateOptions) (*apiv1alpha2.InferenceModel, error)
+	Delete(ctx context.Context, name string, opts v1.DeleteOptions) error
+	DeleteCollection(ctx context.Context, opts v1.DeleteOptions, listOpts v1.ListOptions) error
+	Get(ctx context.Context, name string, opts v1.GetOptions) (*apiv1alpha2.InferenceModel, error)
+	List(ctx context.Context, opts v1.ListOptions) (*apiv1alpha2.InferenceModelList, error)
+	Watch(ctx context.Context, opts v1.ListOptions) (watch.Interface, error)
+	Patch(ctx context.Context, name string, pt types.PatchType, data []byte, opts v1.PatchOptions, subresources ...string) (result *apiv1alpha2.InferenceModel, err error)
+	Apply(ctx context.Context, inferenceModel *applyconfigurationapiv1alpha2.InferenceModelApplyConfiguration, opts v1.ApplyOptions) (result *apiv1alpha2.InferenceModel, err error)
+	// Add a +genclient:noStatus comment above the type to avoid generating ApplyStatus().
+	ApplyStatus(ctx context.Context, inferenceModel *applyconfigurationapiv1alpha2.InferenceModelApplyConfiguration, opts v1.ApplyOptions) (result *apiv1alpha2.InferenceModel, err error)
+	InferenceModelExpansion
+}
+
+// inferenceModels implements InferenceModelInterface
+type inferenceModels struct {
+	*gentype.ClientWithListAndApply[*apiv1alpha2.InferenceModel, *apiv1alpha2.InferenceModelList, *applyconfigurationapiv1alpha2.InferenceModelApplyConfiguration]
+}
+
+// newInferenceModels returns a InferenceModels
+func newInferenceModels(c *InferenceV1alpha2Client, namespace string) *inferenceModels {
+	return &inferenceModels{
+		gentype.NewClientWithListAndApply[*apiv1alpha2.InferenceModel, *apiv1alpha2.InferenceModelList, *applyconfigurationapiv1alpha2.InferenceModelApplyConfiguration](
+			"inferencemodels",
+			c.RESTClient(),
+			scheme.ParameterCodec,
+			namespace,
+			func() *apiv1alpha2.InferenceModel { return &apiv1alpha2.InferenceModel{} },
+			func() *apiv1alpha2.InferenceModelList { return &apiv1alpha2.InferenceModelList{} },
+		),
+	}
+}
diff --git a/client-go/clientset/versioned/typed/api/v1alpha2/inferencepool.go b/client-go/clientset/versioned/typed/api/v1alpha2/inferencepool.go
new file mode 100644
index 000000000..8482451ee
--- /dev/null
+++ b/client-go/clientset/versioned/typed/api/v1alpha2/inferencepool.go
@@ -0,0 +1,73 @@
+/*
+Copyright 2024 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+// Code generated by client-gen. DO NOT EDIT.
+
+package v1alpha2
+
+import (
+	context "context"
+
+	v1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	types "k8s.io/apimachinery/pkg/types"
+	watch "k8s.io/apimachinery/pkg/watch"
+	gentype "k8s.io/client-go/gentype"
+	apiv1alpha2 "sigs.k8s.io/gateway-api-inference-extension/api/v1alpha2"
+	applyconfigurationapiv1alpha2 "sigs.k8s.io/gateway-api-inference-extension/client-go/applyconfiguration/api/v1alpha2"
+	scheme "sigs.k8s.io/gateway-api-inference-extension/client-go/clientset/versioned/scheme"
+)
+
+// InferencePoolsGetter has a method to return a InferencePoolInterface.
+// A group's client should implement this interface.
+type InferencePoolsGetter interface {
+	InferencePools(namespace string) InferencePoolInterface
+}
+
+// InferencePoolInterface has methods to work with InferencePool resources.
+type InferencePoolInterface interface {
+	Create(ctx context.Context, inferencePool *apiv1alpha2.InferencePool, opts v1.CreateOptions) (*apiv1alpha2.InferencePool, error)
+	Update(ctx context.Context, inferencePool *apiv1alpha2.InferencePool, opts v1.UpdateOptions) (*apiv1alpha2.InferencePool, error)
+	// Add a +genclient:noStatus comment above the type to avoid generating UpdateStatus().
+	UpdateStatus(ctx context.Context, inferencePool *apiv1alpha2.InferencePool, opts v1.UpdateOptions) (*apiv1alpha2.InferencePool, error)
+	Delete(ctx context.Context, name string, opts v1.DeleteOptions) error
+	DeleteCollection(ctx context.Context, opts v1.DeleteOptions, listOpts v1.ListOptions) error
+	Get(ctx context.Context, name string, opts v1.GetOptions) (*apiv1alpha2.InferencePool, error)
+	List(ctx context.Context, opts v1.ListOptions) (*apiv1alpha2.InferencePoolList, error)
+	Watch(ctx context.Context, opts v1.ListOptions) (watch.Interface, error)
+	Patch(ctx context.Context, name string, pt types.PatchType, data []byte, opts v1.PatchOptions, subresources ...string) (result *apiv1alpha2.InferencePool, err error)
+	Apply(ctx context.Context, inferencePool *applyconfigurationapiv1alpha2.InferencePoolApplyConfiguration, opts v1.ApplyOptions) (result *apiv1alpha2.InferencePool, err error)
+	// Add a +genclient:noStatus comment above the type to avoid generating ApplyStatus().
+	ApplyStatus(ctx context.Context, inferencePool *applyconfigurationapiv1alpha2.InferencePoolApplyConfiguration, opts v1.ApplyOptions) (result *apiv1alpha2.InferencePool, err error)
+	InferencePoolExpansion
+}
+
+// inferencePools implements InferencePoolInterface
+type inferencePools struct {
+	*gentype.ClientWithListAndApply[*apiv1alpha2.InferencePool, *apiv1alpha2.InferencePoolList, *applyconfigurationapiv1alpha2.InferencePoolApplyConfiguration]
+}
+
+// newInferencePools returns a InferencePools
+func newInferencePools(c *InferenceV1alpha2Client, namespace string) *inferencePools {
+	return &inferencePools{
+		gentype.NewClientWithListAndApply[*apiv1alpha2.InferencePool, *apiv1alpha2.InferencePoolList, *applyconfigurationapiv1alpha2.InferencePoolApplyConfiguration](
+			"inferencepools",
+			c.RESTClient(),
+			scheme.ParameterCodec,
+			namespace,
+			func() *apiv1alpha2.InferencePool { return &apiv1alpha2.InferencePool{} },
+			func() *apiv1alpha2.InferencePoolList { return &apiv1alpha2.InferencePoolList{} },
+		),
+	}
+}
diff --git a/client-go/informers/externalversions/api/interface.go b/client-go/informers/externalversions/api/interface.go
index fbf5ba092..210b89f84 100644
--- a/client-go/informers/externalversions/api/interface.go
+++ b/client-go/informers/externalversions/api/interface.go
@@ -19,6 +19,7 @@ package api
 
 import (
 	v1alpha1 "sigs.k8s.io/gateway-api-inference-extension/client-go/informers/externalversions/api/v1alpha1"
+	v1alpha2 "sigs.k8s.io/gateway-api-inference-extension/client-go/informers/externalversions/api/v1alpha2"
 	internalinterfaces "sigs.k8s.io/gateway-api-inference-extension/client-go/informers/externalversions/internalinterfaces"
 )
 
@@ -26,6 +27,8 @@ import (
 type Interface interface {
 	// V1alpha1 provides access to shared informers for resources in V1alpha1.
 	V1alpha1() v1alpha1.Interface
+	// V1alpha2 provides access to shared informers for resources in V1alpha2.
+	V1alpha2() v1alpha2.Interface
 }
 
 type group struct {
@@ -43,3 +46,8 @@ func New(f internalinterfaces.SharedInformerFactory, namespace string, tweakList
 func (g *group) V1alpha1() v1alpha1.Interface {
 	return v1alpha1.New(g.factory, g.namespace, g.tweakListOptions)
 }
+
+// V1alpha2 returns a new v1alpha2.Interface.
+func (g *group) V1alpha2() v1alpha2.Interface {
+	return v1alpha2.New(g.factory, g.namespace, g.tweakListOptions)
+}
diff --git a/client-go/informers/externalversions/api/v1alpha2/inferencemodel.go b/client-go/informers/externalversions/api/v1alpha2/inferencemodel.go
new file mode 100644
index 000000000..74f640d15
--- /dev/null
+++ b/client-go/informers/externalversions/api/v1alpha2/inferencemodel.go
@@ -0,0 +1,89 @@
+/*
+Copyright 2024 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+// Code generated by informer-gen. DO NOT EDIT.
+
+package v1alpha2
+
+import (
+	context "context"
+	time "time"
+
+	v1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	runtime "k8s.io/apimachinery/pkg/runtime"
+	watch "k8s.io/apimachinery/pkg/watch"
+	cache "k8s.io/client-go/tools/cache"
+	gatewayapiinferenceextensionapiv1alpha2 "sigs.k8s.io/gateway-api-inference-extension/api/v1alpha2"
+	versioned "sigs.k8s.io/gateway-api-inference-extension/client-go/clientset/versioned"
+	internalinterfaces "sigs.k8s.io/gateway-api-inference-extension/client-go/informers/externalversions/internalinterfaces"
+	apiv1alpha2 "sigs.k8s.io/gateway-api-inference-extension/client-go/listers/api/v1alpha2"
+)
+
+// InferenceModelInformer provides access to a shared informer and lister for
+// InferenceModels.
+type InferenceModelInformer interface {
+	Informer() cache.SharedIndexInformer
+	Lister() apiv1alpha2.InferenceModelLister
+}
+
+type inferenceModelInformer struct {
+	factory          internalinterfaces.SharedInformerFactory
+	tweakListOptions internalinterfaces.TweakListOptionsFunc
+	namespace        string
+}
+
+// NewInferenceModelInformer constructs a new informer for InferenceModel type.
+// Always prefer using an informer factory to get a shared informer instead of getting an independent
+// one. This reduces memory footprint and number of connections to the server.
+func NewInferenceModelInformer(client versioned.Interface, namespace string, resyncPeriod time.Duration, indexers cache.Indexers) cache.SharedIndexInformer {
+	return NewFilteredInferenceModelInformer(client, namespace, resyncPeriod, indexers, nil)
+}
+
+// NewFilteredInferenceModelInformer constructs a new informer for InferenceModel type.
+// Always prefer using an informer factory to get a shared informer instead of getting an independent
+// one. This reduces memory footprint and number of connections to the server.
+func NewFilteredInferenceModelInformer(client versioned.Interface, namespace string, resyncPeriod time.Duration, indexers cache.Indexers, tweakListOptions internalinterfaces.TweakListOptionsFunc) cache.SharedIndexInformer {
+	return cache.NewSharedIndexInformer(
+		&cache.ListWatch{
+			ListFunc: func(options v1.ListOptions) (runtime.Object, error) {
+				if tweakListOptions != nil {
+					tweakListOptions(&options)
+				}
+				return client.InferenceV1alpha2().InferenceModels(namespace).List(context.TODO(), options)
+			},
+			WatchFunc: func(options v1.ListOptions) (watch.Interface, error) {
+				if tweakListOptions != nil {
+					tweakListOptions(&options)
+				}
+				return client.InferenceV1alpha2().InferenceModels(namespace).Watch(context.TODO(), options)
+			},
+		},
+		&gatewayapiinferenceextensionapiv1alpha2.InferenceModel{},
+		resyncPeriod,
+		indexers,
+	)
+}
+
+func (f *inferenceModelInformer) defaultInformer(client versioned.Interface, resyncPeriod time.Duration) cache.SharedIndexInformer {
+	return NewFilteredInferenceModelInformer(client, f.namespace, resyncPeriod, cache.Indexers{cache.NamespaceIndex: cache.MetaNamespaceIndexFunc}, f.tweakListOptions)
+}
+
+func (f *inferenceModelInformer) Informer() cache.SharedIndexInformer {
+	return f.factory.InformerFor(&gatewayapiinferenceextensionapiv1alpha2.InferenceModel{}, f.defaultInformer)
+}
+
+func (f *inferenceModelInformer) Lister() apiv1alpha2.InferenceModelLister {
+	return apiv1alpha2.NewInferenceModelLister(f.Informer().GetIndexer())
+}
diff --git a/client-go/informers/externalversions/api/v1alpha2/inferencepool.go b/client-go/informers/externalversions/api/v1alpha2/inferencepool.go
new file mode 100644
index 000000000..d04591dd9
--- /dev/null
+++ b/client-go/informers/externalversions/api/v1alpha2/inferencepool.go
@@ -0,0 +1,89 @@
+/*
+Copyright 2024 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+// Code generated by informer-gen. DO NOT EDIT.
+
+package v1alpha2
+
+import (
+	context "context"
+	time "time"
+
+	v1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	runtime "k8s.io/apimachinery/pkg/runtime"
+	watch "k8s.io/apimachinery/pkg/watch"
+	cache "k8s.io/client-go/tools/cache"
+	gatewayapiinferenceextensionapiv1alpha2 "sigs.k8s.io/gateway-api-inference-extension/api/v1alpha2"
+	versioned "sigs.k8s.io/gateway-api-inference-extension/client-go/clientset/versioned"
+	internalinterfaces "sigs.k8s.io/gateway-api-inference-extension/client-go/informers/externalversions/internalinterfaces"
+	apiv1alpha2 "sigs.k8s.io/gateway-api-inference-extension/client-go/listers/api/v1alpha2"
+)
+
+// InferencePoolInformer provides access to a shared informer and lister for
+// InferencePools.
+type InferencePoolInformer interface {
+	Informer() cache.SharedIndexInformer
+	Lister() apiv1alpha2.InferencePoolLister
+}
+
+type inferencePoolInformer struct {
+	factory          internalinterfaces.SharedInformerFactory
+	tweakListOptions internalinterfaces.TweakListOptionsFunc
+	namespace        string
+}
+
+// NewInferencePoolInformer constructs a new informer for InferencePool type.
+// Always prefer using an informer factory to get a shared informer instead of getting an independent
+// one. This reduces memory footprint and number of connections to the server.
+func NewInferencePoolInformer(client versioned.Interface, namespace string, resyncPeriod time.Duration, indexers cache.Indexers) cache.SharedIndexInformer {
+	return NewFilteredInferencePoolInformer(client, namespace, resyncPeriod, indexers, nil)
+}
+
+// NewFilteredInferencePoolInformer constructs a new informer for InferencePool type.
+// Always prefer using an informer factory to get a shared informer instead of getting an independent
+// one. This reduces memory footprint and number of connections to the server.
+func NewFilteredInferencePoolInformer(client versioned.Interface, namespace string, resyncPeriod time.Duration, indexers cache.Indexers, tweakListOptions internalinterfaces.TweakListOptionsFunc) cache.SharedIndexInformer {
+	return cache.NewSharedIndexInformer(
+		&cache.ListWatch{
+			ListFunc: func(options v1.ListOptions) (runtime.Object, error) {
+				if tweakListOptions != nil {
+					tweakListOptions(&options)
+				}
+				return client.InferenceV1alpha2().InferencePools(namespace).List(context.TODO(), options)
+			},
+			WatchFunc: func(options v1.ListOptions) (watch.Interface, error) {
+				if tweakListOptions != nil {
+					tweakListOptions(&options)
+				}
+				return client.InferenceV1alpha2().InferencePools(namespace).Watch(context.TODO(), options)
+			},
+		},
+		&gatewayapiinferenceextensionapiv1alpha2.InferencePool{},
+		resyncPeriod,
+		indexers,
+	)
+}
+
+func (f *inferencePoolInformer) defaultInformer(client versioned.Interface, resyncPeriod time.Duration) cache.SharedIndexInformer {
+	return NewFilteredInferencePoolInformer(client, f.namespace, resyncPeriod, cache.Indexers{cache.NamespaceIndex: cache.MetaNamespaceIndexFunc}, f.tweakListOptions)
+}
+
+func (f *inferencePoolInformer) Informer() cache.SharedIndexInformer {
+	return f.factory.InformerFor(&gatewayapiinferenceextensionapiv1alpha2.InferencePool{}, f.defaultInformer)
+}
+
+func (f *inferencePoolInformer) Lister() apiv1alpha2.InferencePoolLister {
+	return apiv1alpha2.NewInferencePoolLister(f.Informer().GetIndexer())
+}
diff --git a/client-go/informers/externalversions/api/v1alpha2/interface.go b/client-go/informers/externalversions/api/v1alpha2/interface.go
new file mode 100644
index 000000000..9e5c4d9c2
--- /dev/null
+++ b/client-go/informers/externalversions/api/v1alpha2/interface.go
@@ -0,0 +1,51 @@
+/*
+Copyright 2024 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+// Code generated by informer-gen. DO NOT EDIT.
+
+package v1alpha2
+
+import (
+	internalinterfaces "sigs.k8s.io/gateway-api-inference-extension/client-go/informers/externalversions/internalinterfaces"
+)
+
+// Interface provides access to all the informers in this group version.
+type Interface interface {
+	// InferenceModels returns a InferenceModelInformer.
+	InferenceModels() InferenceModelInformer
+	// InferencePools returns a InferencePoolInformer.
+	InferencePools() InferencePoolInformer
+}
+
+type version struct {
+	factory          internalinterfaces.SharedInformerFactory
+	namespace        string
+	tweakListOptions internalinterfaces.TweakListOptionsFunc
+}
+
+// New returns a new Interface.
+func New(f internalinterfaces.SharedInformerFactory, namespace string, tweakListOptions internalinterfaces.TweakListOptionsFunc) Interface {
+	return &version{factory: f, namespace: namespace, tweakListOptions: tweakListOptions}
+}
+
+// InferenceModels returns a InferenceModelInformer.
+func (v *version) InferenceModels() InferenceModelInformer {
+	return &inferenceModelInformer{factory: v.factory, namespace: v.namespace, tweakListOptions: v.tweakListOptions}
+}
+
+// InferencePools returns a InferencePoolInformer.
+func (v *version) InferencePools() InferencePoolInformer {
+	return &inferencePoolInformer{factory: v.factory, namespace: v.namespace, tweakListOptions: v.tweakListOptions}
+}
diff --git a/client-go/informers/externalversions/generic.go b/client-go/informers/externalversions/generic.go
index 672998f55..9f363d884 100644
--- a/client-go/informers/externalversions/generic.go
+++ b/client-go/informers/externalversions/generic.go
@@ -23,6 +23,7 @@ import (
 	schema "k8s.io/apimachinery/pkg/runtime/schema"
 	cache "k8s.io/client-go/tools/cache"
 	v1alpha1 "sigs.k8s.io/gateway-api-inference-extension/api/v1alpha1"
+	v1alpha2 "sigs.k8s.io/gateway-api-inference-extension/api/v1alpha2"
 )
 
 // GenericInformer is type of SharedIndexInformer which will locate and delegate to other
@@ -57,6 +58,12 @@ func (f *sharedInformerFactory) ForResource(resource schema.GroupVersionResource
 	case v1alpha1.SchemeGroupVersion.WithResource("inferencepools"):
 		return &genericInformer{resource: resource.GroupResource(), informer: f.Inference().V1alpha1().InferencePools().Informer()}, nil
 
+		// Group=inference.networking.x-k8s.io, Version=v1alpha2
+	case v1alpha2.SchemeGroupVersion.WithResource("inferencemodels"):
+		return &genericInformer{resource: resource.GroupResource(), informer: f.Inference().V1alpha2().InferenceModels().Informer()}, nil
+	case v1alpha2.SchemeGroupVersion.WithResource("inferencepools"):
+		return &genericInformer{resource: resource.GroupResource(), informer: f.Inference().V1alpha2().InferencePools().Informer()}, nil
+
 	}
 
 	return nil, fmt.Errorf("no informer found for %v", resource)
diff --git a/client-go/listers/api/v1alpha2/expansion_generated.go b/client-go/listers/api/v1alpha2/expansion_generated.go
new file mode 100644
index 000000000..204c375b3
--- /dev/null
+++ b/client-go/listers/api/v1alpha2/expansion_generated.go
@@ -0,0 +1,34 @@
+/*
+Copyright 2024 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+// Code generated by lister-gen. DO NOT EDIT.
+
+package v1alpha2
+
+// InferenceModelListerExpansion allows custom methods to be added to
+// InferenceModelLister.
+type InferenceModelListerExpansion interface{}
+
+// InferenceModelNamespaceListerExpansion allows custom methods to be added to
+// InferenceModelNamespaceLister.
+type InferenceModelNamespaceListerExpansion interface{}
+
+// InferencePoolListerExpansion allows custom methods to be added to
+// InferencePoolLister.
+type InferencePoolListerExpansion interface{}
+
+// InferencePoolNamespaceListerExpansion allows custom methods to be added to
+// InferencePoolNamespaceLister.
+type InferencePoolNamespaceListerExpansion interface{}
diff --git a/client-go/listers/api/v1alpha2/inferencemodel.go b/client-go/listers/api/v1alpha2/inferencemodel.go
new file mode 100644
index 000000000..ce83b85ff
--- /dev/null
+++ b/client-go/listers/api/v1alpha2/inferencemodel.go
@@ -0,0 +1,69 @@
+/*
+Copyright 2024 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+// Code generated by lister-gen. DO NOT EDIT.
+
+package v1alpha2
+
+import (
+	labels "k8s.io/apimachinery/pkg/labels"
+	listers "k8s.io/client-go/listers"
+	cache "k8s.io/client-go/tools/cache"
+	apiv1alpha2 "sigs.k8s.io/gateway-api-inference-extension/api/v1alpha2"
+)
+
+// InferenceModelLister helps list InferenceModels.
+// All objects returned here must be treated as read-only.
+type InferenceModelLister interface {
+	// List lists all InferenceModels in the indexer.
+	// Objects returned here must be treated as read-only.
+	List(selector labels.Selector) (ret []*apiv1alpha2.InferenceModel, err error)
+	// InferenceModels returns an object that can list and get InferenceModels.
+	InferenceModels(namespace string) InferenceModelNamespaceLister
+	InferenceModelListerExpansion
+}
+
+// inferenceModelLister implements the InferenceModelLister interface.
+type inferenceModelLister struct {
+	listers.ResourceIndexer[*apiv1alpha2.InferenceModel]
+}
+
+// NewInferenceModelLister returns a new InferenceModelLister.
+func NewInferenceModelLister(indexer cache.Indexer) InferenceModelLister {
+	return &inferenceModelLister{listers.New[*apiv1alpha2.InferenceModel](indexer, apiv1alpha2.Resource("inferencemodel"))}
+}
+
+// InferenceModels returns an object that can list and get InferenceModels.
+func (s *inferenceModelLister) InferenceModels(namespace string) InferenceModelNamespaceLister {
+	return inferenceModelNamespaceLister{listers.NewNamespaced[*apiv1alpha2.InferenceModel](s.ResourceIndexer, namespace)}
+}
+
+// InferenceModelNamespaceLister helps list and get InferenceModels.
+// All objects returned here must be treated as read-only.
+type InferenceModelNamespaceLister interface {
+	// List lists all InferenceModels in the indexer for a given namespace.
+	// Objects returned here must be treated as read-only.
+	List(selector labels.Selector) (ret []*apiv1alpha2.InferenceModel, err error)
+	// Get retrieves the InferenceModel from the indexer for a given namespace and name.
+	// Objects returned here must be treated as read-only.
+	Get(name string) (*apiv1alpha2.InferenceModel, error)
+	InferenceModelNamespaceListerExpansion
+}
+
+// inferenceModelNamespaceLister implements the InferenceModelNamespaceLister
+// interface.
+type inferenceModelNamespaceLister struct {
+	listers.ResourceIndexer[*apiv1alpha2.InferenceModel]
+}
diff --git a/client-go/listers/api/v1alpha2/inferencepool.go b/client-go/listers/api/v1alpha2/inferencepool.go
new file mode 100644
index 000000000..c7e49a1e0
--- /dev/null
+++ b/client-go/listers/api/v1alpha2/inferencepool.go
@@ -0,0 +1,69 @@
+/*
+Copyright 2024 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+// Code generated by lister-gen. DO NOT EDIT.
+
+package v1alpha2
+
+import (
+	labels "k8s.io/apimachinery/pkg/labels"
+	listers "k8s.io/client-go/listers"
+	cache "k8s.io/client-go/tools/cache"
+	apiv1alpha2 "sigs.k8s.io/gateway-api-inference-extension/api/v1alpha2"
+)
+
+// InferencePoolLister helps list InferencePools.
+// All objects returned here must be treated as read-only.
+type InferencePoolLister interface {
+	// List lists all InferencePools in the indexer.
+	// Objects returned here must be treated as read-only.
+	List(selector labels.Selector) (ret []*apiv1alpha2.InferencePool, err error)
+	// InferencePools returns an object that can list and get InferencePools.
+	InferencePools(namespace string) InferencePoolNamespaceLister
+	InferencePoolListerExpansion
+}
+
+// inferencePoolLister implements the InferencePoolLister interface.
+type inferencePoolLister struct {
+	listers.ResourceIndexer[*apiv1alpha2.InferencePool]
+}
+
+// NewInferencePoolLister returns a new InferencePoolLister.
+func NewInferencePoolLister(indexer cache.Indexer) InferencePoolLister {
+	return &inferencePoolLister{listers.New[*apiv1alpha2.InferencePool](indexer, apiv1alpha2.Resource("inferencepool"))}
+}
+
+// InferencePools returns an object that can list and get InferencePools.
+func (s *inferencePoolLister) InferencePools(namespace string) InferencePoolNamespaceLister {
+	return inferencePoolNamespaceLister{listers.NewNamespaced[*apiv1alpha2.InferencePool](s.ResourceIndexer, namespace)}
+}
+
+// InferencePoolNamespaceLister helps list and get InferencePools.
+// All objects returned here must be treated as read-only.
+type InferencePoolNamespaceLister interface {
+	// List lists all InferencePools in the indexer for a given namespace.
+	// Objects returned here must be treated as read-only.
+	List(selector labels.Selector) (ret []*apiv1alpha2.InferencePool, err error)
+	// Get retrieves the InferencePool from the indexer for a given namespace and name.
+	// Objects returned here must be treated as read-only.
+	Get(name string) (*apiv1alpha2.InferencePool, error)
+	InferencePoolNamespaceListerExpansion
+}
+
+// inferencePoolNamespaceLister implements the InferencePoolNamespaceLister
+// interface.
+type inferencePoolNamespaceLister struct {
+	listers.ResourceIndexer[*apiv1alpha2.InferencePool]
+}
diff --git a/cmd/epp/main.go b/cmd/epp/main.go
index 1f76cfabe..dd47fa27e 100644
--- a/cmd/epp/main.go
+++ b/cmd/epp/main.go
@@ -40,6 +40,7 @@ import (
 	"sigs.k8s.io/controller-runtime/pkg/manager"
 	"sigs.k8s.io/controller-runtime/pkg/metrics/filters"
 	"sigs.k8s.io/gateway-api-inference-extension/api/v1alpha1"
+	"sigs.k8s.io/gateway-api-inference-extension/api/v1alpha2"
 	"sigs.k8s.io/gateway-api-inference-extension/internal/runnable"
 	"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/backend"
 	"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/backend/vllm"
@@ -104,6 +105,8 @@ var (
 func init() {
 	utilruntime.Must(clientgoscheme.AddToScheme(scheme))
 	utilruntime.Must(v1alpha1.AddToScheme(scheme))
+	utilruntime.Must(v1alpha2.AddToScheme(scheme))
+
 }
 
 func main() {
diff --git a/config/crd/bases/inference.networking.x-k8s.io_inferencemodels.yaml b/config/crd/bases/inference.networking.x-k8s.io_inferencemodels.yaml
index bca196059..09258c204 100644
--- a/config/crd/bases/inference.networking.x-k8s.io_inferencemodels.yaml
+++ b/config/crd/bases/inference.networking.x-k8s.io_inferencemodels.yaml
@@ -235,6 +235,230 @@ spec:
             type: object
         type: object
     served: true
+    storage: false
+    subresources:
+      status: {}
+  - name: v1alpha2
+    schema:
+      openAPIV3Schema:
+        description: InferenceModel is the Schema for the InferenceModels API.
+        properties:
+          apiVersion:
+            description: |-
+              APIVersion defines the versioned schema of this representation of an object.
+              Servers should convert recognized schemas to the latest internal value, and
+              may reject unrecognized values.
+              More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources
+            type: string
+          kind:
+            description: |-
+              Kind is a string value representing the REST resource this object represents.
+              Servers may infer this from the endpoint the client submits requests to.
+              Cannot be updated.
+              In CamelCase.
+              More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
+            type: string
+          metadata:
+            type: object
+          spec:
+            description: |-
+              InferenceModelSpec represents the desired state of a specific model use case. This resource is
+              managed by the "Inference Workload Owner" persona.
+
+              The Inference Workload Owner persona is someone that trains, verifies, and
+              leverages a large language model from a model frontend, drives the lifecycle
+              and rollout of new versions of those models, and defines the specific
+              performance and latency goals for the model. These workloads are
+              expected to operate within an InferencePool sharing compute capacity with other
+              InferenceModels, defined by the Inference Platform Admin.
+
+              InferenceModel's modelName (not the ObjectMeta name) is unique for a given InferencePool,
+              if the name is reused, an error will be shown on the status of a
+              InferenceModel that attempted to reuse. The oldest InferenceModel, based on
+              creation timestamp, will be selected to remain valid. In the event of a race
+              condition, one will be selected at random.
+            properties:
+              criticality:
+                description: |-
+                  Criticality defines how important it is to serve the model compared to other models referencing the same pool.
+                  Criticality impacts how traffic is handled in resource constrained situations. It handles this by
+                  queuing or rejecting requests of lower criticality. InferenceModels of an equivalent Criticality will
+                  fairly share resources over throughput of tokens. In the future, the metric used to calculate fairness,
+                  and the proportionality of fairness will be configurable.
+
+                  Default values for this field will not be set, to allow for future additions of new field that may 'one of' with this field.
+                  Any implementations that may consume this field may treat an unset value as the 'Standard' range.
+                enum:
+                - Critical
+                - Standard
+                - Sheddable
+                type: string
+              modelName:
+                description: |-
+                  ModelName is the name of the model as it will be set in the "model" parameter for an incoming request.
+                  ModelNames must be unique for a referencing InferencePool
+                  (names can be reused for a different pool in the same cluster).
+                  The modelName with the oldest creation timestamp is retained, and the incoming
+                  InferenceModel is sets the Ready status to false with a corresponding reason.
+                  In the rare case of a race condition, one Model will be selected randomly to be considered valid, and the other rejected.
+                  Names can be reserved without an underlying model configured in the pool.
+                  This can be done by specifying a target model and setting the weight to zero,
+                  an error will be returned specifying that no valid target model is found.
+                maxLength: 256
+                type: string
+              poolRef:
+                description: PoolRef is a reference to the inference pool, the pool
+                  must exist in the same namespace.
+                properties:
+                  group:
+                    default: inference.networking.x-k8s.io
+                    description: Group is the group of the referent.
+                    maxLength: 253
+                    pattern: ^$|^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$
+                    type: string
+                  kind:
+                    default: InferencePool
+                    description: Kind is kind of the referent. For example "InferencePool".
+                    maxLength: 63
+                    minLength: 1
+                    pattern: ^[a-zA-Z]([-a-zA-Z0-9]*[a-zA-Z0-9])?$
+                    type: string
+                  name:
+                    description: Name is the name of the referent.
+                    maxLength: 253
+                    minLength: 1
+                    type: string
+                required:
+                - name
+                type: object
+              targetModels:
+                description: |-
+                  TargetModels allow multiple versions of a model for traffic splitting.
+                  If not specified, the target model name is defaulted to the modelName parameter.
+                  modelName is often in reference to a LoRA adapter.
+                items:
+                  description: |-
+                    TargetModel represents a deployed model or a LoRA adapter. The
+                    Name field is expected to match the name of the LoRA adapter
+                    (or base model) as it is registered within the model server. Inference
+                    Gateway assumes that the model exists on the model server and it's the
+                    responsibility of the user to validate a correct match. Should a model fail
+                    to exist at request time, the error is processed by the Inference Gateway
+                    and emitted on the appropriate InferenceModel object.
+                  properties:
+                    name:
+                      description: Name is the name of the adapter or base model,
+                        as expected by the ModelServer.
+                      maxLength: 253
+                      type: string
+                    weight:
+                      description: |-
+                        Weight is used to determine the proportion of traffic that should be
+                        sent to this model when multiple target models are specified.
+
+                        Weight defines the proportion of requests forwarded to the specified
+                        model. This is computed as weight/(sum of all weights in this
+                        TargetModels list). For non-zero values, there may be some epsilon from
+                        the exact proportion defined here depending on the precision an
+                        implementation supports. Weight is not a percentage and the sum of
+                        weights does not need to equal 100.
+
+                        If a weight is set for any targetModel, it must be set for all targetModels.
+                        Conversely weights are optional, so long as ALL targetModels do not specify a weight.
+                      format: int32
+                      maximum: 1000000
+                      minimum: 0
+                      type: integer
+                  required:
+                  - name
+                  type: object
+                maxItems: 10
+                type: array
+                x-kubernetes-validations:
+                - message: Weights should be set for all models, or none of the models.
+                  rule: self.all(model, has(model.weight)) || self.all(model, !has(model.weight))
+            required:
+            - modelName
+            - poolRef
+            type: object
+          status:
+            description: InferenceModelStatus defines the observed state of InferenceModel
+            properties:
+              conditions:
+                default:
+                - lastTransitionTime: "1970-01-01T00:00:00Z"
+                  message: Waiting for controller
+                  reason: Pending
+                  status: Unknown
+                  type: Ready
+                description: |-
+                  Conditions track the state of the InferenceModel.
+
+                  Known condition types are:
+
+                  * "Accepted"
+                items:
+                  description: Condition contains details for one aspect of the current
+                    state of this API Resource.
+                  properties:
+                    lastTransitionTime:
+                      description: |-
+                        lastTransitionTime is the last time the condition transitioned from one status to another.
+                        This should be when the underlying condition changed.  If that is not known, then using the time when the API field changed is acceptable.
+                      format: date-time
+                      type: string
+                    message:
+                      description: |-
+                        message is a human readable message indicating details about the transition.
+                        This may be an empty string.
+                      maxLength: 32768
+                      type: string
+                    observedGeneration:
+                      description: |-
+                        observedGeneration represents the .metadata.generation that the condition was set based upon.
+                        For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date
+                        with respect to the current state of the instance.
+                      format: int64
+                      minimum: 0
+                      type: integer
+                    reason:
+                      description: |-
+                        reason contains a programmatic identifier indicating the reason for the condition's last transition.
+                        Producers of specific condition types may define expected values and meanings for this field,
+                        and whether the values are considered a guaranteed API.
+                        The value should be a CamelCase string.
+                        This field may not be empty.
+                      maxLength: 1024
+                      minLength: 1
+                      pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$
+                      type: string
+                    status:
+                      description: status of the condition, one of True, False, Unknown.
+                      enum:
+                      - "True"
+                      - "False"
+                      - Unknown
+                      type: string
+                    type:
+                      description: type of condition in CamelCase or in foo.example.com/CamelCase.
+                      maxLength: 316
+                      pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$
+                      type: string
+                  required:
+                  - lastTransitionTime
+                  - message
+                  - reason
+                  - status
+                  - type
+                  type: object
+                maxItems: 8
+                type: array
+                x-kubernetes-list-map-keys:
+                - type
+                x-kubernetes-list-type: map
+            type: object
+        type: object
+    served: true
     storage: true
     subresources:
       status: {}
diff --git a/config/crd/bases/inference.networking.x-k8s.io_inferencepools.yaml b/config/crd/bases/inference.networking.x-k8s.io_inferencepools.yaml
index 9e6473b9e..918e95cb7 100644
--- a/config/crd/bases/inference.networking.x-k8s.io_inferencepools.yaml
+++ b/config/crd/bases/inference.networking.x-k8s.io_inferencepools.yaml
@@ -201,6 +201,258 @@ spec:
             type: object
         type: object
     served: true
+    storage: false
+    subresources:
+      status: {}
+  - name: v1alpha2
+    schema:
+      openAPIV3Schema:
+        description: InferencePool is the Schema for the InferencePools API.
+        properties:
+          apiVersion:
+            description: |-
+              APIVersion defines the versioned schema of this representation of an object.
+              Servers should convert recognized schemas to the latest internal value, and
+              may reject unrecognized values.
+              More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources
+            type: string
+          kind:
+            description: |-
+              Kind is a string value representing the REST resource this object represents.
+              Servers may infer this from the endpoint the client submits requests to.
+              Cannot be updated.
+              In CamelCase.
+              More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
+            type: string
+          metadata:
+            type: object
+          spec:
+            description: InferencePoolSpec defines the desired state of InferencePool
+            properties:
+              extensionRef:
+                description: Extension configures an endpoint picker as an extension
+                  service.
+                properties:
+                  failureMode:
+                    default: FailClose
+                    description: |-
+                      Configures how the gateway handles the case when the extension is not responsive.
+                      Defaults to failClose.
+                    enum:
+                    - FailOpen
+                    - FailClose
+                    type: string
+                  group:
+                    default: ""
+                    description: |-
+                      Group is the group of the referent.
+                      When unspecified or empty string, core API group is inferred.
+                    type: string
+                  kind:
+                    default: Service
+                    description: |-
+                      Kind is the Kubernetes resource kind of the referent. For example
+                      "Service".
+
+                      Defaults to "Service" when not specified.
+
+                      ExternalName services can refer to CNAME DNS records that may live
+                      outside of the cluster and as such are difficult to reason about in
+                      terms of conformance. They also may not be safe to forward to (see
+                      CVE-2021-25740 for more information). Implementations MUST NOT
+                      support ExternalName Services.
+                    type: string
+                  name:
+                    description: Name is the name of the referent.
+                    type: string
+                  targetPortNumber:
+                    description: |-
+                      The port number on the service running the extension. When unspecified, implementations SHOULD infer a
+                      default value of 9002 when the Kind is Service.
+                    format: int32
+                    maximum: 65535
+                    minimum: 1
+                    type: integer
+                required:
+                - name
+                type: object
+              selector:
+                additionalProperties:
+                  description: |-
+                    LabelValue is the value of a label. This is used for validation
+                    of maps. This matches the Kubernetes label validation rules:
+                    * must be 63 characters or less (can be empty),
+                    * unless empty, must begin and end with an alphanumeric character ([a-z0-9A-Z]),
+                    * could contain dashes (-), underscores (_), dots (.), and alphanumerics between.
+
+                    Valid values include:
+
+                    * MyValue
+                    * my.name
+                    * 123-my-value
+                  maxLength: 63
+                  minLength: 0
+                  pattern: ^(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])?$
+                  type: string
+                description: |-
+                  Selector defines a map of labels to watch model server pods
+                  that should be included in the InferencePool.
+                  In some cases, implementations may translate this field to a Service selector, so this matches the simple
+                  map used for Service selectors instead of the full Kubernetes LabelSelector type.
+                type: object
+              targetPortNumber:
+                description: |-
+                  TargetPortNumber defines the port number to access the selected model servers.
+                  The number must be in the range 1 to 65535.
+                format: int32
+                maximum: 65535
+                minimum: 1
+                type: integer
+            required:
+            - extensionRef
+            - selector
+            - targetPortNumber
+            type: object
+          status:
+            description: InferencePoolStatus defines the observed state of InferencePool
+            properties:
+              parent:
+                description: |-
+                  Parents is a list of parent resources (usually Gateways) that are
+                  associated with the route, and the status of the InferencePool with respect to
+                  each parent.
+
+                  A maximum of 32 Gateways will be represented in this list. An empty list
+                  means the route has not been attached to any Gateway.
+                items:
+                  description: PoolStatus defines the observed state of InferencePool
+                    from a gateway.
+                  properties:
+                    conditions:
+                      default:
+                      - lastTransitionTime: "1970-01-01T00:00:00Z"
+                        message: Waiting for controller
+                        reason: Pending
+                        status: Unknown
+                        type: Ready
+                      description: |-
+                        Conditions track the state of the InferencePool.
+
+                        Known condition types are:
+
+                        * "Ready"
+                      items:
+                        description: Condition contains details for one aspect of
+                          the current state of this API Resource.
+                        properties:
+                          lastTransitionTime:
+                            description: |-
+                              lastTransitionTime is the last time the condition transitioned from one status to another.
+                              This should be when the underlying condition changed.  If that is not known, then using the time when the API field changed is acceptable.
+                            format: date-time
+                            type: string
+                          message:
+                            description: |-
+                              message is a human readable message indicating details about the transition.
+                              This may be an empty string.
+                            maxLength: 32768
+                            type: string
+                          observedGeneration:
+                            description: |-
+                              observedGeneration represents the .metadata.generation that the condition was set based upon.
+                              For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date
+                              with respect to the current state of the instance.
+                            format: int64
+                            minimum: 0
+                            type: integer
+                          reason:
+                            description: |-
+                              reason contains a programmatic identifier indicating the reason for the condition's last transition.
+                              Producers of specific condition types may define expected values and meanings for this field,
+                              and whether the values are considered a guaranteed API.
+                              The value should be a CamelCase string.
+                              This field may not be empty.
+                            maxLength: 1024
+                            minLength: 1
+                            pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$
+                            type: string
+                          status:
+                            description: status of the condition, one of True, False,
+                              Unknown.
+                            enum:
+                            - "True"
+                            - "False"
+                            - Unknown
+                            type: string
+                          type:
+                            description: type of condition in CamelCase or in foo.example.com/CamelCase.
+                            maxLength: 316
+                            pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$
+                            type: string
+                        required:
+                        - lastTransitionTime
+                        - message
+                        - reason
+                        - status
+                        - type
+                        type: object
+                      maxItems: 8
+                      type: array
+                      x-kubernetes-list-map-keys:
+                      - type
+                      x-kubernetes-list-type: map
+                    parentRef:
+                      description: GatewayRef indicates the gateway that observed
+                        state of InferencePool.
+                      properties:
+                        apiVersion:
+                          description: API version of the referent.
+                          type: string
+                        fieldPath:
+                          description: |-
+                            If referring to a piece of an object instead of an entire object, this string
+                            should contain a valid JSON/Go field access statement, such as desiredState.manifest.containers[2].
+                            For example, if the object reference is to a container within a pod, this would take on a value like:
+                            "spec.containers{name}" (where "name" refers to the name of the container that triggered
+                            the event) or if no container name is specified "spec.containers[2]" (container with
+                            index 2 in this pod). This syntax is chosen only to have some well-defined way of
+                            referencing a part of an object.
+                          type: string
+                        kind:
+                          description: |-
+                            Kind of the referent.
+                            More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
+                          type: string
+                        name:
+                          description: |-
+                            Name of the referent.
+                            More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
+                          type: string
+                        namespace:
+                          description: |-
+                            Namespace of the referent.
+                            More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/
+                          type: string
+                        resourceVersion:
+                          description: |-
+                            Specific resourceVersion to which this reference is made, if any.
+                            More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#concurrency-control-and-consistency
+                          type: string
+                        uid:
+                          description: |-
+                            UID of the referent.
+                            More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#uids
+                          type: string
+                      type: object
+                      x-kubernetes-map-type: atomic
+                  required:
+                  - parentRef
+                  type: object
+                maxItems: 32
+                type: array
+            type: object
+        type: object
+    served: true
     storage: true
     subresources:
       status: {}
diff --git a/pkg/epp/backend/fake.go b/pkg/epp/backend/fake.go
index e81b38177..06f14f696 100644
--- a/pkg/epp/backend/fake.go
+++ b/pkg/epp/backend/fake.go
@@ -21,7 +21,7 @@ import (
 
 	"k8s.io/apimachinery/pkg/types"
 	"sigs.k8s.io/controller-runtime/pkg/log"
-	"sigs.k8s.io/gateway-api-inference-extension/api/v1alpha1"
+	"sigs.k8s.io/gateway-api-inference-extension/api/v1alpha2"
 	"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datastore"
 	logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging"
 )
@@ -40,9 +40,9 @@ func (f *FakePodMetricsClient) FetchMetrics(ctx context.Context, existing *datas
 }
 
 type FakeDataStore struct {
-	Res map[string]*v1alpha1.InferenceModel
+	Res map[string]*v1alpha2.InferenceModel
 }
 
-func (fds *FakeDataStore) FetchModelData(modelName string) (returnModel *v1alpha1.InferenceModel) {
+func (fds *FakeDataStore) FetchModelData(modelName string) (returnModel *v1alpha2.InferenceModel) {
 	return fds.Res[modelName]
 }
diff --git a/pkg/epp/controller/inferencemodel_reconciler.go b/pkg/epp/controller/inferencemodel_reconciler.go
index 99a1eb26e..00358740c 100644
--- a/pkg/epp/controller/inferencemodel_reconciler.go
+++ b/pkg/epp/controller/inferencemodel_reconciler.go
@@ -27,7 +27,7 @@ import (
 	ctrl "sigs.k8s.io/controller-runtime"
 	"sigs.k8s.io/controller-runtime/pkg/client"
 	"sigs.k8s.io/controller-runtime/pkg/log"
-	"sigs.k8s.io/gateway-api-inference-extension/api/v1alpha1"
+	"sigs.k8s.io/gateway-api-inference-extension/api/v1alpha2"
 	"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datastore"
 	logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging"
 )
@@ -49,7 +49,7 @@ func (c *InferenceModelReconciler) Reconcile(ctx context.Context, req ctrl.Reque
 	loggerDefault := logger.V(logutil.DEFAULT)
 	loggerDefault.Info("Reconciling InferenceModel", "name", req.NamespacedName)
 
-	infModel := &v1alpha1.InferenceModel{}
+	infModel := &v1alpha2.InferenceModel{}
 	if err := c.Get(ctx, req.NamespacedName, infModel); err != nil {
 		if errors.IsNotFound(err) {
 			loggerDefault.Info("InferenceModel not found. Removing from datastore since object must be deleted", "name", req.NamespacedName)
@@ -68,7 +68,7 @@ func (c *InferenceModelReconciler) Reconcile(ctx context.Context, req ctrl.Reque
 	return ctrl.Result{}, nil
 }
 
-func (c *InferenceModelReconciler) updateDatastore(logger logr.Logger, infModel *v1alpha1.InferenceModel) {
+func (c *InferenceModelReconciler) updateDatastore(logger logr.Logger, infModel *v1alpha2.InferenceModel) {
 	loggerDefault := logger.V(logutil.DEFAULT)
 
 	if infModel.Spec.PoolRef.Name == c.PoolNamespacedName.Name {
@@ -84,6 +84,6 @@ func (c *InferenceModelReconciler) updateDatastore(logger logr.Logger, infModel
 
 func (c *InferenceModelReconciler) SetupWithManager(mgr ctrl.Manager) error {
 	return ctrl.NewControllerManagedBy(mgr).
-		For(&v1alpha1.InferenceModel{}).
+		For(&v1alpha2.InferenceModel{}).
 		Complete(c)
 }
diff --git a/pkg/epp/controller/inferencemodel_reconciler_test.go b/pkg/epp/controller/inferencemodel_reconciler_test.go
index cf94b168f..cea7bf427 100644
--- a/pkg/epp/controller/inferencemodel_reconciler_test.go
+++ b/pkg/epp/controller/inferencemodel_reconciler_test.go
@@ -28,34 +28,34 @@ import (
 
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	"k8s.io/apimachinery/pkg/types"
-	"sigs.k8s.io/gateway-api-inference-extension/api/v1alpha1"
+	"sigs.k8s.io/gateway-api-inference-extension/api/v1alpha2"
 	"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datastore"
 	logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging"
 )
 
 var (
-	infModel1 = &v1alpha1.InferenceModel{
-		Spec: v1alpha1.InferenceModelSpec{
+	infModel1 = &v1alpha2.InferenceModel{
+		Spec: v1alpha2.InferenceModelSpec{
 			ModelName: "fake model1",
-			PoolRef:   v1alpha1.PoolObjectReference{Name: "test-pool"},
+			PoolRef:   v1alpha2.PoolObjectReference{Name: "test-pool"},
 		},
 		ObjectMeta: metav1.ObjectMeta{
 			Name: "test-service",
 		},
 	}
-	infModel1Modified = &v1alpha1.InferenceModel{
-		Spec: v1alpha1.InferenceModelSpec{
+	infModel1Modified = &v1alpha2.InferenceModel{
+		Spec: v1alpha2.InferenceModelSpec{
 			ModelName: "fake model1",
-			PoolRef:   v1alpha1.PoolObjectReference{Name: "test-poolio"},
+			PoolRef:   v1alpha2.PoolObjectReference{Name: "test-poolio"},
 		},
 		ObjectMeta: metav1.ObjectMeta{
 			Name: "test-service",
 		},
 	}
-	infModel2 = &v1alpha1.InferenceModel{
-		Spec: v1alpha1.InferenceModelSpec{
+	infModel2 = &v1alpha2.InferenceModel{
+		Spec: v1alpha2.InferenceModelSpec{
 			ModelName: "fake model",
-			PoolRef:   v1alpha1.PoolObjectReference{Name: "test-pool"},
+			PoolRef:   v1alpha2.PoolObjectReference{Name: "test-pool"},
 		},
 		ObjectMeta: metav1.ObjectMeta{
 			Name: "test-service-2",
@@ -69,14 +69,14 @@ func TestUpdateDatastore_InferenceModelReconciler(t *testing.T) {
 	tests := []struct {
 		name                string
 		datastore           datastore.Datastore
-		incomingService     *v1alpha1.InferenceModel
+		incomingService     *v1alpha2.InferenceModel
 		wantInferenceModels *sync.Map
 	}{
 		{
 			name: "No Services registered; valid, new service incoming.",
-			datastore: datastore.NewFakeDatastore(nil, nil, &v1alpha1.InferencePool{
-				Spec: v1alpha1.InferencePoolSpec{
-					Selector: map[v1alpha1.LabelKey]v1alpha1.LabelValue{"app": "vllm"},
+			datastore: datastore.NewFakeDatastore(nil, nil, &v1alpha2.InferencePool{
+				Spec: v1alpha2.InferencePoolSpec{
+					Selector: map[v1alpha2.LabelKey]v1alpha2.LabelValue{"app": "vllm"},
 				},
 				ObjectMeta: metav1.ObjectMeta{
 					Name:            "test-pool",
@@ -89,9 +89,9 @@ func TestUpdateDatastore_InferenceModelReconciler(t *testing.T) {
 		},
 		{
 			name: "Removing existing service.",
-			datastore: datastore.NewFakeDatastore(nil, populateServiceMap(infModel1), &v1alpha1.InferencePool{
-				Spec: v1alpha1.InferencePoolSpec{
-					Selector: map[v1alpha1.LabelKey]v1alpha1.LabelValue{"app": "vllm"},
+			datastore: datastore.NewFakeDatastore(nil, populateServiceMap(infModel1), &v1alpha2.InferencePool{
+				Spec: v1alpha2.InferencePoolSpec{
+					Selector: map[v1alpha2.LabelKey]v1alpha2.LabelValue{"app": "vllm"},
 				},
 				ObjectMeta: metav1.ObjectMeta{
 					Name:            "test-pool",
@@ -103,19 +103,19 @@ func TestUpdateDatastore_InferenceModelReconciler(t *testing.T) {
 		},
 		{
 			name: "Unrelated service, do nothing.",
-			datastore: datastore.NewFakeDatastore(nil, populateServiceMap(infModel1), &v1alpha1.InferencePool{
-				Spec: v1alpha1.InferencePoolSpec{
-					Selector: map[v1alpha1.LabelKey]v1alpha1.LabelValue{"app": "vllm"},
+			datastore: datastore.NewFakeDatastore(nil, populateServiceMap(infModel1), &v1alpha2.InferencePool{
+				Spec: v1alpha2.InferencePoolSpec{
+					Selector: map[v1alpha2.LabelKey]v1alpha2.LabelValue{"app": "vllm"},
 				},
 				ObjectMeta: metav1.ObjectMeta{
 					Name:            "test-pool",
 					ResourceVersion: "Old and boring",
 				},
 			}),
-			incomingService: &v1alpha1.InferenceModel{
-				Spec: v1alpha1.InferenceModelSpec{
+			incomingService: &v1alpha2.InferenceModel{
+				Spec: v1alpha2.InferenceModelSpec{
 					ModelName: "fake model",
-					PoolRef:   v1alpha1.PoolObjectReference{Name: "test-poolio"},
+					PoolRef:   v1alpha2.PoolObjectReference{Name: "test-poolio"},
 				},
 				ObjectMeta: metav1.ObjectMeta{
 					Name: "unrelated-service",
@@ -125,9 +125,9 @@ func TestUpdateDatastore_InferenceModelReconciler(t *testing.T) {
 		},
 		{
 			name: "Add to existing",
-			datastore: datastore.NewFakeDatastore(nil, populateServiceMap(infModel1), &v1alpha1.InferencePool{
-				Spec: v1alpha1.InferencePoolSpec{
-					Selector: map[v1alpha1.LabelKey]v1alpha1.LabelValue{"app": "vllm"},
+			datastore: datastore.NewFakeDatastore(nil, populateServiceMap(infModel1), &v1alpha2.InferencePool{
+				Spec: v1alpha2.InferencePoolSpec{
+					Selector: map[v1alpha2.LabelKey]v1alpha2.LabelValue{"app": "vllm"},
 				},
 				ObjectMeta: metav1.ObjectMeta{
 					Name:            "test-pool",
@@ -164,13 +164,13 @@ func TestUpdateDatastore_InferenceModelReconciler(t *testing.T) {
 func TestReconcile_ResourceNotFound(t *testing.T) {
 	// Set up the scheme.
 	scheme := runtime.NewScheme()
-	_ = v1alpha1.AddToScheme(scheme)
+	_ = v1alpha2.AddToScheme(scheme)
 
 	// Create a fake client with no InferenceModel objects.
 	fakeClient := fake.NewClientBuilder().WithScheme(scheme).Build()
 
 	// Create a minimal datastore.
-	datastore := datastore.NewFakeDatastore(nil, nil, &v1alpha1.InferencePool{
+	datastore := datastore.NewFakeDatastore(nil, nil, &v1alpha2.InferencePool{
 		ObjectMeta: metav1.ObjectMeta{Name: "test-pool"},
 	})
 
@@ -201,20 +201,20 @@ func TestReconcile_ResourceNotFound(t *testing.T) {
 func TestReconcile_ModelMarkedForDeletion(t *testing.T) {
 	// Set up the scheme.
 	scheme := runtime.NewScheme()
-	_ = v1alpha1.AddToScheme(scheme)
+	_ = v1alpha2.AddToScheme(scheme)
 
 	// Create an InferenceModel object.
 	now := metav1.Now()
-	existingModel := &v1alpha1.InferenceModel{
+	existingModel := &v1alpha2.InferenceModel{
 		ObjectMeta: metav1.ObjectMeta{
 			Name:              "existing-model",
 			Namespace:         "default",
 			DeletionTimestamp: &now,
 			Finalizers:        []string{"finalizer"},
 		},
-		Spec: v1alpha1.InferenceModelSpec{
+		Spec: v1alpha2.InferenceModelSpec{
 			ModelName: "fake-model",
-			PoolRef:   v1alpha1.PoolObjectReference{Name: "test-pool"},
+			PoolRef:   v1alpha2.PoolObjectReference{Name: "test-pool"},
 		},
 	}
 
@@ -222,7 +222,7 @@ func TestReconcile_ModelMarkedForDeletion(t *testing.T) {
 	fakeClient := fake.NewClientBuilder().WithScheme(scheme).WithObjects(existingModel).Build()
 
 	// Create a minimal datastore.
-	datastore := datastore.NewFakeDatastore(nil, nil, &v1alpha1.InferencePool{
+	datastore := datastore.NewFakeDatastore(nil, nil, &v1alpha2.InferencePool{
 		ObjectMeta: metav1.ObjectMeta{Name: "test-pool"},
 	})
 
@@ -258,17 +258,17 @@ func TestReconcile_ModelMarkedForDeletion(t *testing.T) {
 func TestReconcile_ResourceExists(t *testing.T) {
 	// Set up the scheme.
 	scheme := runtime.NewScheme()
-	_ = v1alpha1.AddToScheme(scheme)
+	_ = v1alpha2.AddToScheme(scheme)
 
 	// Create an InferenceModel object.
-	existingModel := &v1alpha1.InferenceModel{
+	existingModel := &v1alpha2.InferenceModel{
 		ObjectMeta: metav1.ObjectMeta{
 			Name:      "existing-model",
 			Namespace: "default",
 		},
-		Spec: v1alpha1.InferenceModelSpec{
+		Spec: v1alpha2.InferenceModelSpec{
 			ModelName: "fake-model",
-			PoolRef:   v1alpha1.PoolObjectReference{Name: "test-pool"},
+			PoolRef:   v1alpha2.PoolObjectReference{Name: "test-pool"},
 		},
 	}
 
@@ -276,7 +276,7 @@ func TestReconcile_ResourceExists(t *testing.T) {
 	fakeClient := fake.NewClientBuilder().WithScheme(scheme).WithObjects(existingModel).Build()
 
 	// Create a minimal datastore.
-	datastore := datastore.NewFakeDatastore(nil, nil, &v1alpha1.InferencePool{
+	datastore := datastore.NewFakeDatastore(nil, nil, &v1alpha2.InferencePool{
 		ObjectMeta: metav1.ObjectMeta{Name: "test-pool"},
 	})
 
@@ -309,7 +309,7 @@ func TestReconcile_ResourceExists(t *testing.T) {
 	}
 }
 
-func populateServiceMap(services ...*v1alpha1.InferenceModel) *sync.Map {
+func populateServiceMap(services ...*v1alpha2.InferenceModel) *sync.Map {
 	returnVal := &sync.Map{}
 
 	for _, service := range services {
diff --git a/pkg/epp/controller/inferencepool_reconciler.go b/pkg/epp/controller/inferencepool_reconciler.go
index f2c56991c..baf3332b5 100644
--- a/pkg/epp/controller/inferencepool_reconciler.go
+++ b/pkg/epp/controller/inferencepool_reconciler.go
@@ -27,7 +27,7 @@ import (
 	ctrl "sigs.k8s.io/controller-runtime"
 	"sigs.k8s.io/controller-runtime/pkg/client"
 	"sigs.k8s.io/controller-runtime/pkg/log"
-	"sigs.k8s.io/gateway-api-inference-extension/api/v1alpha1"
+	"sigs.k8s.io/gateway-api-inference-extension/api/v1alpha2"
 	"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datastore"
 	logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging"
 )
@@ -52,7 +52,7 @@ func (c *InferencePoolReconciler) Reconcile(ctx context.Context, req ctrl.Reques
 	loggerDefault := logger.V(logutil.DEFAULT)
 	loggerDefault.Info("Reconciling InferencePool", "name", req.NamespacedName)
 
-	serverPool := &v1alpha1.InferencePool{}
+	serverPool := &v1alpha2.InferencePool{}
 
 	if err := c.Get(ctx, req.NamespacedName, serverPool); err != nil {
 		if errors.IsNotFound(err) {
@@ -73,7 +73,7 @@ func (c *InferencePoolReconciler) Reconcile(ctx context.Context, req ctrl.Reques
 	return ctrl.Result{}, nil
 }
 
-func (c *InferencePoolReconciler) updateDatastore(ctx context.Context, newPool *v1alpha1.InferencePool) {
+func (c *InferencePoolReconciler) updateDatastore(ctx context.Context, newPool *v1alpha2.InferencePool) {
 	logger := log.FromContext(ctx)
 	oldPool, err := c.Datastore.PoolGet()
 	c.Datastore.PoolSet(newPool)
@@ -91,6 +91,6 @@ func (c *InferencePoolReconciler) updateDatastore(ctx context.Context, newPool *
 
 func (c *InferencePoolReconciler) SetupWithManager(mgr ctrl.Manager) error {
 	return ctrl.NewControllerManagedBy(mgr).
-		For(&v1alpha1.InferencePool{}).
+		For(&v1alpha2.InferencePool{}).
 		Complete(c)
 }
diff --git a/pkg/epp/controller/inferencepool_reconciler_test.go b/pkg/epp/controller/inferencepool_reconciler_test.go
index 6263fa165..a96406f04 100644
--- a/pkg/epp/controller/inferencepool_reconciler_test.go
+++ b/pkg/epp/controller/inferencepool_reconciler_test.go
@@ -30,7 +30,7 @@ import (
 	ctrl "sigs.k8s.io/controller-runtime"
 	"sigs.k8s.io/controller-runtime/pkg/client"
 	"sigs.k8s.io/controller-runtime/pkg/client/fake"
-	"sigs.k8s.io/gateway-api-inference-extension/api/v1alpha1"
+	"sigs.k8s.io/gateway-api-inference-extension/api/v1alpha2"
 	"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datastore"
 	utiltesting "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/testing"
 )
@@ -38,17 +38,17 @@ import (
 var (
 	selector_v1 = map[string]string{"app": "vllm_v1"}
 	selector_v2 = map[string]string{"app": "vllm_v2"}
-	pool1       = &v1alpha1.InferencePool{
+	pool1       = &v1alpha2.InferencePool{
 		ObjectMeta: metav1.ObjectMeta{
 			Name:      "pool1",
 			Namespace: "pool1-ns",
 		},
-		Spec: v1alpha1.InferencePoolSpec{
-			Selector:         map[v1alpha1.LabelKey]v1alpha1.LabelValue{"app": "vllm_v1"},
+		Spec: v1alpha2.InferencePoolSpec{
+			Selector:         map[v1alpha2.LabelKey]v1alpha2.LabelValue{"app": "vllm_v1"},
 			TargetPortNumber: 8080,
 		},
 	}
-	pool2 = &v1alpha1.InferencePool{
+	pool2 = &v1alpha2.InferencePool{
 		ObjectMeta: metav1.ObjectMeta{
 			Name:      "pool2",
 			Namespace: "pool2-ns",
@@ -74,7 +74,7 @@ func TestReconcile_InferencePoolReconciler(t *testing.T) {
 	// Set up the scheme.
 	scheme := runtime.NewScheme()
 	_ = clientgoscheme.AddToScheme(scheme)
-	_ = v1alpha1.AddToScheme(scheme)
+	_ = v1alpha2.AddToScheme(scheme)
 
 	// Create a fake client with the pool and the pods.
 	initialObjects := []client.Object{pool1, pool2}
@@ -111,11 +111,11 @@ func TestReconcile_InferencePoolReconciler(t *testing.T) {
 	}
 
 	// Step 3: update the pool selector to include more pods
-	newPool1 := &v1alpha1.InferencePool{}
+	newPool1 := &v1alpha2.InferencePool{}
 	if err := fakeClient.Get(ctx, req.NamespacedName, newPool1); err != nil {
 		t.Errorf("Unexpected pool get error: %v", err)
 	}
-	newPool1.Spec.Selector = map[v1alpha1.LabelKey]v1alpha1.LabelValue{"app": "vllm_v2"}
+	newPool1.Spec.Selector = map[v1alpha2.LabelKey]v1alpha2.LabelValue{"app": "vllm_v2"}
 	if err := fakeClient.Update(ctx, newPool1, &client.UpdateOptions{}); err != nil {
 		t.Errorf("Unexpected pool update error: %v", err)
 	}
@@ -157,7 +157,7 @@ func TestReconcile_InferencePoolReconciler(t *testing.T) {
 	}
 }
 
-func diffPool(datastore datastore.Datastore, wantPool *v1alpha1.InferencePool, wantPods []string) string {
+func diffPool(datastore datastore.Datastore, wantPool *v1alpha2.InferencePool, wantPods []string) string {
 	gotPool, _ := datastore.PoolGet()
 	if diff := cmp.Diff(wantPool, gotPool); diff != "" {
 		return diff
diff --git a/pkg/epp/controller/pod_reconciler_test.go b/pkg/epp/controller/pod_reconciler_test.go
index b3869113c..8a39dbabd 100644
--- a/pkg/epp/controller/pod_reconciler_test.go
+++ b/pkg/epp/controller/pod_reconciler_test.go
@@ -31,7 +31,7 @@ import (
 	ctrl "sigs.k8s.io/controller-runtime"
 	"sigs.k8s.io/controller-runtime/pkg/client"
 	"sigs.k8s.io/controller-runtime/pkg/client/fake"
-	"sigs.k8s.io/gateway-api-inference-extension/api/v1alpha1"
+	"sigs.k8s.io/gateway-api-inference-extension/api/v1alpha2"
 	"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datastore"
 )
 
@@ -53,10 +53,10 @@ func TestUpdateDatastore_PodReconciler(t *testing.T) {
 	}{
 		{
 			name: "Add new pod",
-			datastore: datastore.NewFakeDatastore(populateMap(basePod1, basePod2), nil, &v1alpha1.InferencePool{
-				Spec: v1alpha1.InferencePoolSpec{
+			datastore: datastore.NewFakeDatastore(populateMap(basePod1, basePod2), nil, &v1alpha2.InferencePool{
+				Spec: v1alpha2.InferencePoolSpec{
 					TargetPortNumber: int32(8000),
-					Selector: map[v1alpha1.LabelKey]v1alpha1.LabelValue{
+					Selector: map[v1alpha2.LabelKey]v1alpha2.LabelValue{
 						"some-key": "some-val",
 					},
 				},
@@ -82,10 +82,10 @@ func TestUpdateDatastore_PodReconciler(t *testing.T) {
 		},
 		{
 			name: "Update pod1 address",
-			datastore: datastore.NewFakeDatastore(populateMap(basePod1, basePod2), nil, &v1alpha1.InferencePool{
-				Spec: v1alpha1.InferencePoolSpec{
+			datastore: datastore.NewFakeDatastore(populateMap(basePod1, basePod2), nil, &v1alpha2.InferencePool{
+				Spec: v1alpha2.InferencePoolSpec{
 					TargetPortNumber: int32(8000),
-					Selector: map[v1alpha1.LabelKey]v1alpha1.LabelValue{
+					Selector: map[v1alpha2.LabelKey]v1alpha2.LabelValue{
 						"some-key": "some-val",
 					},
 				},
@@ -111,10 +111,10 @@ func TestUpdateDatastore_PodReconciler(t *testing.T) {
 		},
 		{
 			name: "Delete pod with DeletionTimestamp",
-			datastore: datastore.NewFakeDatastore(populateMap(basePod1, basePod2), nil, &v1alpha1.InferencePool{
-				Spec: v1alpha1.InferencePoolSpec{
+			datastore: datastore.NewFakeDatastore(populateMap(basePod1, basePod2), nil, &v1alpha2.InferencePool{
+				Spec: v1alpha2.InferencePoolSpec{
 					TargetPortNumber: int32(8000),
-					Selector: map[v1alpha1.LabelKey]v1alpha1.LabelValue{
+					Selector: map[v1alpha2.LabelKey]v1alpha2.LabelValue{
 						"some-key": "some-val",
 					},
 				},
@@ -141,10 +141,10 @@ func TestUpdateDatastore_PodReconciler(t *testing.T) {
 		},
 		{
 			name: "Delete notfound pod",
-			datastore: datastore.NewFakeDatastore(populateMap(basePod1, basePod2), nil, &v1alpha1.InferencePool{
-				Spec: v1alpha1.InferencePoolSpec{
+			datastore: datastore.NewFakeDatastore(populateMap(basePod1, basePod2), nil, &v1alpha2.InferencePool{
+				Spec: v1alpha2.InferencePoolSpec{
 					TargetPortNumber: int32(8000),
-					Selector: map[v1alpha1.LabelKey]v1alpha1.LabelValue{
+					Selector: map[v1alpha2.LabelKey]v1alpha2.LabelValue{
 						"some-key": "some-val",
 					},
 				},
@@ -154,10 +154,10 @@ func TestUpdateDatastore_PodReconciler(t *testing.T) {
 		},
 		{
 			name: "New pod, not ready, valid selector",
-			datastore: datastore.NewFakeDatastore(populateMap(basePod1, basePod2), nil, &v1alpha1.InferencePool{
-				Spec: v1alpha1.InferencePoolSpec{
+			datastore: datastore.NewFakeDatastore(populateMap(basePod1, basePod2), nil, &v1alpha2.InferencePool{
+				Spec: v1alpha2.InferencePoolSpec{
 					TargetPortNumber: int32(8000),
-					Selector: map[v1alpha1.LabelKey]v1alpha1.LabelValue{
+					Selector: map[v1alpha2.LabelKey]v1alpha2.LabelValue{
 						"some-key": "some-val",
 					},
 				},
@@ -182,10 +182,10 @@ func TestUpdateDatastore_PodReconciler(t *testing.T) {
 		},
 		{
 			name: "Remove pod that does not match selector",
-			datastore: datastore.NewFakeDatastore(populateMap(basePod1, basePod2), nil, &v1alpha1.InferencePool{
-				Spec: v1alpha1.InferencePoolSpec{
+			datastore: datastore.NewFakeDatastore(populateMap(basePod1, basePod2), nil, &v1alpha2.InferencePool{
+				Spec: v1alpha2.InferencePoolSpec{
 					TargetPortNumber: int32(8000),
-					Selector: map[v1alpha1.LabelKey]v1alpha1.LabelValue{
+					Selector: map[v1alpha2.LabelKey]v1alpha2.LabelValue{
 						"some-key": "some-val",
 					},
 				},
@@ -210,10 +210,10 @@ func TestUpdateDatastore_PodReconciler(t *testing.T) {
 		},
 		{
 			name: "Remove pod that is not ready",
-			datastore: datastore.NewFakeDatastore(populateMap(basePod1, basePod2), nil, &v1alpha1.InferencePool{
-				Spec: v1alpha1.InferencePoolSpec{
+			datastore: datastore.NewFakeDatastore(populateMap(basePod1, basePod2), nil, &v1alpha2.InferencePool{
+				Spec: v1alpha2.InferencePoolSpec{
 					TargetPortNumber: int32(8000),
-					Selector: map[v1alpha1.LabelKey]v1alpha1.LabelValue{
+					Selector: map[v1alpha2.LabelKey]v1alpha2.LabelValue{
 						"some-key": "some-val",
 					},
 				},
diff --git a/pkg/epp/datastore/datastore.go b/pkg/epp/datastore/datastore.go
index eecea59ce..c5bbddcfd 100644
--- a/pkg/epp/datastore/datastore.go
+++ b/pkg/epp/datastore/datastore.go
@@ -28,21 +28,21 @@ import (
 	"k8s.io/apimachinery/pkg/types"
 	"sigs.k8s.io/controller-runtime/pkg/client"
 	"sigs.k8s.io/controller-runtime/pkg/log"
-	"sigs.k8s.io/gateway-api-inference-extension/api/v1alpha1"
+	"sigs.k8s.io/gateway-api-inference-extension/api/v1alpha2"
 	logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging"
 )
 
 // The datastore is a local cache of relevant data for the given InferencePool (currently all pulled from k8s-api)
 type Datastore interface {
 	// InferencePool operations
-	PoolSet(pool *v1alpha1.InferencePool)
-	PoolGet() (*v1alpha1.InferencePool, error)
+	PoolSet(pool *v1alpha2.InferencePool)
+	PoolGet() (*v1alpha2.InferencePool, error)
 	PoolHasSynced() bool
 	PoolLabelsMatch(podLabels map[string]string) bool
 
 	// InferenceModel operations
-	ModelSet(infModel *v1alpha1.InferenceModel)
-	ModelGet(modelName string) (*v1alpha1.InferenceModel, bool)
+	ModelSet(infModel *v1alpha2.InferenceModel)
+	ModelGet(modelName string) (*v1alpha2.InferenceModel, bool)
 	ModelDelete(modelName string)
 
 	// PodMetrics operations
@@ -69,7 +69,7 @@ func NewDatastore() Datastore {
 }
 
 // Used for test only
-func NewFakeDatastore(pods, models *sync.Map, pool *v1alpha1.InferencePool) Datastore {
+func NewFakeDatastore(pods, models *sync.Map, pool *v1alpha2.InferencePool) Datastore {
 	store := NewDatastore()
 	if pods != nil {
 		store.(*datastore).pods = pods
@@ -86,7 +86,7 @@ func NewFakeDatastore(pods, models *sync.Map, pool *v1alpha1.InferencePool) Data
 type datastore struct {
 	// poolMu is used to synchronize access to the inferencePool.
 	poolMu sync.RWMutex
-	pool   *v1alpha1.InferencePool
+	pool   *v1alpha2.InferencePool
 	models *sync.Map
 	// key: types.NamespacedName, value: *PodMetrics
 	pods *sync.Map
@@ -101,13 +101,13 @@ func (ds *datastore) Clear() {
 }
 
 // /// InferencePool APIs ///
-func (ds *datastore) PoolSet(pool *v1alpha1.InferencePool) {
+func (ds *datastore) PoolSet(pool *v1alpha2.InferencePool) {
 	ds.poolMu.Lock()
 	defer ds.poolMu.Unlock()
 	ds.pool = pool
 }
 
-func (ds *datastore) PoolGet() (*v1alpha1.InferencePool, error) {
+func (ds *datastore) PoolGet() (*v1alpha2.InferencePool, error) {
 	ds.poolMu.RLock()
 	defer ds.poolMu.RUnlock()
 	if !ds.PoolHasSynced() {
@@ -129,14 +129,14 @@ func (ds *datastore) PoolLabelsMatch(podLabels map[string]string) bool {
 }
 
 // /// InferenceModel APIs ///
-func (ds *datastore) ModelSet(infModel *v1alpha1.InferenceModel) {
+func (ds *datastore) ModelSet(infModel *v1alpha2.InferenceModel) {
 	ds.models.Store(infModel.Spec.ModelName, infModel)
 }
 
-func (ds *datastore) ModelGet(modelName string) (*v1alpha1.InferenceModel, bool) {
+func (ds *datastore) ModelGet(modelName string) (*v1alpha2.InferenceModel, bool) {
 	infModel, ok := ds.models.Load(modelName)
 	if ok {
-		return infModel.(*v1alpha1.InferenceModel), true
+		return infModel.(*v1alpha2.InferenceModel), true
 	}
 	return nil, false
 }
@@ -243,11 +243,11 @@ func (ds *datastore) PodDeleteAll() {
 	ds.pods.Clear()
 }
 
-func selectorFromInferencePoolSelector(selector map[v1alpha1.LabelKey]v1alpha1.LabelValue) labels.Selector {
+func selectorFromInferencePoolSelector(selector map[v1alpha2.LabelKey]v1alpha2.LabelValue) labels.Selector {
 	return labels.SelectorFromSet(stripLabelKeyAliasFromLabelMap(selector))
 }
 
-func stripLabelKeyAliasFromLabelMap(labels map[v1alpha1.LabelKey]v1alpha1.LabelValue) map[string]string {
+func stripLabelKeyAliasFromLabelMap(labels map[v1alpha2.LabelKey]v1alpha2.LabelValue) map[string]string {
 	outMap := make(map[string]string)
 	for k, v := range labels {
 		outMap[string(k)] = string(v)
@@ -255,7 +255,7 @@ func stripLabelKeyAliasFromLabelMap(labels map[v1alpha1.LabelKey]v1alpha1.LabelV
 	return outMap
 }
 
-func RandomWeightedDraw(logger logr.Logger, model *v1alpha1.InferenceModel, seed int64) string {
+func RandomWeightedDraw(logger logr.Logger, model *v1alpha2.InferenceModel, seed int64) string {
 	var weights int32
 
 	source := rand.NewSource(rand.Int63())
@@ -277,8 +277,8 @@ func RandomWeightedDraw(logger logr.Logger, model *v1alpha1.InferenceModel, seed
 	return ""
 }
 
-func IsCritical(model *v1alpha1.InferenceModel) bool {
-	if model.Spec.Criticality != nil && *model.Spec.Criticality == v1alpha1.Critical {
+func IsCritical(model *v1alpha2.InferenceModel) bool {
+	if model.Spec.Criticality != nil && *model.Spec.Criticality == v1alpha2.Critical {
 		return true
 	}
 	return false
diff --git a/pkg/epp/datastore/datastore_test.go b/pkg/epp/datastore/datastore_test.go
index bd5c50209..2af365413 100644
--- a/pkg/epp/datastore/datastore_test.go
+++ b/pkg/epp/datastore/datastore_test.go
@@ -20,19 +20,19 @@ import (
 	"testing"
 
 	v1 "k8s.io/apimachinery/pkg/apis/meta/v1"
-	"sigs.k8s.io/gateway-api-inference-extension/api/v1alpha1"
+	"sigs.k8s.io/gateway-api-inference-extension/api/v1alpha2"
 	logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging"
 )
 
 func TestHasSynced(t *testing.T) {
 	tests := []struct {
 		name          string
-		inferencePool *v1alpha1.InferencePool
+		inferencePool *v1alpha2.InferencePool
 		hasSynced     bool
 	}{
 		{
 			name: "Ready when InferencePool exists in data store",
-			inferencePool: &v1alpha1.InferencePool{
+			inferencePool: &v1alpha2.InferencePool{
 				ObjectMeta: v1.ObjectMeta{
 					Name:      "test-pool",
 					Namespace: "default",
@@ -66,14 +66,14 @@ func TestRandomWeightedDraw(t *testing.T) {
 	logger := logutil.NewTestLogger()
 	tests := []struct {
 		name  string
-		model *v1alpha1.InferenceModel
+		model *v1alpha2.InferenceModel
 		want  string
 	}{
 		{
 			name: "'random' distribution",
-			model: &v1alpha1.InferenceModel{
-				Spec: v1alpha1.InferenceModelSpec{
-					TargetModels: []v1alpha1.TargetModel{
+			model: &v1alpha2.InferenceModel{
+				Spec: v1alpha2.InferenceModelSpec{
+					TargetModels: []v1alpha2.TargetModel{
 						{
 							Name:   "canary",
 							Weight: pointer(50),
@@ -89,9 +89,9 @@ func TestRandomWeightedDraw(t *testing.T) {
 		},
 		{
 			name: "'random' distribution",
-			model: &v1alpha1.InferenceModel{
-				Spec: v1alpha1.InferenceModelSpec{
-					TargetModels: []v1alpha1.TargetModel{
+			model: &v1alpha2.InferenceModel{
+				Spec: v1alpha2.InferenceModelSpec{
+					TargetModels: []v1alpha2.TargetModel{
 						{
 							Name:   "canary",
 							Weight: pointer(25),
@@ -111,9 +111,9 @@ func TestRandomWeightedDraw(t *testing.T) {
 		},
 		{
 			name: "'random' distribution",
-			model: &v1alpha1.InferenceModel{
-				Spec: v1alpha1.InferenceModelSpec{
-					TargetModels: []v1alpha1.TargetModel{
+			model: &v1alpha2.InferenceModel{
+				Spec: v1alpha2.InferenceModelSpec{
+					TargetModels: []v1alpha2.TargetModel{
 						{
 							Name:   "canary",
 							Weight: pointer(20),
diff --git a/pkg/epp/test/benchmark/benchmark.go b/pkg/epp/test/benchmark/benchmark.go
index 10987b47f..677834806 100644
--- a/pkg/epp/test/benchmark/benchmark.go
+++ b/pkg/epp/test/benchmark/benchmark.go
@@ -31,7 +31,7 @@ import (
 	"google.golang.org/protobuf/proto"
 	"sigs.k8s.io/controller-runtime/pkg/log"
 	"sigs.k8s.io/controller-runtime/pkg/log/zap"
-	"sigs.k8s.io/gateway-api-inference-extension/api/v1alpha1"
+	"sigs.k8s.io/gateway-api-inference-extension/api/v1alpha2"
 	"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datastore"
 	runserver "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/server"
 	"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/test"
@@ -108,12 +108,12 @@ func generateRequestFunc(logger logr.Logger) func(mtd *desc.MethodDescriptor, ca
 	}
 }
 
-func fakeModels() map[string]*v1alpha1.InferenceModel {
-	models := map[string]*v1alpha1.InferenceModel{}
+func fakeModels() map[string]*v1alpha2.InferenceModel {
+	models := map[string]*v1alpha2.InferenceModel{}
 	for i := range *numFakePods {
 		for j := range *numModelsPerPod {
 			m := modelName(i*(*numModelsPerPod) + j)
-			models[m] = &v1alpha1.InferenceModel{Spec: v1alpha1.InferenceModelSpec{ModelName: m}}
+			models[m] = &v1alpha2.InferenceModel{Spec: v1alpha2.InferenceModelSpec{ModelName: m}}
 		}
 	}
 
diff --git a/pkg/epp/test/utils.go b/pkg/epp/test/utils.go
index c44d7147f..6a75ed2ff 100644
--- a/pkg/epp/test/utils.go
+++ b/pkg/epp/test/utils.go
@@ -29,7 +29,7 @@ import (
 	"google.golang.org/grpc/reflection"
 	"k8s.io/apimachinery/pkg/types"
 	"sigs.k8s.io/controller-runtime/pkg/log"
-	"sigs.k8s.io/gateway-api-inference-extension/api/v1alpha1"
+	"sigs.k8s.io/gateway-api-inference-extension/api/v1alpha2"
 	"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/backend"
 	"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datastore"
 	"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/handlers"
@@ -43,7 +43,7 @@ func StartExtProc(
 	port int,
 	refreshPodsInterval, refreshMetricsInterval, refreshPrometheusMetricsInterval time.Duration,
 	pods []*datastore.PodMetrics,
-	models map[string]*v1alpha1.InferenceModel,
+	models map[string]*v1alpha2.InferenceModel,
 ) *grpc.Server {
 	logger := log.FromContext(ctx)
 	pms := make(map[types.NamespacedName]*datastore.PodMetrics)
diff --git a/test/e2e/e2e_suite_test.go b/test/e2e/e2e_suite_test.go
index c43427752..14ee738f3 100644
--- a/test/e2e/e2e_suite_test.go
+++ b/test/e2e/e2e_suite_test.go
@@ -38,7 +38,7 @@ import (
 	clientgoscheme "k8s.io/client-go/kubernetes/scheme"
 	"sigs.k8s.io/controller-runtime/pkg/client"
 	"sigs.k8s.io/controller-runtime/pkg/client/config"
-	infextv1a1 "sigs.k8s.io/gateway-api-inference-extension/api/v1alpha1"
+	infextv1a2 "sigs.k8s.io/gateway-api-inference-extension/api/v1alpha2"
 	testutils "sigs.k8s.io/gateway-api-inference-extension/test/utils"
 )
 
@@ -136,7 +136,7 @@ func setupSuite() {
 	err = apiextv1.AddToScheme(scheme)
 	gomega.ExpectWithOffset(1, err).NotTo(gomega.HaveOccurred())
 
-	err = infextv1a1.AddToScheme(scheme)
+	err = infextv1a2.AddToScheme(scheme)
 	gomega.ExpectWithOffset(1, err).NotTo(gomega.HaveOccurred())
 
 	cli, err = client.New(cfg, client.Options{Scheme: scheme})
diff --git a/test/e2e/e2e_test.go b/test/e2e/e2e_test.go
index 087097a77..8cd73d327 100644
--- a/test/e2e/e2e_test.go
+++ b/test/e2e/e2e_test.go
@@ -26,7 +26,7 @@ import (
 	"github.com/onsi/gomega"
 	"k8s.io/apimachinery/pkg/types"
 	"k8s.io/utils/ptr"
-	infextv1a1 "sigs.k8s.io/gateway-api-inference-extension/api/v1alpha1"
+	"sigs.k8s.io/gateway-api-inference-extension/api/v1alpha2"
 	testutils "sigs.k8s.io/gateway-api-inference-extension/test/utils"
 )
 
@@ -95,8 +95,8 @@ var _ = ginkgo.Describe("InferencePool", func() {
 })
 
 // newInferenceModel creates an InferenceModel in the given namespace for testutils.
-func newInferenceModel(ns string) *infextv1a1.InferenceModel {
-	targets := []infextv1a1.TargetModel{
+func newInferenceModel(ns string) *v1alpha2.InferenceModel {
+	targets := []v1alpha2.TargetModel{
 		{
 			Name:   modelName + "-0",
 			Weight: ptr.To(int32(50)),
@@ -107,7 +107,7 @@ func newInferenceModel(ns string) *infextv1a1.InferenceModel {
 		},
 	}
 	return testutils.MakeModelWrapper("inferencemodel-sample", ns).
-		SetCriticality(infextv1a1.Critical).
+		SetCriticality(v1alpha2.Critical).
 		SetModelName(modelName).
 		SetPoolRef(modelServerName).
 		SetTargetModels(targets).
diff --git a/test/integration/hermetic_test.go b/test/integration/hermetic_test.go
index 91bc71c6c..85c49913a 100644
--- a/test/integration/hermetic_test.go
+++ b/test/integration/hermetic_test.go
@@ -46,7 +46,7 @@ import (
 	ctrl "sigs.k8s.io/controller-runtime"
 	k8sclient "sigs.k8s.io/controller-runtime/pkg/client"
 	"sigs.k8s.io/controller-runtime/pkg/envtest"
-	"sigs.k8s.io/gateway-api-inference-extension/api/v1alpha1"
+	"sigs.k8s.io/gateway-api-inference-extension/api/v1alpha2"
 	"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/backend"
 	"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datastore"
 	runserver "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/server"
@@ -407,7 +407,7 @@ func BeforeSuit(t *testing.T) func() {
 	}
 
 	utilruntime.Must(clientgoscheme.AddToScheme(scheme))
-	utilruntime.Must(v1alpha1.AddToScheme(scheme))
+	utilruntime.Must(v1alpha2.AddToScheme(scheme))
 
 	k8sClient, err = k8sclient.New(cfg, k8sclient.Options{Scheme: scheme})
 	if err != nil {
@@ -450,7 +450,7 @@ func BeforeSuit(t *testing.T) func() {
 	}
 
 	for _, doc := range docs {
-		inferenceModel := &v1alpha1.InferenceModel{}
+		inferenceModel := &v1alpha2.InferenceModel{}
 		if err = yaml.Unmarshal(doc, inferenceModel); err != nil {
 			logutil.Fatal(logger, err, "Can't unmarshal object", "document", doc)
 		}
@@ -462,7 +462,7 @@ func BeforeSuit(t *testing.T) func() {
 		}
 	}
 	for _, doc := range docs {
-		inferencePool := &v1alpha1.InferencePool{}
+		inferencePool := &v1alpha2.InferencePool{}
 		if err = yaml.Unmarshal(doc, inferencePool); err != nil {
 			logutil.Fatal(logger, err, "Can't unmarshal object", "document", doc)
 		}
diff --git a/test/utils/utils.go b/test/utils/utils.go
index 777eadd8b..1ec0fbaae 100644
--- a/test/utils/utils.go
+++ b/test/utils/utils.go
@@ -36,7 +36,7 @@ import (
 	"k8s.io/client-go/rest"
 	"k8s.io/client-go/tools/remotecommand"
 	"sigs.k8s.io/controller-runtime/pkg/client"
-	infextv1a1 "sigs.k8s.io/gateway-api-inference-extension/api/v1alpha1"
+	"sigs.k8s.io/gateway-api-inference-extension/api/v1alpha2"
 )
 
 // DeleteClusterResources deletes all cluster-scoped objects the tests typically create.
@@ -106,11 +106,11 @@ func DeleteNamespacedResources(ctx context.Context, cli client.Client, ns string
 	if err != nil && !apierrors.IsNotFound(err) {
 		return err
 	}
-	err = cli.DeleteAllOf(ctx, &infextv1a1.InferencePool{}, client.InNamespace(ns), client.PropagationPolicy(metav1.DeletePropagationForeground))
+	err = cli.DeleteAllOf(ctx, &v1alpha2.InferencePool{}, client.InNamespace(ns), client.PropagationPolicy(metav1.DeletePropagationForeground))
 	if err != nil && !apierrors.IsNotFound(err) {
 		return err
 	}
-	err = cli.DeleteAllOf(ctx, &infextv1a1.InferenceModel{}, client.InNamespace(ns), client.PropagationPolicy(metav1.DeletePropagationForeground))
+	err = cli.DeleteAllOf(ctx, &v1alpha2.InferenceModel{}, client.InNamespace(ns), client.PropagationPolicy(metav1.DeletePropagationForeground))
 	if err != nil && !apierrors.IsNotFound(err) {
 		return err
 	}
@@ -132,7 +132,7 @@ func DeleteInferenceModelResources(ctx context.Context, cli client.Client, ns st
 	if ns == "" {
 		return nil
 	}
-	err := cli.DeleteAllOf(ctx, &infextv1a1.InferenceModel{}, client.InNamespace(ns), client.PropagationPolicy(metav1.DeletePropagationForeground))
+	err := cli.DeleteAllOf(ctx, &v1alpha2.InferenceModel{}, client.InNamespace(ns), client.PropagationPolicy(metav1.DeletePropagationForeground))
 	if err != nil && !apierrors.IsNotFound(err) {
 		return err
 	}
diff --git a/test/utils/wrappers.go b/test/utils/wrappers.go
index 668a5adca..3280cb117 100644
--- a/test/utils/wrappers.go
+++ b/test/utils/wrappers.go
@@ -18,25 +18,25 @@ package utils
 
 import (
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
-	infextv1a1 "sigs.k8s.io/gateway-api-inference-extension/api/v1alpha1"
+	"sigs.k8s.io/gateway-api-inference-extension/api/v1alpha2"
 )
 
 // InferenceModelWrapper wraps an InferenceModel.
 type InferenceModelWrapper struct {
-	infextv1a1.InferenceModel
+	v1alpha2.InferenceModel
 }
 
 // MakeModelWrapper creates a wrapper for an MakeModelWrapper.
 func MakeModelWrapper(name, ns string) *InferenceModelWrapper {
 	return &InferenceModelWrapper{
-		infextv1a1.InferenceModel{
+		v1alpha2.InferenceModel{
 			ObjectMeta: metav1.ObjectMeta{
 				Name:      name,
 				Namespace: ns,
 			},
-			Spec: infextv1a1.InferenceModelSpec{
+			Spec: v1alpha2.InferenceModelSpec{
 				ModelName: "",
-				PoolRef:   infextv1a1.PoolObjectReference{},
+				PoolRef:   v1alpha2.PoolObjectReference{},
 			},
 		},
 	}
@@ -49,7 +49,7 @@ func (m *InferenceModelWrapper) SetModelName(name string) *InferenceModelWrapper
 }
 
 // SetCriticality sets the value of the inferenceModel.spec.criticality.
-func (m *InferenceModelWrapper) SetCriticality(level infextv1a1.Criticality) *InferenceModelWrapper {
+func (m *InferenceModelWrapper) SetCriticality(level v1alpha2.Criticality) *InferenceModelWrapper {
 	m.Spec.Criticality = &level
 	return m
 }
@@ -57,8 +57,8 @@ func (m *InferenceModelWrapper) SetCriticality(level infextv1a1.Criticality) *In
 // SetPoolRef sets the value of the inferenceModel.spec.poolRef using defaults
 // for group/kind and name as the PoolObjectReference name.
 func (m *InferenceModelWrapper) SetPoolRef(name string) *InferenceModelWrapper {
-	ref := infextv1a1.PoolObjectReference{
-		Group: infextv1a1.GroupVersion.Group,
+	ref := v1alpha2.PoolObjectReference{
+		Group: v1alpha2.GroupVersion.Group,
 		Kind:  "inferencepools",
 		Name:  name,
 	}
@@ -67,12 +67,12 @@ func (m *InferenceModelWrapper) SetPoolRef(name string) *InferenceModelWrapper {
 }
 
 // SetTargetModels sets the value of the inferenceModel.spec.targetModels.
-func (m *InferenceModelWrapper) SetTargetModels(models []infextv1a1.TargetModel) *InferenceModelWrapper {
+func (m *InferenceModelWrapper) SetTargetModels(models []v1alpha2.TargetModel) *InferenceModelWrapper {
 	m.Spec.TargetModels = models
 	return m
 }
 
 // Obj returns the inner InferenceModel.
-func (m *InferenceModelWrapper) Obj() *infextv1a1.InferenceModel {
+func (m *InferenceModelWrapper) Obj() *v1alpha2.InferenceModel {
 	return &m.InferenceModel
 }