diff --git a/api/v1alpha1/inferencemodel_types.go b/api/v1alpha1/inferencemodel_types.go
index 3414b797..766ecfef 100644
--- a/api/v1alpha1/inferencemodel_types.go
+++ b/api/v1alpha1/inferencemodel_types.go
@@ -65,7 +65,7 @@ type InferenceModelSpec struct {
 	// Reference to the inference pool, the pool must exist in the same namespace.
 	//
 	// +kubebuilder:validation:Required
-	PoolRef *PoolObjectReference `json:"poolRef,omitempty"`
+	PoolRef PoolObjectReference `json:"poolRef"`
 }
 
 // PoolObjectReference identifies an API object within the namespace of the
diff --git a/api/v1alpha1/zz_generated.deepcopy.go b/api/v1alpha1/zz_generated.deepcopy.go
index fd866b35..4f17fbd0 100644
--- a/api/v1alpha1/zz_generated.deepcopy.go
+++ b/api/v1alpha1/zz_generated.deepcopy.go
@@ -97,11 +97,7 @@ func (in *InferenceModelSpec) DeepCopyInto(out *InferenceModelSpec) {
 		*out = make([]TargetModel, len(*in))
 		copy(*out, *in)
 	}
-	if in.PoolRef != nil {
-		in, out := &in.PoolRef, &out.PoolRef
-		*out = new(PoolObjectReference)
-		**out = **in
-	}
+	out.PoolRef = in.PoolRef
 }
 
 // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new InferenceModelSpec.
diff --git a/config/crd/bases/inference.networking.x-k8s.io_inferencepools.yaml b/config/crd/bases/inference.networking.x-k8s.io_inferencepools.yaml
index 2c1202c4..17135c67 100644
--- a/config/crd/bases/inference.networking.x-k8s.io_inferencepools.yaml
+++ b/config/crd/bases/inference.networking.x-k8s.io_inferencepools.yaml
@@ -67,7 +67,7 @@ spec:
                 type: object
               targetPortNumber:
                 description: |-
-                  TargetPort is the port number that the model servers within the pool expect
+                  TargetPortNumber is the port number that the model servers within the pool expect
                   to recieve traffic from.
                   This maps to the TargetPort in: https://pkg.go.dev/k8s.io/api/core/v1#ServicePort
                 format: int32
diff --git a/config/crd/bases/inference.networking.x-k8s.io_llmserverpools.yaml b/config/crd/bases/inference.networking.x-k8s.io_llmserverpools.yaml
deleted file mode 100644
index 3ef34c19..00000000
--- a/config/crd/bases/inference.networking.x-k8s.io_llmserverpools.yaml
+++ /dev/null
@@ -1,127 +0,0 @@
----
-apiVersion: apiextensions.k8s.io/v1
-kind: CustomResourceDefinition
-metadata:
-  annotations:
-    controller-gen.kubebuilder.io/version: v0.16.1
-  name: llmserverpools.inference.networking.x-k8s.io
-spec:
-  group: inference.networking.x-k8s.io
-  names:
-    kind: InferencePool
-    listKind: InferencePoolList
-    plural: llmserverpools
-    singular: llmserverpool
-  scope: Namespaced
-  versions:
-  - name: v1alpha1
-    schema:
-      openAPIV3Schema:
-        description: InferencePool is the Schema for the llmserverpools API
-        properties:
-          apiVersion:
-            description: |-
-              APIVersion defines the versioned schema of this representation of an object.
-              Servers should convert recognized schemas to the latest internal value, and
-              may reject unrecognized values.
-              More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources
-            type: string
-          kind:
-            description: |-
-              Kind is a string value representing the REST resource this object represents.
-              Servers may infer this from the endpoint the client submits requests to.
-              Cannot be updated.
-              In CamelCase.
-              More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
-            type: string
-          metadata:
-            type: object
-          spec:
-            description: InferencePoolSpec defines the desired state of InferencePool
-            properties:
-              modelServerSelector:
-                additionalProperties:
-                  type: string
-                description: |-
-                  ModelServerSelector uses a map of label to watch model server pods
-                  that should be included in the InferencePool. ModelServers should not
-                  be with any other Service or InferencePool, that behavior is not supported
-                  and will result in sub-optimal utilization.
-                  Due to this selector being translated to a service a simple map is used instead
-                  of: https://pkg.go.dev/k8s.io/apimachinery/pkg/apis/meta/v1#LabelSelector
-                  To avoid footshoot errors when the https://pkg.go.dev/k8s.io/apimachinery/pkg/apis/meta/v1#LabelSelectorAsMap would be used.
-                type: object
-              targetPort:
-                description: |-
-                  TargetPort is the port number that the model servers within the pool expect
-                  to recieve traffic from.
-                  This maps to the TargetPort in: https://pkg.go.dev/k8s.io/api/core/v1#ServicePort
-                format: int32
-                type: integer
-            type: object
-          status:
-            description: InferencePoolStatus defines the observed state of InferencePool
-            properties:
-              conditions:
-                description: Conditions track the state of the InferencePool.
-                items:
-                  description: Condition contains details for one aspect of the current
-                    state of this API Resource.
-                  properties:
-                    lastTransitionTime:
-                      description: |-
-                        lastTransitionTime is the last time the condition transitioned from one status to another.
-                        This should be when the underlying condition changed.  If that is not known, then using the time when the API field changed is acceptable.
-                      format: date-time
-                      type: string
-                    message:
-                      description: |-
-                        message is a human readable message indicating details about the transition.
-                        This may be an empty string.
-                      maxLength: 32768
-                      type: string
-                    observedGeneration:
-                      description: |-
-                        observedGeneration represents the .metadata.generation that the condition was set based upon.
-                        For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date
-                        with respect to the current state of the instance.
-                      format: int64
-                      minimum: 0
-                      type: integer
-                    reason:
-                      description: |-
-                        reason contains a programmatic identifier indicating the reason for the condition's last transition.
-                        Producers of specific condition types may define expected values and meanings for this field,
-                        and whether the values are considered a guaranteed API.
-                        The value should be a CamelCase string.
-                        This field may not be empty.
-                      maxLength: 1024
-                      minLength: 1
-                      pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$
-                      type: string
-                    status:
-                      description: status of the condition, one of True, False, Unknown.
-                      enum:
-                      - "True"
-                      - "False"
-                      - Unknown
-                      type: string
-                    type:
-                      description: type of condition in CamelCase or in foo.example.com/CamelCase.
-                      maxLength: 316
-                      pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$
-                      type: string
-                  required:
-                  - lastTransitionTime
-                  - message
-                  - reason
-                  - status
-                  - type
-                  type: object
-                type: array
-            type: object
-        type: object
-    served: true
-    storage: true
-    subresources:
-      status: {}
diff --git a/config/crd/bases/inference.networking.x-k8s.io_llmservices.yaml b/config/crd/bases/inference.networking.x-k8s.io_llmservices.yaml
deleted file mode 100644
index 390affa8..00000000
--- a/config/crd/bases/inference.networking.x-k8s.io_llmservices.yaml
+++ /dev/null
@@ -1,239 +0,0 @@
----
-apiVersion: apiextensions.k8s.io/v1
-kind: CustomResourceDefinition
-metadata:
-  annotations:
-    controller-gen.kubebuilder.io/version: v0.16.1
-  name: inferencemodels.inference.networking.x-k8s.io
-spec:
-  group: inference.networking.x-k8s.io
-  names:
-    kind: InferenceModel
-    listKind: InferenceModelList
-    plural: inferencemodels
-    singular: inferencemodel
-  scope: Namespaced
-  versions:
-  - name: v1alpha1
-    schema:
-      openAPIV3Schema:
-        description: InferenceModel is the Schema for the inferencemodels API
-        properties:
-          apiVersion:
-            description: |-
-              APIVersion defines the versioned schema of this representation of an object.
-              Servers should convert recognized schemas to the latest internal value, and
-              may reject unrecognized values.
-              More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources
-            type: string
-          kind:
-            description: |-
-              Kind is a string value representing the REST resource this object represents.
-              Servers may infer this from the endpoint the client submits requests to.
-              Cannot be updated.
-              In CamelCase.
-              More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
-            type: string
-          metadata:
-            type: object
-          spec:
-            description: |-
-              InferenceModel represents a set of LLM services that are multiplexed onto one
-              or more InferencePools. This resource is managed by the "LLM Service Owner"
-              persona. The Service Owner persona is: a team that trains, verifies, and
-              leverages a large language model from a model frontend, drives the lifecycle
-              and rollout of new versions of those models, and defines the specific
-              performance and latency goals for the model. These services are
-              expected to operate within a InferencePool sharing compute capacity with other
-              InferenceModels, defined by the Inference Platform Admin. We allow a user who
-              has multiple InferenceModels across multiple pools (with the same config) to
-              specify the configuration exactly once, and deploy to many pools
-              simultaneously. Enabling a simpler config and single source of truth
-              for a given user. InferenceModel names are unique for a given InferencePool,
-              if the name is reused, an error will be  shown on the status of a
-              InferenceModel that attempted to reuse. The oldest InferenceModel, based on
-              creation timestamp, will be selected to remain valid. In the event of a race
-              condition, one will be selected at random.
-            properties:
-              models:
-                description: |-
-                  Model defines the distinct services.
-                  Model can be in 2 priority classes, Critical and Noncritical.
-                  Priority class is implicitly set to Critical by specifying an Objective.
-                  Otherwise the Model is considered Noncritical.
-                items:
-                  description: |-
-                    Model defines the policies for routing the traffic of a use case, this includes performance objectives
-                    and traffic splitting between different versions of the model.
-                  properties:
-                    name:
-                      description: |-
-                        The name of the model as the users set in the "model" parameter in the requests.
-                        The name should be unique among the services that reference the same backend pool.
-                        This is the parameter that will be used to match the request with. In the future, we may
-                        allow to match on other request parameters. The other approach to support matching on
-                        on other request parameters is to use a different ModelName per HTTPFilter.
-                        Names can be reserved without implementing an actual model in the pool.
-                        This can be done by specifying a target model and setting the weight to zero,
-                        an error will be returned specifying that no valid target model is found.
-                      type: string
-                    objective:
-                      description: |-
-                        Optional
-                        LLM Services with an objective have higher priority than services without.
-                        IMPORTANT: By specifying an objective, this places the InferenceModel in a higher priority class than InferenceModels without a defined priority class.
-                        In the face of resource-scarcity. Higher priority requests will be preserved, and lower priority class requests will be rejected.
-                      properties:
-                        desiredAveragePerOutputTokenLatencyAtP95OverMultipleRequests:
-                          description: |-
-                            The AverageLatencyPerOutputToken is calculated as the e2e request latency divided by output token
-                            length. Note that this is different from what is known as TPOT (time per output token) which only
-                            takes decode time into account.
-                            The P95 is calculated over a fixed time window defined at the operator level.
-                          format: int64
-                          type: integer
-                      type: object
-                    targetModels:
-                      description: |-
-                        Optional.
-                        Allow multiple versions of a model for traffic splitting.
-                        If not specified, the target model name is defaulted to the modelName parameter.
-                        modelName is often in reference to a LoRA adapter.
-                      items:
-                        description: |-
-                          TargetModel represents a deployed model or a LoRA adapter. The
-                          Name field is expected to match the name of the LoRA adapter
-                          (or base model) as it is registered within the model server. Inference
-                          Gateway assumes that the model exists on the model server and is the
-                          responsibility of the user to validate a correct match. Should a model fail
-                          to exist at request time, the error is processed by the Instance Gateway,
-                          and then emitted on the appropriate InferenceModel object.
-                        properties:
-                          name:
-                            description: The name of the adapter as expected by the
-                              ModelServer.
-                            type: string
-                          weight:
-                            description: |-
-                              Weight is used to determine the percentage of traffic that should be
-                              sent to this target model when multiple versions of the model are specified.
-                            type: integer
-                        type: object
-                      type: array
-                  type: object
-                type: array
-              poolRef:
-                description: PoolRef are references to the backend pools that the
-                  InferenceModel registers to.
-                items:
-                  description: ObjectReference contains enough information to let
-                    you inspect or modify the referred object.
-                  properties:
-                    apiVersion:
-                      description: API version of the referent.
-                      type: string
-                    fieldPath:
-                      description: |-
-                        If referring to a piece of an object instead of an entire object, this string
-                        should contain a valid JSON/Go field access statement, such as desiredState.manifest.containers[2].
-                        For example, if the object reference is to a container within a pod, this would take on a value like:
-                        "spec.containers{name}" (where "name" refers to the name of the container that triggered
-                        the event) or if no container name is specified "spec.containers[2]" (container with
-                        index 2 in this pod). This syntax is chosen only to have some well-defined way of
-                        referencing a part of an object.
-                      type: string
-                    kind:
-                      description: |-
-                        Kind of the referent.
-                        More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
-                      type: string
-                    name:
-                      description: |-
-                        Name of the referent.
-                        More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
-                      type: string
-                    namespace:
-                      description: |-
-                        Namespace of the referent.
-                        More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/
-                      type: string
-                    resourceVersion:
-                      description: |-
-                        Specific resourceVersion to which this reference is made, if any.
-                        More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#concurrency-control-and-consistency
-                      type: string
-                    uid:
-                      description: |-
-                        UID of the referent.
-                        More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#uids
-                      type: string
-                  type: object
-                  x-kubernetes-map-type: atomic
-                type: array
-            type: object
-          status:
-            description: InferenceModelStatus defines the observed state of InferenceModel
-            properties:
-              conditions:
-                description: Conditions track the state of the InferencePool.
-                items:
-                  description: Condition contains details for one aspect of the current
-                    state of this API Resource.
-                  properties:
-                    lastTransitionTime:
-                      description: |-
-                        lastTransitionTime is the last time the condition transitioned from one status to another.
-                        This should be when the underlying condition changed.  If that is not known, then using the time when the API field changed is acceptable.
-                      format: date-time
-                      type: string
-                    message:
-                      description: |-
-                        message is a human readable message indicating details about the transition.
-                        This may be an empty string.
-                      maxLength: 32768
-                      type: string
-                    observedGeneration:
-                      description: |-
-                        observedGeneration represents the .metadata.generation that the condition was set based upon.
-                        For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date
-                        with respect to the current state of the instance.
-                      format: int64
-                      minimum: 0
-                      type: integer
-                    reason:
-                      description: |-
-                        reason contains a programmatic identifier indicating the reason for the condition's last transition.
-                        Producers of specific condition types may define expected values and meanings for this field,
-                        and whether the values are considered a guaranteed API.
-                        The value should be a CamelCase string.
-                        This field may not be empty.
-                      maxLength: 1024
-                      minLength: 1
-                      pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$
-                      type: string
-                    status:
-                      description: status of the condition, one of True, False, Unknown.
-                      enum:
-                      - "True"
-                      - "False"
-                      - Unknown
-                      type: string
-                    type:
-                      description: type of condition in CamelCase or in foo.example.com/CamelCase.
-                      maxLength: 316
-                      pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$
-                      type: string
-                  required:
-                  - lastTransitionTime
-                  - message
-                  - reason
-                  - status
-                  - type
-                  type: object
-                type: array
-            type: object
-        type: object
-    served: true
-    storage: true
-    subresources:
-      status: {}
diff --git a/config/crd/kustomization.yaml b/config/crd/kustomization.yaml
index 9f3d8c42..60bac40b 100644
--- a/config/crd/kustomization.yaml
+++ b/config/crd/kustomization.yaml
@@ -2,7 +2,7 @@
 # since it depends on service name and namespace that are out of this kustomize package.
 # It should be run by config/default
 resources:
-- bases/inference.networking.x-k8s.io_llmserverpools.yaml
+- bases/inference.networking.x-k8s.io_inferencepools.yaml
 - bases/inference.networking.x-k8s.io_inferencemodels.yaml
 # +kubebuilder:scaffold:crdkustomizeresource
 
@@ -13,7 +13,7 @@ patches:
 
 # [CERTMANAGER] To enable cert-manager, uncomment all the sections with [CERTMANAGER] prefix.
 # patches here are for enabling the CA injection for each CRD
-#- path: patches/cainjection_in_llmserverpools.yaml
+#- path: patches/cainjection_in_inferencepools.yaml
 #- path: patches/cainjection_in_inferencemodels.yaml
 # +kubebuilder:scaffold:crdkustomizecainjectionpatch
 
diff --git a/config/rbac/llmservice_editor_role.yaml b/config/rbac/inferencemodel_editor_role.yaml
similarity index 100%
rename from config/rbac/llmservice_editor_role.yaml
rename to config/rbac/inferencemodel_editor_role.yaml
diff --git a/config/rbac/llmservice_viewer_role.yaml b/config/rbac/inferencemodel_viewer_role.yaml
similarity index 100%
rename from config/rbac/llmservice_viewer_role.yaml
rename to config/rbac/inferencemodel_viewer_role.yaml
diff --git a/config/rbac/llmserverpool_editor_role.yaml b/config/rbac/inferencepool_editor_role.yaml
similarity index 74%
rename from config/rbac/llmserverpool_editor_role.yaml
rename to config/rbac/inferencepool_editor_role.yaml
index 54139d22..cc1f7c35 100644
--- a/config/rbac/llmserverpool_editor_role.yaml
+++ b/config/rbac/inferencepool_editor_role.yaml
@@ -1,16 +1,16 @@
-# permissions for end users to edit llmserverpools.
+# permissions for end users to edit inferencepools.
 apiVersion: rbac.authorization.k8s.io/v1
 kind: ClusterRole
 metadata:
   labels:
     app.kubernetes.io/name: api
     app.kubernetes.io/managed-by: kustomize
-  name: llmserverpool-editor-role
+  name: inferencepool-editor-role
 rules:
 - apiGroups:
   - inference.networking.x-k8s.io
   resources:
-  - llmserverpools
+  - inferencepools
   verbs:
   - create
   - delete
@@ -22,6 +22,6 @@ rules:
 - apiGroups:
   - inference.networking.x-k8s.io
   resources:
-  - llmserverpools/status
+  - inferencepools/status
   verbs:
   - get
diff --git a/config/rbac/llmserverpool_viewer_role.yaml b/config/rbac/inferencepool_viewer_role.yaml
similarity index 71%
rename from config/rbac/llmserverpool_viewer_role.yaml
rename to config/rbac/inferencepool_viewer_role.yaml
index c3355ba2..828e0022 100644
--- a/config/rbac/llmserverpool_viewer_role.yaml
+++ b/config/rbac/inferencepool_viewer_role.yaml
@@ -1,16 +1,16 @@
-# permissions for end users to view llmserverpools.
+# permissions for end users to view inferencepools.
 apiVersion: rbac.authorization.k8s.io/v1
 kind: ClusterRole
 metadata:
   labels:
     app.kubernetes.io/name: api
     app.kubernetes.io/managed-by: kustomize
-  name: llmserverpool-viewer-role
+  name: inferencepool-viewer-role
 rules:
 - apiGroups:
   - inference.networking.x-k8s.io
   resources:
-  - llmserverpools
+  - inferencepools
   verbs:
   - get
   - list
@@ -18,6 +18,6 @@ rules:
 - apiGroups:
   - inference.networking.x-k8s.io
   resources:
-  - llmserverpools/status
+  - inferencepools/status
   verbs:
   - get
diff --git a/config/rbac/kustomization.yaml b/config/rbac/kustomization.yaml
index 45cadfd7..c3a52137 100644
--- a/config/rbac/kustomization.yaml
+++ b/config/rbac/kustomization.yaml
@@ -24,6 +24,6 @@ resources:
 # if you do not want those helpers be installed with your Project.
 - inferencemodel_editor_role.yaml
 - inferencemodel_viewer_role.yaml
-- llmserverpool_editor_role.yaml
-- llmserverpool_viewer_role.yaml
+- inferencepool_editor_role.yaml
+- inferencepool_viewer_role.yaml
 
diff --git a/config/samples/gateway_v1alpha1_llmservice.yaml b/config/samples/gateway_v1alpha1_inferencemodel.yaml
similarity index 100%
rename from config/samples/gateway_v1alpha1_llmservice.yaml
rename to config/samples/gateway_v1alpha1_inferencemodel.yaml
diff --git a/config/samples/gateway_v1alpha1_llmserverpool.yaml b/config/samples/gateway_v1alpha1_inferencepool.yaml
similarity index 89%
rename from config/samples/gateway_v1alpha1_llmserverpool.yaml
rename to config/samples/gateway_v1alpha1_inferencepool.yaml
index b734e2bd..54046489 100644
--- a/config/samples/gateway_v1alpha1_llmserverpool.yaml
+++ b/config/samples/gateway_v1alpha1_inferencepool.yaml
@@ -4,7 +4,7 @@ metadata:
   labels:
     app.kubernetes.io/name: api
     app.kubernetes.io/managed-by: kustomize
-  name: llmserverpool-sample
+  name: inferencepool-sample
 spec:
   serviceRefs:
   - gemini-jetstream-tpu-v5e-service
diff --git a/config/samples/kustomization.yaml b/config/samples/kustomization.yaml
index e6def51a..e4b9f2e8 100644
--- a/config/samples/kustomization.yaml
+++ b/config/samples/kustomization.yaml
@@ -1,5 +1,5 @@
 ## Append samples of your project ##
 resources:
-- gateway_v1alpha1_llmserverpool.yaml
+- gateway_v1alpha1_inferencepool.yaml
 - gateway_v1alpha1_inferencemodel.yaml
 # +kubebuilder:scaffold:manifestskustomizesamples
diff --git a/pkg/ext-proc/backend/datastore.go b/pkg/ext-proc/backend/datastore.go
index 2291b9ec..b6d46f43 100644
--- a/pkg/ext-proc/backend/datastore.go
+++ b/pkg/ext-proc/backend/datastore.go
@@ -76,7 +76,7 @@ func (s *K8sDatastore) FetchModelData(modelName string) (returnModel *v1alpha1.I
 }
 
 func RandomWeightedDraw(model *v1alpha1.InferenceModel, seed int64) string {
-	weights := 0
+	var weights int32
 
 	source := rand.NewSource(rand.Int63())
 	if seed > 0 {
@@ -87,7 +87,7 @@ func RandomWeightedDraw(model *v1alpha1.InferenceModel, seed int64) string {
 		weights += model.Weight
 	}
 	klog.V(3).Infof("Weights for Model(%v) total to: %v", model.Name, weights)
-	randomVal := r.Intn(weights)
+	randomVal := r.Int31n(weights)
 	for _, model := range model.Spec.TargetModels {
 		if randomVal < model.Weight {
 			return model.Name
diff --git a/pkg/ext-proc/backend/datastore_test.go b/pkg/ext-proc/backend/datastore_test.go
index 6f541ea9..d84206bb 100644
--- a/pkg/ext-proc/backend/datastore_test.go
+++ b/pkg/ext-proc/backend/datastore_test.go
@@ -6,8 +6,6 @@ import (
 	"inference.networking.x-k8s.io/llm-instance-gateway/api/v1alpha1"
 )
 
-var ()
-
 func TestRandomWeightedDraw(t *testing.T) {
 	tests := []struct {
 		name  string
@@ -77,8 +75,7 @@ func TestRandomWeightedDraw(t *testing.T) {
 			want: "v1.1",
 		},
 	}
-	var seedVal int64
-	seedVal = 420
+	var seedVal int64 = 420
 	for _, test := range tests {
 		t.Run(test.name, func(t *testing.T) {
 			for range 10000 {
diff --git a/pkg/ext-proc/backend/endpointslice_reconciler.go b/pkg/ext-proc/backend/endpointslice_reconciler.go
index 3beb0105..f99ff61f 100644
--- a/pkg/ext-proc/backend/endpointslice_reconciler.go
+++ b/pkg/ext-proc/backend/endpointslice_reconciler.go
@@ -47,13 +47,18 @@ func (c *EndpointSliceReconciler) Reconcile(ctx context.Context, req ctrl.Reques
 	return ctrl.Result{}, nil
 }
 
-func (c *EndpointSliceReconciler) updateDatastore(slice *discoveryv1.EndpointSlice, inferencePool *v1alpha1.InferencePool) {
+func (c *EndpointSliceReconciler) updateDatastore(
+	slice *discoveryv1.EndpointSlice,
+	inferencePool *v1alpha1.InferencePool) {
 	podMap := make(map[Pod]bool)
 
 	for _, endpoint := range slice.Endpoints {
 		klog.V(4).Infof("Zone: %v \n endpoint: %+v \n", c.Zone, endpoint)
 		if c.validPod(endpoint) {
-			pod := Pod{Name: *&endpoint.TargetRef.Name, Address: endpoint.Addresses[0] + ":" + fmt.Sprint(inferencePool.Spec.TargetPort)}
+			pod := Pod{
+				Name:    endpoint.TargetRef.Name,
+				Address: endpoint.Addresses[0] + ":" + fmt.Sprint(inferencePool.Spec.TargetPortNumber),
+			}
 			podMap[pod] = true
 			c.Datastore.pods.Store(pod, true)
 		}
@@ -93,12 +98,14 @@ func (c *EndpointSliceReconciler) SetupWithManager(mgr ctrl.Manager) error {
 	}
 
 	return ctrl.NewControllerManagedBy(mgr).
-		For(&discoveryv1.EndpointSlice{}, builder.WithPredicates(predicate.NewPredicateFuncs(inferencePoolAvailable), predicate.NewPredicateFuncs(ownsEndPointSlice))).
+		For(&discoveryv1.EndpointSlice{},
+			builder.WithPredicates(predicate.NewPredicateFuncs(inferencePoolAvailable),
+				predicate.NewPredicateFuncs(ownsEndPointSlice))).
 		Complete(c)
 }
 
 func (c *EndpointSliceReconciler) validPod(endpoint discoveryv1.Endpoint) bool {
 	validZone := c.Zone == "" || c.Zone != "" && *endpoint.Zone == c.Zone
-	return validZone && *endpoint.Conditions.Ready == true
+	return validZone && *endpoint.Conditions.Ready
 
 }
diff --git a/pkg/ext-proc/backend/endpointslice_reconcilier_test.go b/pkg/ext-proc/backend/endpointslice_reconcilier_test.go
index 0199905f..16bcd8c2 100644
--- a/pkg/ext-proc/backend/endpointslice_reconcilier_test.go
+++ b/pkg/ext-proc/backend/endpointslice_reconcilier_test.go
@@ -28,7 +28,7 @@ func TestUpdateDatastore_EndpointSliceReconciler(t *testing.T) {
 				pods: populateMap(basePod1, basePod2),
 				inferencePool: &v1alpha1.InferencePool{
 					Spec: v1alpha1.InferencePoolSpec{
-						TargetPort: int32(8000),
+						TargetPortNumber: int32(8000),
 					},
 				},
 			},
@@ -74,7 +74,7 @@ func TestUpdateDatastore_EndpointSliceReconciler(t *testing.T) {
 				pods: populateMap(basePod1, basePod2),
 				inferencePool: &v1alpha1.InferencePool{
 					Spec: v1alpha1.InferencePoolSpec{
-						TargetPort: int32(8000),
+						TargetPortNumber: int32(8000),
 					},
 				},
 			},
@@ -120,7 +120,7 @@ func TestUpdateDatastore_EndpointSliceReconciler(t *testing.T) {
 				pods: populateMap(basePod1, basePod2),
 				inferencePool: &v1alpha1.InferencePool{
 					Spec: v1alpha1.InferencePoolSpec{
-						TargetPort: int32(8000),
+						TargetPortNumber: int32(8000),
 					},
 				},
 			},
@@ -167,7 +167,9 @@ func TestUpdateDatastore_EndpointSliceReconciler(t *testing.T) {
 			endpointSliceReconciler.updateDatastore(test.incomingSlice, test.datastore.inferencePool)
 
 			if mapsEqual(endpointSliceReconciler.Datastore.pods, test.wantPods) {
-				t.Errorf("Unexpected output pod mismatch. \n Got %v \n Want: %v \n", endpointSliceReconciler.Datastore.pods, test.wantPods)
+				t.Errorf("Unexpected output pod mismatch. \n Got %v \n Want: %v \n",
+					endpointSliceReconciler.Datastore.pods,
+					test.wantPods)
 			}
 		})
 	}
diff --git a/pkg/ext-proc/backend/inferencemodel_reconciler_test.go b/pkg/ext-proc/backend/inferencemodel_reconciler_test.go
index a616c899..9f1ef6ed 100644
--- a/pkg/ext-proc/backend/inferencemodel_reconciler_test.go
+++ b/pkg/ext-proc/backend/inferencemodel_reconciler_test.go
@@ -12,7 +12,7 @@ var (
 	service1 = &v1alpha1.InferenceModel{
 		Spec: v1alpha1.InferenceModelSpec{
 			ModelName: "fake model1",
-			PoolRef:   &v1alpha1.PoolObjectReference{Name: "test-pool"},
+			PoolRef:   v1alpha1.PoolObjectReference{Name: "test-pool"},
 		},
 		ObjectMeta: metav1.ObjectMeta{
 			Name: "test-service",
@@ -21,7 +21,7 @@ var (
 	service1Modified = &v1alpha1.InferenceModel{
 		Spec: v1alpha1.InferenceModelSpec{
 			ModelName: "fake model1",
-			PoolRef:   &v1alpha1.PoolObjectReference{Name: "test-poolio"},
+			PoolRef:   v1alpha1.PoolObjectReference{Name: "test-poolio"},
 		},
 		ObjectMeta: metav1.ObjectMeta{
 			Name: "test-service",
@@ -30,7 +30,7 @@ var (
 	service2 = &v1alpha1.InferenceModel{
 		Spec: v1alpha1.InferenceModelSpec{
 			ModelName: "fake model",
-			PoolRef:   &v1alpha1.PoolObjectReference{Name: "test-pool"},
+			PoolRef:   v1alpha1.PoolObjectReference{Name: "test-pool"},
 		},
 		ObjectMeta: metav1.ObjectMeta{
 			Name: "test-service-2",
@@ -50,7 +50,7 @@ func TestUpdateDatastore_InferenceModelReconciler(t *testing.T) {
 			datastore: &K8sDatastore{
 				inferencePool: &v1alpha1.InferencePool{
 					Spec: v1alpha1.InferencePoolSpec{
-						Selector: map[v1alpha1.LabelString]v1alpha1.LabelString{"app": "vllm"},
+						Selector: map[v1alpha1.LabelKey]v1alpha1.LabelValue{"app": "vllm"},
 					},
 					ObjectMeta: metav1.ObjectMeta{
 						Name:            "test-pool",
@@ -67,7 +67,7 @@ func TestUpdateDatastore_InferenceModelReconciler(t *testing.T) {
 			datastore: &K8sDatastore{
 				inferencePool: &v1alpha1.InferencePool{
 					Spec: v1alpha1.InferencePoolSpec{
-						Selector: map[v1alpha1.LabelString]v1alpha1.LabelString{"app": "vllm"},
+						Selector: map[v1alpha1.LabelKey]v1alpha1.LabelValue{"app": "vllm"},
 					},
 					ObjectMeta: metav1.ObjectMeta{
 						Name:            "test-pool",
@@ -84,7 +84,7 @@ func TestUpdateDatastore_InferenceModelReconciler(t *testing.T) {
 			datastore: &K8sDatastore{
 				inferencePool: &v1alpha1.InferencePool{
 					Spec: v1alpha1.InferencePoolSpec{
-						Selector: map[v1alpha1.LabelString]v1alpha1.LabelString{"app": "vllm"},
+						Selector: map[v1alpha1.LabelKey]v1alpha1.LabelValue{"app": "vllm"},
 					},
 					ObjectMeta: metav1.ObjectMeta{
 						Name:            "test-pool",
@@ -96,7 +96,7 @@ func TestUpdateDatastore_InferenceModelReconciler(t *testing.T) {
 			incomingService: &v1alpha1.InferenceModel{
 				Spec: v1alpha1.InferenceModelSpec{
 					ModelName: "fake model",
-					PoolRef:   &v1alpha1.PoolObjectReference{Name: "test-poolio"},
+					PoolRef:   v1alpha1.PoolObjectReference{Name: "test-poolio"},
 				},
 				ObjectMeta: metav1.ObjectMeta{
 					Name: "unrelated-service",
@@ -109,7 +109,7 @@ func TestUpdateDatastore_InferenceModelReconciler(t *testing.T) {
 			datastore: &K8sDatastore{
 				inferencePool: &v1alpha1.InferencePool{
 					Spec: v1alpha1.InferencePoolSpec{
-						Selector: map[v1alpha1.LabelString]v1alpha1.LabelString{"app": "vllm"},
+						Selector: map[v1alpha1.LabelKey]v1alpha1.LabelValue{"app": "vllm"},
 					},
 					ObjectMeta: metav1.ObjectMeta{
 						Name:            "test-pool",
@@ -124,7 +124,10 @@ func TestUpdateDatastore_InferenceModelReconciler(t *testing.T) {
 	}
 	for _, test := range tests {
 		t.Run(test.name, func(t *testing.T) {
-			InferenceModelReconciler := &InferenceModelReconciler{Datastore: test.datastore, ServerPoolName: test.datastore.inferencePool.Name}
+			InferenceModelReconciler := &InferenceModelReconciler{
+				Datastore:      test.datastore,
+				ServerPoolName: test.datastore.inferencePool.Name,
+			}
 			InferenceModelReconciler.updateDatastore(test.incomingService)
 
 			if ok := mapsEqual(InferenceModelReconciler.Datastore.InferenceModels, test.wantInferenceModels); !ok {
diff --git a/pkg/ext-proc/backend/inferencepool_reconciler.go b/pkg/ext-proc/backend/inferencepool_reconciler.go
index b53912ff..662b6f41 100644
--- a/pkg/ext-proc/backend/inferencepool_reconciler.go
+++ b/pkg/ext-proc/backend/inferencepool_reconciler.go
@@ -12,10 +12,6 @@ import (
 	ctrl "sigs.k8s.io/controller-runtime"
 )
 
-const (
-	reconcilerNamePrefix = "instance-gateway-"
-)
-
 // InferencePoolReconciler utilizes the controller runtime to reconcile Instance Gateway resources
 // This implementation is just used for reading & maintaining data sync. The Gateway implementation
 // will have the proper controller that will create/manage objects on behalf of the server pool.
@@ -47,7 +43,8 @@ func (c *InferencePoolReconciler) Reconcile(ctx context.Context, req ctrl.Reques
 }
 
 func (c *InferencePoolReconciler) updateDatastore(serverPool *v1alpha1.InferencePool) {
-	if c.Datastore.inferencePool == nil || serverPool.ObjectMeta.ResourceVersion != c.Datastore.inferencePool.ObjectMeta.ResourceVersion {
+	if c.Datastore.inferencePool == nil ||
+		serverPool.ObjectMeta.ResourceVersion != c.Datastore.inferencePool.ObjectMeta.ResourceVersion {
 		c.Datastore.setInferencePool(serverPool)
 	}
 }
diff --git a/pkg/ext-proc/backend/vllm/metrics.go b/pkg/ext-proc/backend/vllm/metrics.go
index f074741e..27a29d9a 100644
--- a/pkg/ext-proc/backend/vllm/metrics.go
+++ b/pkg/ext-proc/backend/vllm/metrics.go
@@ -35,7 +35,11 @@ type PodMetricsClientImpl struct {
 }
 
 // FetchMetrics fetches metrics from a given pod.
-func (p *PodMetricsClientImpl) FetchMetrics(ctx context.Context, pod backend.Pod, existing *backend.PodMetrics) (*backend.PodMetrics, error) {
+func (p *PodMetricsClientImpl) FetchMetrics(
+	ctx context.Context,
+	pod backend.Pod,
+	existing *backend.PodMetrics,
+) (*backend.PodMetrics, error) {
 	// Currently the metrics endpoint is hard-coded, which works with vLLM.
 	// TODO(https://github.com/kubernetes-sigs/llm-instance-gateway/issues/16): Consume this from InferencePool config.
 	url := fmt.Sprintf("http://%s/metrics", pod.Address)
@@ -66,7 +70,10 @@ func (p *PodMetricsClientImpl) FetchMetrics(ctx context.Context, pod backend.Pod
 // promToPodMetrics updates internal pod metrics with scraped prometheus metrics.
 // A combined error is returned if errors occur in one or more metric processing.
 // it returns a new PodMetrics pointer which can be used to atomically update the pod metrics map.
-func promToPodMetrics(metricFamilies map[string]*dto.MetricFamily, existing *backend.PodMetrics) (*backend.PodMetrics, error) {
+func promToPodMetrics(
+	metricFamilies map[string]*dto.MetricFamily,
+	existing *backend.PodMetrics,
+) (*backend.PodMetrics, error) {
 	var errs error
 	updated := existing.Clone()
 	runningQueueSize, _, err := getLatestMetric(metricFamilies, RunningQueueSizeMetricName)
diff --git a/pkg/ext-proc/backend/vllm/metrics_test.go b/pkg/ext-proc/backend/vllm/metrics_test.go
index f6ac403f..6121fa11 100644
--- a/pkg/ext-proc/backend/vllm/metrics_test.go
+++ b/pkg/ext-proc/backend/vllm/metrics_test.go
@@ -178,7 +178,7 @@ func TestPromToPodMetrics(t *testing.T) {
 									Value: proto.String("lora3,lora4"),
 								},
 								{
-									Name:  proto.String(LoraRequestInfoRunningAdaptersMetricName),
+									Name:  proto.String(LoraRequestInfoMaxAdaptersMetricName),
 									Value: proto.String("2a"),
 								},
 							},