diff --git a/api/v1alpha1/groupversion_info.go b/api/v1alpha1/groupversion_info.go
index 1b8782a0b..7ff9c3990 100644
--- a/api/v1alpha1/groupversion_info.go
+++ b/api/v1alpha1/groupversion_info.go
@@ -16,7 +16,7 @@ limitations under the License.
 
 // Package v1alpha1 contains API Schema definitions for the gateway v1alpha1 API group
 // +kubebuilder:object:generate=true
-// +groupName=gateway.inference.networking.x-k8s.io
+// +groupName=inference.networking.x-k8s.io
 package v1alpha1
 
 import (
diff --git a/config/crd/bases/gateway.inference.k8s.io_llmserverpools.yaml b/config/crd/bases/gateway.inference.k8s.io_llmserverpools.yaml
deleted file mode 100644
index 56bb3ff02..000000000
--- a/config/crd/bases/gateway.inference.k8s.io_llmserverpools.yaml
+++ /dev/null
@@ -1,158 +0,0 @@
----
-apiVersion: apiextensions.k8s.io/v1
-kind: CustomResourceDefinition
-metadata:
-  annotations:
-    controller-gen.kubebuilder.io/version: v0.16.1
-  name: llmserverpools.gateway.inference.networking.x-k8s.io
-spec:
-  group: gateway.inference.networking.x-k8s.io
-  names:
-    kind: LLMServerPool
-    listKind: LLMServerPoolList
-    plural: llmserverpools
-    singular: llmserverpool
-  scope: Namespaced
-  versions:
-  - name: v1alpha1
-    schema:
-      openAPIV3Schema:
-        description: LLMServerPool is the Schema for the llmserverpools API
-        properties:
-          apiVersion:
-            description: |-
-              APIVersion defines the versioned schema of this representation of an object.
-              Servers should convert recognized schemas to the latest internal value, and
-              may reject unrecognized values.
-              More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources
-            type: string
-          kind:
-            description: |-
-              Kind is a string value representing the REST resource this object represents.
-              Servers may infer this from the endpoint the client submits requests to.
-              Cannot be updated.
-              In CamelCase.
-              More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
-            type: string
-          metadata:
-            type: object
-          spec:
-            description: LLMServerPoolSpec defines the desired state of LLMServerPool
-            properties:
-              modelServerSelector:
-                description: |-
-                  ModelServerSelector uses label selection to watch model server pods
-                  that should be included in the LLMServerPool. ModelServers should not
-                  be with any other Service or LLMServerPool, that behavior is not supported
-                  and will result in sub-optimal utilization.
-                properties:
-                  matchExpressions:
-                    description: matchExpressions is a list of label selector requirements.
-                      The requirements are ANDed.
-                    items:
-                      description: |-
-                        A label selector requirement is a selector that contains values, a key, and an operator that
-                        relates the key and values.
-                      properties:
-                        key:
-                          description: key is the label key that the selector applies
-                            to.
-                          type: string
-                        operator:
-                          description: |-
-                            operator represents a key's relationship to a set of values.
-                            Valid operators are In, NotIn, Exists and DoesNotExist.
-                          type: string
-                        values:
-                          description: |-
-                            values is an array of string values. If the operator is In or NotIn,
-                            the values array must be non-empty. If the operator is Exists or DoesNotExist,
-                            the values array must be empty. This array is replaced during a strategic
-                            merge patch.
-                          items:
-                            type: string
-                          type: array
-                          x-kubernetes-list-type: atomic
-                      required:
-                      - key
-                      - operator
-                      type: object
-                    type: array
-                    x-kubernetes-list-type: atomic
-                  matchLabels:
-                    additionalProperties:
-                      type: string
-                    description: |-
-                      matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels
-                      map is equivalent to an element of matchExpressions, whose key field is "key", the
-                      operator is "In", and the values array contains only "value". The requirements are ANDed.
-                    type: object
-                type: object
-                x-kubernetes-map-type: atomic
-            type: object
-          status:
-            description: LLMServerPoolStatus defines the observed state of LLMServerPool
-            properties:
-              conditions:
-                description: Conditions track the state of the LLMServerPool.
-                items:
-                  description: Condition contains details for one aspect of the current
-                    state of this API Resource.
-                  properties:
-                    lastTransitionTime:
-                      description: |-
-                        lastTransitionTime is the last time the condition transitioned from one status to another.
-                        This should be when the underlying condition changed.  If that is not known, then using the time when the API field changed is acceptable.
-                      format: date-time
-                      type: string
-                    message:
-                      description: |-
-                        message is a human readable message indicating details about the transition.
-                        This may be an empty string.
-                      maxLength: 32768
-                      type: string
-                    observedGeneration:
-                      description: |-
-                        observedGeneration represents the .metadata.generation that the condition was set based upon.
-                        For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date
-                        with respect to the current state of the instance.
-                      format: int64
-                      minimum: 0
-                      type: integer
-                    reason:
-                      description: |-
-                        reason contains a programmatic identifier indicating the reason for the condition's last transition.
-                        Producers of specific condition types may define expected values and meanings for this field,
-                        and whether the values are considered a guaranteed API.
-                        The value should be a CamelCase string.
-                        This field may not be empty.
-                      maxLength: 1024
-                      minLength: 1
-                      pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$
-                      type: string
-                    status:
-                      description: status of the condition, one of True, False, Unknown.
-                      enum:
-                      - "True"
-                      - "False"
-                      - Unknown
-                      type: string
-                    type:
-                      description: type of condition in CamelCase or in foo.example.com/CamelCase.
-                      maxLength: 316
-                      pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$
-                      type: string
-                  required:
-                  - lastTransitionTime
-                  - message
-                  - reason
-                  - status
-                  - type
-                  type: object
-                type: array
-            type: object
-        type: object
-    served: true
-    storage: true
-    subresources:
-      status: {}
diff --git a/config/crd/bases/gateway.inference.networking.x-k8s.io_llmservices.yaml b/config/crd/bases/gateway.inference.networking.x-k8s.io_llmservices.yaml
deleted file mode 100644
index b9a1a80ee..000000000
--- a/config/crd/bases/gateway.inference.networking.x-k8s.io_llmservices.yaml
+++ /dev/null
@@ -1,239 +0,0 @@
----
-apiVersion: apiextensions.k8s.io/v1
-kind: CustomResourceDefinition
-metadata:
-  annotations:
-    controller-gen.kubebuilder.io/version: v0.16.1
-  name: llmservices.gateway.inference.networking.x-k8s.io
-spec:
-  group: gateway.inference.networking.x-k8s.io
-  names:
-    kind: LLMService
-    listKind: LLMServiceList
-    plural: llmservices
-    singular: llmservice
-  scope: Namespaced
-  versions:
-  - name: v1alpha1
-    schema:
-      openAPIV3Schema:
-        description: LLMService is the Schema for the llmservices API
-        properties:
-          apiVersion:
-            description: |-
-              APIVersion defines the versioned schema of this representation of an object.
-              Servers should convert recognized schemas to the latest internal value, and
-              may reject unrecognized values.
-              More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources
-            type: string
-          kind:
-            description: |-
-              Kind is a string value representing the REST resource this object represents.
-              Servers may infer this from the endpoint the client submits requests to.
-              Cannot be updated.
-              In CamelCase.
-              More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
-            type: string
-          metadata:
-            type: object
-          spec:
-            description: |-
-              LLMService represents a set of LLM services that are multiplexed onto one
-              or more LLMServerPools. This resource is managed by the "LLM Service Owner"
-              persona. The Service Owner persona is: a team that trains, verifies, and
-              leverages a large language model from a model frontend, drives the lifecycle
-              and rollout of new versions of those models, and defines the specific
-              performance and latency goals for the model. These services are
-              expected to operate within a LLMServerPool sharing compute capacity with other
-              LLMServices, defined by the Inference Platform Admin. We allow a user who
-              has multiple LLMServices across multiple pools (with the same config) to
-              specify the configuration exactly once, and deploy to many pools
-              simultaneously. Enabling a simpler config and single source of truth
-              for a given user. LLMService names are unique for a given LLMServerPool,
-              if the name is reused, an error will be  shown on the status of a
-              LLMService that attempted to reuse. The oldest LLMService, based on
-              creation timestamp, will be selected to remain valid. In the event of a race
-              condition, one will be selected at random.
-            properties:
-              models:
-                description: |-
-                  Model defines the distinct services.
-                  Model can be in 2 priority classes, Critical and Noncritical.
-                  Priority class is implicitly set to Critical by specifying an Objective.
-                  Otherwise the Model is considered Noncritical.
-                items:
-                  description: |-
-                    Model defines the policies for routing the traffic of a use case, this includes performance objectives
-                    and traffic splitting between different versions of the model.
-                  properties:
-                    name:
-                      description: |-
-                        The name of the model as the users set in the "model" parameter in the requests.
-                        The name should be unique among the services that reference the same backend pool.
-                        This is the parameter that will be used to match the request with. In the future, we may
-                        allow to match on other request parameters. The other approach to support matching on
-                        on other request parameters is to use a different ModelName per HTTPFilter.
-                        Names can be reserved without implementing an actual model in the pool.
-                        This can be done by specifying a target model and setting the weight to zero,
-                        an error will be returned specifying that no valid target model is found.
-                      type: string
-                    objective:
-                      description: |-
-                        Optional
-                        LLM Services with an objective have higher priority than services without.
-                        IMPORTANT: By specifying an objective, this places the LLMService in a higher priority class than LLMServices without a defined priority class.
-                        In the face of resource-scarcity. Higher priority requests will be preserved, and lower priority class requests will be rejected.
-                      properties:
-                        desiredAveragePerOutputTokenLatencyAtP95OverMultipleRequests:
-                          description: |-
-                            The AverageLatencyPerOutputToken is calculated as the e2e request latency divided by output token
-                            length. Note that this is different from what is known as TPOT (time per output token) which only
-                            takes decode time into account.
-                            The P95 is calculated over a fixed time window defined at the operator level.
-                          format: int64
-                          type: integer
-                      type: object
-                    targetModels:
-                      description: |-
-                        Optional.
-                        Allow multiple versions of a model for traffic splitting.
-                        If not specified, the target model name is defaulted to the modelName parameter.
-                        modelName is often in reference to a LoRA adapter.
-                      items:
-                        description: |-
-                          TargetModel represents a deployed model or a LoRA adapter. The
-                          Name field is expected to match the name of the LoRA adapter
-                          (or base model) as it is registered within the model server. Inference
-                          Gateway assumes that the model exists on the model server and is the
-                          responsibility of the user to validate a correct match. Should a model fail
-                          to exist at request time, the error is processed by the Instance Gateway,
-                          and then emitted on the appropriate LLMService object.
-                        properties:
-                          name:
-                            description: The name of the adapter as expected by the
-                              ModelServer.
-                            type: string
-                          weight:
-                            description: |-
-                              Weight is used to determine the percentage of traffic that should be
-                              sent to this target model when multiple versions of the model are specified.
-                            type: integer
-                        type: object
-                      type: array
-                  type: object
-                type: array
-              poolRef:
-                description: PoolRef are references to the backend pools that the
-                  LLMService registers to.
-                items:
-                  description: ObjectReference contains enough information to let
-                    you inspect or modify the referred object.
-                  properties:
-                    apiVersion:
-                      description: API version of the referent.
-                      type: string
-                    fieldPath:
-                      description: |-
-                        If referring to a piece of an object instead of an entire object, this string
-                        should contain a valid JSON/Go field access statement, such as desiredState.manifest.containers[2].
-                        For example, if the object reference is to a container within a pod, this would take on a value like:
-                        "spec.containers{name}" (where "name" refers to the name of the container that triggered
-                        the event) or if no container name is specified "spec.containers[2]" (container with
-                        index 2 in this pod). This syntax is chosen only to have some well-defined way of
-                        referencing a part of an object.
-                      type: string
-                    kind:
-                      description: |-
-                        Kind of the referent.
-                        More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
-                      type: string
-                    name:
-                      description: |-
-                        Name of the referent.
-                        More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
-                      type: string
-                    namespace:
-                      description: |-
-                        Namespace of the referent.
-                        More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/
-                      type: string
-                    resourceVersion:
-                      description: |-
-                        Specific resourceVersion to which this reference is made, if any.
-                        More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#concurrency-control-and-consistency
-                      type: string
-                    uid:
-                      description: |-
-                        UID of the referent.
-                        More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#uids
-                      type: string
-                  type: object
-                  x-kubernetes-map-type: atomic
-                type: array
-            type: object
-          status:
-            description: LLMServiceStatus defines the observed state of LLMService
-            properties:
-              conditions:
-                description: Conditions track the state of the LLMServerPool.
-                items:
-                  description: Condition contains details for one aspect of the current
-                    state of this API Resource.
-                  properties:
-                    lastTransitionTime:
-                      description: |-
-                        lastTransitionTime is the last time the condition transitioned from one status to another.
-                        This should be when the underlying condition changed.  If that is not known, then using the time when the API field changed is acceptable.
-                      format: date-time
-                      type: string
-                    message:
-                      description: |-
-                        message is a human readable message indicating details about the transition.
-                        This may be an empty string.
-                      maxLength: 32768
-                      type: string
-                    observedGeneration:
-                      description: |-
-                        observedGeneration represents the .metadata.generation that the condition was set based upon.
-                        For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date
-                        with respect to the current state of the instance.
-                      format: int64
-                      minimum: 0
-                      type: integer
-                    reason:
-                      description: |-
-                        reason contains a programmatic identifier indicating the reason for the condition's last transition.
-                        Producers of specific condition types may define expected values and meanings for this field,
-                        and whether the values are considered a guaranteed API.
-                        The value should be a CamelCase string.
-                        This field may not be empty.
-                      maxLength: 1024
-                      minLength: 1
-                      pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$
-                      type: string
-                    status:
-                      description: status of the condition, one of True, False, Unknown.
-                      enum:
-                      - "True"
-                      - "False"
-                      - Unknown
-                      type: string
-                    type:
-                      description: type of condition in CamelCase or in foo.example.com/CamelCase.
-                      maxLength: 316
-                      pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$
-                      type: string
-                  required:
-                  - lastTransitionTime
-                  - message
-                  - reason
-                  - status
-                  - type
-                  type: object
-                type: array
-            type: object
-        type: object
-    served: true
-    storage: true
-    subresources:
-      status: {}
diff --git a/config/crd/bases/gateway.inference.networking.x-k8s.io_llmserverpools.yaml b/config/crd/bases/inference.networking.x-k8s.io_llmserverpools.yaml
similarity index 98%
rename from config/crd/bases/gateway.inference.networking.x-k8s.io_llmserverpools.yaml
rename to config/crd/bases/inference.networking.x-k8s.io_llmserverpools.yaml
index 02ac00ac0..eb4d6c0a1 100644
--- a/config/crd/bases/gateway.inference.networking.x-k8s.io_llmserverpools.yaml
+++ b/config/crd/bases/inference.networking.x-k8s.io_llmserverpools.yaml
@@ -4,9 +4,9 @@ kind: CustomResourceDefinition
 metadata:
   annotations:
     controller-gen.kubebuilder.io/version: v0.16.1
-  name: llmserverpools.gateway.inference.networking.x-k8s.io
+  name: llmserverpools.inference.networking.x-k8s.io
 spec:
-  group: gateway.inference.networking.x-k8s.io
+  group: inference.networking.x-k8s.io
   names:
     kind: LLMServerPool
     listKind: LLMServerPoolList
diff --git a/config/crd/bases/gateway.inference.k8s.io_llmservices.yaml b/config/crd/bases/inference.networking.x-k8s.io_llmservices.yaml
similarity index 99%
rename from config/crd/bases/gateway.inference.k8s.io_llmservices.yaml
rename to config/crd/bases/inference.networking.x-k8s.io_llmservices.yaml
index b9a1a80ee..73e87bee3 100644
--- a/config/crd/bases/gateway.inference.k8s.io_llmservices.yaml
+++ b/config/crd/bases/inference.networking.x-k8s.io_llmservices.yaml
@@ -4,9 +4,9 @@ kind: CustomResourceDefinition
 metadata:
   annotations:
     controller-gen.kubebuilder.io/version: v0.16.1
-  name: llmservices.gateway.inference.networking.x-k8s.io
+  name: llmservices.inference.networking.x-k8s.io
 spec:
-  group: gateway.inference.networking.x-k8s.io
+  group: inference.networking.x-k8s.io
   names:
     kind: LLMService
     listKind: LLMServiceList
diff --git a/config/crd/kustomization.yaml b/config/crd/kustomization.yaml
index 2a0bcfcbb..76a4574e8 100644
--- a/config/crd/kustomization.yaml
+++ b/config/crd/kustomization.yaml
@@ -2,8 +2,8 @@
 # since it depends on service name and namespace that are out of this kustomize package.
 # It should be run by config/default
 resources:
-- bases/gateway.inference.networking.x-k8s.io_llmserverpools.yaml
-- bases/gateway.inference.networking.x-k8s.io_llmservices.yaml
+- bases/inference.networking.x-k8s.io_llmserverpools.yaml
+- bases/inference.networking.x-k8s.io_llmservices.yaml
 # +kubebuilder:scaffold:crdkustomizeresource
 
 patches:
diff --git a/config/rbac/llmserverpool_editor_role.yaml b/config/rbac/llmserverpool_editor_role.yaml
index ef8ad5c14..54139d22d 100644
--- a/config/rbac/llmserverpool_editor_role.yaml
+++ b/config/rbac/llmserverpool_editor_role.yaml
@@ -8,7 +8,7 @@ metadata:
   name: llmserverpool-editor-role
 rules:
 - apiGroups:
-  - gateway.inference.networking.x-k8s.io
+  - inference.networking.x-k8s.io
   resources:
   - llmserverpools
   verbs:
@@ -20,7 +20,7 @@ rules:
   - update
   - watch
 - apiGroups:
-  - gateway.inference.networking.x-k8s.io
+  - inference.networking.x-k8s.io
   resources:
   - llmserverpools/status
   verbs:
diff --git a/config/rbac/llmserverpool_viewer_role.yaml b/config/rbac/llmserverpool_viewer_role.yaml
index 35a4ade89..c3355ba27 100644
--- a/config/rbac/llmserverpool_viewer_role.yaml
+++ b/config/rbac/llmserverpool_viewer_role.yaml
@@ -8,7 +8,7 @@ metadata:
   name: llmserverpool-viewer-role
 rules:
 - apiGroups:
-  - gateway.inference.networking.x-k8s.io
+  - inference.networking.x-k8s.io
   resources:
   - llmserverpools
   verbs:
@@ -16,7 +16,7 @@ rules:
   - list
   - watch
 - apiGroups:
-  - gateway.inference.networking.x-k8s.io
+  - inference.networking.x-k8s.io
   resources:
   - llmserverpools/status
   verbs:
diff --git a/config/rbac/llmservice_editor_role.yaml b/config/rbac/llmservice_editor_role.yaml
index 092151884..300216acc 100644
--- a/config/rbac/llmservice_editor_role.yaml
+++ b/config/rbac/llmservice_editor_role.yaml
@@ -8,7 +8,7 @@ metadata:
   name: llmservice-editor-role
 rules:
 - apiGroups:
-  - gateway.inference.networking.x-k8s.io
+  - inference.networking.x-k8s.io
   resources:
   - llmservices
   verbs:
@@ -20,7 +20,7 @@ rules:
   - update
   - watch
 - apiGroups:
-  - gateway.inference.networking.x-k8s.io
+  - inference.networking.x-k8s.io
   resources:
   - llmservices/status
   verbs:
diff --git a/config/rbac/llmservice_viewer_role.yaml b/config/rbac/llmservice_viewer_role.yaml
index 9c26d7c8b..cfdec182f 100644
--- a/config/rbac/llmservice_viewer_role.yaml
+++ b/config/rbac/llmservice_viewer_role.yaml
@@ -8,7 +8,7 @@ metadata:
   name: llmservice-viewer-role
 rules:
 - apiGroups:
-  - gateway.inference.networking.x-k8s.io
+  - inference.networking.x-k8s.io
   resources:
   - llmservices
   verbs:
@@ -16,7 +16,7 @@ rules:
   - list
   - watch
 - apiGroups:
-  - gateway.inference.networking.x-k8s.io
+  - inference.networking.x-k8s.io
   resources:
   - llmservices/status
   verbs: