kubernetes-sigs
diff --git a/‎.golangci.yml
Lines changed: 0 additions & 1 deletion b/‎.golangci.yml
Lines changed: 0 additions & 1 deletion
diff --git a/‎go.mod
Lines changed: 1 addition & 1 deletion b/‎go.mod
Lines changed: 1 addition & 1 deletion
diff --git a/‎go.sum
Lines changed: 2 additions & 2 deletions b/‎go.sum
Lines changed: 2 additions & 2 deletions
diff --git a/‎pkg/README.md
Lines changed: 1 addition & 94 deletions b/‎pkg/README.md
Lines changed: 1 addition & 94 deletions
diff --git a/‎pkg/ext-proc/backend/inferencemodel_reconciler.go
Lines changed: 20 additions & 14 deletions b/‎pkg/ext-proc/backend/inferencemodel_reconciler.go
Lines changed: 20 additions & 14 deletions
@@ -14,7 +14,6 @@ linters:
     - dupword
     - durationcheck
     - fatcontext
-    - gci
     - ginkgolinter
     - gocritic
     - govet
 
@@ -18,7 +18,7 @@ require (
 	github.com/stretchr/testify v1.10.0
 	go.uber.org/multierr v1.11.0
 	google.golang.org/grpc v1.70.0
-	google.golang.org/protobuf v1.36.4
+	google.golang.org/protobuf v1.36.5
 	k8s.io/api v0.32.1
 	k8s.io/apiextensions-apiserver v0.32.1
 	k8s.io/apimachinery v0.32.1
 
@@ -329,8 +329,8 @@ google.golang.org/genproto/googleapis/rpc v0.0.0-20241202173237-19429a94021a h1:
 google.golang.org/genproto/googleapis/rpc v0.0.0-20241202173237-19429a94021a/go.mod h1:5uTbfoYQed2U9p3KIj2/Zzm02PYhndfdmML0qC3q3FU=
 google.golang.org/grpc v1.70.0 h1:pWFv03aZoHzlRKHWicjsZytKAiYCtNS0dHbXnIdq7jQ=
 google.golang.org/grpc v1.70.0/go.mod h1:ofIJqVKDXx/JiXrwr2IG4/zwdH9txy3IlF40RmcJSQw=
-google.golang.org/protobuf v1.36.4 h1:6A3ZDJHn/eNqc1i+IdefRzy/9PokBTPvcqMySR7NNIM=
-google.golang.org/protobuf v1.36.4/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE=
+google.golang.org/protobuf v1.36.5 h1:tPhr+woSbjfYvY6/GPufUoYizxw1cF/yFoxJ2fmpwlM=
+google.golang.org/protobuf v1.36.5/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE=
 gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
 gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
 gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
 
@@ -1,96 +1,3 @@
 ## Quickstart
 
-This quickstart guide is intended for engineers familiar with k8s and model servers (vLLM in this instance). The goal of this guide is to get a first, single InferencePool up and running! 
-
-### Requirements
- - Envoy Gateway [v1.2.1](https://gateway.envoyproxy.io/docs/install/install-yaml/#install-with-yaml) or higher
- - A cluster with:
-   - Support for Services of type `LoadBalancer`. (This can be validated by ensuring your Envoy Gateway is up and running). For example, with Kind,
-     you can follow [these steps](https://kind.sigs.k8s.io/docs/user/loadbalancer).
-   - 3 GPUs to run the sample model server. Adjust the number of replicas in `./manifests/vllm/deployment.yaml` as needed.
-
-### Steps
-
-1. **Deploy Sample Model Server**
-
-   Create a Hugging Face secret to download the model [meta-llama/Llama-2-7b-hf](https://huggingface.co/meta-llama/Llama-2-7b-hf). Ensure that the token grants access to this model.
-   Deploy a sample vLLM deployment with the proper protocol to work with the LLM Instance Gateway.
-   ```bash
-   kubectl create secret generic hf-token --from-literal=token=$HF_TOKEN # Your Hugging Face Token with access to Llama2
-   kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/pkg/manifests/vllm/deployment.yaml
-   ```
-
-1. **Install the Inference Extension CRDs:**
-
-   ```sh
-   kubectl apply -k https://github.com/kubernetes-sigs/gateway-api-inference-extension/config/crd
-   ```
-
-1. **Deploy InferenceModel**
-
-   Deploy the sample InferenceModel which is configured to load balance traffic between the `tweet-summary-0` and `tweet-summary-1`
-   [LoRA adapters](https://docs.vllm.ai/en/latest/features/lora.html) of the sample model server.
-   ```bash
-   kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/pkg/manifests/inferencemodel.yaml
-   ```
-
-1. **Update Envoy Gateway Config to enable Patch Policy**
-
-   Our custom LLM Gateway ext-proc is patched into the existing envoy gateway via `EnvoyPatchPolicy`. To enable this feature, we must extend the Envoy Gateway config map. To do this, simply run:
-   ```bash
-   kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/pkg/manifests/gateway/enable_patch_policy.yaml
-   kubectl rollout restart deployment envoy-gateway -n envoy-gateway-system
-   ```
-   Additionally, if you would like to enable the admin interface, you can uncomment the admin lines and run this again.
-
-1. **Deploy Gateway**
-
-   ```bash
-   kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/pkg/manifests/gateway/gateway.yaml
-   ```
-   > **_NOTE:_** This file couples together the gateway infra and the HTTPRoute infra for a convenient, quick startup. Creating additional/different InferencePools on the same gateway will require an additional set of: `Backend`, `HTTPRoute`, the resources included in the `./manifests/gateway/ext-proc.yaml` file, and an additional `./manifests/gateway/patch_policy.yaml` file. ***Should you choose to experiment, familiarity with xDS and Envoy are very useful.***
-
-   Confirm that the Gateway was assigned an IP address and reports a `Programmed=True` status:
-   ```bash
-   $ kubectl get gateway inference-gateway
-   NAME                CLASS               ADDRESS         PROGRAMMED   AGE
-   inference-gateway   inference-gateway   <MY_ADDRESS>    True         22s
-   ```
-
-1. **Deploy the Inference Extension and InferencePool**
-
-   ```bash
-   kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/pkg/manifests/ext_proc.yaml
-   ```
-
-1. **Deploy Envoy Gateway Custom Policies**
-
-   ```bash
-   kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/pkg/manifests/gateway/extension_policy.yaml
-   kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/pkg/manifests/gateway/patch_policy.yaml
-   ```
-   > **_NOTE:_** This is also per InferencePool, and will need to be configured to support the new pool should you wish to experiment further.
-
-1. **OPTIONALLY**: Apply Traffic Policy
-
-   For high-traffic benchmarking you can apply this manifest to avoid any defaults that can cause timeouts/errors.
-
-   ```bash
-   kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/pkg/manifests/gateway/traffic_policy.yaml
-   ```
-
-1. **Try it out**
-
-   Wait until the gateway is ready.
-
-   ```bash
-   IP=$(kubectl get gateway/inference-gateway -o jsonpath='{.status.addresses[0].value}')
-   PORT=8081
-
-   curl -i ${IP}:${PORT}/v1/completions -H 'Content-Type: application/json' -d '{
-   "model": "tweet-summary",
-   "prompt": "Write as if you were a critic: San Francisco",
-   "max_tokens": 100,
-   "temperature": 0
-   }'
-   ```
+Please refer to our Getting started guide here: https://gateway-api-inference-extension.sigs.k8s.io/guides/
@@ -5,6 +5,7 @@ import (
 
 	"inference.networking.x-k8s.io/gateway-api-inference-extension/api/v1alpha1"
 	logutil "inference.networking.x-k8s.io/gateway-api-inference-extension/pkg/ext-proc/util/logging"
+	"k8s.io/apimachinery/pkg/api/errors"
 	"k8s.io/apimachinery/pkg/runtime"
 	"k8s.io/apimachinery/pkg/types"
 	"k8s.io/client-go/tools/record"
@@ -25,32 +26,37 @@ func (c *InferenceModelReconciler) Reconcile(ctx context.Context, req ctrl.Reque
 	if req.Namespace != c.PoolNamespacedName.Namespace {
 		return ctrl.Result{}, nil
 	}
-	klog.V(1).Infof("reconciling InferenceModel %v", req.NamespacedName)
-
-	service := &v1alpha1.InferenceModel{}
-	if err := c.Get(ctx, req.NamespacedName, service); err != nil {
-		klog.Error(err, "unable to get InferencePool")
+	klog.V(1).Infof("Reconciling InferenceModel %v", req.NamespacedName)
+
+	infModel := &v1alpha1.InferenceModel{}
+	if err := c.Get(ctx, req.NamespacedName, infModel); err != nil {
+		if errors.IsNotFound(err) {
+			klog.V(1).Infof("InferenceModel %v not found. Removing from datastore since object must be deleted", req.NamespacedName)
+			c.Datastore.InferenceModels.Delete(infModel.Spec.ModelName)
+			return ctrl.Result{}, nil
+		}
+		klog.Error(err, "Unable to get InferenceModel")
 		return ctrl.Result{}, err
 	}
 
-	c.updateDatastore(service)
+	c.updateDatastore(infModel)
 	return ctrl.Result{}, nil
 }
 
-func (c *InferenceModelReconciler) SetupWithManager(mgr ctrl.Manager) error {
-	return ctrl.NewControllerManagedBy(mgr).
-		For(&v1alpha1.InferenceModel{}).
-		Complete(c)
-}
-
 func (c *InferenceModelReconciler) updateDatastore(infModel *v1alpha1.InferenceModel) {
 	if infModel.Spec.PoolRef.Name == c.PoolNamespacedName.Name {
 		klog.V(1).Infof("Incoming pool ref %v, server pool name: %v", infModel.Spec.PoolRef, c.PoolNamespacedName.Name)
-		klog.V(1).Infof("Adding/Updating inference model: %v", infModel.Spec.ModelName)
+		klog.V(1).Infof("Adding/Updating InferenceModel: %v", infModel.Spec.ModelName)
 		c.Datastore.InferenceModels.Store(infModel.Spec.ModelName, infModel)
 		return
 	}
-	klog.V(logutil.DEFAULT).Infof("Removing/Not adding inference model: %v", infModel.Spec.ModelName)
+	klog.V(logutil.DEFAULT).Infof("Removing/Not adding InferenceModel: %v", infModel.Spec.ModelName)
 	// If we get here. The model is not relevant to this pool, remove.
 	c.Datastore.InferenceModels.Delete(infModel.Spec.ModelName)
 }
+
+func (c *InferenceModelReconciler) SetupWithManager(mgr ctrl.Manager) error {
+	return ctrl.NewControllerManagedBy(mgr).
+		For(&v1alpha1.InferenceModel{}).
+		Complete(c)
+}