shmuelk
diff --git a/‎.github/ISSUE_TEMPLATE/blank_issue.md
+8 b/‎.github/ISSUE_TEMPLATE/blank_issue.md
+8
diff --git a/‎.github/ISSUE_TEMPLATE/bug_request.md
+3-1 b/‎.github/ISSUE_TEMPLATE/bug_request.md
+3-1
diff --git a/‎.github/ISSUE_TEMPLATE/config.yml
+1 b/‎.github/ISSUE_TEMPLATE/config.yml
+1
diff --git a/‎.github/ISSUE_TEMPLATE/feature_request.md
+1-2 b/‎.github/ISSUE_TEMPLATE/feature_request.md
+1-2
diff --git a/‎.github/ISSUE_TEMPLATE/new-release.md
+1 b/‎.github/ISSUE_TEMPLATE/new-release.md
+1
diff --git a/‎Makefile
+5-1 b/‎Makefile
+5-1
diff --git a/‎README.md
+50-1 b/‎README.md
+50-1
diff --git a/‎docs/proposals/00x-epp-compliance-proposal/README.md renamed to ‎docs/proposals/0683-epp-architecture-proposal/README.md b/‎docs/proposals/00x-epp-compliance-proposal/README.md renamed to ‎docs/proposals/0683-epp-architecture-proposal/README.md
diff --git a/‎docs/proposals/00x-epp-compliance-proposal/images/epp_arch.svg renamed to ‎docs/proposals/0683-epp-architecture-proposal/images/epp_arch.svg b/‎docs/proposals/00x-epp-compliance-proposal/images/epp_arch.svg renamed to ‎docs/proposals/0683-epp-architecture-proposal/images/epp_arch.svg
diff --git a/‎docs/proposals/README.md
+5 b/‎docs/proposals/README.md
+5
diff --git a/‎pkg/epp/backend/metrics/pod_metrics.go
+5-6 b/‎pkg/epp/backend/metrics/pod_metrics.go
+5-6
diff --git a/‎pkg/epp/backend/metrics/types.go
+7-8 b/‎pkg/epp/backend/metrics/types.go
+7-8
diff --git a/‎pkg/epp/controller/pod_reconciler.go
+22 b/‎pkg/epp/controller/pod_reconciler.go
+22
diff --git a/‎pkg/epp/datastore/datastore.go
+3 b/‎pkg/epp/datastore/datastore.go
+3
diff --git a/‎pkg/epp/datastore/datastore_test.go
+91 b/‎pkg/epp/datastore/datastore_test.go
+91
diff --git a/‎pkg/epp/handlers/request.go
+3 b/‎pkg/epp/handlers/request.go
+3
diff --git a/‎pkg/epp/handlers/server.go
+3 b/‎pkg/epp/handlers/server.go
+3
@@ -0,0 +1,8 @@
+---
+name: Blank Issue
+about: Create a new issue from scratch
+title: ''
+labels: needs-triage
+assignees: ''
+
+---
@@ -1,7 +1,9 @@
 ---
 name: Bug Report
 about: Report a bug you encountered
-labels: kind/bug
+title: ''
+labels: kind/bug, needs-triage
+assignees: ''
 
 ---
 
 
@@ -0,0 +1 @@
+blank_issues_enabled: false
@@ -2,7 +2,7 @@
 name: Feature request
 about: Suggest an idea for this project
 title: ''
-labels: ''
+labels: needs-triage
 assignees: ''
 
 ---
@@ -12,4 +12,3 @@ assignees: ''
 **What would you like to be added**:
 
 **Why is this needed**:
-
@@ -4,6 +4,7 @@ about: Propose a new release
 title: Release v0.x.0
 labels: ''
 assignees: ''
+
 ---
 
 - [Introduction](#introduction)
 
@@ -123,8 +123,12 @@ vet: ## Run go vet against code.
 # test: manifests generate fmt vet envtest image-build ## Run tests.
 # 	KUBEBUILDER_ASSETS="$(shell $(ENVTEST) use $(ENVTEST_K8S_VERSION) --bin-dir $(LOCALBIN) -p path)" go test $$(go list ./... | grep -v /e2e) -race -coverprofile cover.out
 
+.PHONY: test-unit
+test-unit: ## Run unit tests.
+	KUBEBUILDER_ASSETS="$(shell $(ENVTEST) use $(ENVTEST_K8S_VERSION) --bin-dir $(LOCALBIN) -p path)" go test ./pkg/... -race -coverprofile cover.out
+
 .PHONY: test-integration
-test-integration: ## Run tests.
+test-integration: ## Run integration tests.
 	KUBEBUILDER_ASSETS="$(shell $(ENVTEST) use $(ENVTEST_K8S_VERSION) --bin-dir $(LOCALBIN) -p path)" go test ./test/integration/epp/... -race -coverprofile cover.out
 
 .PHONY: test-e2e
 
@@ -2,7 +2,56 @@
 [![Go Reference](https://pkg.go.dev/badge/sigs.k8s.io/gateway-api-inference-extension.svg)](https://pkg.go.dev/sigs.k8s.io/gateway-api-inference-extension)
 [![License](https://img.shields.io/github/license/kubernetes-sigs/gateway-api-inference-extension)](/LICENSE)
 
-# Gateway API Inference Extension 
+# Gateway API Inference Extension (GIE)
+
+This project offers tools for AI Inference, enabling developers to build [Inference Gateways].
+
+[Inference Gateways]:#concepts-and-definitions
+
+## Concepts and Definitions
+
+The following are some key industry terms that are important to understand for
+this project:
+
+- **Model**: A generative AI model that has learned patterns from data and is
+  used for inference. Models vary in size and architecture, from smaller
+  domain-specific models to massive multi-billion parameter neural networks that
+  are optimized for diverse language tasks.
+- **Inference**: The process of running a generative AI model, such as a large
+  language model, diffusion model etc, to generate text, embeddings, or other
+  outputs from input data.
+- **Model server**: A service (in our case, containerized) responsible for
+  receiving inference requests and returning predictions from a model.
+- **Accelerator**: specialized hardware, such as Graphics Processing Units
+  (GPUs) that can be attached to Kubernetes nodes to speed up computations,
+  particularly for training and inference tasks.
+
+And the following are more specific terms to this project:
+
+- **Scheduler**: Makes decisions about which endpoint is optimal (best cost /
+  best performance) for an inference request based on `Metrics and Capabilities`
+  from [Model Serving](/docs/proposals/003-model-server-protocol/README.md).
+- **Metrics and Capabilities**: Data provided by model serving platforms about
+  performance, availability and capabilities to optimize routing. Includes
+  things like [Prefix Cache] status or [LoRA Adapters] availability.
+- **Endpoint Selector**: A `Scheduler` combined with `Metrics and Capabilities`
+  systems is often referred to together as an [Endpoint Selection Extension]
+  (this is also sometimes referred to as an "endpoint picker", or "EPP").
+- **Inference Gateway**: A proxy/load-balancer which has been coupled with a
+  `Endpoint Selector`. It provides optimized routing and load balancing for
+  serving Kubernetes self-hosted generative Artificial Intelligence (AI)
+  workloads. It simplifies the deployment, management, and observability of AI
+  inference workloads.
+
+For deeper insights and more advanced concepts, refer to our [proposals](/docs/proposals).
+
+[Inference]:https://www.digitalocean.com/community/tutorials/llm-inference-optimization
+[Gateway API]:https://github.com/kubernetes-sigs/gateway-api
+[Prefix Cache]:https://docs.vllm.ai/en/stable/design/v1/prefix_caching.html
+[LoRA Adapters]:https://docs.vllm.ai/en/stable/features/lora.html
+[Endpoint Selection Extension]:https://gateway-api-inference-extension.sigs.k8s.io/#endpoint-selection-extension
+
+## Technical Overview
 
 This extension upgrades an [ext-proc](https://www.envoyproxy.io/docs/envoy/latest/configuration/http/http_filters/ext_proc_filter)-capable proxy or gateway - such as Envoy Gateway, kGateway, or the GKE Gateway - to become an **inference gateway** - supporting inference platform teams self-hosting large language models on Kubernetes. This integration makes it easy to expose and control access to your local [OpenAI-compatible chat completion endpoints](https://platform.openai.com/docs/api-reference/chat) to other workloads on or off cluster, or to integrate your self-hosted models alongside model-as-a-service providers in a higher level **AI Gateway** like LiteLLM, Solo AI Gateway, or Apigee.
 
 
@@ -0,0 +1,5 @@
+# Proposals Best Practices
+
+
+## Naming
+The directory of the proposal should lead with a 4-digit PR number (will move to 5,6,... should our PR count get that high), followed by kebab-cased title. The PR number is not known until the PR is cut, so development can use a placeholder, ex. XXXX-my-proposal. PR number is used b/c it is unique & chronological, allowing the default ordering of proposals to follow the timeline of development.
@@ -41,9 +41,8 @@ type podMetrics struct {
 	ds       Datastore
 	interval time.Duration
 
-	parentCtx context.Context
-	once      sync.Once // ensure the StartRefreshLoop is only called once.
-	done      chan struct{}
+	once sync.Once // ensure the StartRefreshLoop is only called once.
+	done chan struct{}
 
 	logger logr.Logger
 }
@@ -79,8 +78,8 @@ func toInternalPod(in *corev1.Pod) *Pod {
 }
 
 // start starts a goroutine exactly once to periodically update metrics. The goroutine will be
-// stopped either when stop() is called, or the parentCtx is cancelled.
-func (pm *podMetrics) startRefreshLoop() {
+// stopped either when stop() is called, or the given ctx is cancelled.
+func (pm *podMetrics) startRefreshLoop(ctx context.Context) {
 	pm.once.Do(func() {
 		go func() {
 			pm.logger.V(logutil.DEFAULT).Info("Starting refresher", "pod", pm.GetPod())
@@ -90,7 +89,7 @@ func (pm *podMetrics) startRefreshLoop() {
 				select {
 				case <-pm.done:
 					return
-				case <-pm.parentCtx.Done():
+				case <-ctx.Done():
 					return
 				case <-ticker.C: // refresh metrics periodically
 					if err := pm.refreshMetrics(); err != nil {
 
@@ -43,18 +43,17 @@ type PodMetricsFactory struct {
 func (f *PodMetricsFactory) NewPodMetrics(parentCtx context.Context, in *corev1.Pod, ds Datastore) PodMetrics {
 	pod := toInternalPod(in)
 	pm := &podMetrics{
-		pmc:       f.pmc,
-		ds:        ds,
-		interval:  f.refreshMetricsInterval,
-		parentCtx: parentCtx,
-		once:      sync.Once{},
-		done:      make(chan struct{}),
-		logger:    log.FromContext(parentCtx).WithValues("pod", pod.NamespacedName),
+		pmc:      f.pmc,
+		ds:       ds,
+		interval: f.refreshMetricsInterval,
+		once:     sync.Once{},
+		done:     make(chan struct{}),
+		logger:   log.FromContext(parentCtx).WithValues("pod", pod.NamespacedName),
 	}
 	pm.pod.Store(pod)
 	pm.metrics.Store(newMetrics())
 
-	pm.startRefreshLoop()
+	pm.startRefreshLoop(parentCtx)
 	return pm
 }
 
 
@@ -26,7 +26,9 @@ import (
 	"k8s.io/client-go/tools/record"
 	ctrl "sigs.k8s.io/controller-runtime"
 	"sigs.k8s.io/controller-runtime/pkg/client"
+	"sigs.k8s.io/controller-runtime/pkg/event"
 	"sigs.k8s.io/controller-runtime/pkg/log"
+	"sigs.k8s.io/controller-runtime/pkg/predicate"
 	"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datastore"
 	logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging"
 	podutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/pod"
@@ -63,8 +65,28 @@ func (c *PodReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.R
 }
 
 func (c *PodReconciler) SetupWithManager(mgr ctrl.Manager) error {
+	filter := predicate.Funcs{
+		CreateFunc: func(ce event.CreateEvent) bool {
+			pod := ce.Object.(*corev1.Pod)
+			return c.Datastore.PoolLabelsMatch(pod.GetLabels())
+		},
+		UpdateFunc: func(ue event.UpdateEvent) bool {
+			oldPod := ue.ObjectOld.(*corev1.Pod)
+			newPod := ue.ObjectNew.(*corev1.Pod)
+			return c.Datastore.PoolLabelsMatch(oldPod.GetLabels()) || c.Datastore.PoolLabelsMatch(newPod.GetLabels())
+		},
+		DeleteFunc: func(de event.DeleteEvent) bool {
+			pod := de.Object.(*corev1.Pod)
+			return c.Datastore.PoolLabelsMatch(pod.GetLabels())
+		},
+		GenericFunc: func(ge event.GenericEvent) bool {
+			pod := ge.Object.(*corev1.Pod)
+			return c.Datastore.PoolLabelsMatch(pod.GetLabels())
+		},
+	}
 	return ctrl.NewControllerManagedBy(mgr).
 		For(&corev1.Pod{}).
+		WithEventFilter(filter).
 		Complete(c)
 }
 
 
@@ -150,6 +150,9 @@ func (ds *datastore) PoolHasSynced() bool {
 func (ds *datastore) PoolLabelsMatch(podLabels map[string]string) bool {
 	ds.poolAndModelsMu.RLock()
 	defer ds.poolAndModelsMu.RUnlock()
+	if ds.pool == nil {
+		return false
+	}
 	poolSelector := selectorFromInferencePoolSelector(ds.pool.Spec.Selector)
 	podSet := labels.Set(podLabels)
 	return poolSelector.Matches(podSet)
 
@@ -355,3 +355,94 @@ func TestMetrics(t *testing.T) {
 		})
 	}
 }
+
+func TestPods(t *testing.T) {
+	updatedPod := &corev1.Pod{
+		ObjectMeta: metav1.ObjectMeta{
+			Name: "pod1",
+		},
+		Spec: corev1.PodSpec{
+			NodeName: "node-1",
+		},
+	}
+	tests := []struct {
+		name         string
+		op           func(ctx context.Context, ds Datastore)
+		existingPods []*corev1.Pod
+		wantPods     []*corev1.Pod
+	}{
+		{
+			name:         "Add new pod, no existing pods, should add",
+			existingPods: []*corev1.Pod{},
+			wantPods:     []*corev1.Pod{pod1},
+			op: func(ctx context.Context, ds Datastore) {
+				ds.PodUpdateOrAddIfNotExist(pod1)
+			},
+		},
+		{
+			name:         "Add new pod, with existing pods, should add",
+			existingPods: []*corev1.Pod{pod1},
+			wantPods:     []*corev1.Pod{pod1, pod2},
+			op: func(ctx context.Context, ds Datastore) {
+				ds.PodUpdateOrAddIfNotExist(pod2)
+			},
+		},
+		{
+			name:         "Update existing pod, new field, should update",
+			existingPods: []*corev1.Pod{pod1},
+			wantPods:     []*corev1.Pod{updatedPod},
+			op: func(ctx context.Context, ds Datastore) {
+				ds.PodUpdateOrAddIfNotExist(updatedPod)
+			},
+		},
+		{
+			name:         "Update existing pod, no new fields, should not update",
+			existingPods: []*corev1.Pod{pod1},
+			wantPods:     []*corev1.Pod{pod1},
+			op: func(ctx context.Context, ds Datastore) {
+				incoming := &corev1.Pod{
+					ObjectMeta: metav1.ObjectMeta{
+						Name:      "pod1",
+						Namespace: "default",
+					},
+				}
+				ds.PodUpdateOrAddIfNotExist(incoming)
+			},
+		},
+		{
+			name:     "Delete the pod",
+			wantPods: []*corev1.Pod{pod1},
+			op: func(ctx context.Context, ds Datastore) {
+				ds.PodDelete(pod2NamespacedName)
+			},
+		},
+		{
+			name:         "Delete the pod that doesn't exist",
+			existingPods: []*corev1.Pod{pod1},
+			wantPods:     []*corev1.Pod{pod1},
+			op: func(ctx context.Context, ds Datastore) {
+				ds.PodDelete(pod2NamespacedName)
+			},
+		},
+	}
+	for _, test := range tests {
+		t.Run(test.name, func(t *testing.T) {
+			ctx := context.Background()
+			pmf := backendmetrics.NewPodMetricsFactory(&backendmetrics.FakePodMetricsClient{}, time.Second)
+			ds := NewDatastore(t.Context(), pmf)
+			for _, pod := range test.existingPods {
+				ds.PodUpdateOrAddIfNotExist(pod)
+			}
+
+			test.op(ctx, ds)
+			var gotPods []*corev1.Pod
+			for _, pm := range ds.PodGetAll() {
+				pod := &corev1.Pod{ObjectMeta: metav1.ObjectMeta{Name: pm.GetPod().NamespacedName.Name, Namespace: pm.GetPod().NamespacedName.Namespace}, Status: corev1.PodStatus{PodIP: pm.GetPod().Address}}
+				gotPods = append(gotPods, pod)
+			}
+			if !cmp.Equal(gotPods, test.wantPods, cmpopts.SortSlices(func(a, b *corev1.Pod) bool { return a.Name < b.Name })) {
+				t.Logf("got (%v) != want (%v);", gotPods, test.wantPods)
+			}
+		})
+	}
+}
@@ -138,6 +138,9 @@ func (s *StreamingServer) HandleRequestHeaders(ctx context.Context, reqCtx *Requ
 		// The above PR will address endpoint admission, but currently any request without a body will be
 		// routed to a random upstream pod.
 		pod := GetRandomPod(s.datastore)
+		if pod == nil {
+			return errutil.Error{Code: errutil.Internal, Msg: "no pods available in datastore"}
+		}
 		pool, err := s.datastore.PoolGet()
 		if err != nil {
 			return err
 
@@ -449,6 +449,9 @@ func RandomWeightedDraw(logger logr.Logger, model *v1alpha2.InferenceModel, seed
 
 func GetRandomPod(ds datastore.Datastore) *backendmetrics.Pod {
 	pods := ds.PodGetAll()
+	if len(pods) == 0 {
+		return nil
+	}
 	number := rand.Intn(len(pods))
 	pod := pods[number]
 	return pod.GetPod()