kubernetes-sigs
diff --git a/‎pkg/ext-proc/backend/datastore.go
+12 b/‎pkg/ext-proc/backend/datastore.go
+12
diff --git a/‎pkg/ext-proc/backend/filterconfig_reconciler.go
+35 b/‎pkg/ext-proc/backend/filterconfig_reconciler.go
+35
diff --git a/‎pkg/ext-proc/main.go
+11-1 b/‎pkg/ext-proc/main.go
+11-1
diff --git a/‎pkg/ext-proc/scheduling/algorithms/topk.go
+52 b/‎pkg/ext-proc/scheduling/algorithms/topk.go
+52
diff --git a/‎pkg/ext-proc/scheduling/algorithms/topk_test.go
+49 b/‎pkg/ext-proc/scheduling/algorithms/topk_test.go
+49
diff --git a/‎pkg/ext-proc/scheduling/filter.go
+22-13 b/‎pkg/ext-proc/scheduling/filter.go
+22-13
diff --git a/‎pkg/ext-proc/scheduling/filter_test.go
+5-5 b/‎pkg/ext-proc/scheduling/filter_test.go
+5-5
@@ -29,10 +29,22 @@ type K8sDatastore struct {
 	inferencePool   *v1alpha1.InferencePool
 	InferenceModels *sync.Map
 	pods            *sync.Map
+
+	filterConfigMap *corev1.ConfigMap
 }
 
 type K8sDatastoreOption func(*K8sDatastore)
 
+func (ds *K8sDatastore) GetFilterConfigMap() *corev1.ConfigMap {
+	return ds.filterConfigMap
+}
+
+func WithFilterConfigMap(filterConfigMap *corev1.ConfigMap) K8sDatastoreOption {
+	return func(store *K8sDatastore) {
+		store.filterConfigMap = filterConfigMap
+	}
+}
+
 // WithPods can be used in tests to override the pods.
 func WithPods(pods []*PodMetrics) K8sDatastoreOption {
 	return func(store *K8sDatastore) {
 
@@ -0,0 +1,35 @@
+package backend
+
+import (
+	"context"
+
+	corev1 "k8s.io/api/core/v1"
+	"k8s.io/klog/v2"
+	ctrl "sigs.k8s.io/controller-runtime"
+	"sigs.k8s.io/controller-runtime/pkg/client"
+)
+
+type FilterConfigReconciler struct {
+	client.Client
+	Datastore *K8sDatastore
+}
+
+func (c *FilterConfigReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
+	if req.NamespacedName.Name != "filter-config" || req.NamespacedName.Namespace != "default" {
+		return ctrl.Result{}, nil
+	}
+	cm := &corev1.ConfigMap{}
+	if err := c.Get(ctx, req.NamespacedName, cm); err != nil {
+		klog.Errorf("unable to get ConfigMap, err: %v", err)
+		return ctrl.Result{}, err
+	}
+	klog.Infof("updating filter config to: %++v", cm.Data)
+	c.Datastore.filterConfigMap = cm.DeepCopy()
+	return ctrl.Result{}, nil
+}
+
+func (c *FilterConfigReconciler) SetupWithManager(mgr ctrl.Manager) error {
+	return ctrl.NewControllerManagedBy(mgr).
+		For(&corev1.ConfigMap{}).
+		Complete(c)
+}
@@ -146,6 +146,13 @@ func main() {
 		klog.Error(err, "Error setting up EndpointSliceReconciler")
 	}
 
+	if err := (&backend.FilterConfigReconciler{
+		Datastore: datastore,
+		Client:    mgr.GetClient(),
+	}).SetupWithManager(mgr); err != nil {
+		klog.Error(err, "Error setting up EndpointSliceReconciler")
+	}
+
 	errChan := make(chan error)
 	go func() {
 		if err := mgr.Start(ctrl.SetupSignalHandler()); err != nil {
@@ -160,11 +167,14 @@ func main() {
 	if err := pp.Init(*refreshPodsInterval, *refreshMetricsInterval); err != nil {
 		klog.Fatalf("failed to initialize: %v", err)
 	}
+
+	orchestrator := scheduling.NewFilterOrchestrator(datastore)
+
 	extProcPb.RegisterExternalProcessorServer(
 		s,
 		handlers.NewServer(
 			pp,
-			scheduling.NewScheduler(pp),
+			scheduling.NewScheduler(pp, scheduling.WithOrchestrator(orchestrator)),
 			*targetPodHeader,
 			datastore))
 	healthPb.RegisterHealthServer(s, &healthServer{})
 
@@ -0,0 +1,52 @@
+package algorithms
+
+import "container/heap"
+
+type TopK[T any] interface {
+	TopK(elems []T, k int) []T
+}
+
+type HeapTopKImpl[T any] struct {
+	cmp    func(a, b T) bool
+	sorted []T
+}
+
+func NewHeapTopK[T any](cmp func(a, b T) bool) TopK[T] {
+	return &HeapTopKImpl[T]{
+		cmp: cmp,
+	}
+}
+
+func (h *HeapTopKImpl[T]) TopK(elems []T, k int) []T {
+	if k <= 0 {
+		return []T{}
+	}
+
+	if k >= len(elems) {
+		return elems
+	}
+
+	h.sorted = []T{}
+	heap.Init(h)
+	for _, e := range elems {
+		heap.Push(h, e)
+		if h.Len() > k {
+			heap.Pop(h)
+		}
+	}
+	return h.sorted
+}
+
+func (h *HeapTopKImpl[T]) Len() int           { return len(h.sorted) }
+func (h *HeapTopKImpl[T]) Less(i, j int) bool { return h.cmp(h.sorted[i], h.sorted[j]) }
+func (h *HeapTopKImpl[T]) Swap(i, j int)      { h.sorted[i], h.sorted[j] = h.sorted[j], h.sorted[i] }
+
+func (h *HeapTopKImpl[T]) Push(x any) {
+	h.sorted = append(h.sorted, x.(T))
+}
+
+func (h *HeapTopKImpl[T]) Pop() any {
+	pop := h.sorted[len(h.sorted)-1]
+	h.sorted = h.sorted[:len(h.sorted)-1]
+	return pop
+}
@@ -0,0 +1,49 @@
+package algorithms
+
+import (
+	"reflect"
+	"testing"
+)
+
+func TestMaxTopK(t *testing.T) {
+	tests := []struct {
+		elems []int
+		k     int
+		want  []int
+	}{
+		{[]int{1, 2, 3, 4, 5}, 3, []int{3, 4, 5}},
+		{[]int{5, 4, 3, 2, 1}, 2, []int{4, 5}},
+		{[]int{1, 3, 5, 7, 9}, 0, []int{}},
+		{[]int{}, 3, []int{}},
+		{[]int{10}, 1, []int{10}},
+		{[]int{1, 2, 3}, 5, []int{1, 2, 3}},
+	}
+
+	for _, tt := range tests {
+		h := &HeapTopKImpl[int]{cmp: func(a, b int) bool { return a < b }}
+		got := h.TopK(tt.elems, tt.k)
+		if !reflect.DeepEqual(got, tt.want) {
+			t.Errorf("TopK(%v, %d) = %v, want %v", tt.elems, tt.k, got, tt.want)
+		}
+	}
+}
+
+func TestMinTopK(t *testing.T) {
+	tests := []struct {
+		elems []int
+		k     int
+		want  []int
+	}{
+		{[]int{1, 2, 3, 4, 5}, 3, []int{3, 1, 2}},
+		{[]int{5, 4, 3, 2, 1}, 2, []int{2, 1}},
+		{[]int{1, 2, 3}, 5, []int{1, 2, 3}},
+	}
+
+	for _, tt := range tests {
+		h := &HeapTopKImpl[int]{cmp: func(a, b int) bool { return a > b }}
+		got := h.TopK(tt.elems, tt.k)
+		if !reflect.DeepEqual(got, tt.want) {
+			t.Errorf("TopK(%v, %d) = %v, want %v", tt.elems, tt.k, got, tt.want)
+		}
+	}
+}
@@ -4,43 +4,46 @@ import (
 	"errors"
 	"math"
 
+	"google.golang.org/grpc/codes"
+	"google.golang.org/grpc/status"
 	"inference.networking.x-k8s.io/gateway-api-inference-extension/pkg/ext-proc/backend"
+
 	klog "k8s.io/klog/v2"
 )
 
-type Filter interface {
+type FilterChain interface {
 	Name() string
 	Filter(req *LLMRequest, pods []*backend.PodMetrics) ([]*backend.PodMetrics, error)
 }
 
-// filter applies current filterFunc, and then recursively applies next filters depending success or
+// filterChainImpl applies current filterFunc, and then recursively applies next filters depending success or
 // failure of the current filterFunc.
 // It can be used to construct a flow chart algorithm.
-type filter struct {
+type filterChainImpl struct {
 	name   string
-	filter filterFunc
+	filter filter
 	// nextOnSuccess filter will be applied after successfully applying the current filter.
 	// The filtered results will be passed to the next filter.
-	nextOnSuccess *filter
+	nextOnSuccess *filterChainImpl
 	// nextOnFailure filter will be applied if current filter fails.
 	// The original input will be passed to the next filter.
-	nextOnFailure *filter
+	nextOnFailure *filterChainImpl
 	// nextOnSuccessOrFailure is a convenience field to configure the next filter regardless of the
 	// success or failure of the current filter.
 	// NOTE: When using nextOnSuccessOrFailure, both nextOnSuccess and nextOnFailure SHOULD be nil.
 	// However if that's not the case, nextOnSuccess and nextOnFailure will be used, instead of
 	// nextOnSuccessOrFailure,  in the success and failure scenarios, respectively.
-	nextOnSuccessOrFailure *filter
+	nextOnSuccessOrFailure *filterChainImpl
 }
 
-func (f *filter) Name() string {
+func (f *filterChainImpl) Name() string {
 	if f == nil {
 		return "nil"
 	}
 	return f.name
 }
 
-func (f *filter) Filter(req *LLMRequest, pods []*backend.PodMetrics) ([]*backend.PodMetrics, error) {
+func (f *filterChainImpl) Filter(req *LLMRequest, pods []*backend.PodMetrics) ([]*backend.PodMetrics, error) {
 	klog.V(3).Infof("Running filter %q on request %v with %v pods", f.name, req, len(pods))
 
 	filtered, err := f.filter(req, pods)
@@ -71,11 +74,11 @@ func (f *filter) Filter(req *LLMRequest, pods []*backend.PodMetrics) ([]*backend
 	}
 }
 
-// filterFunc filters a set of input pods to a subset.
-type filterFunc func(req *LLMRequest, pods []*backend.PodMetrics) ([]*backend.PodMetrics, error)
+// filter filters a set of input pods to a subset.
+type filter func(req *LLMRequest, pods []*backend.PodMetrics) ([]*backend.PodMetrics, error)
 
-// toFilterFunc is a helper function to convert a per pod filter func to the FilterFunc.
-func toFilterFunc(pp podPredicate) filterFunc {
+// toFilter is a helper function to convert a per pod filter func to the FilterFunc.
+func toFilter(pp podPredicate) filter {
 	return func(req *LLMRequest, pods []*backend.PodMetrics) ([]*backend.PodMetrics, error) {
 		filtered := []*backend.PodMetrics{}
 		for _, pod := range pods {
@@ -152,6 +155,12 @@ func leastKVCacheFilterFunc(req *LLMRequest, pods []*backend.PodMetrics) ([]*bac
 	return filtered, nil
 }
 
+func dropRequestFilterFunc(req *LLMRequest, pods []*backend.PodMetrics) ([]*backend.PodMetrics, error) {
+	klog.Infof("Dropping request %v", req)
+	return []*backend.PodMetrics{}, status.Errorf(
+		codes.ResourceExhausted, "dropping request due to limited backend resources")
+}
+
 // podPredicate is a filter function to check whether a pod is desired.
 type podPredicate func(req *LLMRequest, pod *backend.PodMetrics) bool
 
 
@@ -15,11 +15,11 @@ func TestFilter(t *testing.T) {
 		input  []*backend.PodMetrics
 		output []*backend.PodMetrics
 		err    bool
-		filter *filter
+		filter *filterChainImpl
 	}{
 		{
 			name: "simple filter without successor, failure",
-			filter: &filter{filter: func(req *LLMRequest, pods []*backend.PodMetrics) ([]*backend.PodMetrics, error) {
+			filter: &filterChainImpl{filter: func(req *LLMRequest, pods []*backend.PodMetrics) ([]*backend.PodMetrics, error) {
 				return nil, errors.New("filter error")
 			}},
 			err: true,
@@ -216,7 +216,7 @@ func TestFilter(t *testing.T) {
 func TestFilterFunc(t *testing.T) {
 	tests := []struct {
 		name   string
-		f      filterFunc
+		f      filter
 		req    *LLMRequest
 		input  []*backend.PodMetrics
 		output []*backend.PodMetrics
@@ -302,7 +302,7 @@ func TestFilterFunc(t *testing.T) {
 		},
 		{
 			name: "noQueueAndLessThanKVCacheThresholdPredicate",
-			f:    toFilterFunc(noQueueAndLessThanKVCacheThresholdPredicate(0, 0.8)),
+			f:    toFilter(noQueueAndLessThanKVCacheThresholdPredicate(0, 0.8)),
 			input: []*backend.PodMetrics{
 				{
 					// This pod should be returned.
@@ -337,7 +337,7 @@ func TestFilterFunc(t *testing.T) {
 		},
 		{
 			name: "low LoRA cost",
-			f:    toFilterFunc(lowLoRACostPredicate),
+			f:    toFilter(lowLoRACostPredicate),
 			req: &LLMRequest{
 				Model:               "model",
 				ResolvedTargetModel: "model",