Skip to content

Commit 2612ead

Browse files
committed
changes for multilora
1 parent c1cbfa1 commit 2612ead

File tree

2 files changed

+2
-38
lines changed

2 files changed

+2
-38
lines changed

pkg/epp/scheduling/filter.go

Lines changed: 1 addition & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -20,14 +20,10 @@ import (
2020
"errors"
2121
"math"
2222

23-
<<<<<<< HEAD:pkg/epp/scheduling/filter.go
2423
"github.com/go-logr/logr"
25-
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datastore"
26-
logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging"
27-
=======
2824
klog "k8s.io/klog/v2"
25+
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datastore"
2926
"sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/backend"
30-
>>>>>>> 78c0740 (changes for multilora):pkg/ext-proc/scheduling/filter.go
3127
)
3228

3329
type Filter interface {
@@ -66,14 +62,8 @@ func (f *filter) Name() string {
6662
return f.name
6763
}
6864

69-
<<<<<<< HEAD:pkg/epp/scheduling/filter.go
70-
func (f *filter) Filter(logger logr.Logger, req *LLMRequest, pods []*datastore.PodMetrics) ([]*datastore.PodMetrics, error) {
71-
loggerTrace := logger.V(logutil.TRACE)
72-
loggerTrace.Info("Running a filter", "name", f.Name(), "podCount", len(pods))
73-
=======
7465
func (f *filter) Filter(req *LLMRequest, pods []*backend.PodMetrics) ([]*backend.PodMetrics, error) {
7566
klog.InfoS("Running a filter", "name", f.Name(), "request", req, "podCount", len(pods))
76-
>>>>>>> 78c0740 (changes for multilora):pkg/ext-proc/scheduling/filter.go
7767

7868
filtered, err := f.filter(logger, req, pods)
7969

@@ -86,12 +76,8 @@ func (f *filter) Filter(req *LLMRequest, pods []*backend.PodMetrics) ([]*backend
8676
if f.nextOnSuccess != nil {
8777
next = f.nextOnSuccess
8878
}
89-
<<<<<<< HEAD:pkg/epp/scheduling/filter.go
90-
loggerTrace.Info("Filter succeeded", "filter", f.Name(), "next", next.Name(), "filteredPodCount", len(filtered))
91-
=======
9279
klog.InfoS("Filter succeeded", "filter", f.Name(), "next", next.Name(), "filteredPodCount", len(filtered))
9380

94-
>>>>>>> 78c0740 (changes for multilora):pkg/ext-proc/scheduling/filter.go
9581
// On success, pass the filtered result to the next filter.
9682
return next.Filter(logger, req, filtered)
9783
} else {
@@ -102,11 +88,7 @@ func (f *filter) Filter(req *LLMRequest, pods []*backend.PodMetrics) ([]*backend
10288
if f.nextOnFailure != nil {
10389
next = f.nextOnFailure
10490
}
105-
<<<<<<< HEAD:pkg/epp/scheduling/filter.go
106-
loggerTrace.Info("Filter failed", "filter", f.Name(), "next", next.Name())
107-
=======
10891
klog.InfoS("Filter failed", "filter", f.Name(), "next", next.Name())
109-
>>>>>>> 78c0740 (changes for multilora):pkg/ext-proc/scheduling/filter.go
11092
// On failure, pass the initial set of pods to the next filter.
11193
return next.Filter(logger, req, pods)
11294
}
@@ -229,13 +211,8 @@ func loRAAffinityPredicate(req *LLMRequest, pod *datastore.PodMetrics) bool {
229211
}
230212

231213
// canAcceptNewLoraPredicate is a filter function to check whether a pod has room to load the adapter.
232-
<<<<<<< HEAD:pkg/epp/scheduling/filter.go
233-
func canAcceptNewLoraPredicate(req *LLMRequest, pod *datastore.PodMetrics) bool {
234-
return len(pod.ActiveModels) < pod.MaxActiveModels
235-
=======
236214
func canAcceptNewLoraPredicate(req *LLMRequest, pod *backend.PodMetrics) bool {
237215
return len(pod.ActiveModels) < maxLoRACost
238-
>>>>>>> 78c0740 (changes for multilora):pkg/ext-proc/scheduling/filter.go
239216
}
240217

241218
func criticalRequestPredicate(req *LLMRequest, pod *datastore.PodMetrics) bool {

pkg/epp/scheduling/scheduler.go

Lines changed: 1 addition & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -18,12 +18,11 @@ limitations under the License.
1818
package scheduling
1919

2020
import (
21-
"context"
2221
"fmt"
2322
"math/rand"
2423

2524
"github.com/go-logr/logr"
26-
"sigs.k8s.io/controller-runtime/pkg/log"
25+
klog "k8s.io/klog/v2"
2726
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datastore"
2827
errutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/error"
2928
logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging"
@@ -122,26 +121,14 @@ type Scheduler struct {
122121
}
123122

124123
// Schedule finds the target pod based on metrics and the requested lora adapter.
125-
<<<<<<< HEAD:pkg/epp/scheduling/scheduler.go
126-
func (s *Scheduler) Schedule(ctx context.Context, req *LLMRequest) (targetPod datastore.PodMetrics, err error) {
127-
logger := log.FromContext(ctx).WithValues("request", req)
128-
podMetrics := s.datastore.PodGetAll()
129-
logger.V(logutil.VERBOSE).Info("Scheduling a request", "metrics", podMetrics)
130-
pods, err := s.filter.Filter(logger, req, podMetrics)
131-
=======
132124
func (s *Scheduler) Schedule(req *LLMRequest) (targetPod backend.Pod, err error) {
133125
klog.InfoS("Scheduling a request", "request", req, "metrics", s.podMetricsProvider.AllPodMetrics())
134126
pods, err := s.filter.Filter(req, s.podMetricsProvider.AllPodMetrics())
135-
>>>>>>> 78c0740 (changes for multilora):pkg/ext-proc/scheduling/scheduler.go
136127
if err != nil || len(pods) == 0 {
137128
return datastore.PodMetrics{}, fmt.Errorf(
138129
"failed to apply filter, resulted %v pods, this should never happen: %w", len(pods), err)
139130
}
140-
<<<<<<< HEAD:pkg/epp/scheduling/scheduler.go
141-
logger.V(logutil.VERBOSE).Info("Selecting a random pod from the candidates", "candidatePods", pods)
142-
=======
143131
klog.InfoS("Selecting a random pod from the candidates", "candidatePods", pods)
144-
>>>>>>> 78c0740 (changes for multilora):pkg/ext-proc/scheduling/scheduler.go
145132
i := rand.Intn(len(pods))
146133
return *pods[i], nil
147134
}

0 commit comments

Comments
 (0)