|
| 1 | +/* |
| 2 | +Copyright 2025 The Kubernetes Authors. |
| 3 | +
|
| 4 | +Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | +you may not use this file except in compliance with the License. |
| 6 | +You may obtain a copy of the License at |
| 7 | +
|
| 8 | + http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | +
|
| 10 | +Unless required by applicable law or agreed to in writing, software |
| 11 | +distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | +See the License for the specific language governing permissions and |
| 14 | +limitations under the License. |
| 15 | +*/ |
| 16 | +package scorers |
| 17 | + |
| 18 | +import ( |
| 19 | + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/config" |
| 20 | + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/plugins" |
| 21 | + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/types" |
| 22 | +) |
| 23 | + |
| 24 | +type LoadBasedScorer struct{} |
| 25 | + |
| 26 | +func NewLoadBasedScorer() plugins.Scorer { |
| 27 | + return LoadBasedScorer{} |
| 28 | +} |
| 29 | + |
| 30 | +func (s LoadBasedScorer) Name() string { |
| 31 | + return "load based scorer" |
| 32 | +} |
| 33 | + |
| 34 | +// Score scores the given pod in range of 0-1 |
| 35 | +// Currently metrics contains number of requests waiting in the queue, there is no information about number of requests |
| 36 | +// that can be processed in the given pod immediately. |
| 37 | +// Pod with empty waiting requests queue is scored with 0.5 |
| 38 | +// Pod with requests in the queue will get score between 0.5 and 0. |
| 39 | +// Score 0 will get pod with number of requests in the queue equal to the threshold used in load-based filter (QueueingThresholdLoRA) |
| 40 | +// In future pods with additional capacity will get score higher than 0.5 |
| 41 | +func (s LoadBasedScorer) Score(ctx *types.SchedulingContext, pods []types.Pod) map[types.Pod]float64 { |
| 42 | + scoredPods := make(map[types.Pod]float64) |
| 43 | + |
| 44 | + for _, pod := range pods { |
| 45 | + waitingRequests := float64(pod.GetMetrics().WaitingQueueSize) |
| 46 | + |
| 47 | + if waitingRequests == 0 { |
| 48 | + scoredPods[pod] = 0.5 |
| 49 | + } else { |
| 50 | + scoredPods[pod] = 0.5 * (1.0 - (waitingRequests / float64(config.Conf.QueueingThresholdLoRA))) |
| 51 | + } |
| 52 | + } |
| 53 | + |
| 54 | + return scoredPods |
| 55 | +} |
0 commit comments