Skip to content

Commit 634c1fe

Browse files
committed
- implemented kvcache-aware-scorer
- added configuration Signed-off-by: Maroon Ayoub <[email protected]>
1 parent 327171b commit 634c1fe

25 files changed

+348
-65
lines changed

Dockerfile

+21-2
Original file line numberDiff line numberDiff line change
@@ -3,28 +3,47 @@ FROM quay.io/projectquay/golang:1.24 AS builder
33
ARG TARGETOS
44
ARG TARGETARCH
55

6-
# ENV GOPROXY=https://goproxy.io,direct
6+
# Install build tools
7+
RUN dnf install -y gcc-c++ libstdc++ libstdc++-devel && dnf clean all
78

89
WORKDIR /workspace
10+
11+
## NeuralMagic internal repos pull config
12+
ARG GIT_NM_USER
13+
ARG NM_TOKEN
14+
### use git token
15+
RUN echo -e "machine github.com\n\tlogin ${GIT_NM_USER}\n\tpassword ${NM_TOKEN}" >> ~/.netrc
16+
ENV GOPRIVATE=github.com/neuralmagic
17+
ENV GIT_TERMINAL_PROMPT=1
18+
919
# Copy the Go Modules manifests
1020
COPY go.mod go.mod
1121
COPY go.sum go.sum
1222
# cache deps before building and copying source so that we don't need to re-download as much
1323
# and so that source changes don't invalidate our downloaded layer
1424
RUN go mod download
25+
RUN rm -rf ~/.netrc # remove git token
1526

1627
# Copy the go source
1728
COPY cmd ./cmd
1829
COPY pkg ./pkg
1930
COPY internal ./internal
2031
COPY api ./api
2132

33+
# HuggingFace tokenizer bindings
34+
RUN mkdir -p lib
35+
RUN curl -L https://github.com/daulet/tokenizers/releases/download/v1.20.2/libtokenizers.${TARGETOS}-${TARGETARCH}.tar.gz | tar -xz -C lib
36+
RUN ranlib lib/*.a
37+
2238
# Build
2339
# the GOARCH has not a default value to allow the binary be built according to the host where the command
2440
# was called. For example, if we call make image-build in a local env which has the Apple Silicon M1 SO
2541
# the docker BUILDPLATFORM arg will be linux/arm64 when for Apple x86 it will be linux/amd64. Therefore,
2642
# by leaving it empty we can ensure that the container and binary shipped on it will have the same platform.
27-
RUN CGO_ENABLED=0 GOOS=${TARGETOS:-linux} GOARCH=${TARGETARCH} go build -o bin/epp cmd/epp/main.go cmd/epp/health.go
43+
ENV CGO_ENABLED=1
44+
ENV GOOS=${TARGETOS:-linux}
45+
ENV GOARCH=${TARGETARCH}
46+
RUN go build -o bin/epp -ldflags="-extldflags '-L$(pwd)/lib'" cmd/epp/main.go cmd/epp/health.go
2847

2948
# Use distroless as minimal base image to package the manager binary
3049
# Refer to https://github.com/GoogleContainerTools/distroless for more details

Makefile

+6-1
Original file line numberDiff line numberDiff line change
@@ -489,7 +489,12 @@ buildah-build: check-builder load-version-json ## Build and push image (multi-ar
489489
.PHONY: image-build
490490
image-build: check-container-tool load-version-json ## Build container image using $(CONTAINER_TOOL)
491491
@printf "\033[33;1m==== Building container image $(IMG) ====\033[0m\n"
492-
$(CONTAINER_TOOL) build --build-arg TARGETOS=$(TARGETOS) --build-arg TARGETARCH=$(TARGETARCH) -t $(IMG) .
492+
$(CONTAINER_TOOL) build --platform=$(TARGETOS)/$(TARGETARCH) \
493+
--build-arg TARGETOS=$(TARGETOS) \
494+
--build-arg TARGETARCH=$(TARGETARCH) \
495+
--build-arg GIT_NM_USER=$(GIT_NM_USER)\
496+
--build-arg NM_TOKEN=$(NM_TOKEN) \
497+
-t $(IMG) .
493498

494499
.PHONY: image-push
495500
image-push: check-container-tool load-version-json ## Push container image $(IMG) to registry

README.md

+17
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,23 @@
66

77
This project offers tools for AI Inference, enabling developers to build [Inference Gateways].
88

9+
---
10+
## Temporary Fork Configuration
11+
12+
To enable KVCacheAwareScorer, the following env vars must be configured:
13+
```
14+
export ENABLE_KVCACHE_AWARE_SCORER=true
15+
export KVCACHE_AWARE_SCORER_WEIGHT=1.0
16+
export KVCACHE_INDEXER_REDIS_ADDR=<redis-service>
17+
export HF_TOKEN=<HuggingFace Token that has access to the vLLM models>
18+
```
19+
20+
To enable LoadAwareScorer, the following env vars must be configured:
21+
```
22+
export ENABLE_LOAD_AWARE_SCORER=true
23+
export LOAD_AWARE_SCORER_WEIGHT=1.0
24+
```
25+
---
926
[Inference Gateways]:#concepts-and-definitions
1027

1128
## Concepts and Definitions

cmd/epp/main.go

-1
Original file line numberDiff line numberDiff line change
@@ -314,5 +314,4 @@ func verifyMetricMapping(mapping backendmetrics.MetricMapping, logger logr.Logge
314314
if mapping.LoraRequestInfo == nil {
315315
logger.Info("Not scraping metric: LoraRequestInfo")
316316
}
317-
318317
}

go.mod

+13-6
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,15 @@
11
module sigs.k8s.io/gateway-api-inference-extension
22

3-
go 1.24.0
3+
go 1.24.1
4+
5+
toolchain go1.24.2
46

57
require (
68
github.com/elastic/crd-ref-docs v0.1.0
79
github.com/envoyproxy/go-control-plane/envoy v1.32.4
810
github.com/go-logr/logr v1.4.2
911
github.com/google/go-cmp v0.7.0
12+
github.com/neuralmagic/llm-d-kv-cache-manager v0.0.0-20250430102735-86595011431d
1013
github.com/onsi/ginkgo/v2 v2.23.4
1114
github.com/onsi/gomega v1.37.0
1215
github.com/prometheus/client_golang v1.22.0
@@ -41,7 +44,9 @@ require (
4144
github.com/cenkalti/backoff/v4 v4.3.0 // indirect
4245
github.com/cespare/xxhash/v2 v2.3.0 // indirect
4346
github.com/cncf/xds/go v0.0.0-20241223141626-cff3c89139a3 // indirect
47+
github.com/daulet/tokenizers v1.20.2 // indirect
4448
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect
49+
github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f // indirect
4550
github.com/emicklei/go-restful/v3 v3.11.0 // indirect
4651
github.com/envoyproxy/protoc-gen-validate v1.2.1 // indirect
4752
github.com/evanphx/json-patch/v5 v5.9.11 // indirect
@@ -69,6 +74,7 @@ require (
6974
github.com/google/uuid v1.6.0 // indirect
7075
github.com/gorilla/websocket v1.5.0 // indirect
7176
github.com/grpc-ecosystem/grpc-gateway/v2 v2.20.0 // indirect
77+
github.com/hashicorp/golang-lru/v2 v2.0.7 // indirect
7278
github.com/huandu/xstrings v1.3.3 // indirect
7379
github.com/imdario/mergo v0.3.11 // indirect
7480
github.com/inconshreveable/mousetrap v1.1.0 // indirect
@@ -90,6 +96,7 @@ require (
9096
github.com/planetscale/vtprotobuf v0.6.1-0.20240319094008-0393e58bdf10 // indirect
9197
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect
9298
github.com/prometheus/procfs v0.15.1 // indirect
99+
github.com/redis/go-redis/v9 v9.7.3 // indirect
93100
github.com/spf13/cobra v1.8.1 // indirect
94101
github.com/spf13/pflag v1.0.5 // indirect
95102
github.com/stoewer/go-strcase v1.3.0 // indirect
@@ -104,15 +111,15 @@ require (
104111
go.opentelemetry.io/otel/trace v1.34.0 // indirect
105112
go.opentelemetry.io/proto/otlp v1.3.1 // indirect
106113
go.uber.org/automaxprocs v1.6.0 // indirect
107-
golang.org/x/crypto v0.36.0 // indirect
114+
golang.org/x/crypto v0.37.0 // indirect
108115
golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56 // indirect
109116
golang.org/x/mod v0.24.0 // indirect
110-
golang.org/x/net v0.38.0 // indirect
117+
golang.org/x/net v0.39.0 // indirect
111118
golang.org/x/oauth2 v0.27.0 // indirect
112-
golang.org/x/sync v0.12.0 // indirect
119+
golang.org/x/sync v0.13.0 // indirect
113120
golang.org/x/sys v0.32.0 // indirect
114-
golang.org/x/term v0.30.0 // indirect
115-
golang.org/x/text v0.23.0 // indirect
121+
golang.org/x/term v0.31.0 // indirect
122+
golang.org/x/text v0.24.0 // indirect
116123
golang.org/x/time v0.7.0 // indirect
117124
golang.org/x/tools v0.31.0 // indirect
118125
golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028 // indirect

go.sum

+24-12
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,10 @@ github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
1616
github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
1717
github.com/blang/semver/v4 v4.0.0 h1:1PFHFE6yCCTv8C1TeyNNarDzntLi7wMI5i/pzqYIsAM=
1818
github.com/blang/semver/v4 v4.0.0/go.mod h1:IbckMUScFkM3pff0VJDNKRiT6TG/YpiHIM2yvyW5YoQ=
19+
github.com/bsm/ginkgo/v2 v2.12.0 h1:Ny8MWAHyOepLGlLKYmXG4IEkioBysk6GpaRTLC8zwWs=
20+
github.com/bsm/ginkgo/v2 v2.12.0/go.mod h1:SwYbGRRDovPVboqFv0tPTcG1sN61LM1Z4ARdbAV9g4c=
21+
github.com/bsm/gomega v1.27.10 h1:yeMWxP2pV2fG3FgAODIY8EiRE3dy0aeFYt4l7wh6yKA=
22+
github.com/bsm/gomega v1.27.10/go.mod h1:JyEr/xRbxbtgWNi8tIEVPUYZ5Dzef52k01W3YH0H+O0=
1923
github.com/cenkalti/backoff/v4 v4.3.0 h1:MyRJ/UdXutAwSAT+s3wNd7MfTIcy71VQueUuFK343L8=
2024
github.com/cenkalti/backoff/v4 v4.3.0/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE=
2125
github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs=
@@ -24,10 +28,14 @@ github.com/cncf/xds/go v0.0.0-20241223141626-cff3c89139a3 h1:boJj011Hh+874zpIySe
2428
github.com/cncf/xds/go v0.0.0-20241223141626-cff3c89139a3/go.mod h1:W+zGtBO5Y1IgJhy4+A9GOqVhqLpfZi+vwmdNXUehLA8=
2529
github.com/cpuguy83/go-md2man/v2 v2.0.4/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
2630
github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
31+
github.com/daulet/tokenizers v1.20.2 h1:tlq/vIOiBTKDPets3596aFvmJYLn3XI6LFKq4q9LKhQ=
32+
github.com/daulet/tokenizers v1.20.2/go.mod h1:tGnMdZthXdcWY6DGD07IygpwJqiPvG85FQUnhs/wSCs=
2733
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
2834
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
2935
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM=
3036
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
37+
github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f h1:lO4WD4F/rVNCu3HqELle0jiPLLBs70cWOduZpkS1E78=
38+
github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f/go.mod h1:cuUVRXasLTGF7a8hSLbxyZXjz+1KgoB3wDUb6vlszIc=
3139
github.com/elastic/crd-ref-docs v0.1.0 h1:Cr5kz89QB3Iuuj7dhAfLMApCrChEGAaIBTxGk/xuRKw=
3240
github.com/elastic/crd-ref-docs v0.1.0/go.mod h1:X83mMBdJt05heJUYiS3T0yJ/JkCuliuhSUNav5Gjo/U=
3341
github.com/emicklei/go-restful/v3 v3.11.0 h1:rAQeMHw1c7zTmncogyy8VvRZwtkmkZ4FxERmMY4rD+g=
@@ -100,6 +108,8 @@ github.com/gorilla/websocket v1.5.0 h1:PPwGk2jz7EePpoHN/+ClbZu8SPxiqlu12wZP/3sWm
100108
github.com/gorilla/websocket v1.5.0/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE=
101109
github.com/grpc-ecosystem/grpc-gateway/v2 v2.20.0 h1:bkypFPDjIYGfCYD5mRBvpqxfYX1YCS1PXdKYWi8FsN0=
102110
github.com/grpc-ecosystem/grpc-gateway/v2 v2.20.0/go.mod h1:P+Lt/0by1T8bfcF3z737NnSbmxQAppXMRziHUxPOC8k=
111+
github.com/hashicorp/golang-lru/v2 v2.0.7 h1:a+bsQ5rvGLjzHuww6tVxozPZFVghXaHOwFs4luLUK2k=
112+
github.com/hashicorp/golang-lru/v2 v2.0.7/go.mod h1:QeFd9opnmA6QUJc5vARoKUSoFhyfM2/ZepoAG6RGpeM=
103113
github.com/huandu/xstrings v1.3.3 h1:/Gcsuc1x8JVbJ9/rlye4xZnVAbEkGauT8lbebqcQws4=
104114
github.com/huandu/xstrings v1.3.3/go.mod h1:y5/lhBue+AyNmUVz9RLU9xbLR0o4KIIExikq4ovT0aE=
105115
github.com/imdario/mergo v0.3.11 h1:3tnifQM4i+fbajXKBHXWEH+KvNHqojZ778UH75j3bGA=
@@ -147,6 +157,8 @@ github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq
147157
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ=
148158
github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f h1:y5//uYreIhSUg3J1GEMiLbxo1LJaP8RfCpH6pymGZus=
149159
github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f/go.mod h1:ZdcZmHo+o7JKHSa8/e818NopupXU1YMK5fe1lsApnBw=
160+
github.com/neuralmagic/llm-d-kv-cache-manager v0.0.0-20250430102735-86595011431d h1:6YSxvAG4ve5jy0nTLs509OMU5fuiQ3JNQdZxqiu8PgQ=
161+
github.com/neuralmagic/llm-d-kv-cache-manager v0.0.0-20250430102735-86595011431d/go.mod h1:VB+KcEemkO1ZKpz/hgUPQMU9oSLv2uCLW6y6c+r8jkQ=
150162
github.com/nxadm/tail v1.4.8 h1:nPr65rt6Y5JFSKQO7qToXr7pePgD6Gwiw05lkbyAQTE=
151163
github.com/nxadm/tail v1.4.8/go.mod h1:+ncqLTQzXmGhMZNUePPaPqPvBxHAIsmXswZKocGu+AU=
152164
github.com/onsi/ginkgo v1.16.5 h1:8xi0RTUf59SOSfEtZMvwTvXYMzG4gV23XVHOZiXNtnE=
@@ -172,6 +184,8 @@ github.com/prometheus/common v0.63.0 h1:YR/EIY1o3mEFP/kZCD7iDMnLPlGyuU2Gb3HIcXnA
172184
github.com/prometheus/common v0.63.0/go.mod h1:VVFF/fBIoToEnWRVkYoXEkq3R3paCoxG9PXP74SnV18=
173185
github.com/prometheus/procfs v0.15.1 h1:YagwOFzUgYfKKHX6Dr+sHT7km/hxC76UB0learggepc=
174186
github.com/prometheus/procfs v0.15.1/go.mod h1:fB45yRUv8NstnjriLhBQLuOUt+WW4BsoGhij/e3PBqk=
187+
github.com/redis/go-redis/v9 v9.7.3 h1:YpPyAayJV+XErNsatSElgRZZVCwXX9QzkKYNvO7x0wM=
188+
github.com/redis/go-redis/v9 v9.7.3/go.mod h1:bGUrSggJ9X9GUmZpZNEOQKaANxSGgOEBRltRTZHSvrA=
175189
github.com/rogpeppe/go-internal v1.13.1 h1:KvO1DLK/DRN07sQ1LQKScxyZJuNnedQ5/wKSR38lUII=
176190
github.com/rogpeppe/go-internal v1.13.1/go.mod h1:uMEvuHeurkdAXX61udpOXGD/AzZDWNMNyH2VO9fmH0o=
177191
github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
@@ -226,8 +240,8 @@ go.uber.org/zap v1.27.0/go.mod h1:GB2qFLM7cTU87MWRP2mPIjqfIDnGu+VIO4V/SdhGo2E=
226240
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
227241
golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
228242
golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
229-
golang.org/x/crypto v0.36.0 h1:AnAEvhDddvBdpY+uR+MyHmuZzzNqXSe/GvuDeob5L34=
230-
golang.org/x/crypto v0.36.0/go.mod h1:Y4J0ReaxCR1IMaabaSMugxJES1EpwhBHhv2bDHklZvc=
243+
golang.org/x/crypto v0.37.0 h1:kJNSjF/Xp7kU0iB2Z+9viTPMW4EqqsrywMXLJOOsXSE=
244+
golang.org/x/crypto v0.37.0/go.mod h1:vg+k43peMZ0pUMhYmVAWysMK35e6ioLh3wB8ZCAfbVc=
231245
golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56 h1:2dVuKD2vS7b0QIHQbpyTISPd0LeHDbnYEryqj5Q1ug8=
232246
golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56/go.mod h1:M4RDyNAINzryxdtnbRXRL/OHtkFuWGRjvuhBJpk2IlY=
233247
golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
@@ -238,31 +252,29 @@ golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn
238252
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
239253
golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
240254
golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
241-
golang.org/x/net v0.38.0 h1:vRMAPTMaeGqVhG5QyLJHqNDwecKTomGeqbnfZyKlBI8=
242-
golang.org/x/net v0.38.0/go.mod h1:ivrbrMbzFq5J41QOQh0siUuly180yBYtLp+CKbEaFx8=
243-
golang.org/x/oauth2 v0.25.0 h1:CY4y7XT9v0cRI9oupztF8AgiIu99L/ksR/Xp/6jrZ70=
244-
golang.org/x/oauth2 v0.25.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbhtI=
255+
golang.org/x/net v0.39.0 h1:ZCu7HMWDxpXpaiKdhzIfaltL9Lp31x/3fCP11bc6/fY=
256+
golang.org/x/net v0.39.0/go.mod h1:X7NRbYVEA+ewNkCNyJ513WmMdQ3BineSwVtN2zD/d+E=
245257
golang.org/x/oauth2 v0.27.0 h1:da9Vo7/tDv5RH/7nZDz1eMGS/q1Vv1N/7FCrBhI9I3M=
246258
golang.org/x/oauth2 v0.27.0/go.mod h1:onh5ek6nERTohokkhCD/y2cV4Do3fxFHFuAejCkRWT8=
247259
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
248260
golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
249261
golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
250-
golang.org/x/sync v0.12.0 h1:MHc5BpPuC30uJk597Ri8TV3CNZcTLu6B6z4lJy+g6Jw=
251-
golang.org/x/sync v0.12.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA=
262+
golang.org/x/sync v0.13.0 h1:AauUjRAJ9OSnvULf/ARrrVywoJDy0YS2AwQ98I37610=
263+
golang.org/x/sync v0.13.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA=
252264
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
253265
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
254266
golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
255267
golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
256268
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
257269
golang.org/x/sys v0.32.0 h1:s77OFDvIQeibCmezSnk/q6iAfkdiQaJi4VzroCFrN20=
258270
golang.org/x/sys v0.32.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k=
259-
golang.org/x/term v0.30.0 h1:PQ39fJZ+mfadBm0y5WlL4vlM7Sx1Hgf13sMIY2+QS9Y=
260-
golang.org/x/term v0.30.0/go.mod h1:NYYFdzHoI5wRh/h5tDMdMqCqPJZEuNqVR5xJLd/n67g=
271+
golang.org/x/term v0.31.0 h1:erwDkOK1Msy6offm1mOgvspSkslFnIGsFnxOKoufg3o=
272+
golang.org/x/term v0.31.0/go.mod h1:R4BeIy7D95HzImkxGkTW1UQTtP54tio2RyHz7PwK0aw=
261273
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
262274
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
263275
golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
264-
golang.org/x/text v0.23.0 h1:D71I7dUrlY+VX0gQShAThNGHFxZ13dGLBHQLVl1mJlY=
265-
golang.org/x/text v0.23.0/go.mod h1:/BLNzu4aZCJ1+kcD0DNRotWKage4q2rGVAg4o22unh4=
276+
golang.org/x/text v0.24.0 h1:dd5Bzh4yt5KYA8f9CJHCP4FB4D51c2c6JvN37xJJkJ0=
277+
golang.org/x/text v0.24.0/go.mod h1:L8rBsPeo2pSS+xqN0d5u2ikmjtmoJbDBT1b7nHvFCdU=
266278
golang.org/x/time v0.7.0 h1:ntUhktv3OPE6TgYxXWv9vKvUSJyIFJlyohwbkEwPrKQ=
267279
golang.org/x/time v0.7.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM=
268280
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=

pkg/epp/backend/metrics/fake.go

+2
Original file line numberDiff line numberDiff line change
@@ -40,9 +40,11 @@ func (fpm *FakePodMetrics) String() string {
4040
func (fpm *FakePodMetrics) GetPod() *Pod {
4141
return fpm.Pod
4242
}
43+
4344
func (fpm *FakePodMetrics) GetMetrics() *Metrics {
4445
return fpm.Metrics
4546
}
47+
4648
func (fpm *FakePodMetrics) UpdatePod(pod *corev1.Pod) {
4749
fpm.Pod = toInternalPod(pod)
4850
}

pkg/epp/backend/metrics/metrics.go

-1
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,6 @@ func (p *PodMetricsClientImpl) FetchMetrics(
4747
existing *Metrics,
4848
port int32,
4949
) (*Metrics, error) {
50-
5150
// Currently the metrics endpoint is hard-coded, which works with vLLM.
5251
// TODO(https://github.com/kubernetes-sigs/gateway-api-inference-extension/issues/16): Consume this from InferencePool config.
5352
url := "http://" + pod.Address + ":" + strconv.Itoa(int(port)) + "/metrics"

pkg/epp/backend/metrics/metrics_test.go

-3
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,6 @@ func makeMetricFamily(name string, metrics ...*dto.Metric) *dto.MetricFamily {
5858
// --- Tests ---
5959

6060
func TestGetMetric(t *testing.T) {
61-
6261
metricFamilies := map[string]*dto.MetricFamily{
6362
"metric1": makeMetricFamily("metric1",
6463
makeMetric(map[string]string{"label1": "value1"}, 1.0, 1000),
@@ -166,7 +165,6 @@ func TestGetMetric(t *testing.T) {
166165

167166
for _, tt := range tests {
168167
t.Run(tt.name, func(t *testing.T) {
169-
170168
gotMetric, err := p.getMetric(metricFamilies, tt.spec)
171169

172170
if tt.wantError {
@@ -240,7 +238,6 @@ func TestLabelsMatch(t *testing.T) {
240238
}
241239

242240
func TestGetLatestLoraMetric(t *testing.T) {
243-
244241
testCases := []struct {
245242
name string
246243
metricFamilies map[string]*dto.MetricFamily

pkg/epp/backend/metrics/pod_metrics_test.go

+2
Original file line numberDiff line numberDiff line change
@@ -88,10 +88,12 @@ type fakeDataStore struct{}
8888
func (f *fakeDataStore) PoolGet() (*v1alpha2.InferencePool, error) {
8989
return &v1alpha2.InferencePool{Spec: v1alpha2.InferencePoolSpec{TargetPortNumber: 8000}}, nil
9090
}
91+
9192
func (f *fakeDataStore) PodGetAll() []PodMetrics {
9293
// Not implemented.
9394
return nil
9495
}
96+
9597
func (f *fakeDataStore) PodList(func(PodMetrics) bool) []PodMetrics {
9698
// Not implemented.
9799
return nil

pkg/epp/controller/inferencemodel_reconciler_test.go

-1
Original file line numberDiff line numberDiff line change
@@ -227,7 +227,6 @@ func TestInferenceModelReconciler(t *testing.T) {
227227
if diff := diffStore(ds, diffStoreParams{wantPool: pool, wantModels: test.wantModels}); diff != "" {
228228
t.Errorf("Unexpected diff (+got/-want): %s", diff)
229229
}
230-
231230
})
232231
}
233232
}

pkg/epp/datastore/datastore.go

+1-3
Original file line numberDiff line numberDiff line change
@@ -38,9 +38,7 @@ const (
3838
ModelNameIndexKey = "spec.modelName"
3939
)
4040

41-
var (
42-
errPoolNotSynced = errors.New("InferencePool is not initialized in data store")
43-
)
41+
var errPoolNotSynced = errors.New("InferencePool is not initialized in data store")
4442

4543
// The datastore is a local cache of relevant data for the given InferencePool (currently all pulled from k8s-api)
4644
type Datastore interface {

pkg/epp/datastore/datastore_test.go

-2
Original file line numberDiff line numberDiff line change
@@ -204,7 +204,6 @@ func TestModel(t *testing.T) {
204204
existing := ds.ModelDelete(types.NamespacedName{Name: model1ts.Name, Namespace: model1ts.Namespace})
205205
got := ds.ModelGet(tsModel)
206206
return existing != nil && got == nil
207-
208207
},
209208
wantOpResult: true,
210209
wantModels: []*v1alpha2.InferenceModel{model2chat},
@@ -226,7 +225,6 @@ func TestModel(t *testing.T) {
226225
if diff := testutil.DiffModelLists(test.wantModels, ds.ModelGetAll()); diff != "" {
227226
t.Errorf("Unexpected models diff: %s", diff)
228227
}
229-
230228
})
231229
}
232230
}

pkg/epp/handlers/request.go

+7
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,8 @@ import (
3131
logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging"
3232
)
3333

34+
const emptyPrompt = ""
35+
3436
// HandleRequestBody always returns the requestContext even in the error case, as the request context is used in error handling.
3537
func (s *StreamingServer) HandleRequestBody(
3638
ctx context.Context,
@@ -68,6 +70,7 @@ func (s *StreamingServer) HandleRequestBody(
6870
Headers: reqCtx.RequestHeaders,
6971
ResolvedTargetModel: modelName,
7072
Critical: modelObj.Spec.Criticality != nil && *modelObj.Spec.Criticality == v1alpha2.Critical,
73+
Prompt: emptyPrompt,
7174
}
7275
logger.V(logutil.DEBUG).Info("LLM request assembled", "request", llmReq)
7376

@@ -76,6 +79,10 @@ func (s *StreamingServer) HandleRequestBody(
7679
if llmReq.Model != llmReq.ResolvedTargetModel {
7780
requestBodyMap["model"] = llmReq.ResolvedTargetModel
7881
}
82+
// Extract prompt from the request body.
83+
if prompt, ok := requestBodyMap["prompt"].(string); ok {
84+
llmReq.Prompt = prompt
85+
}
7986

8087
requestBodyBytes, err = json.Marshal(requestBodyMap)
8188
if err != nil {

0 commit comments

Comments
 (0)