diff --git a/Dockerfile b/Dockerfile index 5d6f08a50..4adc82e4c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -4,7 +4,7 @@ ARG BUILDER_IMAGE=golang:1.23-alpine ARG BASE_IMAGE=gcr.io/distroless/base-debian10 ## Multistage build -FROM ${BUILDER_IMAGE} as builder +FROM ${BUILDER_IMAGE} AS builder ENV CGO_ENABLED=0 ENV GOOS=linux ENV GOARCH=amd64 @@ -19,13 +19,13 @@ COPY cmd ./cmd COPY pkg ./pkg COPY internal ./internal COPY api ./api -WORKDIR /src/cmd/ext-proc -RUN go build -o /ext-proc +WORKDIR /src/cmd/epp +RUN go build -o /epp ## Multistage deploy FROM ${BASE_IMAGE} WORKDIR / -COPY --from=builder /ext-proc /ext-proc +COPY --from=builder /epp /epp -ENTRYPOINT ["/ext-proc"] \ No newline at end of file +ENTRYPOINT ["/epp"] diff --git a/cmd/ext-proc/health.go b/cmd/epp/health.go similarity index 91% rename from cmd/ext-proc/health.go rename to cmd/epp/health.go index 26a58df8c..335c0849c 100644 --- a/cmd/ext-proc/health.go +++ b/cmd/epp/health.go @@ -23,8 +23,8 @@ import ( "google.golang.org/grpc/codes" healthPb "google.golang.org/grpc/health/grpc_health_v1" "google.golang.org/grpc/status" - "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/datastore" - logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/util/logging" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datastore" + logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging" ) type healthServer struct { diff --git a/cmd/ext-proc/main.go b/cmd/epp/main.go similarity index 95% rename from cmd/ext-proc/main.go rename to cmd/epp/main.go index 047a1fa7a..a189984b0 100644 --- a/cmd/ext-proc/main.go +++ b/cmd/epp/main.go @@ -41,12 +41,12 @@ import ( "sigs.k8s.io/controller-runtime/pkg/metrics/filters" "sigs.k8s.io/gateway-api-inference-extension/api/v1alpha1" "sigs.k8s.io/gateway-api-inference-extension/internal/runnable" - "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/backend" - "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/backend/vllm" - "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/datastore" - "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/metrics" - runserver "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/server" - "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/util/logging" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/backend" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/backend/vllm" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datastore" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/metrics" + runserver "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/server" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging" ) const ( diff --git a/docs/dev.md b/docs/dev.md index 2af396688..d223ed6ab 100644 --- a/docs/dev.md +++ b/docs/dev.md @@ -37,7 +37,7 @@ const( ) ``` -The guidelines are written in the context of a k8s controller. Our [ext-proc](../pkg/ext-proc/) does more things such as handling requests and scraping metrics, therefore we adapt the guidelines as follows: +The guidelines are written in the context of a k8s controller. Our [epp](../pkg/epp/) does more things such as handling requests and scraping metrics, therefore we adapt the guidelines as follows: 1. The server startup process and configuration. diff --git a/docs/proposals/003-endpoint-picker-protocol/README.md b/docs/proposals/003-endpoint-picker-protocol/README.md index 8e96a630c..6876135d2 100644 --- a/docs/proposals/003-endpoint-picker-protocol/README.md +++ b/docs/proposals/003-endpoint-picker-protocol/README.md @@ -2,7 +2,7 @@ The Endpoint Picker, or EPP, is a core component of the inference extension. Ultimately it's responsible for picking an endpoint from the `InferencePool`. A reference implementation can be -found [here](../../../pkg/ext-proc/). +found [here](../../../pkg/epp/). ## Proxy Protocol diff --git a/pkg/ext-proc/backend/fake.go b/pkg/epp/backend/fake.go similarity index 90% rename from pkg/ext-proc/backend/fake.go rename to pkg/epp/backend/fake.go index 2de34c16c..e81b38177 100644 --- a/pkg/ext-proc/backend/fake.go +++ b/pkg/epp/backend/fake.go @@ -22,8 +22,8 @@ import ( "k8s.io/apimachinery/pkg/types" "sigs.k8s.io/controller-runtime/pkg/log" "sigs.k8s.io/gateway-api-inference-extension/api/v1alpha1" - "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/datastore" - logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/util/logging" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datastore" + logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging" ) type FakePodMetricsClient struct { diff --git a/pkg/ext-proc/backend/provider.go b/pkg/epp/backend/provider.go similarity index 95% rename from pkg/ext-proc/backend/provider.go rename to pkg/epp/backend/provider.go index 974319f7a..a12f84d5c 100644 --- a/pkg/ext-proc/backend/provider.go +++ b/pkg/epp/backend/provider.go @@ -25,9 +25,9 @@ import ( "github.com/go-logr/logr" "go.uber.org/multierr" "sigs.k8s.io/controller-runtime/pkg/log" - "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/datastore" - "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/metrics" - logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/util/logging" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datastore" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/metrics" + logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging" ) const ( diff --git a/pkg/ext-proc/backend/provider_test.go b/pkg/epp/backend/provider_test.go similarity index 98% rename from pkg/ext-proc/backend/provider_test.go rename to pkg/epp/backend/provider_test.go index 7736dd8dd..1e11afe2c 100644 --- a/pkg/ext-proc/backend/provider_test.go +++ b/pkg/epp/backend/provider_test.go @@ -27,7 +27,7 @@ import ( "github.com/google/go-cmp/cmp/cmpopts" "github.com/stretchr/testify/assert" "k8s.io/apimachinery/pkg/types" - "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/datastore" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datastore" ) var ( diff --git a/pkg/ext-proc/backend/vllm/metrics.go b/pkg/epp/backend/vllm/metrics.go similarity index 97% rename from pkg/ext-proc/backend/vllm/metrics.go rename to pkg/epp/backend/vllm/metrics.go index 59a132c8d..8648e24ce 100644 --- a/pkg/ext-proc/backend/vllm/metrics.go +++ b/pkg/epp/backend/vllm/metrics.go @@ -30,8 +30,8 @@ import ( "github.com/prometheus/common/expfmt" "go.uber.org/multierr" "sigs.k8s.io/controller-runtime/pkg/log" - "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/datastore" - logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/util/logging" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datastore" + logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging" ) const ( diff --git a/pkg/ext-proc/backend/vllm/metrics_test.go b/pkg/epp/backend/vllm/metrics_test.go similarity index 97% rename from pkg/ext-proc/backend/vllm/metrics_test.go rename to pkg/epp/backend/vllm/metrics_test.go index 1c9d54489..12aac1a1e 100644 --- a/pkg/ext-proc/backend/vllm/metrics_test.go +++ b/pkg/epp/backend/vllm/metrics_test.go @@ -23,8 +23,8 @@ import ( dto "github.com/prometheus/client_model/go" "github.com/stretchr/testify/assert" "google.golang.org/protobuf/proto" - "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/datastore" - logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/util/logging" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datastore" + logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging" ) func TestPromToPodMetrics(t *testing.T) { diff --git a/pkg/ext-proc/controller/inferencemodel_reconciler.go b/pkg/epp/controller/inferencemodel_reconciler.go similarity index 95% rename from pkg/ext-proc/controller/inferencemodel_reconciler.go rename to pkg/epp/controller/inferencemodel_reconciler.go index cca05fcef..99a1eb26e 100644 --- a/pkg/ext-proc/controller/inferencemodel_reconciler.go +++ b/pkg/epp/controller/inferencemodel_reconciler.go @@ -28,8 +28,8 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/log" "sigs.k8s.io/gateway-api-inference-extension/api/v1alpha1" - "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/datastore" - logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/util/logging" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datastore" + logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging" ) type InferenceModelReconciler struct { diff --git a/pkg/ext-proc/controller/inferencemodel_reconciler_test.go b/pkg/epp/controller/inferencemodel_reconciler_test.go similarity index 98% rename from pkg/ext-proc/controller/inferencemodel_reconciler_test.go rename to pkg/epp/controller/inferencemodel_reconciler_test.go index 583f5f750..cf94b168f 100644 --- a/pkg/ext-proc/controller/inferencemodel_reconciler_test.go +++ b/pkg/epp/controller/inferencemodel_reconciler_test.go @@ -29,8 +29,8 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" "sigs.k8s.io/gateway-api-inference-extension/api/v1alpha1" - "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/datastore" - logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/util/logging" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datastore" + logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging" ) var ( diff --git a/pkg/ext-proc/controller/inferencepool_reconciler.go b/pkg/epp/controller/inferencepool_reconciler.go similarity index 96% rename from pkg/ext-proc/controller/inferencepool_reconciler.go rename to pkg/epp/controller/inferencepool_reconciler.go index b2cd01c0b..f2c56991c 100644 --- a/pkg/ext-proc/controller/inferencepool_reconciler.go +++ b/pkg/epp/controller/inferencepool_reconciler.go @@ -28,8 +28,8 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/log" "sigs.k8s.io/gateway-api-inference-extension/api/v1alpha1" - "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/datastore" - logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/util/logging" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datastore" + logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging" ) // InferencePoolReconciler utilizes the controller runtime to reconcile Instance Gateway resources diff --git a/pkg/ext-proc/controller/inferencepool_reconciler_test.go b/pkg/epp/controller/inferencepool_reconciler_test.go similarity index 97% rename from pkg/ext-proc/controller/inferencepool_reconciler_test.go rename to pkg/epp/controller/inferencepool_reconciler_test.go index 925cb236d..6263fa165 100644 --- a/pkg/ext-proc/controller/inferencepool_reconciler_test.go +++ b/pkg/epp/controller/inferencepool_reconciler_test.go @@ -31,8 +31,8 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/client/fake" "sigs.k8s.io/gateway-api-inference-extension/api/v1alpha1" - "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/datastore" - utiltesting "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/util/testing" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datastore" + utiltesting "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/testing" ) var ( diff --git a/pkg/ext-proc/controller/pod_reconciler.go b/pkg/epp/controller/pod_reconciler.go similarity index 95% rename from pkg/ext-proc/controller/pod_reconciler.go rename to pkg/epp/controller/pod_reconciler.go index 871e1da52..5b0c25c99 100644 --- a/pkg/ext-proc/controller/pod_reconciler.go +++ b/pkg/epp/controller/pod_reconciler.go @@ -28,8 +28,8 @@ import ( ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/log" - "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/datastore" - logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/util/logging" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datastore" + logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging" ) type PodReconciler struct { diff --git a/pkg/ext-proc/controller/pod_reconciler_test.go b/pkg/epp/controller/pod_reconciler_test.go similarity index 99% rename from pkg/ext-proc/controller/pod_reconciler_test.go rename to pkg/epp/controller/pod_reconciler_test.go index c87ee54d5..b3869113c 100644 --- a/pkg/ext-proc/controller/pod_reconciler_test.go +++ b/pkg/epp/controller/pod_reconciler_test.go @@ -32,7 +32,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/client/fake" "sigs.k8s.io/gateway-api-inference-extension/api/v1alpha1" - "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/datastore" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datastore" ) var ( diff --git a/pkg/ext-proc/datastore/datastore.go b/pkg/epp/datastore/datastore.go similarity index 98% rename from pkg/ext-proc/datastore/datastore.go rename to pkg/epp/datastore/datastore.go index 602364967..eecea59ce 100644 --- a/pkg/ext-proc/datastore/datastore.go +++ b/pkg/epp/datastore/datastore.go @@ -29,7 +29,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/log" "sigs.k8s.io/gateway-api-inference-extension/api/v1alpha1" - logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/util/logging" + logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging" ) // The datastore is a local cache of relevant data for the given InferencePool (currently all pulled from k8s-api) diff --git a/pkg/ext-proc/datastore/datastore_test.go b/pkg/epp/datastore/datastore_test.go similarity index 97% rename from pkg/ext-proc/datastore/datastore_test.go rename to pkg/epp/datastore/datastore_test.go index f32d8d770..bd5c50209 100644 --- a/pkg/ext-proc/datastore/datastore_test.go +++ b/pkg/epp/datastore/datastore_test.go @@ -21,7 +21,7 @@ import ( v1 "k8s.io/apimachinery/pkg/apis/meta/v1" "sigs.k8s.io/gateway-api-inference-extension/api/v1alpha1" - logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/util/logging" + logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging" ) func TestHasSynced(t *testing.T) { diff --git a/pkg/ext-proc/datastore/types.go b/pkg/epp/datastore/types.go similarity index 100% rename from pkg/ext-proc/datastore/types.go rename to pkg/epp/datastore/types.go diff --git a/pkg/ext-proc/handlers/request.go b/pkg/epp/handlers/request.go similarity index 95% rename from pkg/ext-proc/handlers/request.go rename to pkg/epp/handlers/request.go index 34db206de..b9ffd0b02 100644 --- a/pkg/ext-proc/handlers/request.go +++ b/pkg/epp/handlers/request.go @@ -26,10 +26,10 @@ import ( extProcPb "github.com/envoyproxy/go-control-plane/envoy/service/ext_proc/v3" "google.golang.org/protobuf/types/known/structpb" "sigs.k8s.io/controller-runtime/pkg/log" - "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/datastore" - "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/scheduling" - errutil "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/util/error" - logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/util/logging" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datastore" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling" + errutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/error" + logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging" ) // HandleRequestBody handles body of the request to the backend server, such as parsing the "model" diff --git a/pkg/ext-proc/handlers/response.go b/pkg/epp/handlers/response.go similarity index 97% rename from pkg/ext-proc/handlers/response.go rename to pkg/epp/handlers/response.go index ed3082c51..f9396acf0 100644 --- a/pkg/ext-proc/handlers/response.go +++ b/pkg/epp/handlers/response.go @@ -24,8 +24,8 @@ import ( configPb "github.com/envoyproxy/go-control-plane/envoy/config/core/v3" extProcPb "github.com/envoyproxy/go-control-plane/envoy/service/ext_proc/v3" "sigs.k8s.io/controller-runtime/pkg/log" - errutil "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/util/error" - logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/util/logging" + errutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/error" + logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging" ) // HandleResponseHeaders processes response headers from the backend model server. diff --git a/pkg/ext-proc/handlers/response_test.go b/pkg/epp/handlers/response_test.go similarity index 97% rename from pkg/ext-proc/handlers/response_test.go rename to pkg/epp/handlers/response_test.go index dbb7e7007..01f02d094 100644 --- a/pkg/ext-proc/handlers/response_test.go +++ b/pkg/epp/handlers/response_test.go @@ -22,7 +22,7 @@ import ( extProcPb "github.com/envoyproxy/go-control-plane/envoy/service/ext_proc/v3" "github.com/google/go-cmp/cmp" - logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/util/logging" + logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging" ) const ( diff --git a/pkg/ext-proc/handlers/server.go b/pkg/epp/handlers/server.go similarity index 95% rename from pkg/ext-proc/handlers/server.go rename to pkg/epp/handlers/server.go index 506eaa97d..2c61118cc 100644 --- a/pkg/ext-proc/handlers/server.go +++ b/pkg/epp/handlers/server.go @@ -27,11 +27,11 @@ import ( "google.golang.org/grpc/codes" "google.golang.org/grpc/status" "sigs.k8s.io/controller-runtime/pkg/log" - "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/datastore" - "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/metrics" - "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/scheduling" - errutil "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/util/error" - logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/util/logging" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datastore" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/metrics" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling" + errutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/error" + logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging" ) func NewServer(scheduler Scheduler, targetEndpointKey string, datastore datastore.Datastore) *Server { @@ -185,7 +185,6 @@ func (s *Server) Process(srv extProcPb.ExternalProcessor_ProcessServer) error { return status.Errorf(codes.Unknown, "failed to send response back to Envoy: %v", err) } } - } // RequestContext stores context information during the life time of an HTTP request. diff --git a/pkg/ext-proc/metrics/README.md b/pkg/epp/metrics/README.md similarity index 100% rename from pkg/ext-proc/metrics/README.md rename to pkg/epp/metrics/README.md diff --git a/pkg/ext-proc/metrics/metrics.go b/pkg/epp/metrics/metrics.go similarity index 98% rename from pkg/ext-proc/metrics/metrics.go rename to pkg/epp/metrics/metrics.go index cc21d531b..e86ca901e 100644 --- a/pkg/ext-proc/metrics/metrics.go +++ b/pkg/epp/metrics/metrics.go @@ -24,7 +24,7 @@ import ( compbasemetrics "k8s.io/component-base/metrics" "k8s.io/component-base/metrics/legacyregistry" "sigs.k8s.io/controller-runtime/pkg/log" - logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/util/logging" + logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging" ) const ( diff --git a/pkg/ext-proc/metrics/metrics_test.go b/pkg/epp/metrics/metrics_test.go similarity index 93% rename from pkg/ext-proc/metrics/metrics_test.go rename to pkg/epp/metrics/metrics_test.go index 2e891066b..c2436bab1 100644 --- a/pkg/ext-proc/metrics/metrics_test.go +++ b/pkg/epp/metrics/metrics_test.go @@ -24,8 +24,8 @@ import ( "k8s.io/component-base/metrics/legacyregistry" "k8s.io/component-base/metrics/testutil" - errutil "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/util/error" - logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/util/logging" + errutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/error" + logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging" ) const ( @@ -119,31 +119,32 @@ func TestRecordRequestErrorCounter(t *testing.T) { name string reqs []requests invalid bool - }{{ - name: "multiple requests", - reqs: []requests{ - { - modelName: "m10", - targetModelName: "t10", - error: errutil.Internal, - }, - { - modelName: "m10", - targetModelName: "t10", - error: errutil.Internal, - }, - { - modelName: "m10", - targetModelName: "t11", - error: errutil.ModelServerError, - }, - { - modelName: "m20", - targetModelName: "t20", - error: errutil.InferencePoolResourceExhausted, + }{ + { + name: "multiple requests", + reqs: []requests{ + { + modelName: "m10", + targetModelName: "t10", + error: errutil.Internal, + }, + { + modelName: "m10", + targetModelName: "t10", + error: errutil.Internal, + }, + { + modelName: "m10", + targetModelName: "t11", + error: errutil.ModelServerError, + }, + { + modelName: "m20", + targetModelName: "t20", + error: errutil.InferencePoolResourceExhausted, + }, }, }, - }, } Register() for _, scenario := range scenarios { diff --git a/pkg/ext-proc/metrics/testdata/input_tokens_metric b/pkg/epp/metrics/testdata/input_tokens_metric similarity index 100% rename from pkg/ext-proc/metrics/testdata/input_tokens_metric rename to pkg/epp/metrics/testdata/input_tokens_metric diff --git a/pkg/ext-proc/metrics/testdata/kv_cache_avg_metrics b/pkg/epp/metrics/testdata/kv_cache_avg_metrics similarity index 100% rename from pkg/ext-proc/metrics/testdata/kv_cache_avg_metrics rename to pkg/epp/metrics/testdata/kv_cache_avg_metrics diff --git a/pkg/ext-proc/metrics/testdata/output_tokens_metric b/pkg/epp/metrics/testdata/output_tokens_metric similarity index 100% rename from pkg/ext-proc/metrics/testdata/output_tokens_metric rename to pkg/epp/metrics/testdata/output_tokens_metric diff --git a/pkg/ext-proc/metrics/testdata/queue_avg_size_metrics b/pkg/epp/metrics/testdata/queue_avg_size_metrics similarity index 100% rename from pkg/ext-proc/metrics/testdata/queue_avg_size_metrics rename to pkg/epp/metrics/testdata/queue_avg_size_metrics diff --git a/pkg/ext-proc/metrics/testdata/request_duration_seconds_metric b/pkg/epp/metrics/testdata/request_duration_seconds_metric similarity index 100% rename from pkg/ext-proc/metrics/testdata/request_duration_seconds_metric rename to pkg/epp/metrics/testdata/request_duration_seconds_metric diff --git a/pkg/ext-proc/metrics/testdata/request_error_total_metric b/pkg/epp/metrics/testdata/request_error_total_metric similarity index 100% rename from pkg/ext-proc/metrics/testdata/request_error_total_metric rename to pkg/epp/metrics/testdata/request_error_total_metric diff --git a/pkg/ext-proc/metrics/testdata/request_sizes_metric b/pkg/epp/metrics/testdata/request_sizes_metric similarity index 100% rename from pkg/ext-proc/metrics/testdata/request_sizes_metric rename to pkg/epp/metrics/testdata/request_sizes_metric diff --git a/pkg/ext-proc/metrics/testdata/request_total_metric b/pkg/epp/metrics/testdata/request_total_metric similarity index 100% rename from pkg/ext-proc/metrics/testdata/request_total_metric rename to pkg/epp/metrics/testdata/request_total_metric diff --git a/pkg/ext-proc/metrics/testdata/response_sizes_metric b/pkg/epp/metrics/testdata/response_sizes_metric similarity index 100% rename from pkg/ext-proc/metrics/testdata/response_sizes_metric rename to pkg/epp/metrics/testdata/response_sizes_metric diff --git a/pkg/ext-proc/scheduling/filter.go b/pkg/epp/scheduling/filter.go similarity index 98% rename from pkg/ext-proc/scheduling/filter.go rename to pkg/epp/scheduling/filter.go index 36691a735..b7881468c 100644 --- a/pkg/ext-proc/scheduling/filter.go +++ b/pkg/epp/scheduling/filter.go @@ -21,8 +21,8 @@ import ( "math" "github.com/go-logr/logr" - "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/datastore" - logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/util/logging" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datastore" + logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging" ) type Filter interface { diff --git a/pkg/ext-proc/scheduling/filter_test.go b/pkg/epp/scheduling/filter_test.go similarity index 98% rename from pkg/ext-proc/scheduling/filter_test.go rename to pkg/epp/scheduling/filter_test.go index 01909fea2..ac765b780 100644 --- a/pkg/ext-proc/scheduling/filter_test.go +++ b/pkg/epp/scheduling/filter_test.go @@ -23,8 +23,8 @@ import ( "github.com/go-logr/logr" "github.com/google/go-cmp/cmp" "k8s.io/apimachinery/pkg/types" - "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/datastore" - logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/util/logging" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datastore" + logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging" ) func TestFilter(t *testing.T) { diff --git a/pkg/ext-proc/scheduling/scheduler.go b/pkg/epp/scheduling/scheduler.go similarity index 94% rename from pkg/ext-proc/scheduling/scheduler.go rename to pkg/epp/scheduling/scheduler.go index b5f2f4f23..a969948ef 100644 --- a/pkg/ext-proc/scheduling/scheduler.go +++ b/pkg/epp/scheduling/scheduler.go @@ -24,9 +24,9 @@ import ( "github.com/go-logr/logr" "sigs.k8s.io/controller-runtime/pkg/log" - "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/datastore" - errutil "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/util/error" - logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/util/logging" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datastore" + errutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/error" + logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging" ) const ( @@ -102,7 +102,8 @@ var ( filter: func(logger logr.Logger, req *LLMRequest, pods []*datastore.PodMetrics) ([]*datastore.PodMetrics, error) { logger.V(logutil.DEFAULT).Info("Request dropped", "request", req) return []*datastore.PodMetrics{}, errutil.Error{ - Code: errutil.InferencePoolResourceExhausted, Msg: "dropping request due to limited backend resources"} + Code: errutil.InferencePoolResourceExhausted, Msg: "dropping request due to limited backend resources", + } }, }, } diff --git a/pkg/ext-proc/scheduling/types.go b/pkg/epp/scheduling/types.go similarity index 100% rename from pkg/ext-proc/scheduling/types.go rename to pkg/epp/scheduling/types.go diff --git a/pkg/ext-proc/server/runserver.go b/pkg/epp/server/runserver.go similarity index 95% rename from pkg/ext-proc/server/runserver.go rename to pkg/epp/server/runserver.go index 795b242d0..92b7be7f1 100644 --- a/pkg/ext-proc/server/runserver.go +++ b/pkg/epp/server/runserver.go @@ -36,11 +36,11 @@ import ( ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/manager" "sigs.k8s.io/gateway-api-inference-extension/internal/runnable" - "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/backend" - "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/controller" - "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/datastore" - "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/handlers" - "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/scheduling" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/backend" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/controller" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datastore" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/handlers" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling" ) // ExtProcServerRunner provides methods to manage an external process server. diff --git a/pkg/ext-proc/server/runserver_test.go b/pkg/epp/server/runserver_test.go similarity index 87% rename from pkg/ext-proc/server/runserver_test.go rename to pkg/epp/server/runserver_test.go index 438dc0969..b02688c58 100644 --- a/pkg/ext-proc/server/runserver_test.go +++ b/pkg/epp/server/runserver_test.go @@ -21,8 +21,8 @@ import ( "sigs.k8s.io/controller-runtime/pkg/manager" - "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/server" - logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/util/logging" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/server" + logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging" ) func TestRunnable(t *testing.T) { diff --git a/pkg/ext-proc/test/benchmark/benchmark.go b/pkg/epp/test/benchmark/benchmark.go similarity index 93% rename from pkg/ext-proc/test/benchmark/benchmark.go rename to pkg/epp/test/benchmark/benchmark.go index dc06a27a8..10987b47f 100644 --- a/pkg/ext-proc/test/benchmark/benchmark.go +++ b/pkg/epp/test/benchmark/benchmark.go @@ -32,10 +32,10 @@ import ( "sigs.k8s.io/controller-runtime/pkg/log" "sigs.k8s.io/controller-runtime/pkg/log/zap" "sigs.k8s.io/gateway-api-inference-extension/api/v1alpha1" - "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/datastore" - runserver "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/server" - "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/test" - logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/util/logging" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datastore" + runserver "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/server" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/test" + logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging" ) var ( diff --git a/pkg/ext-proc/test/utils.go b/pkg/epp/test/utils.go similarity index 88% rename from pkg/ext-proc/test/utils.go rename to pkg/epp/test/utils.go index ef83c9327..f82084d97 100644 --- a/pkg/ext-proc/test/utils.go +++ b/pkg/epp/test/utils.go @@ -30,12 +30,12 @@ import ( "k8s.io/apimachinery/pkg/types" "sigs.k8s.io/controller-runtime/pkg/log" "sigs.k8s.io/gateway-api-inference-extension/api/v1alpha1" - "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/backend" - "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/datastore" - "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/handlers" - "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/scheduling" - logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/util/logging" - utiltesting "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/util/testing" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/backend" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datastore" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/handlers" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling" + logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging" + utiltesting "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/testing" ) func StartExtProc( diff --git a/pkg/ext-proc/util/error/error.go b/pkg/epp/util/error/error.go similarity index 100% rename from pkg/ext-proc/util/error/error.go rename to pkg/epp/util/error/error.go diff --git a/pkg/ext-proc/util/logging/fatal.go b/pkg/epp/util/logging/fatal.go similarity index 100% rename from pkg/ext-proc/util/logging/fatal.go rename to pkg/epp/util/logging/fatal.go diff --git a/pkg/ext-proc/util/logging/logger.go b/pkg/epp/util/logging/logger.go similarity index 100% rename from pkg/ext-proc/util/logging/logger.go rename to pkg/epp/util/logging/logger.go diff --git a/pkg/ext-proc/util/logging/logging_const.go b/pkg/epp/util/logging/logging_const.go similarity index 100% rename from pkg/ext-proc/util/logging/logging_const.go rename to pkg/epp/util/logging/logging_const.go diff --git a/pkg/ext-proc/util/testing/wrappers.go b/pkg/epp/util/testing/wrappers.go similarity index 100% rename from pkg/ext-proc/util/testing/wrappers.go rename to pkg/epp/util/testing/wrappers.go diff --git a/test/integration/hermetic_test.go b/test/integration/hermetic_test.go index 18efe7bff..eb2ca40ef 100644 --- a/test/integration/hermetic_test.go +++ b/test/integration/hermetic_test.go @@ -47,12 +47,12 @@ import ( k8sclient "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/envtest" "sigs.k8s.io/gateway-api-inference-extension/api/v1alpha1" - "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/backend" - "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/datastore" - runserver "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/server" - extprocutils "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/test" - logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/util/logging" - utiltesting "sigs.k8s.io/gateway-api-inference-extension/pkg/ext-proc/util/testing" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/backend" + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datastore" + runserver "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/server" + extprocutils "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/test" + logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging" + utiltesting "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/testing" "sigs.k8s.io/yaml" ) diff --git a/tools/dashboards/README.md b/tools/dashboards/README.md index c8258b638..7be2a5b8e 100644 --- a/tools/dashboards/README.md +++ b/tools/dashboards/README.md @@ -4,7 +4,7 @@ This documentation provides instructions for setting up grafana dashboards to se ## Requirements -Please follow [metrics](https://github.com/kubernetes-sigs/gateway-api-inference-extension/tree/main/pkg/ext-proc/metrics) page to configure the proxy to enable all metrics. +Please follow [metrics](https://github.com/kubernetes-sigs/gateway-api-inference-extension/tree/main/pkg/epp/metrics) page to configure the proxy to enable all metrics. ## Load Inference Extension dashboard into Grafana @@ -21,6 +21,7 @@ If you run the inferece gateway with [Google Managed Prometheus](https://cloud.g Please configure the `scrape_interval` of your prometheus configuration to lower than `15s`, `rate` function returns empty string if data falls too apart. See https://www.robustperception.io/what-range-should-i-use-with-rate/ for more details. Example: + ``` global: scrape_interval: 5s diff --git a/tools/dashboards/inference_gateway.json b/tools/dashboards/inference_gateway.json index 3af667036..4e872739a 100644 --- a/tools/dashboards/inference_gateway.json +++ b/tools/dashboards/inference_gateway.json @@ -39,7 +39,7 @@ "showLineNumbers": false, "showMiniMap": false }, - "content": "# Inferece Gateway Dashboard\n\nPlease see https://github.com/kubernetes-sigs/gateway-api-inference-extension/tree/main/pkg/ext-proc/metrics for more details of underlying metrics used in the dashboard.", + "content": "# Inferece Gateway Dashboard\n\nPlease see https://github.com/kubernetes-sigs/gateway-api-inference-extension/tree/main/pkg/epp/metrics for more details of underlying metrics used in the dashboard.", "mode": "markdown" }, "pluginVersion": "11.5.0",