Skip to content

Commit 32176e6

Browse files
authored
Merge pull request kubernetes-sigs#50 from shaneutt/kgateway-deployment
Add Kubernetes dev environment deployments with KGateway
2 parents 8eb8707 + 7275019 commit 32176e6

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

44 files changed

+485
-71
lines changed

Makefile

+10-27
Original file line numberDiff line numberDiff line change
@@ -721,7 +721,11 @@ clean.environment.dev.kind:
721721
environment.dev.kubernetes.infrastructure:
722722
ifeq ($(strip $(INFRASTRUCTURE_OVERRIDE)),true)
723723
@echo "Deploying OpenShift Infrastructure Components"
724-
kustomize build deploy/environments/dev/kubernetes-infra | kubectl apply --server-side --force-conflicts -f -
724+
kustomize build deploy/components/crds-gateway-api | kubectl apply --server-side --force-conflicts -f -
725+
kustomize build deploy/components/crds-gie | kubectl apply --server-side --force-conflicts -f -
726+
kustomize build --enable-helm deploy/components/crds-kgateway | kubectl apply --server-side --force-conflicts -f -
727+
kustomize build --enable-helm deploy/environments/dev/kubernetes-kgateway-infra | kubectl apply --server-side --force-conflicts -f -
728+
kubectl -n kgateway-system wait deployment/kgateway --for=condition=Available --timeout=60s
725729
else
726730
$(error "Error: The environment variable INFRASTRUCTURE_OVERRIDE must be set to true in order to run this target.")
727731
endif
@@ -743,7 +747,10 @@ ifeq ($(strip $(INFRASTRUCTURE_OVERRIDE)),true)
743747
@echo "This is extremely destructive. We'll provide 5 seconds before starting to give you a chance to cancel."
744748
sleep 5
745749
@echo "Tearing Down OpenShift Infrastructure Components"
746-
kustomize build deploy/environments/dev/kubernetes-infra | kubectl delete -f - || true
750+
kustomize build --enable-helm deploy/environments/dev/kubernetes-kgateway-infra | kubectl delete -f - || true
751+
kustomize build --enable-helm deploy/components/crds-kgateway | kubectl delete -f - || true
752+
kustomize build deploy/components/crds-gie | kubectl delete -f - || true
753+
kustomize build deploy/components/crds-gateway-api | kubectl delete -f - || true
747754
else
748755
$(error "Error: The environment variable INFRASTRUCTURE_OVERRIDE must be set to true in order to run this target.")
749756
endif
@@ -754,33 +761,9 @@ endif
754761
# This target deploys the GIE stack in a specific namespace for development and
755762
# testing.
756763
# ------------------------------------------------------------------------------
757-
VLLM_SIM_IMAGE ?= quay.io/vllm-d/vllm-sim
758-
VLLM_SIM_TAG ?= 0.0.2
759-
EPP_IMAGE ?= us-central1-docker.pkg.dev/k8s-staging-images/gateway-api-inference-extension/epp
760-
EPP_TAG ?= main
761764
.PHONY: environment.dev.kubernetes
762765
environment.dev.kubernetes: check-kubectl check-kustomize check-envsubst
763-
@echo "INFO: checking required vars"
764-
ifndef NAMESPACE
765-
$(error "Error: NAMESPACE is required but not set")
766-
endif
767-
export NAMESPACE=$(NAMESPACE)
768-
ifndef REGISTRY_SECRET
769-
$(error "Error: REGISTRY_SECRET is required but not set")
770-
endif
771-
export REGISTRY_SECRET=$(REGISTRY_SECRET)
772-
export VLLM_SIM_IMAGE=$(VLLM_SIM_IMAGE)
773-
export VLLM_SIM_TAG=$(VLLM_SIM_TAG)
774-
export EPP_IMAGE=$(EPP_IMAGE)
775-
export EPP_TAG=$(EPP_TAG)
776-
@echo "INFO: Creating namespace (if needed) and setting context to $(NAMESPACE)..."
777-
kubectl create namespace $(NAMESPACE) 2>/dev/null || true
778-
@echo "INFO: Deploying Development Environment in namespace $(NAMESPACE)"
779-
kustomize build deploy/environments/dev/kubernetes | envsubst | kubectl -n $(NAMESPACE) apply -f -
780-
@echo "INFO: Waiting for Pods in namespace $(NAMESPACE) to become ready"
781-
kubectl -n $(NAMESPACE) wait --for=condition=Ready --all pods --timeout=300s
782-
@echo "INFO: Waiting for Gateway in namespace $(NAMESPACE) to become ready"
783-
kubectl -n $(NAMESPACE) wait gateway/inference-gateway --for=condition=Programmed --timeout=60s
766+
./scripts/kubernetes-dev-env.sh 2>&1
784767
@echo "INFO: Development environment deployed to namespace $(NAMESPACE)"
785768

786769
# ------------------------------------------------------------------------------
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,5 @@
11
# ------------------------------------------------------------------------------
2-
# Custom Resource Definitions (CRDs)
3-
#
4-
# This deploys the CRDs needed for development environments (e.g. Gateway API).
2+
# Custom Resource Definitions (CRDs) for Gateway API
53
#
64
# **Warning**: CRDs are cluster-level, so in a shared development environment
75
# this needs to be done in a controlled and communicated manner.
@@ -10,10 +8,4 @@ apiVersion: kustomize.config.k8s.io/v1beta1
108
kind: Kustomization
119

1210
resources:
13-
# Gateway API CRDs
1411
- https://github.com/kubernetes-sigs/gateway-api/config/crd?ref=v1.2.1
15-
# Gateway API Inference Extension (GIE) CRDs
16-
# NOTE: deploys whatever is in the current branch
17-
- ../../../config/crd # GIE CRDs
18-
# Istio CRDs
19-
- istio.yaml
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
# ------------------------------------------------------------------------------
2+
# Custom Resource Definitions (CRDs) for Gateway API Inference Extension (GIE)
3+
#
4+
# This deploys the GIE CRDs from the local directory.
5+
#
6+
# **Warning**: CRDs are cluster-level, so in a shared development environment
7+
# this needs to be done in a controlled and communicated manner.
8+
# ------------------------------------------------------------------------------
9+
apiVersion: kustomize.config.k8s.io/v1beta1
10+
kind: Kustomization
11+
12+
resources:
13+
- ../../../config/crd
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
# ------------------------------------------------------------------------------
2+
# Custom Resource Definitions (CRDs) for Istio
3+
#
4+
# **Warning**: CRDs are cluster-level, so in a shared development environment
5+
# this needs to be done in a controlled and communicated manner.
6+
# ------------------------------------------------------------------------------
7+
apiVersion: kustomize.config.k8s.io/v1beta1
8+
kind: Kustomization
9+
10+
resources:
11+
- istio.yaml
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
charts/
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
# ------------------------------------------------------------------------------
2+
# Custom Resource Definitions (CRDs) for KGateway
3+
#
4+
# **Warning**: CRDs are cluster-level, so in a shared development environment
5+
# this needs to be done in a controlled and communicated manner.
6+
# ------------------------------------------------------------------------------
7+
apiVersion: kustomize.config.k8s.io/v1beta1
8+
kind: Kustomization
9+
10+
resources:
11+
- namespaces.yaml
12+
13+
helmCharts:
14+
- name: kgateway-crds
15+
repo: oci://cr.kgateway.dev/kgateway-dev/charts
16+
version: v2.0.0
17+
releaseName: kgateway-crds
18+
namespace: kgateway-system
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
apiVersion: v1
2+
kind: Namespace
3+
metadata:
4+
name: kgateway-system

deploy/components/inference-gateway/deployments.yaml

-2
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,6 @@ spec:
2121
image: quay.io/vllm-d/gateway-api-inference-extension/epp:latest
2222
imagePullPolicy: IfNotPresent
2323
args:
24-
- -refreshMetricsInterval
25-
- "500ms"
2624
- -poolName
2725
- "vllm-llama3-8b-instruct"
2826
- -v

deploy/components/inference-gateway/gateways.yaml

-5
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,7 @@ apiVersion: gateway.networking.k8s.io/v1
22
kind: Gateway
33
metadata:
44
name: inference-gateway
5-
labels:
6-
istio.io/enable-inference-extproc: "true"
7-
annotations:
8-
networking.istio.io/service-type: ClusterIP
95
spec:
10-
gatewayClassName: istio
116
listeners:
127
- name: default
138
port: 80

deploy/components/inference-gateway/httproutes.yaml

+2
Original file line numberDiff line numberDiff line change
@@ -15,3 +15,5 @@ spec:
1515
kind: InferencePool
1616
name: vllm-llama3-8b-instruct
1717
port: 8000
18+
timeouts:
19+
request: 30s

deploy/components/inference-gateway/inference-models.yaml

+3
Original file line numberDiff line numberDiff line change
@@ -7,3 +7,6 @@ spec:
77
criticality: Critical
88
poolRef:
99
name: vllm-llama3-8b-instruct
10+
targetModels:
11+
- name: food-review
12+
weight: 100

deploy/components/inference-gateway/kustomization.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -20,9 +20,9 @@ kind: Kustomization
2020
resources:
2121
- service-accounts.yaml
2222
- rbac.yaml
23-
- destination-rules.yaml
2423
- inference-pools.yaml
2524
- inference-models.yaml
25+
- services.yaml
2626
- deployments.yaml
2727
- gateways.yaml
2828
- httproutes.yaml

deploy/components/inference-gateway/rbac.yaml

+1-8
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ rules:
66
- apiGroups:
77
- "inference.networking.x-k8s.io"
88
resources:
9+
- "inferencepools"
910
- "inferencemodels"
1011
verbs:
1112
- "get"
@@ -19,14 +20,6 @@ rules:
1920
- "get"
2021
- "watch"
2122
- "list"
22-
- apiGroups:
23-
- "inference.networking.x-k8s.io"
24-
resources:
25-
- "inferencepools"
26-
verbs:
27-
- "get"
28-
- "watch"
29-
- "list"
3023
- apiGroups:
3124
- "discovery.k8s.io"
3225
resources:
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
apiVersion: v1
2+
kind: Service
3+
metadata:
4+
name: endpoint-picker
5+
spec:
6+
selector:
7+
app: endpoint-picker
8+
ports:
9+
- protocol: TCP
10+
port: 9002
11+
targetPort: 9002
12+
appProtocol: http2
13+
type: ClusterIP

deploy/components/istio-control-plane/kustomization.yaml

+1
Original file line numberDiff line numberDiff line change
@@ -23,3 +23,4 @@ resources:
2323
- webhooks.yaml
2424
- deployments.yaml
2525
- hpa.yaml
26+
- telemetry.yaml
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
# Enables debug logging for Gateways
2+
apiVersion: telemetry.istio.io/v1
3+
kind: Telemetry
4+
metadata:
5+
name: mesh-default
6+
namespace: istio-system
7+
spec:
8+
accessLogging:
9+
- providers:
10+
- name: envoy
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
charts/
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
# ------------------------------------------------------------------------------
2+
# KGateway Control Plane
3+
#
4+
# A Gateway API implementation with support for the GIE.
5+
# ------------------------------------------------------------------------------
6+
apiVersion: kustomize.config.k8s.io/v1beta1
7+
kind: Kustomization
8+
9+
resources:
10+
- namespaces.yaml
11+
12+
helmCharts:
13+
- name: kgateway
14+
repo: oci://cr.kgateway.dev/kgateway-dev/charts
15+
version: v2.0.0
16+
releaseName: kgateway
17+
namespace: kgateway-system
18+
valuesInline:
19+
inferenceExtension:
20+
enabled: true
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
apiVersion: v1
2+
kind: Namespace
3+
metadata:
4+
name: kgateway-system

deploy/components/vllm-sim/deployments.yaml

+6-3
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,10 @@ spec:
2222
args:
2323
- "--port=8000"
2424
- "--model=food-review"
25-
# - "--lora=lora10,lora20,lora30"
26-
# - "--time-to-first-token=500"
2725
ports:
28-
- containerPort: 8000
26+
- name: http
27+
containerPort: 8000
28+
protocol: TCP
29+
env:
30+
- name: PORT
31+
value: "8000"

deploy/environments/dev/kind/kustomization.yaml renamed to deploy/environments/dev/kind-istio/kustomization.yaml

+3-1
Original file line numberDiff line numberDiff line change
@@ -13,10 +13,12 @@ apiVersion: kustomize.config.k8s.io/v1beta1
1313
kind: Kustomization
1414

1515
resources:
16+
- destination-rules.yaml
1617
- services.yaml
1718
- ../../../components/istio-control-plane/
1819
- ../../../components/vllm-sim/
1920
- ../../../components/inference-gateway/
2021

2122
patches:
22-
- path: gateway.yaml
23+
- path: patch-deployments.yaml
24+
- path: patch-gateways.yaml
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
apiVersion: apps/v1
2+
kind: Deployment
3+
metadata:
4+
name: endpoint-picker
5+
spec:
6+
template:
7+
spec:
8+
containers:
9+
- name: epp
10+
args:
11+
- -poolName
12+
- "vllm-llama3-8b-instruct"
13+
- -poolNamespace
14+
- "default"
15+
- -v
16+
- "4"
17+
- --zap-encoder
18+
- "json"
19+
- -grpcPort
20+
- "9002"
21+
- -grpcHealthPort
22+
- "9003"
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
apiVersion: gateway.networking.k8s.io/v1
2+
kind: Gateway
3+
metadata:
4+
name: inference-gateway
5+
labels:
6+
istio.io/enable-inference-extproc: "true"
7+
annotations:
8+
networking.istio.io/service-type: ClusterIP
9+
spec:
10+
gatewayClassName: istio
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
# ------------------------------------------------------------------------------
2+
# Kubernetes In Docker (KIND) Environment
3+
#
4+
# This will deploy the full development stack on a KIND cluster:
5+
#
6+
# * KGateway Control Plane
7+
# * VLLM Simulator
8+
# * Inference Gateway
9+
#
10+
# This will expose the VLLM simulator via InferencePool and an HTTPRoute.
11+
# ------------------------------------------------------------------------------
12+
apiVersion: kustomize.config.k8s.io/v1beta1
13+
kind: Kustomization
14+
15+
resources:
16+
- services.yaml
17+
- ../../../components/kgateway-control-plane/
18+
- ../../../components/vllm-sim/
19+
- ../../../components/inference-gateway/
20+
21+
patches:
22+
- path: patch-deployments.yaml
23+
- path: patch-gateways.yaml
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
apiVersion: apps/v1
2+
kind: Deployment
3+
metadata:
4+
name: endpoint-picker
5+
spec:
6+
template:
7+
spec:
8+
containers:
9+
- name: epp
10+
args:
11+
- -poolName
12+
- "vllm-llama3-8b-instruct"
13+
- -poolNamespace
14+
- "default"
15+
- -v
16+
- "4"
17+
- --zap-encoder
18+
- "json"
19+
- -grpcPort
20+
- "9002"
21+
- -grpcHealthPort
22+
- "9003"

deploy/environments/dev/kind/gateway.yaml renamed to deploy/environments/dev/kind-kgateway/patch-gateways.yaml

+2-2
Original file line numberDiff line numberDiff line change
@@ -2,5 +2,5 @@ apiVersion: gateway.networking.k8s.io/v1
22
kind: Gateway
33
metadata:
44
name: inference-gateway
5-
annotations:
6-
networking.istio.io/service-type: NodePort
5+
spec:
6+
gatewayClassName: kgateway

0 commit comments

Comments
 (0)