Skip to content

Commit c679724

Browse files
committed
feat: kind dev env deployment script
Signed-off-by: Shane Utt <[email protected]>
1 parent f606e0d commit c679724

File tree

1 file changed

+140
-0
lines changed

1 file changed

+140
-0
lines changed

scripts/run-kind.sh

+140
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,140 @@
1+
#!/bin/bash
2+
3+
# This shell script deploys a kind cluster with an Istio-based Gateway API
4+
# implementation fully configured. It deploys the vllm simulator, which it
5+
# exposes with a Gateway and HTTPRoute. The Gateway is configured with the
6+
# a filter for the ext_proc endpoint picker.
7+
8+
set -eo pipefail
9+
10+
# ------------------------------------------------------------------------------
11+
# Variables
12+
# ------------------------------------------------------------------------------
13+
14+
# Set a default CLUSTER_NAME if not provided
15+
: "${CLUSTER_NAME:=inference-gateway}"
16+
17+
# Set a default VLLM_SIMULATOR_VERSION if not provided
18+
: "${VLLM_SIMULATOR_VERSION:=0.0.2}"
19+
20+
# Set a default ENDPOINT_PICKER_VERSION if not provided
21+
: "${ENDPOINT_PICKER_VERSION:=0.0.1}"
22+
23+
# ------------------------------------------------------------------------------
24+
# Setup & Requirement Checks
25+
# ------------------------------------------------------------------------------
26+
27+
# Check for a supported container runtime if an explicit one was not set
28+
if [ -z "${CONTAINER_RUNTIME}" ]; then
29+
if command -v docker &> /dev/null; then
30+
CONTAINER_RUNTIME="docker"
31+
elif command -v podman &> /dev/null; then
32+
CONTAINER_RUNTIME="podman"
33+
else
34+
echo "Neither docker nor podman could be found in PATH" >&2
35+
exit 1
36+
fi
37+
fi
38+
39+
set -u
40+
41+
# Check for required programs
42+
for cmd in kind kubectl ${CONTAINER_RUNTIME}; do
43+
if ! command -v "$cmd" &> /dev/null; then
44+
echo "Error: $cmd is not installed or not in the PATH."
45+
exit 1
46+
fi
47+
done
48+
49+
# ------------------------------------------------------------------------------
50+
# Cluster Deployment
51+
# ------------------------------------------------------------------------------
52+
53+
# Check if the cluster already exists
54+
if kind get clusters 2>/dev/null | grep -q "^${CLUSTER_NAME}$"; then
55+
echo "Cluster '${CLUSTER_NAME}' already exists, re-using"
56+
else
57+
kind create cluster --name "${CLUSTER_NAME}"
58+
fi
59+
60+
# Set the kubectl context to the kind cluster
61+
KUBE_CONTEXT="kind-${CLUSTER_NAME}"
62+
63+
set -x
64+
65+
# Hotfix for https://github.com/kubernetes-sigs/kind/issues/3880
66+
CONTAINER_NAME="${CLUSTER_NAME}-control-plane"
67+
${CONTAINER_RUNTIME} exec -it ${CONTAINER_NAME} /bin/bash -c "sysctl net.ipv4.conf.all.arp_ignore=0"
68+
69+
# Wait for all pods to be ready
70+
kubectl --context ${KUBE_CONTEXT} -n kube-system wait --for=condition=Ready --all pods --timeout=300s
71+
kubectl --context ${KUBE_CONTEXT} -n local-path-storage wait --for=condition=Ready --all pods --timeout=300s
72+
73+
# Load the vllm simulator image into the cluster
74+
if [ "${CONTAINER_RUNTIME}" == "podman" ]; then
75+
podman tag localhost/vllm-sim/vllm-sim:${VLLM_SIMULATOR_VERSION} docker.io/vllm-sim/vllm-sim:${VLLM_SIMULATOR_VERSION}
76+
podman save docker.io/vllm-sim/vllm-sim:${VLLM_SIMULATOR_VERSION} -o /dev/stdout | kind --name ${CLUSTER_NAME} load image-archive /dev/stdin
77+
else
78+
kind --name ${CLUSTER_NAME} load docker-image vllm-sim/vllm-sim:${VLLM_SIMULATOR_VERSION}
79+
fi
80+
81+
# Load the ext_proc endpoint-picker image into the cluster
82+
if [ "${CONTAINER_RUNTIME}" == "podman" ]; then
83+
podman tag localhost/inference-router/router-ext-proc:${ENDPOINT_PICKER_VERSION} docker.io/inference-router/router-ext-proc:${ENDPOINT_PICKER_VERSION}
84+
podman save docker.io/inference-router/router-ext-proc:${ENDPOINT_PICKER_VERSION} -o /dev/stdout | kind --name ${CLUSTER_NAME} load image-archive /dev/stdin
85+
else
86+
kind --name ${CLUSTER_NAME} load docker-image inference-router/router-ext-proc:${ENDPOINT_PICKER_VERSION}
87+
fi
88+
89+
# ------------------------------------------------------------------------------
90+
# Sail Operator Deployment
91+
# ------------------------------------------------------------------------------
92+
93+
# Deploy the Sail Operator
94+
kubectl kustomize --enable-helm deploy/components/sail-operator |
95+
kubectl --context ${KUBE_CONTEXT} apply --server-side --force-conflicts -f -
96+
97+
# Wait for the Sail Operator to be ready
98+
kubectl --context ${KUBE_CONTEXT} -n sail-operator wait deployment/sail-operator --for=condition=Available --timeout=60s
99+
100+
# ------------------------------------------------------------------------------
101+
# Development Environment
102+
# ------------------------------------------------------------------------------
103+
104+
# Deploy the environment to the "default" namespace
105+
kubectl kustomize deploy/environments/kind | sed 's/REPLACE_NAMESPACE/default/gI' \
106+
| kubectl --context ${KUBE_CONTEXT} apply -f -
107+
108+
# Wait for all pods to be ready
109+
kubectl --context ${KUBE_CONTEXT} wait --for=condition=Ready --all pods --timeout=300s
110+
111+
# Wait for the gateway to be ready
112+
kubectl --context ${KUBE_CONTEXT} wait gateway/inference-gateway --for=condition=Programmed --timeout=60s
113+
114+
cat <<EOF
115+
-----------------------------------------
116+
Deployment completed!
117+
118+
* Kind Cluster Name: ${CLUSTER_NAME}
119+
* Kubectl Context: ${KUBE_CONTEXT}
120+
121+
Status:
122+
123+
* The vllm simulator is running
124+
* The Gateway is exposing the simulator
125+
* The Endpoint Picker is loaded into the Gateway via ext_proc
126+
127+
You can watch the Endpoint Picker logs with:
128+
129+
$ kubectl --context ${KUBE_CONTEXT} logs -f deployments/endpoint-picker
130+
131+
You can use a port-forward to access the Gateway:
132+
133+
$ kubectl --context ${KUBE_CONTEXT} port-forward service/inference-gateway-istio 8080:80
134+
135+
With that running in the background, you can make requests:
136+
137+
$ curl -v -w '\n' -X POST -H 'Content-Type: application/json' -d '{"model":"model1","messages":[{"role":"user","content":"help"}]}' http://localhost:8080
138+
139+
-----------------------------------------
140+
EOF

0 commit comments

Comments
 (0)