|
| 1 | +#!/bin/bash |
| 2 | + |
| 3 | +# This shell script deploys a kind cluster with an Istio-based Gateway API |
| 4 | +# implementation fully configured. It deploys the vllm simulator, which it |
| 5 | +# exposes with a Gateway and HTTPRoute. The Gateway is configured with the |
| 6 | +# a filter for the ext_proc endpoint picker. |
| 7 | + |
| 8 | +set -eo pipefail |
| 9 | + |
| 10 | +# ------------------------------------------------------------------------------ |
| 11 | +# Variables |
| 12 | +# ------------------------------------------------------------------------------ |
| 13 | + |
| 14 | +# Set a default CLUSTER_NAME if not provided |
| 15 | +: "${CLUSTER_NAME:=inference-gateway}" |
| 16 | + |
| 17 | +# Set a default VLLM_SIMULATOR_VERSION if not provided |
| 18 | +: "${VLLM_SIMULATOR_VERSION:=0.0.2}" |
| 19 | + |
| 20 | +# Set a default ENDPOINT_PICKER_VERSION if not provided |
| 21 | +: "${ENDPOINT_PICKER_VERSION:=0.0.1}" |
| 22 | + |
| 23 | +# ------------------------------------------------------------------------------ |
| 24 | +# Setup & Requirement Checks |
| 25 | +# ------------------------------------------------------------------------------ |
| 26 | + |
| 27 | +# Check for a supported container runtime if an explicit one was not set |
| 28 | +if [ -z "${CONTAINER_RUNTIME}" ]; then |
| 29 | + if command -v docker &> /dev/null; then |
| 30 | + CONTAINER_RUNTIME="docker" |
| 31 | + elif command -v podman &> /dev/null; then |
| 32 | + CONTAINER_RUNTIME="podman" |
| 33 | + else |
| 34 | + echo "Neither docker nor podman could be found in PATH" >&2 |
| 35 | + exit 1 |
| 36 | + fi |
| 37 | +fi |
| 38 | + |
| 39 | +set -u |
| 40 | + |
| 41 | +# Check for required programs |
| 42 | +for cmd in kind kubectl ${CONTAINER_RUNTIME}; do |
| 43 | + if ! command -v "$cmd" &> /dev/null; then |
| 44 | + echo "Error: $cmd is not installed or not in the PATH." |
| 45 | + exit 1 |
| 46 | + fi |
| 47 | +done |
| 48 | + |
| 49 | +# ------------------------------------------------------------------------------ |
| 50 | +# Cluster Deployment |
| 51 | +# ------------------------------------------------------------------------------ |
| 52 | + |
| 53 | +# Check if the cluster already exists |
| 54 | +if kind get clusters 2>/dev/null | grep -q "^${CLUSTER_NAME}$"; then |
| 55 | + echo "Cluster '${CLUSTER_NAME}' already exists, re-using" |
| 56 | +else |
| 57 | + kind create cluster --name "${CLUSTER_NAME}" |
| 58 | +fi |
| 59 | + |
| 60 | +# Set the kubectl context to the kind cluster |
| 61 | +KUBE_CONTEXT="kind-${CLUSTER_NAME}" |
| 62 | + |
| 63 | +set -x |
| 64 | + |
| 65 | +# Hotfix for https://github.com/kubernetes-sigs/kind/issues/3880 |
| 66 | +CONTAINER_NAME="${CLUSTER_NAME}-control-plane" |
| 67 | +${CONTAINER_RUNTIME} exec -it ${CONTAINER_NAME} /bin/bash -c "sysctl net.ipv4.conf.all.arp_ignore=0" |
| 68 | + |
| 69 | +# Wait for all pods to be ready |
| 70 | +kubectl --context ${KUBE_CONTEXT} -n kube-system wait --for=condition=Ready --all pods --timeout=300s |
| 71 | +kubectl --context ${KUBE_CONTEXT} -n local-path-storage wait --for=condition=Ready --all pods --timeout=300s |
| 72 | + |
| 73 | +# Load the vllm simulator image into the cluster |
| 74 | +if [ "${CONTAINER_RUNTIME}" == "podman" ]; then |
| 75 | + podman tag localhost/vllm-sim/vllm-sim:${VLLM_SIMULATOR_VERSION} docker.io/vllm-sim/vllm-sim:${VLLM_SIMULATOR_VERSION} |
| 76 | + podman save docker.io/vllm-sim/vllm-sim:${VLLM_SIMULATOR_VERSION} -o /dev/stdout | kind --name ${CLUSTER_NAME} load image-archive /dev/stdin |
| 77 | +else |
| 78 | + kind --name ${CLUSTER_NAME} load docker-image vllm-sim/vllm-sim:${VLLM_SIMULATOR_VERSION} |
| 79 | +fi |
| 80 | + |
| 81 | +# Load the ext_proc endpoint-picker image into the cluster |
| 82 | +if [ "${CONTAINER_RUNTIME}" == "podman" ]; then |
| 83 | + podman tag localhost/inference-router/router-ext-proc:${ENDPOINT_PICKER_VERSION} docker.io/inference-router/router-ext-proc:${ENDPOINT_PICKER_VERSION} |
| 84 | + podman save docker.io/inference-router/router-ext-proc:${ENDPOINT_PICKER_VERSION} -o /dev/stdout | kind --name ${CLUSTER_NAME} load image-archive /dev/stdin |
| 85 | +else |
| 86 | + kind --name ${CLUSTER_NAME} load docker-image inference-router/router-ext-proc:${ENDPOINT_PICKER_VERSION} |
| 87 | +fi |
| 88 | + |
| 89 | +# ------------------------------------------------------------------------------ |
| 90 | +# Sail Operator Deployment |
| 91 | +# ------------------------------------------------------------------------------ |
| 92 | + |
| 93 | +# Deploy the Sail Operator |
| 94 | +kubectl kustomize --enable-helm deploy/components/sail-operator | |
| 95 | + kubectl --context ${KUBE_CONTEXT} apply --server-side --force-conflicts -f - |
| 96 | + |
| 97 | +# Wait for the Sail Operator to be ready |
| 98 | +kubectl --context ${KUBE_CONTEXT} -n sail-operator wait deployment/sail-operator --for=condition=Available --timeout=60s |
| 99 | + |
| 100 | +# ------------------------------------------------------------------------------ |
| 101 | +# Development Environment |
| 102 | +# ------------------------------------------------------------------------------ |
| 103 | + |
| 104 | +# Deploy the environment to the "default" namespace |
| 105 | +kubectl kustomize deploy/environments/kind | sed 's/REPLACE_NAMESPACE/default/gI' \ |
| 106 | + | kubectl --context ${KUBE_CONTEXT} apply -f - |
| 107 | + |
| 108 | +# Wait for all pods to be ready |
| 109 | +kubectl --context ${KUBE_CONTEXT} wait --for=condition=Ready --all pods --timeout=300s |
| 110 | + |
| 111 | +# Wait for the gateway to be ready |
| 112 | +kubectl --context ${KUBE_CONTEXT} wait gateway/inference-gateway --for=condition=Programmed --timeout=60s |
| 113 | + |
| 114 | +cat <<EOF |
| 115 | +----------------------------------------- |
| 116 | +Deployment completed! |
| 117 | +
|
| 118 | +* Kind Cluster Name: ${CLUSTER_NAME} |
| 119 | +* Kubectl Context: ${KUBE_CONTEXT} |
| 120 | +
|
| 121 | +Status: |
| 122 | +
|
| 123 | +* The vllm simulator is running |
| 124 | +* The Gateway is exposing the simulator |
| 125 | +* The Endpoint Picker is loaded into the Gateway via ext_proc |
| 126 | +
|
| 127 | +You can watch the Endpoint Picker logs with: |
| 128 | +
|
| 129 | + $ kubectl --context ${KUBE_CONTEXT} logs -f deployments/endpoint-picker |
| 130 | +
|
| 131 | +You can use a port-forward to access the Gateway: |
| 132 | +
|
| 133 | + $ kubectl --context ${KUBE_CONTEXT} port-forward service/inference-gateway-istio 8080:80 |
| 134 | +
|
| 135 | +With that running in the background, you can make requests: |
| 136 | +
|
| 137 | + $ curl -v -w '\n' -X POST -H 'Content-Type: application/json' -d '{"model":"model1","messages":[{"role":"user","content":"help"}]}' http://localhost:8080 |
| 138 | +
|
| 139 | +----------------------------------------- |
| 140 | +EOF |
0 commit comments