Skip to content

Commit 6bfbf0f

Browse files
committed
PoC implementation
1 parent dfa8af6 commit 6bfbf0f

15 files changed

+1479
-0
lines changed

examples/poc/README.md

+63
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
# Envoy Ext Proc Gateway with LoRA Integration
2+
3+
This project sets up an Envoy gateway to handle gRPC calls with integration of LoRA (Low-Rank Adaptation). The configuration aims to manage gRPC traffic through Envoy's external processing and custom routing based on headers and load balancing rules. The setup includes Kubernetes services and deployments for both the gRPC server and the vllm-lora application.
4+
5+
## Requirements
6+
- A vLLM based deployment (using the custom image provided below), with LoRA Adapters
7+
- Kubernetes cluster
8+
- Envoy Gateway v1.1 installed on your cluster: https://gateway.envoyproxy.io/v1.1/tasks/quickstart/
9+
- `kubectl` command-line tool
10+
- Go (for local development)
11+
12+
## vLLM
13+
***This PoC uses a modified vLLM fork, the public image of the fork is here: `ghcr.io/tomatillo-and-multiverse/vllm:demo`***
14+
15+
16+
## Overview
17+
18+
This project contains the necessary configurations and code to set up and deploy a service using Kubernetes, Envoy, and Go. The service involves routing based on the model specified (using Open AI API format), collecting metrics, and ensuring efficient load balancing.
19+
20+
![alt text](https://github.com/tomatillo-and-multiverse/lora-inference-gateway/blob/final-poc/envoy-gateway-bootstrap.png)
21+
22+
23+
## Quickstart
24+
25+
### Steps
26+
27+
1. **Apply Kubernetes Manifests**
28+
```bash
29+
cd manifests
30+
kubectl apply -f ext_proc.yaml
31+
kubectl apply -f vllm/vllm-lora-service.yaml
32+
kubectl apply -f vllm/vllm-lora-deployment.yaml
33+
```
34+
35+
2. **Update `ext_proc.yaml`**
36+
- Ensure the `ext_proc.yaml` is updated with the pod names and internal IP addresses of the vLLM replicas. This step is crucial for the correct routing of requests based on headers.
37+
38+
2. **Update and apply `gateway.yaml`**
39+
- Ensure the `gateway.yaml` is updated with the internal IP addresses of the ExtProc service. This step is also crucial for the correct routing of requests based on headers.
40+
```bash
41+
cd manifests
42+
kubectl apply -f gateway.yaml
43+
```
44+
45+
### Monitoring and Metrics
46+
47+
- The Go application collects metrics and saves the latest response headers in memory.
48+
- Ensure Envoy is configured to route based on the metrics collected from the `/metric` endpoint of different service pods.
49+
50+
## Contributing
51+
52+
1. Fork the repository.
53+
2. Create a new branch.
54+
3. Make your changes.
55+
4. Open a pull request.
56+
57+
## License
58+
59+
This project is licensed under the MIT License.
60+
61+
---
62+
63+
Feel free to customize this README to better fit your specific project details and requirements.
911 KB
Loading

examples/poc/ext-proc/Dockerfile

+19
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
## Multistage build
2+
FROM golang:1.22.5-alpine as build
3+
ENV CGO_ENABLED=0
4+
ENV GOOS=linux
5+
ENV GOARCH=amd64
6+
7+
WORKDIR /src
8+
COPY . .
9+
RUN go mod download
10+
RUN go build -o /ext-proc
11+
FROM alpine:latest
12+
## Multistage deploy
13+
FROM gcr.io/distroless/base-debian10
14+
# Install bash
15+
16+
WORKDIR /
17+
COPY --from=build /ext-proc /ext-proc
18+
19+
ENTRYPOINT ["/ext-proc"]

examples/poc/ext-proc/README.md

Whitespace-only changes.

examples/poc/ext-proc/cache/cache.go

+91
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
package cache
2+
3+
import (
4+
"encoding/json"
5+
"fmt"
6+
7+
"github.com/coocood/freecache"
8+
)
9+
10+
type ActiveLoraModelMetrics struct {
11+
Date string
12+
PodName string
13+
ModelName string
14+
NumberOfPendingRequests int
15+
}
16+
17+
type PendingRequestActiveAdaptersMetrics struct {
18+
Date string
19+
PodName string
20+
PendingRequests int
21+
NumberOfActiveAdapters int
22+
}
23+
24+
func SetCacheActiveLoraModel(cache *freecache.Cache, metric ActiveLoraModelMetrics) error {
25+
cacheKey := fmt.Sprintf("%s:%s", metric.PodName, metric.ModelName)
26+
cacheValue, err := json.Marshal(metric)
27+
if err != nil {
28+
return fmt.Errorf("error marshaling ActiveLoraModelMetrics for key %s: %v", cacheKey, err)
29+
}
30+
err = cache.Set([]byte(cacheKey), cacheValue, 0)
31+
if err != nil {
32+
return fmt.Errorf("error setting cacheActiveLoraModel for key %s: %v", cacheKey, err)
33+
}
34+
fmt.Printf("Set cacheActiveLoraModel - Key: %s, Value: %s\n", cacheKey, cacheValue)
35+
return nil
36+
}
37+
38+
func SetCachePendingRequestActiveAdapters(cache *freecache.Cache, metric PendingRequestActiveAdaptersMetrics) error {
39+
cacheKey := fmt.Sprintf("%s:", metric.PodName)
40+
cacheValue, err := json.Marshal(metric)
41+
if err != nil {
42+
return fmt.Errorf("error marshaling PendingRequestActiveAdaptersMetrics for key %s: %v", cacheKey, err)
43+
}
44+
err = cache.Set([]byte(cacheKey), cacheValue, 0)
45+
if err != nil {
46+
return fmt.Errorf("error setting cachePendingRequestActiveAdapters for key %s: %v", cacheKey, err)
47+
}
48+
fmt.Printf("Set cachePendingRequestActiveAdapters - Key: %s, Value: %s\n", cacheKey, cacheValue)
49+
return nil
50+
}
51+
52+
func GetCacheActiveLoraModel(cache *freecache.Cache, podName, modelName string) (*ActiveLoraModelMetrics, error) {
53+
cacheKey := fmt.Sprintf("%s:%s", podName, modelName)
54+
55+
value, err := cache.Get([]byte(cacheKey))
56+
if err != nil {
57+
return nil, fmt.Errorf("error fetching cacheActiveLoraModel for key %s: %v", cacheKey, err)
58+
}
59+
var metric ActiveLoraModelMetrics
60+
err = json.Unmarshal(value, &metric)
61+
if err != nil {
62+
return nil, fmt.Errorf("error unmarshaling ActiveLoraModelMetrics for key %s: %v", cacheKey, err)
63+
}
64+
fmt.Printf("Got cacheActiveLoraModel - Key: %s, Value: %s\n", cacheKey, value)
65+
return &metric, nil
66+
}
67+
68+
func GetCachePendingRequestActiveAdapters(cache *freecache.Cache, podName string) (*PendingRequestActiveAdaptersMetrics, error) {
69+
cacheKey := fmt.Sprintf("%s:", podName)
70+
71+
value, err := cache.Get([]byte(cacheKey))
72+
if err != nil {
73+
return nil, fmt.Errorf("error fetching cachePendingRequestActiveAdapters for key %s: %v", cacheKey, err)
74+
}
75+
var metric PendingRequestActiveAdaptersMetrics
76+
err = json.Unmarshal(value, &metric)
77+
if err != nil {
78+
return nil, fmt.Errorf("error unmarshaling PendingRequestActiveAdaptersMetrics for key %s: %v", cacheKey, err)
79+
}
80+
fmt.Printf("Got cachePendingRequestActiveAdapters - Key: %s, Value: %s\n", cacheKey, value)
81+
return &metric, nil
82+
}
83+
84+
type PodCache struct {
85+
PodIPMap map[string]string
86+
IpPodMap map[string]string
87+
}
88+
89+
func SetPodCache(cache *freecache.Cache, pods []string) {
90+
cacheKey := fmt.Sprintf("")
91+
}

examples/poc/ext-proc/go.mod

+24
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
module ext-proc
2+
3+
go 1.21
4+
5+
require (
6+
github.com/coocood/freecache v1.2.4
7+
github.com/envoyproxy/go-control-plane v0.12.0
8+
github.com/prometheus/client_model v0.6.1
9+
github.com/prometheus/common v0.55.0
10+
google.golang.org/grpc v1.65.0
11+
)
12+
13+
require (
14+
github.com/cespare/xxhash/v2 v2.3.0 // indirect
15+
github.com/cncf/xds/go v0.0.0-20240423153145-555b57ec207b // indirect
16+
github.com/envoyproxy/protoc-gen-validate v1.0.4 // indirect
17+
github.com/golang/protobuf v1.5.4 // indirect
18+
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
19+
golang.org/x/net v0.26.0 // indirect
20+
golang.org/x/sys v0.21.0 // indirect
21+
golang.org/x/text v0.16.0 // indirect
22+
google.golang.org/genproto/googleapis/rpc v0.0.0-20240528184218-531527333157 // indirect
23+
google.golang.org/protobuf v1.34.2 // indirect
24+
)

examples/poc/ext-proc/go.sum

+33
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
github.com/cespare/xxhash/v2 v2.1.2/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
2+
github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs=
3+
github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
4+
github.com/cncf/xds/go v0.0.0-20240423153145-555b57ec207b h1:ga8SEFjZ60pxLcmhnThWgvH2wg8376yUJmPhEH4H3kw=
5+
github.com/cncf/xds/go v0.0.0-20240423153145-555b57ec207b/go.mod h1:W+zGtBO5Y1IgJhy4+A9GOqVhqLpfZi+vwmdNXUehLA8=
6+
github.com/coocood/freecache v1.2.4 h1:UdR6Yz/X1HW4fZOuH0Z94KwG851GWOSknua5VUbb/5M=
7+
github.com/coocood/freecache v1.2.4/go.mod h1:RBUWa/Cy+OHdfTGFEhEuE1pMCMX51Ncizj7rthiQ3vk=
8+
github.com/envoyproxy/go-control-plane v0.12.0 h1:4X+VP1GHd1Mhj6IB5mMeGbLCleqxjletLK6K0rbxyZI=
9+
github.com/envoyproxy/go-control-plane v0.12.0/go.mod h1:ZBTaoJ23lqITozF0M6G4/IragXCQKCnYbmlmtHvwRG0=
10+
github.com/envoyproxy/protoc-gen-validate v1.0.4 h1:gVPz/FMfvh57HdSJQyvBtF00j8JU4zdyUgIUNhlgg0A=
11+
github.com/envoyproxy/protoc-gen-validate v1.0.4/go.mod h1:qys6tmnRsYrQqIhm2bvKZH4Blx/1gTIZ2UKVY1M+Yew=
12+
github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek=
13+
github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps=
14+
github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
15+
github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
16+
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA=
17+
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ=
18+
github.com/prometheus/client_model v0.6.1 h1:ZKSh/rekM+n3CeS952MLRAdFwIKqeY8b62p8ais2e9E=
19+
github.com/prometheus/client_model v0.6.1/go.mod h1:OrxVMOVHjw3lKMa8+x6HeMGkHMQyHDk9E3jmP2AmGiY=
20+
github.com/prometheus/common v0.55.0 h1:KEi6DK7lXW/m7Ig5i47x0vRzuBsHuvJdi5ee6Y3G1dc=
21+
github.com/prometheus/common v0.55.0/go.mod h1:2SECS4xJG1kd8XF9IcM1gMX6510RAEL65zxzNImwdc8=
22+
golang.org/x/net v0.26.0 h1:soB7SVo0PWrY4vPW/+ay0jKDNScG2X9wFeYlXIvJsOQ=
23+
golang.org/x/net v0.26.0/go.mod h1:5YKkiSynbBIh3p6iOc/vibscux0x38BZDkn8sCUPxHE=
24+
golang.org/x/sys v0.21.0 h1:rF+pYz3DAGSQAxAu1CbC7catZg4ebC4UIeIhKxBZvws=
25+
golang.org/x/sys v0.21.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
26+
golang.org/x/text v0.16.0 h1:a94ExnEXNtEwYLGJSIUxnWoxoRz/ZcCsV63ROupILh4=
27+
golang.org/x/text v0.16.0/go.mod h1:GhwF1Be+LQoKShO3cGOHzqOgRrGaYc9AvblQOmPVHnI=
28+
google.golang.org/genproto/googleapis/rpc v0.0.0-20240528184218-531527333157 h1:Zy9XzmMEflZ/MAaA7vNcoebnRAld7FsPW1EeBB7V0m8=
29+
google.golang.org/genproto/googleapis/rpc v0.0.0-20240528184218-531527333157/go.mod h1:EfXuqaE1J41VCDicxHzUDm+8rk+7ZdXzHV0IhO/I6s0=
30+
google.golang.org/grpc v1.65.0 h1:bs/cUb4lp1G5iImFFd3u5ixQzweKizoZJAwBNLR42lc=
31+
google.golang.org/grpc v1.65.0/go.mod h1:WgYC2ypjlB0EiQi6wdKixMqukr6lBc0Vo+oOgjrM5ZQ=
32+
google.golang.org/protobuf v1.34.2 h1:6xV6lTsCfpGD21XK49h7MhtcApnLqkfYgPcdHftf6hg=
33+
google.golang.org/protobuf v1.34.2/go.mod h1:qYOHts0dSfpeUzUFpOMr/WGzszTmLH+DiWniOlNbLDw=

0 commit comments

Comments
 (0)