Skip to content

Commit 6f5b9e7

Browse files
committed
Add makefile and cloudbuild file to build and push lora-syncer
Signed-off-by: Kunjan <[email protected]>
1 parent 62adbb1 commit 6f5b9e7

File tree

3 files changed

+42
-51
lines changed

3 files changed

+42
-51
lines changed

pkg/manifests/vllm/deployment-with-syncer.yaml

Lines changed: 6 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -43,18 +43,8 @@ spec:
4343
- "--max-cpu-loras"
4444
- "12"
4545
- "--lora-modules"
46-
- '{"name": "sql-lora-0", "path": "yard1/llama-2-7b-sql-lora-test", "base_model_name": "llama-2"}'
47-
- '{"name": "sql-lora-1", "path": "yard1/llama-2-7b-sql-lora-test", "base_model_name": "llama-2"}'
48-
- '{"name": "sql-lora-2", "path": "yard1/llama-2-7b-sql-lora-test", "base_model_name": "llama-2"}'
49-
- '{"name": "sql-lora-3", "path": "yard1/llama-2-7b-sql-lora-test", "base_model_name": "llama-2"}'
50-
- '{"name": "sql-lora-4", "path": "yard1/llama-2-7b-sql-lora-test", "base_model_name": "llama-2"}'
5146
- '{"name": "tweet-summary-0", "path": "vineetsharma/qlora-adapter-Llama-2-7b-hf-TweetSumm", "base_model_name": "llama-2"}'
5247
- '{"name": "tweet-summary-1", "path": "vineetsharma/qlora-adapter-Llama-2-7b-hf-TweetSumm", "base_model_name": "llama-2"}'
53-
- '{"name": "tweet-summary-2", "path": "vineetsharma/qlora-adapter-Llama-2-7b-hf-TweetSumm", "base_model_name": "llama-2"}'
54-
- '{"name": "tweet-summary-3", "path": "vineetsharma/qlora-adapter-Llama-2-7b-hf-TweetSumm", "base_model_name": "llama-2"}'
55-
- '{"name": "tweet-summary-4", "path": "vineetsharma/qlora-adapter-Llama-2-7b-hf-TweetSumm", "base_model_name": "llama-2"}'
56-
- '{"name": "sql-lora", "path": "yard1/llama-2-7b-sql-lora-test", "base_model_name": "llama-2"}'
57-
- '{"name": "tweet-summary", "path": "vineetsharma/qlora-adapter-Llama-2-7b-hf-TweetSumm", "base_model_name": "llama-2"}'
5848
env:
5949
- name: PORT
6050
value: "8000"
@@ -143,16 +133,13 @@ data:
143133
ensureExist:
144134
models:
145135
- base-model: meta-llama/Llama-2-7b-hf
146-
id: sql-lora-v1
147-
source: yard1/llama-2-7b-sql-lora-test
136+
id: tweet-summary-0
137+
source: vineetsharma/qlora-adapter-Llama-2-7b-hf-TweetSumm
148138
- base-model: meta-llama/Llama-2-7b-hf
149-
id: sql-lora-v3
150-
source: yard1/llama-2-7b-sql-lora-test
151-
- base-model: meta-llama/Llama-2-7b-hf
152-
id: sql-lora-v4
153-
source: yard1/llama-2-7b-sql-lora-test
139+
id: tweet-summary-1
140+
source: vineetsharma/qlora-adapter-Llama-2-7b-hf-TweetSumm
154141
ensureNotExist:
155142
models:
156143
- base-model: meta-llama/Llama-2-7b-hf
157-
id: sql-lora-v2
158-
source: yard1/llama-2-7b-sql-lora-test
144+
id: tweet-summary-2
145+
source: vineetsharma/qlora-adapter-Llama-2-7b-hf-TweetSumm

pkg/manifests/vllm/deployment.yaml

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -43,18 +43,8 @@ spec:
4343
- "--max-cpu-loras"
4444
- "12"
4545
- "--lora-modules"
46-
- '{"name": "sql-lora-0", "path": "yard1/llama-2-7b-sql-lora-test", "base_model_name": "llama-2"}'
47-
- '{"name": "sql-lora-1", "path": "yard1/llama-2-7b-sql-lora-test", "base_model_name": "llama-2"}'
48-
- '{"name": "sql-lora-2", "path": "yard1/llama-2-7b-sql-lora-test", "base_model_name": "llama-2"}'
49-
- '{"name": "sql-lora-3", "path": "yard1/llama-2-7b-sql-lora-test", "base_model_name": "llama-2"}'
50-
- '{"name": "sql-lora-4", "path": "yard1/llama-2-7b-sql-lora-test", "base_model_name": "llama-2"}'
5146
- '{"name": "tweet-summary-0", "path": "vineetsharma/qlora-adapter-Llama-2-7b-hf-TweetSumm", "base_model_name": "llama-2"}'
5247
- '{"name": "tweet-summary-1", "path": "vineetsharma/qlora-adapter-Llama-2-7b-hf-TweetSumm", "base_model_name": "llama-2"}'
53-
- '{"name": "tweet-summary-2", "path": "vineetsharma/qlora-adapter-Llama-2-7b-hf-TweetSumm", "base_model_name": "llama-2"}'
54-
- '{"name": "tweet-summary-3", "path": "vineetsharma/qlora-adapter-Llama-2-7b-hf-TweetSumm", "base_model_name": "llama-2"}'
55-
- '{"name": "tweet-summary-4", "path": "vineetsharma/qlora-adapter-Llama-2-7b-hf-TweetSumm", "base_model_name": "llama-2"}'
56-
- '{"name": "sql-lora", "path": "yard1/llama-2-7b-sql-lora-test", "base_model_name": "llama-2"}'
57-
- '{"name": "tweet-summary", "path": "vineetsharma/qlora-adapter-Llama-2-7b-hf-TweetSumm", "base_model_name": "llama-2"}'
5848
env:
5949
- name: PORT
6050
value: "8000"

site-src/guides/dynamic-lora.md

Lines changed: 36 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -29,33 +29,40 @@ Rest of the steps are same as [general setup](https://github.com/kubernetes-sigs
2929
name: dynamic-lora-config
3030
data:
3131
configmap.yaml: |
32-
vLLMLoRAConfig:
33-
ensureExist:
34-
models:
35-
- id: tweet-summary-v1
36-
source: tweet-summary-1=/adapters/vineetsharma/qlora-adapter-Llama-2-7b-hf-TweetSumm_1
37-
- id: tweet-summary-v2
38-
source: tweet-summary-2=/adapters/vineetsharma/qlora-adapter-Llama-2-7b-hf-TweetSumm_2
32+
vLLMLoRAConfig:
33+
name: sql-loras-llama
34+
port: 8000
35+
ensureExist:
36+
models:
37+
- base-model: meta-llama/Llama-2-7b-hf
38+
id: tweet-summary-0
39+
source: vineetsharma/qlora-adapter-Llama-2-7b-hf-TweetSumm
40+
- base-model: meta-llama/Llama-2-7b-hf
41+
id: tweet-summary-1
42+
source: vineetsharma/qlora-adapter-Llama-2-7b-hf-TweetSumm
3943
```
4044
41-
2. Configure a canary rollout with traffic split using InferenceModel. In this example, 10% of traffic to the chatbot model will be sent to `tweet-summary-3`.
45+
2. Configure a canary rollout with traffic split using LLMService. In this example, 40% of traffic for tweet-summary model will be sent to the ***tweet-summary-2*** adapter .
4246
4347
``` yaml
4448
model:
45-
name: chatbot
49+
name: tweet-summary
4650
targetModels:
47-
targetModelName: chatbot-v1
48-
weight: 90
49-
targetModelName: chatbot-v2
51+
targetModelName: tweet-summary-0
5052
weight: 10
53+
targetModelName: tweet-summary-1
54+
weight: 40
55+
targetModelName: tweet-summary-2
56+
weight: 40
57+
5158
```
5259
5360
3. Finish rollout by setting the traffic to the new version 100%.
5461
```yaml
5562
model:
56-
name: chatbot
63+
name: tweet-summary
5764
targetModels:
58-
targetModelName: chatbot-v2
65+
targetModelName: tweet-summary-2
5966
weight: 100
6067
```
6168
@@ -68,12 +75,19 @@ model:
6875
data:
6976
configmap.yaml: |
7077
vLLMLoRAConfig:
71-
ensureExist:
72-
models:
73-
- id: chatbot-v2
74-
source: gs://[TEAM-A-MODELS-BUCKET]/chatbot-v2
75-
ensureNotExist: # Explicitly unregisters the adapter from model servers
76-
models:
77-
- id: chatbot-v1
78-
source: gs://[TEAM-A-MODELS-BUCKET]/chatbot-v1
78+
name: sql-loras-llama
79+
port: 8000
80+
ensureExist:
81+
models:
82+
- base-model: meta-llama/Llama-2-7b-hf
83+
id: tweet-summary-2
84+
source: vineetsharma/qlora-adapter-Llama-2-7b-hf-TweetSumm
85+
ensureNotExist:
86+
models:
87+
- base-model: meta-llama/Llama-2-7b-hf
88+
id: tweet-summary-1
89+
source: gs://[HUGGING FACE PATH]
90+
- base-model: meta-llama/Llama-2-7b-hf
91+
id: tweet-summary-0
92+
source: gs://[HUGGING FACE PATH]
7993
```

0 commit comments

Comments
 (0)