From 0562977b554ae419326681c19aeadb68451e79e0 Mon Sep 17 00:00:00 2001 From: kaushikmitr Date: Fri, 28 Mar 2025 22:40:00 +0000 Subject: [PATCH] Allow defining a default base model in the lora syncer configuration --- config/manifests/vllm/gpu-deployment.yaml | 7 +- site-src/guides/adapter-rollout.md | 14 ++-- tools/dynamic-lora-sidecar/README.md | 69 ++++++++++++++++--- tools/dynamic-lora-sidecar/deployment.yaml | 17 ++--- tools/dynamic-lora-sidecar/sidecar/sidecar.py | 17 ++++- .../sidecar/validation.yaml | 11 +-- 6 files changed, 95 insertions(+), 40 deletions(-) diff --git a/config/manifests/vllm/gpu-deployment.yaml b/config/manifests/vllm/gpu-deployment.yaml index c405b33c2..beb19bbd3 100644 --- a/config/manifests/vllm/gpu-deployment.yaml +++ b/config/manifests/vllm/gpu-deployment.yaml @@ -246,11 +246,10 @@ data: vLLMLoRAConfig: name: vllm-llama3.1-8b-instruct port: 8000 + defaultBaseModel: meta-llama/Llama-3.1-8B-Instruct ensureExist: models: - - base-model: meta-llama/Llama-3.1-8B-Instruct - id: food-review + - id: food-review source: Kawon/llama3.1-food-finetune_v14_r8 - - base-model: meta-llama/Llama-3.1-8B-Instruct - id: cad-fabricator + - id: cad-fabricator source: redcathode/fabricator diff --git a/site-src/guides/adapter-rollout.md b/site-src/guides/adapter-rollout.md index 18d60ecec..a398c1246 100644 --- a/site-src/guides/adapter-rollout.md +++ b/site-src/guides/adapter-rollout.md @@ -33,13 +33,12 @@ Change the ConfigMap to match the following (note the new entry under models): vLLMLoRAConfig: name: vllm-llama3-8b-instruct-adapters port: 8000 + defaultBaseModel: meta-llama/Llama-3.1-8B-Instruct ensureExist: models: - - base-model: meta-llama/Llama-3.1-8B-Instruct - id: food-review-1 + - id: food-review-1 source: vineetsharma/qlora-adapter-Llama-2-7b-hf-TweetSumm - - base-model: meta-llama/Llama-3.1-8B-Instruct - id: food-review-2 + - id: food-review-2 source: mahimairaja/tweet-summarization-llama-2-finetuned ``` @@ -118,15 +117,14 @@ Unload the older versions from the servers by updating the LoRA syncer ConfigMap vLLMLoRAConfig: name: sql-loras-llama port: 8000 + defaultBaseModel: meta-llama/Llama-3.1-8B-Instruct ensureExist: models: - - base-model: meta-llama/Llama-3.1-8B-Instruct - id: food-review-2 + - id: food-review-2 source: mahimairaja/tweet-summarization-llama-2-finetuned ensureNotExist: models: - - base-model: meta-llama/Llama-3.1-8B-Instruct - id: food-review-1 + - id: food-review-1 source: vineetsharma/qlora-adapter-Llama-2-7b-hf-TweetSumm ``` diff --git a/tools/dynamic-lora-sidecar/README.md b/tools/dynamic-lora-sidecar/README.md index f14dbfc7c..bebaa8854 100644 --- a/tools/dynamic-lora-sidecar/README.md +++ b/tools/dynamic-lora-sidecar/README.md @@ -60,20 +60,67 @@ The sidecar supports the following command-line arguments: ## Configuration Fields - `vLLMLoRAConfig`[**required**] base key -- `host` [*optional*]Model server's host. defaults to localhost +- `host` [*optional*] Model server's host. defaults to localhost - `port` [*optional*] Model server's port. defaults to 8000 -- `name`[*optional*] Name of this config -- `ensureExist`[*optional*] List of models to ensure existence on specified model server. - - `models`[**required**] [list] - - `base-model`[*optional*] Base model for lora adapter - - `id`[**required**] unique id of lora adapter - - `source`[**required**] path (remote or local) to lora adapter +- `name` [*optional*] Name of this config +- `defaultBaseModel` [*optional*] Default base model to use for all adapters when not specified individually +- `ensureExist` [*optional*] List of models to ensure existence on specified model server. + - `models` [**required**] [list] + - `id` [**required**] unique id of lora adapter + - `source` [**required**] path (remote or local) to lora adapter + - `base-model` [*optional*] Base model for lora adapter (overrides defaultBaseModel) - `ensureNotExist` [*optional*] - - `models`[**required**] [list] - - `id`[**required**] unique id of lora adapter - - `source`[**required**] path (remote or local) to lora adapter - - `base-model`[*optional*] Base model for lora adapter + - `models` [**required**] [list] + - `id` [**required**] unique id of lora adapter + - `source` [**required**] path (remote or local) to lora adapter + - `base-model` [*optional*] Base model for lora adapter (overrides defaultBaseModel) +## Example Configuration + +Here's an example of using the `defaultBaseModel` field to avoid repetition in your configuration: + +```yaml +apiVersion: v1 +kind: ConfigMap +metadata: + name: vllm-llama2-7b-adapters +data: + configmap.yaml: | + vLLMLoRAConfig: + name: vllm-llama2-7b + port: 8000 + defaultBaseModel: meta-llama/Llama-2-7b-hf + ensureExist: + models: + - id: tweet-summary-1 + source: vineetsharma/qlora-adapter-Llama-2-7b-hf-TweetSumm + - id: tweet-summary-2 + source: mahimairaja/tweet-summarization-llama-2-finetuned +``` + +In this example, both adapters will use `meta-llama/Llama-2-7b-hf` as their base model without needing to specify it for each adapter individually. + +You can still override the default base model for specific adapters when needed: + +```yaml +apiVersion: v1 +kind: ConfigMap +metadata: + name: vllm-mixed-adapters +data: + configmap.yaml: | + vLLMLoRAConfig: + name: vllm-mixed + port: 8000 + defaultBaseModel: meta-llama/Llama-2-7b-hf + ensureExist: + models: + - id: tweet-summary-1 + source: vineetsharma/qlora-adapter-Llama-2-7b-hf-TweetSumm + - id: code-assistant + source: huggingface/code-assistant-lora + base-model: meta-llama/Llama-2-13b-hf # Override for this specific adapter +``` ## Example Deployment The [deployment.yaml](deployment.yaml) file shows an example of deploying the sidecar with custom parameters: diff --git a/tools/dynamic-lora-sidecar/deployment.yaml b/tools/dynamic-lora-sidecar/deployment.yaml index 0a20ec666..0c0c1781e 100644 --- a/tools/dynamic-lora-sidecar/deployment.yaml +++ b/tools/dynamic-lora-sidecar/deployment.yaml @@ -66,7 +66,7 @@ spec: - name: lora-adapter-syncer tty: true stdin: true - image: + image: us-central1-docker.pkg.dev/k8s-staging-images/gateway-api-inference-extension/lora-syncer:main restartPolicy: Always imagePullPolicy: Always env: @@ -106,22 +106,17 @@ metadata: data: configmap.yaml: | vLLMLoRAConfig: - host: modelServerHost name: sql-loras-llama - port: modelServerPort + defaultBaseModel: meta-llama/Llama-2-7b-hf ensureExist: models: - - base-model: meta-llama/Llama-3.1-8B-Instruct - id: sql-lora-v1 + - id: sql-lora-v1 source: yard1/llama-2-7b-sql-lora-test - - base-model: meta-llama/Llama-3.1-8B-Instruct - id: sql-lora-v3 + - id: sql-lora-v3 source: yard1/llama-2-7b-sql-lora-test - - base-model: meta-llama/Llama-3.1-8B-Instruct - id: sql-lora-v4 + - id: sql-lora-v4 source: yard1/llama-2-7b-sql-lora-test ensureNotExist: models: - - base-model: meta-llama/Llama-3.1-8B-Instruct - id: sql-lora-v2 + - id: sql-lora-v2 source: yard1/llama-2-7b-sql-lora-test \ No newline at end of file diff --git a/tools/dynamic-lora-sidecar/sidecar/sidecar.py b/tools/dynamic-lora-sidecar/sidecar/sidecar.py index 00de99e34..30724478a 100644 --- a/tools/dynamic-lora-sidecar/sidecar/sidecar.py +++ b/tools/dynamic-lora-sidecar/sidecar/sidecar.py @@ -135,15 +135,24 @@ def port(self): def model_server(self): """Model server {host}:{port}""" return f"{self.host}:{self.port}" + + @property + def default_base_model(self): + """Default base model to use when not specified at adapter level""" + return self.config.get("defaultBaseModel", "") @property def ensure_exist_adapters(self): """Lora adapters in config under key `ensureExist` in set""" adapters = self.config.get("ensureExist", {}).get("models", set()) + default_model = self.default_base_model + return set( [ LoraAdapter( - adapter["id"], adapter["source"], adapter.get("base-model", "") + adapter["id"], + adapter["source"], + adapter.get("base-model", default_model) ) for adapter in adapters ] @@ -153,10 +162,14 @@ def ensure_exist_adapters(self): def ensure_not_exist_adapters(self): """Lora adapters in config under key `ensureNotExist` in set""" adapters = self.config.get("ensureNotExist", {}).get("models", set()) + default_model = self.default_base_model + return set( [ LoraAdapter( - adapter["id"], adapter["source"], adapter.get("base-model", "") + adapter["id"], + adapter["source"], + adapter.get("base-model", default_model) ) for adapter in adapters ] diff --git a/tools/dynamic-lora-sidecar/sidecar/validation.yaml b/tools/dynamic-lora-sidecar/sidecar/validation.yaml index 9dd98f875..30d23b7f1 100644 --- a/tools/dynamic-lora-sidecar/sidecar/validation.yaml +++ b/tools/dynamic-lora-sidecar/sidecar/validation.yaml @@ -16,6 +16,9 @@ properties: name: type: string description: Name of this config + defaultBaseModel: + type: string + description: Default base model to use when not specified at adapter level ensureExist: type: object description: List of models to ensure existence on specified model server @@ -26,9 +29,9 @@ properties: items: type: object properties: - base_model: + base-model: type: string - description: Base model for LoRA adapter + description: Base model for LoRA adapter (overrides defaultBaseModel) id: type: string description: Unique ID of LoRA adapter @@ -50,9 +53,9 @@ properties: items: type: object properties: - base_model: + base-model: type: string - description: Base model for LoRA adapter + description: Base model for LoRA adapter (overrides defaultBaseModel) id: type: string description: Unique ID of LoRA adapter