kubernetes-sigs · k8s-ci-robot · Mar 28, 2025 · Mar 21, 2025 · Mar 28, 2025 · Mar 28, 2025
diff --git a/tools/dynamic-lora-sidecar/Dockerfile b/tools/dynamic-lora-sidecar/Dockerfile
@@ -2,7 +2,7 @@ FROM python:3.9-slim-buster AS test
 
 WORKDIR /dynamic-lora-reconciler-test
 COPY requirements.txt .
-COPY sidecar/* . 
+COPY sidecar/* ./ 
 RUN pip install -r requirements.txt
 RUN python -m unittest discover || exit 1  
 
@@ -18,6 +18,6 @@ RUN pip install --upgrade pip
 COPY requirements.txt .
 RUN pip install --no-cache-dir -r requirements.txt
 
-COPY sidecar/* . 
+COPY sidecar/* ./
 
 CMD ["python", "sidecar.py"]
diff --git a/tools/dynamic-lora-sidecar/README.md b/tools/dynamic-lora-sidecar/README.md
@@ -29,21 +29,34 @@ The sidecar uses the vLLM server's API to load or unload adapters based on the c
 
 ## Usage
 
+
 1. **Build the Docker Image:**
    ```bash
    docker build -t <your-image-name> .
+   ```
+
 2. **Create a configmap:**
-    ```bash
-    kubectl create configmap name-of-your-configmap --from-file=your-file.yaml
+   ```bash
+   kubectl create configmap name-of-your-configmap --from-file=your-file.yaml
+   ```
+
 3. **Mount the configmap and configure sidecar in your pod**
-    ```yaml
-    volumeMounts: # DO NOT USE subPath
-          - name: config-volume
-            mountPath:  /config
-    ```
-    Do not use subPath, since configmap updates are not reflected in the file
+   ```yaml
+   volumeMounts: # DO NOT USE subPath
+         - name: config-volume
+           mountPath:  /config
+   ```
+   Do not use subPath, since configmap updates are not reflected in the file
 
-[deployment]: deployment.yaml it uses [sidecar](https://kubernetes.io/docs/concepts/workloads/pods/sidecar-containers/)(`initContainer` with `restartPolicy` set to `always`) which is beta feature enabled by default since k8s version 1.29. They need to be enabled in 1.28 and prior to 1.28 sidecar are not officially supported.
+## Command Line Arguments
+
+The sidecar supports the following command-line arguments:
+
+- `--health-check-timeout`: Maximum time in seconds to wait for the vLLM server health check (default: 300)
+- `--health-check-interval`: Interval in seconds between health check attempts (default: 2)
+- `--reconcile-trigger`: Time in seconds between forced reconciliation runs (default: 5)
+- `--config`: Path to the config map file (default: value from DYNAMIC_LORA_ROLLOUT_CONFIG env var or "/config/configmap.yaml")
+- `--config-validation`: Enable config validation (default: True)
 
 ## Configuration Fields
 - `vLLMLoRAConfig`[**required**]  base key 
@@ -61,11 +74,41 @@ The sidecar uses the vLLM server's API to load or unload adapters based on the c
         -  `source`[**required**] path (remote or local) to lora adapter
         - `base-model`[*optional*] Base model for lora adapter
 
-
-
+## Example Deployment
+
+The [deployment.yaml](deployment.yaml) file shows an example of deploying the sidecar with custom parameters:
+
+```yaml
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: dynamic-lora-reconciler
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: dynamic-lora-reconciler
+  template:
+    metadata:
+      labels:
+        app: dynamic-lora-reconciler
+    spec:
+      containers:
+      - name: reconciler
+        image: your-image:tag
+        command: ["python", "sidecar.py", "--health-check-timeout", "600", "--health-check-interval", "5", "--reconcile-trigger", "10"] #optional if overriding default values
+        volumeMounts:
+        - name: config-volume
+          mountPath: /config
+      volumes:
+      - name: config-volume
+        configMap:
+          name: name-of-your-configmap
+```
+
+Note: This uses [sidecar](https://kubernetes.io/docs/concepts/workloads/pods/sidecar-containers/)(`initContainer` with `restartPolicy` set to `always`) which is beta feature enabled by default since k8s version 1.29. They need to be enabled in 1.28 and prior to 1.28 sidecar are not officially supported.
 
 ## Screenshots & Testing
 The sidecar was tested with the Deployment and ConfigMap specified in this repo. Here are screen grabs of the logs from the sidecar and vllm server. One can verify that the adapters were loaded by querying `v1/models` and looking at vllm logs.
-![lora-adapter-syncer](screenshots/lora-syncer-sidecar.png)
-![config map change](screenshots/configmap-change.png)
+![lora-adapter-syncer](screenshots/lora-syncer-logs.png)
 ![vllm-logs](screenshots/vllm-logs.png)
diff --git a/tools/dynamic-lora-sidecar/screenshots/configmap-change.png b/tools/dynamic-lora-sidecar/screenshots/configmap-change.png
diff --git a/tools/dynamic-lora-sidecar/screenshots/lora-syncer-logs.png b/tools/dynamic-lora-sidecar/screenshots/lora-syncer-logs.png
diff --git a/tools/dynamic-lora-sidecar/screenshots/lora-syncer-sidecar.png b/tools/dynamic-lora-sidecar/screenshots/lora-syncer-sidecar.png
diff --git a/tools/dynamic-lora-sidecar/sidecar/sidecar.py b/tools/dynamic-lora-sidecar/sidecar/sidecar.py
@@ -1,6 +1,7 @@
 import requests
 import yaml
 import time
+import argparse
 from jsonschema import validate
 from watchfiles import awatch
 from dataclasses import dataclass
@@ -30,18 +31,35 @@ def current_time_human() -> str:
     return now.strftime("%Y-%m-%d %H:%M:%S %Z%z")
 
 
+def parse_arguments():
+    """Parse command line arguments."""
+    parser = argparse.ArgumentParser(description='vLLM LoRA Adapter Reconciler')
+    parser.add_argument('--health-check-timeout', type=int, default=300,
+                        help='Health check timeout in seconds (default: 300)')
+    parser.add_argument('--health-check-interval', type=int, default=2,
+                        help='Health check interval in seconds (default: 2)')
+    parser.add_argument('--reconcile-trigger', type=int, default=5,
+                        help='Reconciliation trigger interval in seconds (default: 5)')
+    parser.add_argument('--config', type=str, default=CONFIG_MAP_FILE,
+                        help=f'Path to config map file (default: {CONFIG_MAP_FILE})')
+    parser.add_argument('--config-validation', action='store_true', default=True,
+                        help='Enable config validation (default: True)')
+    return parser.parse_args()
+
+
 class FileChangeHandler(FileSystemEventHandler):
     """Custom event handler that handles file modifications."""
 
-    def __init__(self, reconciler):
+    def __init__(self, reconciler, config_file):
         super().__init__()
         self.reconciler = reconciler
+        self.config_file = config_file
 
     def on_modified(self, event):
         logging.info("modified!")
-        logging.info(f"Config '{CONFIG_MAP_FILE}' modified!")
+        logging.info(f"Config '{self.config_file}' modified!")
         self.reconciler.reconcile()
-        logging.info(f"model server reconcile to Config '{CONFIG_MAP_FILE}' !")
+        logging.info(f"model server reconcile to Config '{self.config_file}' !")
 
 
 @dataclass
@@ -65,10 +83,17 @@ class LoraReconciler:
     Reconciles adapters registered on vllm server with adapters listed in configmap in current state
     """
 
-    def __init__(self, config_validation=True):
-        self.health_check_timeout = datetime.timedelta(seconds=300)
-        self.health_check_interval = datetime.timedelta(seconds=15)
+    def __init__(self, config_file, health_check_timeout, health_check_interval, 
+                 reconcile_trigger_seconds, config_validation=True):
+        self.config_file = config_file
         self.config_validation = config_validation
+        self.health_check_timeout = datetime.timedelta(seconds=health_check_timeout)
+        self.health_check_interval = datetime.timedelta(seconds=health_check_interval)
+        self.reconcile_trigger_seconds = reconcile_trigger_seconds
+
+        logging.info(f"Settings initialized: health check timeout={health_check_timeout}s, "
+                     f"interval={health_check_interval}s, "
+                     f"reconcile trigger={self.reconcile_trigger_seconds}s")
 
     def validate_config(self, c) -> bool:
         try:
@@ -77,14 +102,14 @@ def validate_config(self, c) -> bool:
                 validate(instance=c, schema=schema)
                 return True
         except Exception as e:
-            logging.error(f"Cannot load config {CONFIG_MAP_FILE} validation error: {e}")
+            logging.error(f"Cannot load config {self.config_file} validation error: {e}")
             return False
 
     @property
     def config(self):
         """Load configmap into memory"""
         try:
-            with open(CONFIG_MAP_FILE, "r") as f:
+            with open(self.config_file, "r") as f:
                 c = yaml.safe_load(f)
                 if self.config_validation and not self.validate_config(c):
                     return {}
@@ -93,7 +118,7 @@ def config(self):
                 c = c.get("vLLMLoRAConfig", {})
                 return c
         except Exception as e:
-            logging.error(f"cannot load config {CONFIG_MAP_FILE} {e}")
+            logging.error(f"cannot load config {self.config_file} {e}")
             return {}
 
     @property
@@ -215,8 +240,9 @@ def unload_adapter(self, adapter: LoraAdapter):
     def reconcile(self):
         """Reconciles model server with current version of configmap"""
         logging.info(
-            f"reconciling model server {self.model_server} with config stored at {CONFIG_MAP_FILE}"
+            f"reconciling model server {self.model_server} with config stored at {self.config_file}"
         )
+
         if not self.is_server_healthy:
             logging.error(f"vllm server at {self.model_server} not healthy")
             return
@@ -240,26 +266,45 @@ def reconcile(self):
 
 
 async def main():
-    reconciler_instance = LoraReconciler()
-    logging.info(f"Running initial reconcile for config map {CONFIG_MAP_FILE}")
+    args = parse_arguments()
+
+    # Update CONFIG_MAP_FILE with argument value
+    config_file = args.config
+
+    reconciler_instance = LoraReconciler(
+        config_file=config_file,
+        health_check_timeout=args.health_check_timeout,
+        health_check_interval=args.health_check_interval,
+        reconcile_trigger_seconds=args.reconcile_trigger,
+        config_validation=args.config_validation
+    )
+
+    logging.info(f"Running initial reconcile for config map {config_file}")
     reconciler_instance.reconcile()
 
-    event_handler = FileChangeHandler(reconciler_instance)
+    event_handler = FileChangeHandler(reconciler_instance, config_file)
     observer = Observer()
     observer.schedule(
-        event_handler, path=os.path.dirname(CONFIG_MAP_FILE), recursive=False
+        event_handler, path=os.path.dirname(config_file), recursive=False
     )
     observer.start()
 
     try:
-        logging.info(f"Starting to watch {CONFIG_MAP_FILE} for changes...")
+        logging.info(f"Starting to watch {config_file} for changes and performing periodic reconciliation...")
         while True:
-            await asyncio.sleep(1)
+            # Get current trigger interval from reconciler
+            trigger_seconds = reconciler_instance.reconcile_trigger_seconds
+            logging.info(f"Waiting {trigger_seconds}s before next reconciliation...")
+            # Wait for configured trigger interval
+            await asyncio.sleep(trigger_seconds)
+            # Force trigger reconciliation
+            logging.info("Periodic reconciliation triggered")
+            reconciler_instance.reconcile()
     except KeyboardInterrupt:
         logging.info("Stopped by user.")
         observer.stop()
     observer.join()
 
 
 if __name__ == "__main__":
-    asyncio.run(main())
+    asyncio.run(main())
diff --git a/tools/dynamic-lora-sidecar/sidecar/test_sidecar.py b/tools/dynamic-lora-sidecar/sidecar/test_sidecar.py
@@ -2,8 +2,10 @@
 from unittest.mock import patch, Mock, mock_open, call
 import yaml
 import os
-from sidecar import LoraReconciler, CONFIG_MAP_FILE, BASE_FIELD, LoraAdapter
+import datetime
+from sidecar import LoraReconciler, LoraAdapter, CONFIG_MAP_FILE, BASE_FIELD
 
+# Update TEST_CONFIG_DATA to include the new configuration parameters
 TEST_CONFIG_DATA = {
     BASE_FIELD: {
         "host": "localhost",
@@ -49,13 +51,14 @@
         },
     }
 }
+
 EXIST_ADAPTERS = [
-    LoraAdapter(a["id"], a["base-model"], a["source"])
+    LoraAdapter(a["id"], a["source"], a["base-model"])
     for a in TEST_CONFIG_DATA[BASE_FIELD]["ensureExist"]["models"]
 ]
 
 NOT_EXIST_ADAPTERS = [
-    LoraAdapter(a["id"], a["base-model"], a["source"])
+    LoraAdapter(a["id"], a["source"], a["base-model"])
     for a in TEST_CONFIG_DATA[BASE_FIELD]["ensureNotExist"]["models"]
 ]
 RESPONSES = {
@@ -101,7 +104,15 @@ def setUp(self, mock_get, mock_file):
             mock_response = getMockResponse()
             mock_response.json.return_value = RESPONSES["v1/models"]
             mock_get.return_value = mock_response
-            self.reconciler = LoraReconciler(False)
+
+            # Create reconciler with command line argument values instead of config file values
+            self.reconciler = LoraReconciler(
+                config_file=CONFIG_MAP_FILE,
+                health_check_timeout=180,
+                health_check_interval=10,
+                reconcile_trigger_seconds=30,
+                config_validation=False
+            )
             self.maxDiff = None
 
     @patch("sidecar.requests.get")
@@ -167,20 +178,47 @@ def test_reconcile(self, mock_post, mock_get, mock_file):
                         mock_get_response.json.return_value = RESPONSES["v1/models"]
                         mock_get.return_value = mock_get_response
                         mock_post.return_value = getMockResponse()
-                        self.reconciler = LoraReconciler()
-                        self.reconciler.reconcile()
 
-                        # 1 adapter is in both exist and not exist list, only 2 are expected to be loaded
-                        mock_load.assert_has_calls(
-                            calls=[call(EXIST_ADAPTERS[0]), call(EXIST_ADAPTERS[2])]
+                        # Create reconciler with command line argument values
+                        self.reconciler = LoraReconciler(
+                            config_file=CONFIG_MAP_FILE,
+                            health_check_timeout=180,
+                            health_check_interval=10,
+                            reconcile_trigger_seconds=30,
+                            config_validation=False
                         )
-                        assert mock_load.call_count == 2
+                        self.reconciler.reconcile()
 
-                        # 1 adapter is in both exist and not exist list, only 2 are expected to be unloaded
-                        mock_unload.assert_has_calls(
-                            calls=[call(NOT_EXIST_ADAPTERS[0]), call(NOT_EXIST_ADAPTERS[2])]
-                        )
-                        assert mock_unload.call_count == 2
+                        # First check the call count
+                        self.assertEqual(mock_load.call_count, 2, "Expected 2 load adapter calls")
+                        self.assertEqual(mock_unload.call_count, 2, "Expected 2 unload adapter calls")
+
+                        # Check that the adapters with the correct IDs were loaded
+                        loaded_ids = [call.args[0].id for call in mock_load.call_args_list]
+                        self.assertIn("sql-lora-v1", loaded_ids, "sql-lora-v1 should have been loaded")
+                        self.assertIn("already_exists", loaded_ids, "already_exists should have been loaded")
+
+                        # Check that the adapters with the correct IDs were unloaded
+                        unloaded_ids = [call.args[0].id for call in mock_unload.call_args_list]
+                        self.assertIn("sql-lora-v2", unloaded_ids, "sql-lora-v2 should have been unloaded")
+                        self.assertIn("to_remove", unloaded_ids, "to_remove should have been unloaded")
+
+    def test_health_check_settings(self):
+        """Test that health check settings are properly initialized from command line args"""
+        # Create reconciler with specific values
+        reconciler = LoraReconciler(
+            config_file=CONFIG_MAP_FILE,
+            health_check_timeout=240,
+            health_check_interval=15,
+            reconcile_trigger_seconds=45,
+            config_validation=False
+        )
+
+        # Check that values are properly set
+        self.assertEqual(reconciler.health_check_timeout, datetime.timedelta(seconds=240))
+        self.assertEqual(reconciler.health_check_interval, datetime.timedelta(seconds=15))
+        self.assertEqual(reconciler.reconcile_trigger_seconds, 45)
+
 
 if __name__ == "__main__":
-    unittest.main()
+    unittest.main()