kubernetes-sigs · k8s-ci-robot · Mar 28, 2025 · Mar 21, 2025 · Mar 28, 2025 · Mar 28, 2025
diff --git a/tools/dynamic-lora-sidecar/Dockerfile b/tools/dynamic-lora-sidecar/Dockerfile
@@ -2,7 +2,7 @@ FROM python:3.9-slim-buster AS test
 
 WORKDIR /dynamic-lora-reconciler-test
 COPY requirements.txt .
-COPY sidecar/* . 
+COPY sidecar/* ./ 
 RUN pip install -r requirements.txt
 RUN python -m unittest discover || exit 1  
 
@@ -18,6 +18,6 @@ RUN pip install --upgrade pip
 COPY requirements.txt .
 RUN pip install --no-cache-dir -r requirements.txt
 
-COPY sidecar/* . 
+COPY sidecar/* ./
 
 CMD ["python", "sidecar.py"]
diff --git a/tools/dynamic-lora-sidecar/screenshots/configmap-change.png b/tools/dynamic-lora-sidecar/screenshots/configmap-change.png
diff --git a/tools/dynamic-lora-sidecar/screenshots/lora-syncer-logs.png b/tools/dynamic-lora-sidecar/screenshots/lora-syncer-logs.png
diff --git a/tools/dynamic-lora-sidecar/screenshots/lora-syncer-sidecar.png b/tools/dynamic-lora-sidecar/screenshots/lora-syncer-sidecar.png
diff --git a/tools/dynamic-lora-sidecar/sidecar/sidecar.py b/tools/dynamic-lora-sidecar/sidecar/sidecar.py
@@ -66,9 +66,25 @@ class LoraReconciler:
     """
 
     def __init__(self, config_validation=True):
-        self.health_check_timeout = datetime.timedelta(seconds=300)
-        self.health_check_interval = datetime.timedelta(seconds=15)
         self.config_validation = config_validation
+        # Health check values will be initialized from config in first reconcile
+        self._update_health_check_settings()
+
+    def _update_health_check_settings(self):
+        """Update health check settings from config"""
+        config = self.config
+        # Get health check timeout from config with default of 300 seconds
+        timeout_seconds = config.get("healthCheckTimeoutSeconds", 300)
+        self.health_check_timeout = datetime.timedelta(seconds=timeout_seconds)
+
+        # Get health check interval from config with default of 15 seconds
+        interval_seconds = config.get("healthCheckIntervalSeconds", 2)
+        self.health_check_interval = datetime.timedelta(seconds=interval_seconds)
+
+        # Get reconciliation trigger interval from config with default of 5 seconds
+        self.reconcile_trigger_seconds = config.get("reconcileTriggerSeconds", 1)
+
+        logging.info(f"Settings updated: health check timeout={timeout_seconds}s, interval={interval_seconds}s, reconcile trigger={self.reconcile_trigger_seconds}s")
 
     def validate_config(self, c) -> bool:
         try:
@@ -217,6 +233,9 @@ def reconcile(self):
         logging.info(
             f"reconciling model server {self.model_server} with config stored at {CONFIG_MAP_FILE}"
         )
+        # Update health check settings from config before reconciliation
+        self._update_health_check_settings()
+
         if not self.is_server_healthy:
             logging.error(f"vllm server at {self.model_server} not healthy")
             return
@@ -252,14 +271,21 @@ async def main():
     observer.start()
 
     try:
-        logging.info(f"Starting to watch {CONFIG_MAP_FILE} for changes...")
+        logging.info(f"Starting to watch {CONFIG_MAP_FILE} for changes and performing periodic reconciliation...")
         while True:
-            await asyncio.sleep(1)
+            # Get current trigger interval from reconciler config
+            trigger_seconds = reconciler_instance.reconcile_trigger_seconds
+            logging.info(f"Waiting {trigger_seconds}s before next reconciliation...")
+            # Wait for configured trigger interval
+            await asyncio.sleep(trigger_seconds)
+            # Force trigger reconciliation
+            logging.info("Periodic reconciliation triggered")
+            reconciler_instance.reconcile()
     except KeyboardInterrupt:
         logging.info("Stopped by user.")
         observer.stop()
     observer.join()
 
 
 if __name__ == "__main__":
-    asyncio.run(main())
+    asyncio.run(main())
diff --git a/tools/dynamic-lora-sidecar/sidecar/test_sidecar.py b/tools/dynamic-lora-sidecar/sidecar/test_sidecar.py
@@ -4,11 +4,15 @@
 import os
 from sidecar import LoraReconciler, CONFIG_MAP_FILE, BASE_FIELD, LoraAdapter
 
+# Update TEST_CONFIG_DATA to include the new configuration parameters
 TEST_CONFIG_DATA = {
     BASE_FIELD: {
         "host": "localhost",
         "name": "sql-loras-llama",
         "port": 8000,
+        "healthCheckTimeoutSeconds": 180,  # Custom health check timeout
+        "healthCheckIntervalSeconds": 10,   # Custom health check interval
+        "reconcileTriggerSeconds": 30,      # Custom reconcile trigger interval
         "ensureExist": {
             "models": [
                 {
@@ -49,13 +53,14 @@
         },
     }
 }
+
 EXIST_ADAPTERS = [
-    LoraAdapter(a["id"], a["base-model"], a["source"])
+    LoraAdapter(a["id"], a["source"], a["base-model"])
     for a in TEST_CONFIG_DATA[BASE_FIELD]["ensureExist"]["models"]
 ]
 
 NOT_EXIST_ADAPTERS = [
-    LoraAdapter(a["id"], a["base-model"], a["source"])
+    LoraAdapter(a["id"], a["source"], a["base-model"])
     for a in TEST_CONFIG_DATA[BASE_FIELD]["ensureNotExist"]["models"]
 ]
 RESPONSES = {
@@ -170,17 +175,21 @@ def test_reconcile(self, mock_post, mock_get, mock_file):
                         self.reconciler = LoraReconciler()
                         self.reconciler.reconcile()
 
-                        # 1 adapter is in both exist and not exist list, only 2 are expected to be loaded
-                        mock_load.assert_has_calls(
-                            calls=[call(EXIST_ADAPTERS[0]), call(EXIST_ADAPTERS[2])]
-                        )
-                        assert mock_load.call_count == 2
+                        # First check the call count
+                        self.assertEqual(mock_load.call_count, 2, "Expected 2 load adapter calls")
+                        self.assertEqual(mock_unload.call_count, 2, "Expected 2 unload adapter calls")
+
+                        # Check that the adapters with the correct IDs were loaded
+                        loaded_ids = [call.args[0].id for call in mock_load.call_args_list]
+                        self.assertIn("sql-lora-v1", loaded_ids, "sql-lora-v1 should have been loaded")
+                        self.assertIn("already_exists", loaded_ids, "already_exists should have been loaded")
 
-                        # 1 adapter is in both exist and not exist list, only 2 are expected to be unloaded
-                        mock_unload.assert_has_calls(
-                            calls=[call(NOT_EXIST_ADAPTERS[0]), call(NOT_EXIST_ADAPTERS[2])]
-                        )
-                        assert mock_unload.call_count == 2
+                        # Check that the adapters with the correct IDs were unloaded
+                        unloaded_ids = [call.args[0].id for call in mock_unload.call_args_list]
+                        self.assertIn("sql-lora-v2", unloaded_ids, "sql-lora-v2 should have been unloaded")
+                        self.assertIn("to_remove", unloaded_ids, "to_remove should have been unloaded")
+
+
 
 if __name__ == "__main__":
-    unittest.main()
+    unittest.main()