Make reconciling non blocking

coolkp · coolkp · commit 100f636e51af · 2024-11-11T11:36:55.000-08:00
diff --git a/examples/dynamic-lora-sidecar/Dockerfile b/examples/dynamic-lora-sidecar/Dockerfile
@@ -1,3 +1,10 @@
+FROM python:3.9-slim-buster AS test
+
+WORKDIR /dynamic-lora-reconciler-test
+COPY requirements.txt .
+COPY sidecar/* . 
+RUN pip install -r requirements.txt
+RUN python -m unittest discover || exit 1  
 
 FROM python:3.10-slim-buster
 
diff --git a/examples/dynamic-lora-sidecar/requirements.txt b/examples/dynamic-lora-sidecar/requirements.txt
@@ -2,4 +2,5 @@ aiohttp
 jsonschema
 pyyaml
 requests 
-watchfiles
+watchfiles
+watchdog
diff --git a/examples/dynamic-lora-sidecar/sidecar/sidecar.py b/examples/dynamic-lora-sidecar/sidecar/sidecar.py
@@ -3,30 +3,51 @@
 import time
 from jsonschema import validate
 from watchfiles import awatch
-import ipaddress
 from dataclasses import dataclass
 import asyncio
 import logging
 import datetime
 import os
+import sys
+from watchdog.observers.polling import PollingObserver as Observer
+from watchdog.events import FileSystemEventHandler
 
-CONFIG_MAP_FILE = os.environ.get("DYNAMIC_LORA_ROLLOUT_CONFIG", "/config/configmap.yaml")
+CONFIG_MAP_FILE = os.environ.get(
+    "DYNAMIC_LORA_ROLLOUT_CONFIG", "/config/configmap.yaml"
+)
 BASE_FIELD = "vLLMLoRAConfig"
 logging.basicConfig(
-    level=logging.INFO, format="%(asctime)s - %(levelname)s - %(filename)s:%(lineno)d -  %(message)s",
-    datefmt='%Y-%m-%d %H:%M:%S',
-    handlers=[logging.StreamHandler()]
+    level=logging.INFO,
+    format="%(asctime)s - %(levelname)s - %(filename)s:%(lineno)d -  %(message)s",
+    datefmt="%Y-%m-%d %H:%M:%S",
+    handlers=[logging.StreamHandler(sys.stdout)],
 )
-logging.Formatter.converter = time.localtime 
+logging.Formatter.converter = time.localtime
 
 
 def current_time_human() -> str:
     now = datetime.datetime.now(datetime.timezone.utc).astimezone()
     return now.strftime("%Y-%m-%d %H:%M:%S %Z%z")
 
+
+class FileChangeHandler(FileSystemEventHandler):
+    """Custom event handler that handles file modifications."""
+
+    def __init__(self, reconciler):
+        super().__init__()
+        self.reconciler = reconciler
+
+    def on_modified(self, event):
+        logging.info("modified!")
+        logging.info(f"Config '{CONFIG_MAP_FILE}' modified!")
+        self.reconciler.reconcile()
+        logging.info(f"model server reconcile to Config '{CONFIG_MAP_FILE}' !")
+
+
 @dataclass
 class LoraAdapter:
     """Class representation of lora adapters in config"""
+
     def __init__(self, id, source="", base_model=""):
         self.id = id
         self.source = source
@@ -48,34 +69,33 @@ def __init__(self, config_validation=True):
         self.health_check_timeout = datetime.timedelta(seconds=300)
         self.health_check_interval = datetime.timedelta(seconds=15)
         self.config_validation = config_validation
-        
-    def validate_config(self, c)-> bool:
+
+    def validate_config(self, c) -> bool:
         try:
-            with open('validation.yaml', 'r') as f:
+            with open("validation.yaml", "r") as f:
                 schema = yaml.safe_load(f)
                 validate(instance=c, schema=schema)
                 return True
         except Exception as e:
             logging.error(f"Cannot load config {CONFIG_MAP_FILE} validation error: {e}")
             return False
-        
+
     @property
     def config(self):
         """Load configmap into memory"""
         try:
-            
             with open(CONFIG_MAP_FILE, "r") as f:
                 c = yaml.safe_load(f)
                 if self.config_validation and not self.validate_config(c):
                     return {}
                 if c is None:
                     c = {}
-                c = c.get("vLLMLoRAConfig",{})
+                c = c.get("vLLMLoRAConfig", {})
                 return c
         except Exception as e:
             logging.error(f"cannot load config {CONFIG_MAP_FILE} {e}")
             return {}
-    
+
     @property
     def host(self):
         """Model server host"""
@@ -85,7 +105,7 @@ def host(self):
     def port(self):
         """Model server port"""
         return self.config.get("port", 8000)
-    
+
     @property
     def model_server(self):
         """Model server {host}:{port}"""
@@ -95,13 +115,27 @@ def model_server(self):
     def ensure_exist_adapters(self):
         """Lora adapters in config under key `ensureExist` in set"""
         adapters = self.config.get("ensureExist", {}).get("models", set())
-        return set([LoraAdapter(adapter["id"], adapter["source"], adapter.get("base-model","")) for adapter in adapters])
+        return set(
+            [
+                LoraAdapter(
+                    adapter["id"], adapter["source"], adapter.get("base-model", "")
+                )
+                for adapter in adapters
+            ]
+        )
 
     @property
     def ensure_not_exist_adapters(self):
         """Lora adapters in config under key `ensureNotExist` in set"""
         adapters = self.config.get("ensureNotExist", {}).get("models", set())
-        return set([LoraAdapter(adapter["id"], adapter["source"], adapter.get("base-model","")) for adapter in adapters])
+        return set(
+            [
+                LoraAdapter(
+                    adapter["id"], adapter["source"], adapter.get("base-model", "")
+                )
+                for adapter in adapters
+            ]
+        )
 
     @property
     def registered_adapters(self):
@@ -123,7 +157,7 @@ def registered_adapters(self):
     @property
     def is_server_healthy(self) -> bool:
         """probe server's health endpoint until timeout or success"""
-        
+
         def check_health() -> bool:
             """Checks server health"""
             url = f"http://{self.model_server}/health"
@@ -132,24 +166,26 @@ def check_health() -> bool:
                 return response.status_code == 200
             except requests.exceptions.RequestException:
                 return False
-        
+
         start_time = datetime.datetime.now()
         while datetime.datetime.now() - start_time < self.health_check_timeout:
             if check_health():
                 return True
             time.sleep(self.health_check_interval.seconds)
         return False
-    
+
     def load_adapter(self, adapter: LoraAdapter):
         """Sends a request to load the specified model."""
         if adapter in self.registered_adapters:
-            logging.info(f"{adapter.id} already present on model server {self.model_server}")
+            logging.info(
+                f"{adapter.id} already present on model server {self.model_server}"
+            )
             return
         url = f"http://{self.model_server}/v1/load_lora_adapter"
         payload = {
             "lora_name": adapter.id,
             "lora_path": adapter.source,
-            "base_model_name": adapter.base_model
+            "base_model_name": adapter.base_model,
         }
         try:
             response = requests.post(url, json=payload)
@@ -161,7 +197,9 @@ def load_adapter(self, adapter: LoraAdapter):
     def unload_adapter(self, adapter: LoraAdapter):
         """Sends a request to unload the specified model."""
         if adapter not in self.registered_adapters:
-            logging.info(f"{adapter.id} already doesn't exist on model server {self.model_server}")
+            logging.info(
+                f"{adapter.id} already doesn't exist on model server {self.model_server}"
+            )
             return
         url = f"http://{self.model_server}/v1/unload_lora_adapter"
         payload = {"lora_name": adapter.id}
@@ -176,12 +214,19 @@ def unload_adapter(self, adapter: LoraAdapter):
 
     def reconcile(self):
         """Reconciles model server with current version of configmap"""
-        logging.info(f"reconciling model server {self.model_server} with config stored at {CONFIG_MAP_FILE}")
+        logging.info(
+            f"reconciling model server {self.model_server} with config stored at {CONFIG_MAP_FILE}"
+        )
         if not self.is_server_healthy:
             logging.error(f"vllm server at {self.model_server} not healthy")
             return
-        invalid_adapters = ", ".join(str(a.id) for a in self.ensure_exist_adapters & self.ensure_not_exist_adapters)
-        logging.warning(f"skipped adapters found in both `ensureExist` and `ensureNotExist` {invalid_adapters}")
+        invalid_adapters = ", ".join(
+            str(a.id)
+            for a in self.ensure_exist_adapters & self.ensure_not_exist_adapters
+        )
+        logging.warning(
+            f"skipped adapters found in both `ensureExist` and `ensureNotExist` {invalid_adapters}"
+        )
         adapters_to_load = self.ensure_exist_adapters - self.ensure_not_exist_adapters
         adapters_to_load_id = ", ".join(str(a.id) for a in adapters_to_load)
         logging.info(f"adapter to load {adapters_to_load_id}")
@@ -194,18 +239,26 @@ def reconcile(self):
             self.unload_adapter(adapter)
 
 
-
 async def main():
-    """Loads the target configuration, compares it with the server's models,
-    and loads/unloads models accordingly."""
-
-    reconcilerInstance = LoraReconciler()
-    logging.info(f"running reconcile for initial loading of configmap {CONFIG_MAP_FILE}")
-    reconcilerInstance.reconcile()
-    logging.info(f"beginning watching of configmap {CONFIG_MAP_FILE}")
-    async for _ in awatch('/config/configmap.yaml'):
-        logging.info(f"Config '{CONFIG_MAP_FILE}' modified!'" )
-        reconcilerInstance.reconcile()
+    reconciler_instance = LoraReconciler()
+    logging.info(f"Running initial reconcile for config map {CONFIG_MAP_FILE}")
+    reconciler_instance.reconcile()
+
+    event_handler = FileChangeHandler(reconciler_instance)
+    observer = Observer()
+    observer.schedule(
+        event_handler, path=os.path.dirname(CONFIG_MAP_FILE), recursive=False
+    )
+    observer.start()
+
+    try:
+        logging.info(f"Starting to watch {CONFIG_MAP_FILE} for changes...")
+        while True:
+            await asyncio.sleep(1)
+    except KeyboardInterrupt:
+        logging.info("Stopped by user.")
+        observer.stop()
+    observer.join()
 
 
 if __name__ == "__main__":