Skip to content

make dynamic lora sidecar health check parameters configurable and force reconcile #605

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Mar 28, 2025
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions tools/dynamic-lora-sidecar/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ FROM python:3.9-slim-buster AS test

WORKDIR /dynamic-lora-reconciler-test
COPY requirements.txt .
COPY sidecar/* .
COPY sidecar/* ./
RUN pip install -r requirements.txt
RUN python -m unittest discover || exit 1

Expand All @@ -18,6 +18,6 @@ RUN pip install --upgrade pip
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt

COPY sidecar/* .
COPY sidecar/* ./

CMD ["python", "sidecar.py"]
Binary file not shown.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file not shown.
36 changes: 31 additions & 5 deletions tools/dynamic-lora-sidecar/sidecar/sidecar.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,9 +66,25 @@ class LoraReconciler:
"""

def __init__(self, config_validation=True):
self.health_check_timeout = datetime.timedelta(seconds=300)
self.health_check_interval = datetime.timedelta(seconds=15)
self.config_validation = config_validation
# Health check values will be initialized from config in first reconcile
self._update_health_check_settings()

def _update_health_check_settings(self):
"""Update health check settings from config"""
config = self.config
# Get health check timeout from config with default of 300 seconds
timeout_seconds = config.get("healthCheckTimeoutSeconds", 300)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes made them cmd line flags with defaults

self.health_check_timeout = datetime.timedelta(seconds=timeout_seconds)

# Get health check interval from config with default of 15 seconds
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

default of 2 seconds

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes made this 2 and refresh 5 and timeout 300

interval_seconds = config.get("healthCheckIntervalSeconds", 2)
self.health_check_interval = datetime.timedelta(seconds=interval_seconds)

# Get reconciliation trigger interval from config with default of 5 seconds
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

default is 1

self.reconcile_trigger_seconds = config.get("reconcileTriggerSeconds", 1)

logging.info(f"Settings updated: health check timeout={timeout_seconds}s, interval={interval_seconds}s, reconcile trigger={self.reconcile_trigger_seconds}s")

def validate_config(self, c) -> bool:
try:
Expand Down Expand Up @@ -217,6 +233,9 @@ def reconcile(self):
logging.info(
f"reconciling model server {self.model_server} with config stored at {CONFIG_MAP_FILE}"
)
# Update health check settings from config before reconciliation
self._update_health_check_settings()
Copy link
Contributor

@ahg-g ahg-g Mar 28, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why do we need to call it here again?


if not self.is_server_healthy:
logging.error(f"vllm server at {self.model_server} not healthy")
return
Expand Down Expand Up @@ -252,14 +271,21 @@ async def main():
observer.start()

try:
logging.info(f"Starting to watch {CONFIG_MAP_FILE} for changes...")
logging.info(f"Starting to watch {CONFIG_MAP_FILE} for changes and performing periodic reconciliation...")
while True:
await asyncio.sleep(1)
# Get current trigger interval from reconciler config
trigger_seconds = reconciler_instance.reconcile_trigger_seconds
logging.info(f"Waiting {trigger_seconds}s before next reconciliation...")
# Wait for configured trigger interval
await asyncio.sleep(trigger_seconds)
# Force trigger reconciliation
logging.info("Periodic reconciliation triggered")
reconciler_instance.reconcile()
except KeyboardInterrupt:
logging.info("Stopped by user.")
observer.stop()
observer.join()


if __name__ == "__main__":
asyncio.run(main())
asyncio.run(main())
35 changes: 22 additions & 13 deletions tools/dynamic-lora-sidecar/sidecar/test_sidecar.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,15 @@
import os
from sidecar import LoraReconciler, CONFIG_MAP_FILE, BASE_FIELD, LoraAdapter

# Update TEST_CONFIG_DATA to include the new configuration parameters
TEST_CONFIG_DATA = {
BASE_FIELD: {
"host": "localhost",
"name": "sql-loras-llama",
"port": 8000,
"healthCheckTimeoutSeconds": 180, # Custom health check timeout
"healthCheckIntervalSeconds": 10, # Custom health check interval
"reconcileTriggerSeconds": 30, # Custom reconcile trigger interval
"ensureExist": {
"models": [
{
Expand Down Expand Up @@ -49,13 +53,14 @@
},
}
}

EXIST_ADAPTERS = [
LoraAdapter(a["id"], a["base-model"], a["source"])
LoraAdapter(a["id"], a["source"], a["base-model"])
for a in TEST_CONFIG_DATA[BASE_FIELD]["ensureExist"]["models"]
]

NOT_EXIST_ADAPTERS = [
LoraAdapter(a["id"], a["base-model"], a["source"])
LoraAdapter(a["id"], a["source"], a["base-model"])
for a in TEST_CONFIG_DATA[BASE_FIELD]["ensureNotExist"]["models"]
]
RESPONSES = {
Expand Down Expand Up @@ -170,17 +175,21 @@ def test_reconcile(self, mock_post, mock_get, mock_file):
self.reconciler = LoraReconciler()
self.reconciler.reconcile()

# 1 adapter is in both exist and not exist list, only 2 are expected to be loaded
mock_load.assert_has_calls(
calls=[call(EXIST_ADAPTERS[0]), call(EXIST_ADAPTERS[2])]
)
assert mock_load.call_count == 2
# First check the call count
self.assertEqual(mock_load.call_count, 2, "Expected 2 load adapter calls")
self.assertEqual(mock_unload.call_count, 2, "Expected 2 unload adapter calls")

# Check that the adapters with the correct IDs were loaded
loaded_ids = [call.args[0].id for call in mock_load.call_args_list]
self.assertIn("sql-lora-v1", loaded_ids, "sql-lora-v1 should have been loaded")
self.assertIn("already_exists", loaded_ids, "already_exists should have been loaded")

# 1 adapter is in both exist and not exist list, only 2 are expected to be unloaded
mock_unload.assert_has_calls(
calls=[call(NOT_EXIST_ADAPTERS[0]), call(NOT_EXIST_ADAPTERS[2])]
)
assert mock_unload.call_count == 2
# Check that the adapters with the correct IDs were unloaded
unloaded_ids = [call.args[0].id for call in mock_unload.call_args_list]
self.assertIn("sql-lora-v2", unloaded_ids, "sql-lora-v2 should have been unloaded")
self.assertIn("to_remove", unloaded_ids, "to_remove should have been unloaded")



if __name__ == "__main__":
unittest.main()
unittest.main()