-
Notifications
You must be signed in to change notification settings - Fork 76
make dynamic lora sidecar health check parameters configurable and force reconcile #605
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 6 commits
0e714fe
22cbdaf
f252457
560d0be
f2a6f44
bc9ddb6
ea87c17
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -66,9 +66,25 @@ class LoraReconciler: | |
""" | ||
|
||
def __init__(self, config_validation=True): | ||
self.health_check_timeout = datetime.timedelta(seconds=300) | ||
self.health_check_interval = datetime.timedelta(seconds=15) | ||
self.config_validation = config_validation | ||
# Health check values will be initialized from config in first reconcile | ||
self._update_health_check_settings() | ||
|
||
def _update_health_check_settings(self): | ||
"""Update health check settings from config""" | ||
config = self.config | ||
# Get health check timeout from config with default of 300 seconds | ||
timeout_seconds = config.get("healthCheckTimeoutSeconds", 300) | ||
self.health_check_timeout = datetime.timedelta(seconds=timeout_seconds) | ||
|
||
# Get health check interval from config with default of 15 seconds | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. default of 2 seconds There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yes made this 2 and refresh 5 and timeout 300 |
||
interval_seconds = config.get("healthCheckIntervalSeconds", 2) | ||
self.health_check_interval = datetime.timedelta(seconds=interval_seconds) | ||
|
||
# Get reconciliation trigger interval from config with default of 5 seconds | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. default is 1 |
||
self.reconcile_trigger_seconds = config.get("reconcileTriggerSeconds", 1) | ||
|
||
logging.info(f"Settings updated: health check timeout={timeout_seconds}s, interval={interval_seconds}s, reconcile trigger={self.reconcile_trigger_seconds}s") | ||
|
||
def validate_config(self, c) -> bool: | ||
try: | ||
|
@@ -217,6 +233,9 @@ def reconcile(self): | |
logging.info( | ||
f"reconciling model server {self.model_server} with config stored at {CONFIG_MAP_FILE}" | ||
) | ||
# Update health check settings from config before reconciliation | ||
self._update_health_check_settings() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why do we need to call it here again? |
||
|
||
if not self.is_server_healthy: | ||
logging.error(f"vllm server at {self.model_server} not healthy") | ||
return | ||
|
@@ -252,14 +271,21 @@ async def main(): | |
observer.start() | ||
|
||
try: | ||
logging.info(f"Starting to watch {CONFIG_MAP_FILE} for changes...") | ||
logging.info(f"Starting to watch {CONFIG_MAP_FILE} for changes and performing periodic reconciliation...") | ||
while True: | ||
await asyncio.sleep(1) | ||
# Get current trigger interval from reconciler config | ||
trigger_seconds = reconciler_instance.reconcile_trigger_seconds | ||
logging.info(f"Waiting {trigger_seconds}s before next reconciliation...") | ||
# Wait for configured trigger interval | ||
await asyncio.sleep(trigger_seconds) | ||
# Force trigger reconciliation | ||
logging.info("Periodic reconciliation triggered") | ||
reconciler_instance.reconcile() | ||
except KeyboardInterrupt: | ||
logging.info("Stopped by user.") | ||
observer.stop() | ||
observer.join() | ||
|
||
|
||
if __name__ == "__main__": | ||
asyncio.run(main()) | ||
asyncio.run(main()) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Those are better as command line flags or env vars to the container, similar to https://github.com/kubernetes-sigs/gateway-api-inference-extension/pull/605/files#diff-e5ae211d8cba1ab987311c2506ed80add46fe1f9d8f8c4ad08084439bb135364R15
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
yes made them cmd line flags with defaults