Skip to content

Commit 1af2df4

Browse files
committed
Check health of server before querying
Signed-off-by: Kunjan Patel <[email protected]>
1 parent 5a03f98 commit 1af2df4

File tree

1 file changed

+45
-11
lines changed

1 file changed

+45
-11
lines changed

examples/dynamic-lora-sidecar/sidecar/sidecar.py

+45-11
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,13 @@
77
import datetime
88
import os
99

10-
CONFIG_MAP_FILE = os.environ.get('DYNAMIC_LORA_ROLLOUT_CONFIG',"configmap.yaml")
10+
CONFIG_MAP_FILE = os.environ.get("DYNAMIC_LORA_ROLLOUT_CONFIG", "configmap.yaml")
11+
DYNAMIC_LORA_FLAG = "VLLM_ALLOW_RUNTIME_LORA_UPDATING"
1112
BASE_FIELD = "vLLMLoRAConfig"
12-
logging.basicConfig(level=logging.INFO,
13-
format='%(asctime)s - %(levelname)s - %(message)s')
13+
logging.basicConfig(
14+
level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
15+
)
16+
1417

1518
def current_time_human() -> str:
1619
now = datetime.datetime.now(datetime.timezone.utc).astimezone()
@@ -44,8 +47,15 @@ def __init__(self):
4447
self.deployment_name = ""
4548
self.registered_adapters = {}
4649
self.config_map_adapters = {}
50+
if not self.validate_dynamic_lora():
51+
logging.fatal(f"{DYNAMIC_LORA_FLAG} set to False")
4752
self.load_configmap()
4853
self.get_registered_adapters()
54+
self.health_check_timeout = datetime.timedelta(seconds=150)
55+
self.health_check_interval = datetime.timedelta(seconds=15)
56+
57+
def validate_dynamic_lora(self):
58+
return os.environ.get(DYNAMIC_LORA_FLAG, False)
4959

5060
def load_configmap(self):
5161
with open(CONFIG_MAP_FILE, "r") as f:
@@ -56,22 +66,45 @@ def load_configmap(self):
5666
deployment.get("host") or "localhost",
5767
deployment.get("port") or "8000",
5868
)
59-
self.config_map_adapters = {adapter["id"]:adapter for adapter in lora_adapters}
69+
self.config_map_adapters = {
70+
adapter["id"]: adapter for adapter in lora_adapters
71+
}
6072

6173
def get_registered_adapters(self):
6274
"""Retrieves all loaded models on server"""
6375
url = f"http://{self.host}:{self.port}/v1/models"
76+
if not self.wait_server_healthy():
77+
logging.error(f"Vllm server at {self.host:self.port} not healthy")
6478
try:
6579
response = requests.get(url)
6680
adapters = {adapter["id"]: adapter for adapter in response.json()["data"]}
6781
self.registered_adapters = adapters
6882
except requests.exceptions.RequestException as e:
6983
logging.error(f"Error communicating with vLLM server: {e}")
7084

85+
def check_health(self) -> bool:
86+
"""Checks server health"""
87+
url = f"http://{self.host}:{self.port}/health"
88+
try:
89+
response = requests.get(url)
90+
return response.status_code == 200
91+
except requests.exceptions.RequestException:
92+
return False
93+
94+
def wait_server_healthy(self) -> bool:
95+
start_time = datetime.datetime.now()
96+
while datetime.datetime.now() - start_time < self.health_check_timeout:
97+
if self.check_health():
98+
break
99+
time.sleep(self.health_check_interval)
100+
71101
def reconcile(self):
72102
"""Reconciles model server with current version of configmap"""
73103
self.get_registered_adapters()
74104
self.load_configmap()
105+
if not self.wait_server_healthy():
106+
logging.error(f"Vllm server at {self.host:self.port} not healthy")
107+
75108
for adapter_id, lora_adapter in self.config_map_adapters.items():
76109
logging.info(f"Processing adapter {adapter_id}")
77110
if lora_adapter.get("toRemove"):
@@ -99,21 +132,22 @@ def log_status_config(self):
99132
"port": self.port,
100133
"models": models,
101134
}
102-
config = {BASE_FIELD:deployment}
103-
yaml_string = yaml.dump(config,indent=2)
104-
logging.info(f"current status of lora adapters on model server at {self.host}:{self.port} \n {yaml_string}")
105-
135+
config = {BASE_FIELD: deployment}
136+
yaml_string = yaml.dump(config, indent=2)
137+
logging.info(
138+
f"current status of lora adapters on model server at {self.host}:{self.port} \n {yaml_string}"
139+
)
106140

107141
def load_adapter(self, adapter):
108142
"""Sends a request to load the specified model."""
109143
adapter_id = adapter["id"]
110144
if adapter_id in self.registered_adapters or adapter.get("toRemove"):
111-
return
145+
return
112146
url = f"http://{self.host}:{self.port}/v1/load_lora_adapter"
113147
payload = {
114148
"lora_name": adapter_id,
115149
"lora_path": adapter["source"],
116-
"base_model_name": adapter.get("base-model",""),
150+
"base_model_name": adapter.get("base-model", ""),
117151
}
118152
try:
119153
response = requests.post(url, json=payload)
@@ -129,7 +163,7 @@ def unload_adapter(self, adapter):
129163
"""Sends a request to unload the specified model."""
130164
adapter_id = adapter["id"]
131165
if adapter_id not in self.registered_adapters:
132-
return
166+
return
133167
url = f"http://{self.host}:{self.port}/v1/unload_lora_adapter"
134168
payload = {"lora_name": adapter_id}
135169
try:

0 commit comments

Comments
 (0)