7
7
import datetime
8
8
import os
9
9
10
- CONFIG_MAP_FILE = os .environ .get ('DYNAMIC_LORA_ROLLOUT_CONFIG' ,"configmap.yaml" )
10
+ CONFIG_MAP_FILE = os .environ .get ("DYNAMIC_LORA_ROLLOUT_CONFIG" , "configmap.yaml" )
11
+ DYNAMIC_LORA_FLAG = "VLLM_ALLOW_RUNTIME_LORA_UPDATING"
11
12
BASE_FIELD = "vLLMLoRAConfig"
12
- logging .basicConfig (level = logging .INFO ,
13
- format = '%(asctime)s - %(levelname)s - %(message)s' )
13
+ logging .basicConfig (
14
+ level = logging .INFO , format = "%(asctime)s - %(levelname)s - %(message)s"
15
+ )
16
+
14
17
15
18
def current_time_human () -> str :
16
19
now = datetime .datetime .now (datetime .timezone .utc ).astimezone ()
@@ -44,8 +47,15 @@ def __init__(self):
44
47
self .deployment_name = ""
45
48
self .registered_adapters = {}
46
49
self .config_map_adapters = {}
50
+ if not self .validate_dynamic_lora ():
51
+ logging .fatal (f"{ DYNAMIC_LORA_FLAG } set to False" )
47
52
self .load_configmap ()
48
53
self .get_registered_adapters ()
54
+ self .health_check_timeout = datetime .timedelta (seconds = 150 )
55
+ self .health_check_interval = datetime .timedelta (seconds = 15 )
56
+
57
+ def validate_dynamic_lora (self ):
58
+ return os .environ .get (DYNAMIC_LORA_FLAG , False )
49
59
50
60
def load_configmap (self ):
51
61
with open (CONFIG_MAP_FILE , "r" ) as f :
@@ -56,22 +66,45 @@ def load_configmap(self):
56
66
deployment .get ("host" ) or "localhost" ,
57
67
deployment .get ("port" ) or "8000" ,
58
68
)
59
- self .config_map_adapters = {adapter ["id" ]:adapter for adapter in lora_adapters }
69
+ self .config_map_adapters = {
70
+ adapter ["id" ]: adapter for adapter in lora_adapters
71
+ }
60
72
61
73
def get_registered_adapters (self ):
62
74
"""Retrieves all loaded models on server"""
63
75
url = f"http://{ self .host } :{ self .port } /v1/models"
76
+ if not self .wait_server_healthy ():
77
+ logging .error (f"Vllm server at { self .host :self.port} not healthy" )
64
78
try :
65
79
response = requests .get (url )
66
80
adapters = {adapter ["id" ]: adapter for adapter in response .json ()["data" ]}
67
81
self .registered_adapters = adapters
68
82
except requests .exceptions .RequestException as e :
69
83
logging .error (f"Error communicating with vLLM server: { e } " )
70
84
85
+ def check_health (self ) -> bool :
86
+ """Checks server health"""
87
+ url = f"http://{ self .host } :{ self .port } /health"
88
+ try :
89
+ response = requests .get (url )
90
+ return response .status_code == 200
91
+ except requests .exceptions .RequestException :
92
+ return False
93
+
94
+ def wait_server_healthy (self ) -> bool :
95
+ start_time = datetime .datetime .now ()
96
+ while datetime .datetime .now () - start_time < self .health_check_timeout :
97
+ if self .check_health ():
98
+ break
99
+ time .sleep (self .health_check_interval )
100
+
71
101
def reconcile (self ):
72
102
"""Reconciles model server with current version of configmap"""
73
103
self .get_registered_adapters ()
74
104
self .load_configmap ()
105
+ if not self .wait_server_healthy ():
106
+ logging .error (f"Vllm server at { self .host :self.port} not healthy" )
107
+
75
108
for adapter_id , lora_adapter in self .config_map_adapters .items ():
76
109
logging .info (f"Processing adapter { adapter_id } " )
77
110
if lora_adapter .get ("toRemove" ):
@@ -99,21 +132,22 @@ def log_status_config(self):
99
132
"port" : self .port ,
100
133
"models" : models ,
101
134
}
102
- config = {BASE_FIELD :deployment }
103
- yaml_string = yaml .dump (config ,indent = 2 )
104
- logging .info (f"current status of lora adapters on model server at { self .host } :{ self .port } \n { yaml_string } " )
105
-
135
+ config = {BASE_FIELD : deployment }
136
+ yaml_string = yaml .dump (config , indent = 2 )
137
+ logging .info (
138
+ f"current status of lora adapters on model server at { self .host } :{ self .port } \n { yaml_string } "
139
+ )
106
140
107
141
def load_adapter (self , adapter ):
108
142
"""Sends a request to load the specified model."""
109
143
adapter_id = adapter ["id" ]
110
144
if adapter_id in self .registered_adapters or adapter .get ("toRemove" ):
111
- return
145
+ return
112
146
url = f"http://{ self .host } :{ self .port } /v1/load_lora_adapter"
113
147
payload = {
114
148
"lora_name" : adapter_id ,
115
149
"lora_path" : adapter ["source" ],
116
- "base_model_name" : adapter .get ("base-model" ,"" ),
150
+ "base_model_name" : adapter .get ("base-model" , "" ),
117
151
}
118
152
try :
119
153
response = requests .post (url , json = payload )
@@ -129,7 +163,7 @@ def unload_adapter(self, adapter):
129
163
"""Sends a request to unload the specified model."""
130
164
adapter_id = adapter ["id" ]
131
165
if adapter_id not in self .registered_adapters :
132
- return
166
+ return
133
167
url = f"http://{ self .host } :{ self .port } /v1/unload_lora_adapter"
134
168
payload = {"lora_name" : adapter_id }
135
169
try :
0 commit comments