@@ -76,7 +76,7 @@ func main() {
76
76
77
77
// Validate flags
78
78
if err := validateFlags (); err != nil {
79
- klog .Fatalf ("flag validation failed : %v" , err )
79
+ klog .Fatalf ("Failed to validate flags : %v" , err )
80
80
}
81
81
82
82
// Print all flag values
@@ -89,7 +89,7 @@ func main() {
89
89
// Create a new manager to manage controllers
90
90
mgr , err := ctrl .NewManager (ctrl .GetConfigOrDie (), ctrl.Options {Scheme : scheme })
91
91
if err != nil {
92
- klog .Fatalf ("failed to start manager: %v" , err )
92
+ klog .Fatalf ("Failed to create controller manager: %v" , err )
93
93
}
94
94
95
95
// Create the data store used to cache watched resources
@@ -106,7 +106,7 @@ func main() {
106
106
},
107
107
Record : mgr .GetEventRecorderFor ("InferencePool" ),
108
108
}).SetupWithManager (mgr ); err != nil {
109
- klog .Fatalf ("Error setting up InferencePoolReconciler: %v" , err )
109
+ klog .Fatalf ("Failed setting up InferencePoolReconciler: %v" , err )
110
110
}
111
111
112
112
if err := (& backend.InferenceModelReconciler {
@@ -119,7 +119,7 @@ func main() {
119
119
},
120
120
Record : mgr .GetEventRecorderFor ("InferenceModel" ),
121
121
}).SetupWithManager (mgr ); err != nil {
122
- klog .Fatalf ("Error setting up InferenceModelReconciler: %v" , err )
122
+ klog .Fatalf ("Failed setting up InferenceModelReconciler: %v" , err )
123
123
}
124
124
125
125
if err := (& backend.EndpointSliceReconciler {
@@ -130,112 +130,95 @@ func main() {
130
130
ServiceName : * serviceName ,
131
131
Zone : * zone ,
132
132
}).SetupWithManager (mgr ); err != nil {
133
- klog .Fatalf ("Error setting up EndpointSliceReconciler: %v" , err )
133
+ klog .Fatalf ("Failed setting up EndpointSliceReconciler: %v" , err )
134
134
}
135
135
136
- // Channel to handle error signals for goroutines
137
- errChan := make (chan error , 1 )
138
-
139
- // Start each component in its own goroutine
140
- startControllerManager (mgr , errChan )
141
- healthSvr := startHealthServer (mgr , errChan , * grpcHealthPort )
136
+ // Start health and ext-proc servers in goroutines
137
+ healthSvr := startHealthServer (datastore , * grpcHealthPort )
142
138
extProcSvr := startExternalProcessorServer (
143
- errChan ,
144
139
datastore ,
145
140
* grpcPort ,
146
141
* refreshPodsInterval ,
147
142
* refreshMetricsInterval ,
148
143
* targetPodHeader ,
149
144
)
150
145
151
- // Wait for first error from any goroutine
152
- err = <- errChan
153
- if err != nil {
154
- klog .Errorf ("goroutine failed: %v" , err )
155
- } else {
156
- klog .Infof ("Manager exited gracefully" )
146
+ // Start the controller manager. Blocking and will return when shutdown is complete.
147
+ klog .Infof ("Starting controller manager" )
148
+ if err := mgr .Start (ctrl .SetupSignalHandler ()); err != nil {
149
+ klog .Fatalf ("Error starting controller manager: %v" , err )
157
150
}
151
+ klog .Info ("Controller manager shutting down" )
158
152
159
- // Gracefully shutdown components
153
+ // Gracefully shutdown servers
160
154
if healthSvr != nil {
161
- klog .Info ("Health server shutting down... " )
155
+ klog .Info ("Health server shutting down" )
162
156
healthSvr .GracefulStop ()
163
157
}
164
158
if extProcSvr != nil {
165
- klog .Info ("Ext-proc server shutting down... " )
159
+ klog .Info ("Ext-proc server shutting down" )
166
160
extProcSvr .GracefulStop ()
167
161
}
168
162
169
- klog .Info ("All components stopped gracefully" )
170
- }
171
-
172
- // startControllerManager runs the controller manager in a goroutine.
173
- func startControllerManager (mgr ctrl.Manager , errChan chan <- error ) {
174
- go func () {
175
- // Blocking and will return when shutdown is complete.
176
- if err := mgr .Start (ctrl .SetupSignalHandler ()); err != nil {
177
- errChan <- fmt .Errorf ("controller manager failed to start: %w" , err )
178
- }
179
- // Manager exited gracefully
180
- klog .Info ("Controller manager shutting down..." )
181
- errChan <- nil
182
- }()
163
+ klog .Info ("All components shutdown" )
183
164
}
184
165
185
166
// startHealthServer starts the gRPC health probe server in a goroutine.
186
- func startHealthServer (mgr ctrl. Manager , errChan chan <- error , port int ) * grpc.Server {
187
- healthSvr := grpc .NewServer ()
188
- healthPb .RegisterHealthServer (healthSvr , & healthServer {Client : mgr . GetClient () })
167
+ func startHealthServer (ds * backend. K8sDatastore , port int ) * grpc.Server {
168
+ svr := grpc .NewServer ()
169
+ healthPb .RegisterHealthServer (svr , & healthServer {datastore : ds })
189
170
190
171
go func () {
191
- healthLis , err := net .Listen ("tcp" , fmt .Sprintf (":%d" , port ))
172
+ lis , err := net .Listen ("tcp" , fmt .Sprintf (":%d" , port ))
192
173
if err != nil {
193
- errChan <- fmt . Errorf ( "health server failed to listen: %w " , err )
174
+ klog . Fatalf ( "Health server failed to listen: %v " , err )
194
175
}
195
176
klog .Infof ("Health server listening on port: %d" , port )
196
177
197
178
// Blocking and will return when shutdown is complete.
198
- if serveErr := healthSvr .Serve (healthLis ); serveErr != nil && serveErr != grpc .ErrServerStopped {
199
- errChan <- fmt . Errorf ( "health server failed: %w " , serveErr )
179
+ if err := svr .Serve (lis ); err != nil && err != grpc .ErrServerStopped {
180
+ klog . Fatalf ( "Health server failed: %v " , err )
200
181
}
182
+ klog .Info ("Health server shutting down" )
201
183
}()
202
- return healthSvr
184
+ return svr
203
185
}
204
186
205
187
// startExternalProcessorServer starts the Envoy external processor server in a goroutine.
206
188
func startExternalProcessorServer (
207
- errChan chan <- error ,
208
189
datastore * backend.K8sDatastore ,
209
190
port int ,
210
191
refreshPodsInterval , refreshMetricsInterval time.Duration ,
211
192
targetPodHeader string ,
212
193
) * grpc.Server {
213
- extSvr := grpc .NewServer ()
194
+ svr := grpc .NewServer ()
195
+
214
196
go func () {
215
197
lis , err := net .Listen ("tcp" , fmt .Sprintf (":%d" , port ))
216
198
if err != nil {
217
- errChan <- fmt . Errorf ( "ext -proc server failed to listen: %w " , err )
199
+ klog . Fatalf ( "Ext -proc server failed to listen: %v " , err )
218
200
}
219
201
klog .Infof ("Ext-proc server listening on port: %d" , port )
220
202
221
203
// Initialize backend provider
222
204
pp := backend .NewProvider (& vllm.PodMetricsClientImpl {}, datastore )
223
205
if err := pp .Init (refreshPodsInterval , refreshMetricsInterval ); err != nil {
224
- errChan <- fmt . Errorf ( "failed to initialize backend provider: %w " , err )
206
+ klog . Fatalf ( "Failed to initialize backend provider: %v " , err )
225
207
}
226
208
227
209
// Register ext_proc handlers
228
210
extProcPb .RegisterExternalProcessorServer (
229
- extSvr ,
211
+ svr ,
230
212
handlers .NewServer (pp , scheduling .NewScheduler (pp ), targetPodHeader , datastore ),
231
213
)
232
214
233
215
// Blocking and will return when shutdown is complete.
234
- if serveErr := extSvr .Serve (lis ); serveErr != nil && serveErr != grpc .ErrServerStopped {
235
- errChan <- fmt . Errorf ( "ext -proc server failed: %w " , serveErr )
216
+ if err := svr .Serve (lis ); err != nil && err != grpc .ErrServerStopped {
217
+ klog . Fatalf ( "Ext -proc server failed: %v " , err )
236
218
}
219
+ klog .Info ("Ext-proc server shutting down" )
237
220
}()
238
- return extSvr
221
+ return svr
239
222
}
240
223
241
224
func validateFlags () error {
0 commit comments