Skip to content

Commit 92646a5

Browse files
authored
Merge pull request #2321 from sbueringer/pr-fix-deadlock
🐛 Revert: move health probes to runnable
2 parents 7415ca7 + dd0cb45 commit 92646a5

File tree

1 file changed

+40
-11
lines changed

1 file changed

+40
-11
lines changed

pkg/manager/internal.go

Lines changed: 40 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -317,9 +317,9 @@ func (cm *controllerManager) addMetricsServer() error {
317317
})
318318
}
319319

320-
func (cm *controllerManager) addHealthProbeServer() error {
320+
func (cm *controllerManager) serveHealthProbes() {
321321
mux := http.NewServeMux()
322-
srv := httpserver.New(mux)
322+
server := httpserver.New(mux)
323323

324324
if cm.readyzHandler != nil {
325325
mux.Handle(cm.readinessEndpointName, http.StripPrefix(cm.readinessEndpointName, cm.readyzHandler))
@@ -332,12 +332,7 @@ func (cm *controllerManager) addHealthProbeServer() error {
332332
mux.Handle(cm.livenessEndpointName+"/", http.StripPrefix(cm.livenessEndpointName, cm.healthzHandler))
333333
}
334334

335-
return cm.add(&server{
336-
Kind: "health probe",
337-
Log: cm.logger,
338-
Server: srv,
339-
Listener: cm.healthProbeListener,
340-
})
335+
go cm.httpServe("health probe", cm.logger, server, cm.healthProbeListener)
341336
}
342337

343338
func (cm *controllerManager) addPprofServer() error {
@@ -358,6 +353,42 @@ func (cm *controllerManager) addPprofServer() error {
358353
})
359354
}
360355

356+
func (cm *controllerManager) httpServe(kind string, log logr.Logger, server *http.Server, ln net.Listener) {
357+
log = log.WithValues("kind", kind, "addr", ln.Addr())
358+
359+
go func() {
360+
log.Info("Starting server")
361+
if err := server.Serve(ln); err != nil {
362+
if errors.Is(err, http.ErrServerClosed) {
363+
return
364+
}
365+
if atomic.LoadInt64(cm.stopProcedureEngaged) > 0 {
366+
// There might be cases where connections are still open and we try to shutdown
367+
// but not having enough time to close the connection causes an error in Serve
368+
//
369+
// In that case we want to avoid returning an error to the main error channel.
370+
log.Error(err, "error on Serve after stop has been engaged")
371+
return
372+
}
373+
cm.errChan <- err
374+
}
375+
}()
376+
377+
// Shutdown the server when stop is closed.
378+
<-cm.internalProceduresStop
379+
if err := server.Shutdown(cm.shutdownCtx); err != nil {
380+
if errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded) {
381+
// Avoid logging context related errors.
382+
return
383+
}
384+
if atomic.LoadInt64(cm.stopProcedureEngaged) > 0 {
385+
cm.logger.Error(err, "error on Shutdown after stop has been engaged")
386+
return
387+
}
388+
cm.errChan <- err
389+
}
390+
}
391+
361392
// Start starts the manager and waits indefinitely.
362393
// There is only two ways to have start return:
363394
// An error has occurred during in one of the internal operations,
@@ -418,9 +449,7 @@ func (cm *controllerManager) Start(ctx context.Context) (err error) {
418449

419450
// Serve health probes.
420451
if cm.healthProbeListener != nil {
421-
if err := cm.addHealthProbeServer(); err != nil {
422-
return fmt.Errorf("failed to add health probe server: %w", err)
423-
}
452+
cm.serveHealthProbes()
424453
}
425454

426455
// Add pprof server

0 commit comments

Comments
 (0)