Skip to content

Commit 471ab88

Browse files
committed
add context to long running operations
1 parent 5558643 commit 471ab88

12 files changed

+82
-90
lines changed

Diff for: cmd/nodeproblemdetector/node_problem_detector.go

+5-12
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@ limitations under the License.
1717
package main
1818

1919
import (
20+
"context"
21+
2022
"github.com/golang/glog"
2123

2224
_ "k8s.io/node-problem-detector/cmd/nodeproblemdetector/exporterplugins"
@@ -31,16 +33,7 @@ import (
3133
"k8s.io/node-problem-detector/pkg/version"
3234
)
3335

34-
func npdInteractive(npdo *options.NodeProblemDetectorOptions) {
35-
termCh := make(chan error, 1)
36-
defer close(termCh)
37-
38-
if err := npdMain(npdo, termCh); err != nil {
39-
glog.Fatalf("Problem detector failed with error: %v", err)
40-
}
41-
}
42-
43-
func npdMain(npdo *options.NodeProblemDetectorOptions, termCh <-chan error) error {
36+
func npdMain(ctx context.Context, npdo *options.NodeProblemDetectorOptions) error {
4437
if npdo.PrintVersion {
4538
version.PrintVersion()
4639
return nil
@@ -58,7 +51,7 @@ func npdMain(npdo *options.NodeProblemDetectorOptions, termCh <-chan error) erro
5851

5952
// Initialize exporters.
6053
defaultExporters := []types.Exporter{}
61-
if ke := k8sexporter.NewExporterOrDie(npdo); ke != nil {
54+
if ke := k8sexporter.NewExporterOrDie(ctx, npdo); ke != nil {
6255
defaultExporters = append(defaultExporters, ke)
6356
glog.Info("K8s exporter started.")
6457
}
@@ -79,5 +72,5 @@ func npdMain(npdo *options.NodeProblemDetectorOptions, termCh <-chan error) erro
7972

8073
// Initialize NPD core.
8174
p := problemdetector.NewProblemDetector(problemDaemons, npdExporters)
82-
return p.Run(termCh)
75+
return p.Run(ctx)
8376
}

Diff for: cmd/nodeproblemdetector/node_problem_detector_linux.go

+6-1
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,9 @@ limitations under the License.
1717
package main
1818

1919
import (
20+
"context"
21+
22+
"github.com/golang/glog"
2023
"github.com/spf13/pflag"
2124
"k8s.io/node-problem-detector/cmd/options"
2225
)
@@ -26,5 +29,7 @@ func main() {
2629
npdo.AddFlags(pflag.CommandLine)
2730

2831
pflag.Parse()
29-
npdInteractive(npdo)
32+
if err := npdMain(context.Background(), npdo); err != nil {
33+
glog.Fatalf("Problem detector failed with error: %v", err)
34+
}
3035
}

Diff for: cmd/nodeproblemdetector/node_problem_detector_test.go

+4-6
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ limitations under the License.
2020
package main
2121

2222
import (
23-
"errors"
23+
"context"
2424
"fmt"
2525
"os"
2626
"strings"
@@ -81,11 +81,9 @@ func TestNPDMain(t *testing.T) {
8181
npdo, cleanup := setupNPD(t)
8282
defer cleanup()
8383

84-
termCh := make(chan error, 2)
85-
termCh <- errors.New("close")
86-
defer close(termCh)
87-
88-
if err := npdMain(npdo, termCh); err != nil {
84+
ctx, cancelFunc := context.WithCancel(context.Background())
85+
cancelFunc()
86+
if err := npdMain(ctx, npdo); err != nil {
8987
t.Errorf("termination signal should not return error got, %v", err)
9088
}
9189
}

Diff for: cmd/nodeproblemdetector/node_problem_detector_windows.go

+28-42
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ limitations under the License.
1717
package main
1818

1919
import (
20-
"errors"
20+
"context"
2121
"fmt"
2222
"sync"
2323
"time"
@@ -102,26 +102,20 @@ type npdService struct {
102102
}
103103

104104
func (s *npdService) Execute(args []string, r <-chan svc.ChangeRequest, changes chan<- svc.Status) (bool, uint32) {
105-
appTermCh := make(chan error, 1)
106-
svcLoopTermCh := make(chan error, 1)
107-
defer func() {
108-
close(appTermCh)
109-
close(svcLoopTermCh)
110-
}()
111-
112105
changes <- svc.Status{State: svc.StartPending}
113106
changes <- svc.Status{State: svc.Running, Accepts: svcCommandsAccepted}
114107
var appWG sync.WaitGroup
115108
var svcWG sync.WaitGroup
116109

117110
options := s.options
111+
ctx, cancelFunc := context.WithCancel(context.Background())
118112

119113
// NPD application goroutine.
120114
appWG.Add(1)
121115
go func() {
122116
defer appWG.Done()
123117

124-
if err := npdMain(options, appTermCh); err != nil {
118+
if err := npdMain(ctx, options); err != nil {
125119
elog.Warning(windowsEventLogID, err.Error())
126120
}
127121

@@ -132,16 +126,36 @@ func (s *npdService) Execute(args []string, r <-chan svc.ChangeRequest, changes
132126
svcWG.Add(1)
133127
go func() {
134128
defer svcWG.Done()
135-
136-
serviceLoop(r, changes, appTermCh, svcLoopTermCh)
129+
for {
130+
select {
131+
case <-ctx.Done():
132+
return
133+
case c := <-r:
134+
switch c.Cmd {
135+
case svc.Interrogate:
136+
changes <- c.CurrentStatus
137+
// Testing deadlock from https://code.google.com/p/winsvc/issues/detail?id=4
138+
time.Sleep(100 * time.Millisecond)
139+
changes <- c.CurrentStatus
140+
case svc.Stop, svc.Shutdown:
141+
elog.Info(windowsEventLogID, fmt.Sprintf("Stopping %s service, %v", svcName, c.Context))
142+
cancelFunc()
143+
case svc.Pause:
144+
elog.Info(windowsEventLogID, "ignoring pause command from Windows service control, not supported")
145+
changes <- svc.Status{State: svc.Paused, Accepts: svcCommandsAccepted}
146+
case svc.Continue:
147+
elog.Info(windowsEventLogID, "ignoring continue command from Windows service control, not supported")
148+
changes <- svc.Status{State: svc.Running, Accepts: svcCommandsAccepted}
149+
default:
150+
elog.Error(windowsEventLogID, fmt.Sprintf("unexpected control request #%d", c))
151+
}
152+
}
153+
}
137154
}()
138155

139156
// Wait for the application go routine to die.
140157
appWG.Wait()
141158

142-
// Ensure that the service control loop is killed.
143-
svcLoopTermCh <- nil
144-
145159
// Wait for the service control loop to terminate.
146160
// Otherwise it's possible that the channel closures cause the application to panic.
147161
svcWG.Wait()
@@ -151,31 +165,3 @@ func (s *npdService) Execute(args []string, r <-chan svc.ChangeRequest, changes
151165

152166
return false, uint32(0)
153167
}
154-
155-
func serviceLoop(r <-chan svc.ChangeRequest, changes chan<- svc.Status, appTermCh chan error, svcLoopTermCh chan error) {
156-
for {
157-
select {
158-
case <-svcLoopTermCh:
159-
return
160-
case c := <-r:
161-
switch c.Cmd {
162-
case svc.Interrogate:
163-
changes <- c.CurrentStatus
164-
// Testing deadlock from https://code.google.com/p/winsvc/issues/detail?id=4
165-
time.Sleep(100 * time.Millisecond)
166-
changes <- c.CurrentStatus
167-
case svc.Stop, svc.Shutdown:
168-
elog.Info(windowsEventLogID, fmt.Sprintf("Stopping %s service, %v", svcName, c.Context))
169-
appTermCh <- errors.New("stopping service")
170-
case svc.Pause:
171-
elog.Info(windowsEventLogID, "ignoring pause command from Windows service control, not supported")
172-
changes <- svc.Status{State: svc.Paused, Accepts: svcCommandsAccepted}
173-
case svc.Continue:
174-
elog.Info(windowsEventLogID, "ignoring continue command from Windows service control, not supported")
175-
changes <- svc.Status{State: svc.Running, Accepts: svcCommandsAccepted}
176-
default:
177-
elog.Error(windowsEventLogID, fmt.Sprintf("unexpected control request #%d", c))
178-
}
179-
}
180-
}
181-
}

Diff for: cmd/nodeproblemdetector/node_problem_detector_windows_test.go

+1
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ limitations under the License.
2020
package main
2121

2222
import (
23+
"context"
2324
"testing"
2425

2526
"golang.org/x/sys/windows/svc"

Diff for: pkg/exporters/k8sexporter/condition/manager.go

+10-7
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ limitations under the License.
1717
package condition
1818

1919
import (
20+
"context"
2021
"reflect"
2122
"sync"
2223
"time"
@@ -49,7 +50,7 @@ const (
4950
// not. This addresses 3).
5051
type ConditionManager interface {
5152
// Start starts the condition manager.
52-
Start()
53+
Start(ctx context.Context)
5354
// UpdateCondition updates a specific condition.
5455
UpdateCondition(types.Condition)
5556
// GetConditions returns all current conditions.
@@ -88,8 +89,8 @@ func NewConditionManager(client problemclient.Client, clock clock.Clock, heartbe
8889
}
8990
}
9091

91-
func (c *conditionManager) Start() {
92-
go c.syncLoop()
92+
func (c *conditionManager) Start(ctx context.Context) {
93+
go c.syncLoop(ctx)
9394
}
9495

9596
func (c *conditionManager) UpdateCondition(condition types.Condition) {
@@ -110,15 +111,17 @@ func (c *conditionManager) GetConditions() []types.Condition {
110111
return conditions
111112
}
112113

113-
func (c *conditionManager) syncLoop() {
114+
func (c *conditionManager) syncLoop(ctx context.Context) {
114115
ticker := c.clock.NewTicker(updatePeriod)
115116
defer ticker.Stop()
116117
for {
117118
select {
118119
case <-ticker.C():
119120
if c.needUpdates() || c.needResync() || c.needHeartbeat() {
120-
c.sync()
121+
c.sync(ctx)
121122
}
123+
case <-ctx.Done():
124+
break
122125
}
123126
}
124127
}
@@ -150,14 +153,14 @@ func (c *conditionManager) needHeartbeat() bool {
150153
}
151154

152155
// sync synchronizes node conditions with the apiserver.
153-
func (c *conditionManager) sync() {
156+
func (c *conditionManager) sync(ctx context.Context) {
154157
c.latestTry = c.clock.Now()
155158
c.resyncNeeded = false
156159
conditions := []v1.NodeCondition{}
157160
for i := range c.conditions {
158161
conditions = append(conditions, problemutil.ConvertToAPICondition(c.conditions[i]))
159162
}
160-
if err := c.client.SetConditions(conditions); err != nil {
163+
if err := c.client.SetConditions(ctx, conditions); err != nil {
161164
// The conditions will be updated again in future sync
162165
glog.Errorf("failed to update node conditions: %v", err)
163166
c.resyncNeeded = true

Diff for: pkg/exporters/k8sexporter/condition/manager_test.go

+4-3
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ limitations under the License.
1717
package condition
1818

1919
import (
20+
"context"
2021
"fmt"
2122
"testing"
2223
"time"
@@ -109,7 +110,7 @@ func TestResync(t *testing.T) {
109110
m, fakeClient, fakeClock := newTestManager()
110111
condition := newTestCondition("TestCondition")
111112
m.conditions = map[string]types.Condition{condition.Type: condition}
112-
m.sync()
113+
m.sync(context.Background())
113114
expected := []v1.NodeCondition{problemutil.ConvertToAPICondition(condition)}
114115
assert.Nil(t, fakeClient.AssertConditions(expected), "Condition should be updated via client")
115116

@@ -118,7 +119,7 @@ func TestResync(t *testing.T) {
118119
assert.False(t, m.needResync(), "Should not resync after resync period without resync needed")
119120

120121
fakeClient.InjectError("SetConditions", fmt.Errorf("injected error"))
121-
m.sync()
122+
m.sync(context.Background())
122123

123124
assert.False(t, m.needResync(), "Should not resync before resync period")
124125
fakeClock.Step(resyncPeriod)
@@ -129,7 +130,7 @@ func TestHeartbeat(t *testing.T) {
129130
m, fakeClient, fakeClock := newTestManager()
130131
condition := newTestCondition("TestCondition")
131132
m.conditions = map[string]types.Condition{condition.Type: condition}
132-
m.sync()
133+
m.sync(context.Background())
133134
expected := []v1.NodeCondition{problemutil.ConvertToAPICondition(condition)}
134135
assert.Nil(t, fakeClient.AssertConditions(expected), "Condition should be updated via client")
135136

Diff for: pkg/exporters/k8sexporter/k8s_exporter.go

+6-5
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ limitations under the License.
1717
package k8sexporter
1818

1919
import (
20+
"context"
2021
"net"
2122
"net/http"
2223
_ "net/http/pprof"
@@ -44,15 +45,15 @@ type k8sExporter struct {
4445
//
4546
// Note that this function may be blocked (until a timeout occurs) before
4647
// kube-apiserver becomes ready.
47-
func NewExporterOrDie(npdo *options.NodeProblemDetectorOptions) types.Exporter {
48+
func NewExporterOrDie(ctx context.Context, npdo *options.NodeProblemDetectorOptions) types.Exporter {
4849
if !npdo.EnableK8sExporter {
4950
return nil
5051
}
5152

5253
c := problemclient.NewClientOrDie(npdo)
5354

5455
glog.Infof("Waiting for kube-apiserver to be ready (timeout %v)...", npdo.APIServerWaitTimeout)
55-
if err := waitForAPIServerReadyWithTimeout(c, npdo); err != nil {
56+
if err := waitForAPIServerReadyWithTimeout(ctx, c, npdo); err != nil {
5657
glog.Warningf("kube-apiserver did not become ready: timed out on waiting for kube-apiserver to return the node object: %v", err)
5758
}
5859

@@ -62,7 +63,7 @@ func NewExporterOrDie(npdo *options.NodeProblemDetectorOptions) types.Exporter {
6263
}
6364

6465
ke.startHTTPReporting(npdo)
65-
ke.conditionManager.Start()
66+
ke.conditionManager.Start(ctx)
6667

6768
return &ke
6869
}
@@ -103,11 +104,11 @@ func (ke *k8sExporter) startHTTPReporting(npdo *options.NodeProblemDetectorOptio
103104
}()
104105
}
105106

106-
func waitForAPIServerReadyWithTimeout(c problemclient.Client, npdo *options.NodeProblemDetectorOptions) error {
107+
func waitForAPIServerReadyWithTimeout(ctx context.Context, c problemclient.Client, npdo *options.NodeProblemDetectorOptions) error {
107108
return wait.PollImmediate(npdo.APIServerWaitInterval, npdo.APIServerWaitTimeout, func() (done bool, err error) {
108109
// If NPD can get the node object from kube-apiserver, the server is
109110
// ready and the RBAC permission is set correctly.
110-
if _, err := c.GetNode(); err != nil {
111+
if _, err := c.GetNode(ctx); err != nil {
111112
glog.Errorf("Can't get node object: %v", err)
112113
return false, nil
113114
}

Diff for: pkg/exporters/k8sexporter/problemclient/fake_problem_client.go

+4-3
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ limitations under the License.
1717
package problemclient
1818

1919
import (
20+
"context"
2021
"fmt"
2122
"reflect"
2223
"sync"
@@ -60,7 +61,7 @@ func (f *FakeProblemClient) AssertConditions(expected []v1.NodeCondition) error
6061
}
6162

6263
// SetConditions is a fake mimic of SetConditions, it only update the internal condition cache.
63-
func (f *FakeProblemClient) SetConditions(conditions []v1.NodeCondition) error {
64+
func (f *FakeProblemClient) SetConditions(ctx context.Context, conditions []v1.NodeCondition) error {
6465
f.Lock()
6566
defer f.Unlock()
6667
if err, ok := f.errors["SetConditions"]; ok {
@@ -73,7 +74,7 @@ func (f *FakeProblemClient) SetConditions(conditions []v1.NodeCondition) error {
7374
}
7475

7576
// GetConditions is a fake mimic of GetConditions, it returns the conditions cached internally.
76-
func (f *FakeProblemClient) GetConditions(types []v1.NodeConditionType) ([]*v1.NodeCondition, error) {
77+
func (f *FakeProblemClient) GetConditions(ctx context.Context, types []v1.NodeConditionType) ([]*v1.NodeCondition, error) {
7778
f.Lock()
7879
defer f.Unlock()
7980
if err, ok := f.errors["GetConditions"]; ok {
@@ -93,6 +94,6 @@ func (f *FakeProblemClient) GetConditions(types []v1.NodeConditionType) ([]*v1.N
9394
func (f *FakeProblemClient) Eventf(eventType string, source, reason, messageFmt string, args ...interface{}) {
9495
}
9596

96-
func (f *FakeProblemClient) GetNode() (*v1.Node, error) {
97+
func (f *FakeProblemClient) GetNode(ctx context.Context) (*v1.Node, error) {
9798
return nil, fmt.Errorf("GetNode() not implemented")
9899
}

0 commit comments

Comments
 (0)