Skip to content

Commit 146dfd7

Browse files
authored
Merge pull request #299 from xueweiz/start
Correctly identify failures in problem daemon starting.
2 parents c95c375 + 225de07 commit 146dfd7

File tree

3 files changed

+9
-6
lines changed

3 files changed

+9
-6
lines changed

cmd/nodeproblemdetector/node_problem_detector.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,8 @@ import (
2222
"github.com/golang/glog"
2323
"github.com/spf13/pflag"
2424

25-
"k8s.io/node-problem-detector/cmd/options"
2625
_ "k8s.io/node-problem-detector/cmd/nodeproblemdetector/problemdaemonplugins"
26+
"k8s.io/node-problem-detector/cmd/options"
2727
"k8s.io/node-problem-detector/pkg/exporters/k8sexporter"
2828
"k8s.io/node-problem-detector/pkg/exporters/prometheusexporter"
2929
"k8s.io/node-problem-detector/pkg/problemdaemon"

pkg/problemdetector/problem_detector.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,18 +47,20 @@ func NewProblemDetector(monitors []types.Monitor, exporters []types.Exporter) Pr
4747
func (p *problemDetector) Run() error {
4848
// Start the log monitors one by one.
4949
var chans []<-chan *types.Status
50+
failureCount := 0
5051
for _, m := range p.monitors {
5152
ch, err := m.Start()
5253
if err != nil {
5354
// Do not return error and keep on trying the following config files.
5455
glog.Errorf("Failed to start problem daemon %v: %v", m, err)
56+
failureCount += 1
5557
continue
5658
}
5759
if ch != nil {
5860
chans = append(chans, ch)
5961
}
6062
}
61-
if len(chans) == 0 {
63+
if len(p.monitors) == failureCount {
6264
return fmt.Errorf("no problem daemon is successfully setup")
6365
}
6466
ch := groupChannel(chans)

pkg/types/types.go

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -99,12 +99,13 @@ const (
9999
Perm Type = "permanent"
100100
)
101101

102-
// Monitor monitors log and custom plugins and reports node problem condition and event according to
103-
// the rules.
102+
// Monitor monitors the system and reports problems and metrics according to the rules.
104103
type Monitor interface {
105-
// Start starts the log monitor.
104+
// Start starts the monitor.
105+
// The Status channel is used to report problems. If the Monitor does not report any
106+
// problem (i.e. metrics reporting only), the channel should be set to nil.
106107
Start() (<-chan *Status, error)
107-
// Stop stops the log monitor.
108+
// Stop stops the monitor.
108109
Stop()
109110
}
110111

0 commit comments

Comments
 (0)