Skip to content

Commit 89ab58b

Browse files
authored
Merge pull request #312 from yguo0905/wait-apiserver
Cherry pick #308 to v0.6: Support waiting for kube-apiserver to be ready with timout during NPD startup
2 parents df2bc3d + 938f2a8 commit 89ab58b

File tree

4 files changed

+42
-3
lines changed

4 files changed

+42
-3
lines changed

cmd/node_problem_detector.go

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ import (
2626
"github.com/golang/glog"
2727
"github.com/spf13/pflag"
2828

29+
"k8s.io/apimachinery/pkg/util/wait"
2930
"k8s.io/node-problem-detector/cmd/options"
3031
"k8s.io/node-problem-detector/pkg/custompluginmonitor"
3132
"k8s.io/node-problem-detector/pkg/problemclient"
@@ -95,7 +96,25 @@ func main() {
9596
startHTTPServer(p, npdo)
9697
}
9798

99+
// This function may be blocked (until a timeout occurs) before
100+
// kube-apiserver becomes ready.
101+
glog.Infof("Waiting for kube-apiserver to be ready (timeout %v)...", npdo.APIServerWaitTimeout)
102+
if err := waitForAPIServerReadyWithTimeout(c, npdo); err != nil {
103+
glog.Warningf("kube-apiserver did not become ready: timed out on waiting for kube-apiserver to return the node object: %v", err)
104+
}
105+
98106
if err := p.Run(); err != nil {
99107
glog.Fatalf("Problem detector failed with error: %v", err)
100108
}
101109
}
110+
111+
func waitForAPIServerReadyWithTimeout(c problemclient.Client, npdo *options.NodeProblemDetectorOptions) error {
112+
return wait.PollImmediate(npdo.APIServerWaitInterval, npdo.APIServerWaitTimeout, func() (done bool, err error) {
113+
// If NPD can get the node object from kube-apiserver, the server is
114+
// ready and the RBAC permission is set correctly.
115+
if _, err := c.GetNode(); err == nil {
116+
return true, nil
117+
}
118+
return false, nil
119+
})
120+
}

cmd/options/options.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ import (
2020
"flag"
2121
"fmt"
2222
"os"
23+
"time"
2324

2425
"net/url"
2526

@@ -46,6 +47,12 @@ type NodeProblemDetectorOptions struct {
4647
ServerPort int
4748
// ServerAddress is the address to bind the node problem detector server.
4849
ServerAddress string
50+
// APIServerWaitTimeout is the timeout on waiting for kube-apiserver to be
51+
// ready.
52+
APIServerWaitTimeout time.Duration
53+
// APIServerWaitInterval is the interval between the checks on the
54+
// readiness of kube-apiserver.
55+
APIServerWaitInterval time.Duration
4956

5057
// application options
5158

@@ -65,6 +72,8 @@ func (npdo *NodeProblemDetectorOptions) AddFlags(fs *pflag.FlagSet) {
6572
[]string{}, "List of paths to custom plugin monitor config files, comma separated.")
6673
fs.StringVar(&npdo.ApiServerOverride, "apiserver-override",
6774
"", "Custom URI used to connect to Kubernetes ApiServer")
75+
fs.DurationVar(&npdo.APIServerWaitTimeout, "apiserver-wait-timeout", time.Duration(5)*time.Minute, "The timeout on waiting for kube-apiserver to be ready. This is ignored if --enable-k8s-exporter is false.")
76+
fs.DurationVar(&npdo.APIServerWaitInterval, "apiserver-wait-interval", time.Duration(5)*time.Second, "The interval between the checks on the readiness of kube-apiserver. This is ignored if --enable-k8s-exporter is false.")
6877
fs.BoolVar(&npdo.PrintVersion, "version", false, "Print version information and quit")
6978
fs.StringVar(&npdo.HostnameOverride, "hostname-override",
7079
"", "Custom node name used to override hostname")

pkg/problemclient/fake_problem_client.go

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ import (
2121
"reflect"
2222
"sync"
2323

24-
"k8s.io/api/core/v1"
24+
v1 "k8s.io/api/core/v1"
2525
)
2626

2727
// FakeProblemClient is a fake problem client for debug.
@@ -92,3 +92,7 @@ func (f *FakeProblemClient) GetConditions(types []v1.NodeConditionType) ([]*v1.N
9292
// Eventf does nothing now.
9393
func (f *FakeProblemClient) Eventf(eventType string, source, reason, messageFmt string, args ...interface{}) {
9494
}
95+
96+
func (f *FakeProblemClient) GetNode() (*v1.Node, error) {
97+
return nil, fmt.Errorf("GetNode() not implemented")
98+
}

pkg/problemclient/problem_client.go

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ import (
2626
typedcorev1 "k8s.io/client-go/kubernetes/typed/core/v1"
2727
"k8s.io/kubernetes/pkg/api/legacyscheme"
2828

29-
"k8s.io/api/core/v1"
29+
v1 "k8s.io/api/core/v1"
3030
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
3131
"k8s.io/apimachinery/pkg/types"
3232
"k8s.io/apimachinery/pkg/util/clock"
@@ -47,6 +47,9 @@ type Client interface {
4747
SetConditions(conditions []v1.NodeCondition) error
4848
// Eventf reports the event.
4949
Eventf(eventType string, source, reason, messageFmt string, args ...interface{})
50+
// GetNode returns the Node object of the node on which the
51+
// node-problem-detector runs.
52+
GetNode() (*v1.Node, error)
5053
}
5154

5255
type nodeProblemClient struct {
@@ -79,7 +82,7 @@ func NewClientOrDie(npdo *options.NodeProblemDetectorOptions) Client {
7982
}
8083

8184
func (c *nodeProblemClient) GetConditions(conditionTypes []v1.NodeConditionType) ([]*v1.NodeCondition, error) {
82-
node, err := c.client.Nodes().Get(c.nodeName, metav1.GetOptions{})
85+
node, err := c.GetNode()
8386
if err != nil {
8487
return nil, err
8588
}
@@ -116,6 +119,10 @@ func (c *nodeProblemClient) Eventf(eventType, source, reason, messageFmt string,
116119
recorder.Eventf(c.nodeRef, eventType, reason, messageFmt, args...)
117120
}
118121

122+
func (c *nodeProblemClient) GetNode() (*v1.Node, error) {
123+
return c.client.Nodes().Get(c.nodeName, metav1.GetOptions{})
124+
}
125+
119126
// generatePatch generates condition patch
120127
func generatePatch(conditions []v1.NodeCondition) ([]byte, error) {
121128
raw, err := json.Marshal(&conditions)

0 commit comments

Comments
 (0)