Skip to content

Commit 4943443

Browse files
committed
Support waiting for kube-apiserver to be ready with timout during NPD startup
1 parent 30babe9 commit 4943443

File tree

3 files changed

+41
-3
lines changed

3 files changed

+41
-3
lines changed

cmd/options/options.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ import (
2020
"flag"
2121
"fmt"
2222
"os"
23+
"time"
2324

2425
"net/url"
2526

@@ -49,6 +50,12 @@ type NodeProblemDetectorOptions struct {
4950
EnableK8sExporter bool
5051
// ApiServerOverride is the custom URI used to connect to Kubernetes ApiServer.
5152
ApiServerOverride string
53+
// APIServerWaitTimeout is the timeout on waiting for kube-apiserver to be
54+
// ready.
55+
APIServerWaitTimeout time.Duration
56+
// APIServerWaitInterval is the interval between the checks on the
57+
// readiness of kube-apiserver.
58+
APIServerWaitInterval time.Duration
5259

5360
// prometheusExporter options
5461
// PrometheusServerPort is the port to bind the Prometheus scrape endpoint. Use 0 to disable.
@@ -96,6 +103,8 @@ func (npdo *NodeProblemDetectorOptions) AddFlags(fs *pflag.FlagSet) {
96103
fs.BoolVar(&npdo.EnableK8sExporter, "enable-k8s-exporter", true, "Enables reporting to Kubernetes API server.")
97104
fs.StringVar(&npdo.ApiServerOverride, "apiserver-override",
98105
"", "Custom URI used to connect to Kubernetes ApiServer. This is ignored if --enable-k8s-exporter is false.")
106+
fs.DurationVar(&npdo.APIServerWaitTimeout, "apiserver-wait-timeout", time.Duration(5)*time.Minute, "The timeout on waiting for kube-apiserver to be ready. This is ignored if --enable-k8s-exporter is false.")
107+
fs.DurationVar(&npdo.APIServerWaitInterval, "apiserver-wait-interval", time.Duration(5)*time.Second, "The interval between the checks on the readiness of kube-apiserver. This is ignored if --enable-k8s-exporter is false.")
99108
fs.BoolVar(&npdo.PrintVersion, "version", false, "Print version information and quit")
100109
fs.StringVar(&npdo.HostnameOverride, "hostname-override",
101110
"", "Custom node name used to override hostname")

pkg/exporters/k8sexporter/k8s_exporter.go

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ import (
2525
"github.com/golang/glog"
2626

2727
"k8s.io/apimachinery/pkg/util/clock"
28+
"k8s.io/apimachinery/pkg/util/wait"
2829

2930
"k8s.io/node-problem-detector/cmd/options"
3031
"k8s.io/node-problem-detector/pkg/exporters/k8sexporter/condition"
@@ -38,13 +39,23 @@ type k8sExporter struct {
3839
conditionManager condition.ConditionManager
3940
}
4041

41-
// NewExporterOrDie creates a exporter for Kubernetes apiserver exporting, panics if error occurs.
42+
// NewExporterOrDie creates a exporter for Kubernetes apiserver exporting,
43+
// panics if error occurs.
44+
//
45+
// Note that this function may be blocked (until a timeout occurs) before
46+
// kube-apiserver becomes ready.
4247
func NewExporterOrDie(npdo *options.NodeProblemDetectorOptions) types.Exporter {
4348
if !npdo.EnableK8sExporter {
4449
return nil
4550
}
4651

4752
c := problemclient.NewClientOrDie(npdo)
53+
54+
glog.Infof("Waiting for kube-apiserver to be ready (timeout %v)...", npdo.APIServerWaitTimeout)
55+
if err := waitForAPIServerReadyWithTimeout(c, npdo); err != nil {
56+
glog.Warningf("kube-apiserver did not become ready: %v", err)
57+
}
58+
4859
ke := k8sExporter{
4960
client: c,
5061
conditionManager: condition.NewConditionManager(c, clock.RealClock{}),
@@ -91,3 +102,14 @@ func (ke *k8sExporter) startHTTPReporting(npdo *options.NodeProblemDetectorOptio
91102
}
92103
}()
93104
}
105+
106+
func waitForAPIServerReadyWithTimeout(c problemclient.Client, npdo *options.NodeProblemDetectorOptions) error {
107+
return wait.PollImmediate(npdo.APIServerWaitInterval, npdo.APIServerWaitTimeout, func() (done bool, err error) {
108+
// If NPD can get the node object from kube-apiserver, the server is
109+
// ready and the RBAC permission is set correctly.
110+
if _, err := c.GetNode(); err == nil {
111+
return true, nil
112+
}
113+
return false, nil
114+
})
115+
}

pkg/exporters/k8sexporter/problemclient/problem_client.go

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ import (
2626
typedcorev1 "k8s.io/client-go/kubernetes/typed/core/v1"
2727
"k8s.io/kubernetes/pkg/api/legacyscheme"
2828

29-
"k8s.io/api/core/v1"
29+
v1 "k8s.io/api/core/v1"
3030
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
3131
"k8s.io/apimachinery/pkg/types"
3232
"k8s.io/apimachinery/pkg/util/clock"
@@ -47,6 +47,9 @@ type Client interface {
4747
SetConditions(conditions []v1.NodeCondition) error
4848
// Eventf reports the event.
4949
Eventf(eventType string, source, reason, messageFmt string, args ...interface{})
50+
// GetNode returns the Node object of the node on which the
51+
// node-problem-detector runs.
52+
GetNode() (*v1.Node, error)
5053
}
5154

5255
type nodeProblemClient struct {
@@ -79,7 +82,7 @@ func NewClientOrDie(npdo *options.NodeProblemDetectorOptions) Client {
7982
}
8083

8184
func (c *nodeProblemClient) GetConditions(conditionTypes []v1.NodeConditionType) ([]*v1.NodeCondition, error) {
82-
node, err := c.client.Nodes().Get(c.nodeName, metav1.GetOptions{})
85+
node, err := c.GetNode()
8386
if err != nil {
8487
return nil, err
8588
}
@@ -116,6 +119,10 @@ func (c *nodeProblemClient) Eventf(eventType, source, reason, messageFmt string,
116119
recorder.Eventf(c.nodeRef, eventType, reason, messageFmt, args...)
117120
}
118121

122+
func (c *nodeProblemClient) GetNode() (*v1.Node, error) {
123+
return c.client.Nodes().Get(c.nodeName, metav1.GetOptions{})
124+
}
125+
119126
// generatePatch generates condition patch
120127
func generatePatch(conditions []v1.NodeCondition) ([]byte, error) {
121128
raw, err := json.Marshal(&conditions)

0 commit comments

Comments
 (0)