Skip to content
This repository was archived by the owner on Jul 30, 2021. It is now read-only.

Commit c562833

Browse files
committed
🐛 refresh token for provisioning machines
Includes: Configurable token duration to extend the required sync interval when necessary. Also adds a requeueAfter parameter, because I realized doing this work that we should get an immediate second crack at the config before it's consumed. Finally, reconciles status when the config has already been used. In order for us to get to that point the owner ref must be set by the machine controller, which implies that the machine has a pointer to this config object. So either it's an extremely unlikely user error, or we mistakenly dropped the "ready" flag for this config.
1 parent b22e54b commit c562833

File tree

4 files changed

+275
-17
lines changed

4 files changed

+275
-17
lines changed

controllers/kubeadmconfig_controller.go

Lines changed: 36 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -100,12 +100,6 @@ func (r *KubeadmConfigReconciler) Reconcile(req ctrl.Request) (_ ctrl.Result, re
100100
return ctrl.Result{}, err
101101
}
102102

103-
// bail super early if it's already ready
104-
if config.Status.Ready {
105-
log.Info("ignoring an already ready config")
106-
return ctrl.Result{}, nil
107-
}
108-
109103
// Look up the Machine that owns this KubeConfig if there is one
110104
machine, err := util.GetOwnerMachine(ctx, r.Client, config.ObjectMeta)
111105
if err != nil {
@@ -118,12 +112,6 @@ func (r *KubeadmConfigReconciler) Reconcile(req ctrl.Request) (_ ctrl.Result, re
118112
}
119113
log = log.WithValues("machine-name", machine.Name)
120114

121-
// Ignore machines that already have bootstrap data
122-
if machine.Spec.Bootstrap.Data != nil {
123-
// TODO: mark the config as ready?
124-
return ctrl.Result{}, nil
125-
}
126-
127115
// Lookup the cluster the machine is associated with
128116
cluster, err := util.GetClusterFromMetadata(ctx, r.Client, machine.ObjectMeta)
129117
if err != nil {
@@ -140,10 +128,45 @@ func (r *KubeadmConfigReconciler) Reconcile(req ctrl.Request) (_ ctrl.Result, re
140128
return ctrl.Result{}, err
141129
}
142130

131+
switch {
143132
// Wait patiently for the infrastructure to be ready
144-
if !cluster.Status.InfrastructureReady {
133+
case !cluster.Status.InfrastructureReady:
145134
log.Info("Infrastructure is not ready, waiting until ready.")
146135
return ctrl.Result{}, nil
136+
// bail super early if it's already ready
137+
case config.Status.Ready && machine.Status.InfrastructureReady:
138+
log.Info("ignoring config for an already ready machine")
139+
return ctrl.Result{}, nil
140+
// Reconcile status for machines that have already copied bootstrap data
141+
case machine.Spec.Bootstrap.Data != nil && !config.Status.Ready:
142+
config.Status.Ready = true
143+
// Initialize the patch helper
144+
patchHelper, err := patch.NewHelper(config, r)
145+
if err != nil {
146+
return ctrl.Result{}, err
147+
}
148+
err = patchHelper.Patch(ctx, config)
149+
return ctrl.Result{}, err
150+
// If we've already embedded a time-limited join token into a config, but are still waiting for the token to be used, refresh it
151+
case config.Status.Ready:
152+
token := config.Spec.JoinConfiguration.Discovery.BootstrapToken.Token
153+
154+
// gets the remote secret interface client for the current cluster
155+
secretsClient, err := r.SecretsClientFactory.NewSecretsClient(r.Client, cluster)
156+
if err != nil {
157+
return ctrl.Result{}, err
158+
}
159+
160+
log.Info("refreshing token until the infrastructure has a chance to consume it")
161+
err = refreshToken(secretsClient, token)
162+
if err != nil {
163+
// It would be nice to re-create the bootstrap token if the error was "not found", but we have no way to update the Machine's bootstrap data
164+
return ctrl.Result{}, errors.Wrapf(err, "failed to refresh bootstrap token")
165+
}
166+
// NB: this may not be sufficient to keep the token live if we don't see it before it expires, but when we generate a config we will set the status to "ready" which should generate an update event
167+
return ctrl.Result{
168+
RequeueAfter: DefaultTokenTTL / 2,
169+
}, nil
147170
}
148171

149172
// Initialize the patch helper

controllers/kubeadmconfig_controller_test.go

Lines changed: 201 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ limitations under the License.
1717
package controllers
1818

1919
import (
20+
"bytes"
2021
"context"
2122
"fmt"
2223
"reflect"
@@ -30,6 +31,7 @@ import (
3031
"k8s.io/apimachinery/pkg/types"
3132
fakeclient "k8s.io/client-go/kubernetes/fake"
3233
typedcorev1 "k8s.io/client-go/kubernetes/typed/core/v1"
34+
bootstrapapi "k8s.io/cluster-bootstrap/token/api"
3335
"k8s.io/klog/klogr"
3436
bootstrapv1 "sigs.k8s.io/cluster-api-bootstrap-provider-kubeadm/api/v1alpha2"
3537
internalcluster "sigs.k8s.io/cluster-api-bootstrap-provider-kubeadm/internal/cluster"
@@ -577,7 +579,7 @@ func TestReconcileIfJoinNodesAndControlPlaneIsReady(t *testing.T) {
577579
myremoteclient, _ := k.SecretsClientFactory.NewSecretsClient(nil, nil)
578580
l, err := myremoteclient.List(metav1.ListOptions{})
579581
if err != nil {
580-
t.Fatal(fmt.Sprintf("Failed to get secrets after reconcyle:\n %+v", err))
582+
t.Fatal(fmt.Sprintf("Failed to get secrets after reconcile:\n %+v", err))
581583
}
582584

583585
if len(l.Items) != 1 {
@@ -588,6 +590,204 @@ func TestReconcileIfJoinNodesAndControlPlaneIsReady(t *testing.T) {
588590
}
589591
}
590592

593+
func TestBootstrapTokenTTLExtension(t *testing.T) {
594+
cluster := newCluster("cluster")
595+
cluster.Status.InfrastructureReady = true
596+
cluster.Status.ControlPlaneInitialized = true
597+
cluster.Status.APIEndpoints = []clusterv1.APIEndpoint{{Host: "100.105.150.1", Port: 6443}}
598+
599+
controlPlaneInitMachine := newControlPlaneMachine(cluster, "control-plane-init-machine")
600+
initConfig := newControlPlaneInitKubeadmConfig(controlPlaneInitMachine, "control-plane-init-config")
601+
workerMachine := newWorkerMachine(cluster)
602+
workerJoinConfig := newWorkerJoinKubeadmConfig(workerMachine)
603+
controlPlaneJoinMachine := newControlPlaneMachine(cluster, "control-plane-join-machine")
604+
controlPlaneJoinConfig := newControlPlaneJoinKubeadmConfig(controlPlaneJoinMachine, "control-plane-join-cfg")
605+
objects := []runtime.Object{
606+
cluster,
607+
workerMachine,
608+
workerJoinConfig,
609+
controlPlaneJoinMachine,
610+
controlPlaneJoinConfig,
611+
}
612+
613+
objects = append(objects, createSecrets(t, cluster, initConfig)...)
614+
myclient := fake.NewFakeClientWithScheme(setupScheme(), objects...)
615+
k := &KubeadmConfigReconciler{
616+
Log: log.Log,
617+
Client: myclient,
618+
SecretsClientFactory: newFakeSecretFactory(),
619+
KubeadmInitLock: &myInitLocker{},
620+
}
621+
request := ctrl.Request{
622+
NamespacedName: types.NamespacedName{
623+
Namespace: "default",
624+
Name: "worker-join-cfg",
625+
},
626+
}
627+
result, err := k.Reconcile(request)
628+
if err != nil {
629+
t.Fatalf("Failed to reconcile:\n %+v", err)
630+
}
631+
if result.Requeue == true {
632+
t.Fatal("did not expect to requeue")
633+
}
634+
if result.RequeueAfter != time.Duration(0) {
635+
t.Fatal("did not expect to requeue after")
636+
}
637+
cfg, err := getKubeadmConfig(myclient, "worker-join-cfg")
638+
if err != nil {
639+
t.Fatalf("Failed to reconcile:\n %+v", err)
640+
}
641+
if cfg.Status.Ready != true {
642+
t.Fatal("Expected status ready")
643+
}
644+
if cfg.Status.BootstrapData == nil {
645+
t.Fatal("Expected status ready")
646+
}
647+
request = ctrl.Request{
648+
NamespacedName: types.NamespacedName{
649+
Namespace: "default",
650+
Name: "control-plane-join-cfg",
651+
},
652+
}
653+
result, err = k.Reconcile(request)
654+
if err != nil {
655+
t.Fatalf("Failed to reconcile:\n %+v", err)
656+
}
657+
if result.Requeue == true {
658+
t.Fatal("did not expect to requeue")
659+
}
660+
if result.RequeueAfter != time.Duration(0) {
661+
t.Fatal("did not expect to requeue after")
662+
}
663+
cfg, err = getKubeadmConfig(myclient, "control-plane-join-cfg")
664+
if err != nil {
665+
t.Fatalf("Failed to reconcile:\n %+v", err)
666+
}
667+
if cfg.Status.Ready != true {
668+
t.Fatal("Expected status ready")
669+
}
670+
if cfg.Status.BootstrapData == nil {
671+
t.Fatal("Expected status ready")
672+
}
673+
674+
myremoteclient, _ := k.SecretsClientFactory.NewSecretsClient(nil, nil)
675+
l, err := myremoteclient.List(metav1.ListOptions{})
676+
if err != nil {
677+
t.Fatalf("Failed to read secrets:\n %+v", err)
678+
}
679+
680+
if len(l.Items) != 2 {
681+
t.Fatalf("Expected two bootstrap tokens, saw:\n %+d", len(l.Items))
682+
}
683+
684+
// ensure that the token is refreshed...
685+
tokenExpires := make([][]byte, len(l.Items))
686+
687+
for i, item := range l.Items {
688+
tokenExpires[i] = item.Data[bootstrapapi.BootstrapTokenExpirationKey]
689+
}
690+
691+
<-time.After(1 * time.Second)
692+
693+
for _, req := range []ctrl.Request{
694+
{
695+
NamespacedName: types.NamespacedName{
696+
Namespace: "default",
697+
Name: "worker-join-cfg",
698+
},
699+
},
700+
{
701+
NamespacedName: types.NamespacedName{
702+
Namespace: "default",
703+
Name: "control-plane-join-cfg",
704+
},
705+
},
706+
} {
707+
708+
result, err := k.Reconcile(req)
709+
if err != nil {
710+
t.Fatalf("Failed to reconcile:\n %+v", err)
711+
}
712+
if result.RequeueAfter >= DefaultTokenTTL {
713+
t.Fatal("expected a requeue duration less than the token TTL")
714+
}
715+
}
716+
717+
l, err = myremoteclient.List(metav1.ListOptions{})
718+
if err != nil {
719+
t.Fatalf("Failed to read secrets:\n %+v", err)
720+
}
721+
722+
if len(l.Items) != 2 {
723+
t.Fatalf("Expected two bootstrap tokens, saw:\n %+d", len(l.Items))
724+
}
725+
726+
for i, item := range l.Items {
727+
if bytes.Equal(tokenExpires[i], item.Data[bootstrapapi.BootstrapTokenExpirationKey]) {
728+
t.Fatal("Reconcile should have refreshed bootstrap token's expiration until the infrastructure was ready")
729+
}
730+
tokenExpires[i] = item.Data[bootstrapapi.BootstrapTokenExpirationKey]
731+
}
732+
733+
// ...until the infrastructure is marked "ready"
734+
workerMachine.Status.InfrastructureReady = true
735+
err = myclient.Update(context.Background(), workerMachine)
736+
if err != nil {
737+
t.Fatalf("unable to set machine infrastructure ready: %v", err)
738+
}
739+
740+
controlPlaneJoinMachine.Status.InfrastructureReady = true
741+
err = myclient.Update(context.Background(), controlPlaneJoinMachine)
742+
if err != nil {
743+
t.Fatalf("unable to set machine infrastructure ready: %v", err)
744+
}
745+
746+
<-time.After(1 * time.Second)
747+
748+
for _, req := range []ctrl.Request{
749+
{
750+
NamespacedName: types.NamespacedName{
751+
Namespace: "default",
752+
Name: "worker-join-cfg",
753+
},
754+
},
755+
{
756+
NamespacedName: types.NamespacedName{
757+
Namespace: "default",
758+
Name: "control-plane-join-cfg",
759+
},
760+
},
761+
} {
762+
763+
result, err := k.Reconcile(req)
764+
if err != nil {
765+
t.Fatalf("Failed to reconcile:\n %+v", err)
766+
}
767+
if result.Requeue == true {
768+
t.Fatal("did not expect to requeue")
769+
}
770+
if result.RequeueAfter != time.Duration(0) {
771+
t.Fatal("did not expect to requeue after")
772+
}
773+
}
774+
775+
l, err = myremoteclient.List(metav1.ListOptions{})
776+
if err != nil {
777+
t.Fatalf("Failed to read secrets:\n %+v", err)
778+
}
779+
780+
if len(l.Items) != 2 {
781+
t.Fatalf("Expected two bootstrap tokens, saw:\n %+d", len(l.Items))
782+
}
783+
784+
for i, item := range l.Items {
785+
if !bytes.Equal(tokenExpires[i], item.Data[bootstrapapi.BootstrapTokenExpirationKey]) {
786+
t.Fatal("Reconcile should have let the bootstrap token expire after the infrastructure was ready")
787+
}
788+
}
789+
}
790+
591791
// Ensure the discovery portion of the JoinConfiguration gets generated correctly.
592792
func TestKubeadmConfigReconciler_Reconcile_DisocveryReconcileBehaviors(t *testing.T) {
593793
k := &KubeadmConfigReconciler{

controllers/token.go

Lines changed: 27 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,9 @@ import (
3030
"sigs.k8s.io/controller-runtime/pkg/client"
3131
)
3232

33-
const (
34-
defaultTokenTTL = 10 * time.Minute
33+
var (
34+
// DefaultTokenTTL is the amount of time a bootstrap token (and therefore a KubeadmConfig) will be valid
35+
DefaultTokenTTL = 15 * time.Minute
3536
)
3637

3738
// ClusterSecretsClientFactory support creation of secrets client for clusters
@@ -76,7 +77,7 @@ func createToken(client corev1.SecretInterface) (string, error) {
7677
Data: map[string][]byte{
7778
bootstrapapi.BootstrapTokenIDKey: []byte(tokenID),
7879
bootstrapapi.BootstrapTokenSecretKey: []byte(tokenSecret),
79-
bootstrapapi.BootstrapTokenExpirationKey: []byte(time.Now().UTC().Add(defaultTokenTTL).Format(time.RFC3339)),
80+
bootstrapapi.BootstrapTokenExpirationKey: []byte(time.Now().UTC().Add(DefaultTokenTTL).Format(time.RFC3339)),
8081
bootstrapapi.BootstrapTokenUsageSigningKey: []byte("true"),
8182
bootstrapapi.BootstrapTokenUsageAuthentication: []byte("true"),
8283
bootstrapapi.BootstrapTokenExtraGroupsKey: []byte("system:bootstrappers:kubeadm:default-node-token"),
@@ -89,3 +90,26 @@ func createToken(client corev1.SecretInterface) (string, error) {
8990
}
9091
return token, nil
9192
}
93+
94+
// refreshToken extends the TTL for an existing token
95+
func refreshToken(client corev1.SecretInterface, token string) error {
96+
substrs := bootstraputil.BootstrapTokenRegexp.FindStringSubmatch(token)
97+
if len(substrs) != 3 {
98+
return errors.Errorf("the bootstrap token %q was not of the form %q", token, bootstrapapi.BootstrapTokenPattern)
99+
}
100+
tokenID := substrs[1]
101+
102+
secretName := bootstraputil.BootstrapTokenSecretName(tokenID)
103+
secret, err := client.Get(secretName, metav1.GetOptions{})
104+
if err != nil {
105+
return err
106+
}
107+
108+
if secret.Data == nil {
109+
return errors.Errorf("Invalid bootstrap secret %q, remove the token from the kubadm config to re-create", secretName)
110+
}
111+
secret.Data[bootstrapapi.BootstrapTokenExpirationKey] = []byte(time.Now().UTC().Add(DefaultTokenTTL).Format(time.RFC3339))
112+
113+
_, err = client.Update(secret)
114+
return err
115+
}

main.go

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,13 @@ func main() {
7777
"The minimum interval at which watched resources are reconciled (e.g. 10m)",
7878
)
7979

80+
flag.DurationVar(
81+
&controllers.DefaultTokenTTL,
82+
"bootstrap-token-ttl",
83+
15*time.Minute,
84+
"The amount of time the bootstrap token will be valid",
85+
)
86+
8087
flag.StringVar(
8188
&watchNamespace,
8289
"namespace",
@@ -88,6 +95,10 @@ func main() {
8895

8996
ctrl.SetLogger(klogr.New())
9097

98+
if controllers.DefaultTokenTTL-syncPeriod < 1*time.Minute {
99+
setupLog.Info("warning: the sync interval is close to the configured token TTL, tokens may expire temporarily before being refreshed")
100+
}
101+
91102
mgr, err := ctrl.NewManager(ctrl.GetConfigOrDie(), ctrl.Options{
92103
Scheme: scheme,
93104
MetricsBindAddress: metricsAddr,

0 commit comments

Comments
 (0)