diff --git a/controllers/kubeadmconfig_controller.go b/controllers/kubeadmconfig_controller.go index f218c19..d8e3220 100644 --- a/controllers/kubeadmconfig_controller.go +++ b/controllers/kubeadmconfig_controller.go @@ -100,12 +100,6 @@ func (r *KubeadmConfigReconciler) Reconcile(req ctrl.Request) (_ ctrl.Result, re return ctrl.Result{}, err } - // bail super early if it's already ready - if config.Status.Ready { - log.Info("ignoring an already ready config") - return ctrl.Result{}, nil - } - // Look up the Machine that owns this KubeConfig if there is one machine, err := util.GetOwnerMachine(ctx, r.Client, config.ObjectMeta) if err != nil { @@ -118,12 +112,6 @@ func (r *KubeadmConfigReconciler) Reconcile(req ctrl.Request) (_ ctrl.Result, re } log = log.WithValues("machine-name", machine.Name) - // Ignore machines that already have bootstrap data - if machine.Spec.Bootstrap.Data != nil { - // TODO: mark the config as ready? - return ctrl.Result{}, nil - } - // Lookup the cluster the machine is associated with cluster, err := util.GetClusterFromMetadata(ctx, r.Client, machine.ObjectMeta) if err != nil { @@ -140,10 +128,45 @@ func (r *KubeadmConfigReconciler) Reconcile(req ctrl.Request) (_ ctrl.Result, re return ctrl.Result{}, err } + switch { // Wait patiently for the infrastructure to be ready - if !cluster.Status.InfrastructureReady { + case !cluster.Status.InfrastructureReady: log.Info("Infrastructure is not ready, waiting until ready.") return ctrl.Result{}, nil + // bail super early if it's already ready + case config.Status.Ready && machine.Status.InfrastructureReady: + log.Info("ignoring config for an already ready machine") + return ctrl.Result{}, nil + // Reconcile status for machines that have already copied bootstrap data + case machine.Spec.Bootstrap.Data != nil && !config.Status.Ready: + config.Status.Ready = true + // Initialize the patch helper + patchHelper, err := patch.NewHelper(config, r) + if err != nil { + return ctrl.Result{}, err + } + err = patchHelper.Patch(ctx, config) + return ctrl.Result{}, err + // If we've already embedded a time-limited join token into a config, but are still waiting for the token to be used, refresh it + case config.Status.Ready: + token := config.Spec.JoinConfiguration.Discovery.BootstrapToken.Token + + // gets the remote secret interface client for the current cluster + secretsClient, err := r.SecretsClientFactory.NewSecretsClient(r.Client, cluster) + if err != nil { + return ctrl.Result{}, err + } + + log.Info("refreshing token until the infrastructure has a chance to consume it") + err = refreshToken(secretsClient, token) + if err != nil { + // It would be nice to re-create the bootstrap token if the error was "not found", but we have no way to update the Machine's bootstrap data + return ctrl.Result{}, errors.Wrapf(err, "failed to refresh bootstrap token") + } + // NB: this may not be sufficient to keep the token live if we don't see it before it expires, but when we generate a config we will set the status to "ready" which should generate an update event + return ctrl.Result{ + RequeueAfter: DefaultTokenTTL / 2, + }, nil } // Initialize the patch helper diff --git a/controllers/kubeadmconfig_controller_test.go b/controllers/kubeadmconfig_controller_test.go index 00a2455..a8ff120 100644 --- a/controllers/kubeadmconfig_controller_test.go +++ b/controllers/kubeadmconfig_controller_test.go @@ -17,6 +17,7 @@ limitations under the License. package controllers import ( + "bytes" "context" "fmt" "reflect" @@ -30,6 +31,7 @@ import ( "k8s.io/apimachinery/pkg/types" fakeclient "k8s.io/client-go/kubernetes/fake" typedcorev1 "k8s.io/client-go/kubernetes/typed/core/v1" + bootstrapapi "k8s.io/cluster-bootstrap/token/api" "k8s.io/klog/klogr" bootstrapv1 "sigs.k8s.io/cluster-api-bootstrap-provider-kubeadm/api/v1alpha2" internalcluster "sigs.k8s.io/cluster-api-bootstrap-provider-kubeadm/internal/cluster" @@ -577,7 +579,7 @@ func TestReconcileIfJoinNodesAndControlPlaneIsReady(t *testing.T) { myremoteclient, _ := k.SecretsClientFactory.NewSecretsClient(nil, nil) l, err := myremoteclient.List(metav1.ListOptions{}) if err != nil { - t.Fatal(fmt.Sprintf("Failed to get secrets after reconcyle:\n %+v", err)) + t.Fatal(fmt.Sprintf("Failed to get secrets after reconcile:\n %+v", err)) } if len(l.Items) != 1 { @@ -588,6 +590,204 @@ func TestReconcileIfJoinNodesAndControlPlaneIsReady(t *testing.T) { } } +func TestBootstrapTokenTTLExtension(t *testing.T) { + cluster := newCluster("cluster") + cluster.Status.InfrastructureReady = true + cluster.Status.ControlPlaneInitialized = true + cluster.Status.APIEndpoints = []clusterv1.APIEndpoint{{Host: "100.105.150.1", Port: 6443}} + + controlPlaneInitMachine := newControlPlaneMachine(cluster, "control-plane-init-machine") + initConfig := newControlPlaneInitKubeadmConfig(controlPlaneInitMachine, "control-plane-init-config") + workerMachine := newWorkerMachine(cluster) + workerJoinConfig := newWorkerJoinKubeadmConfig(workerMachine) + controlPlaneJoinMachine := newControlPlaneMachine(cluster, "control-plane-join-machine") + controlPlaneJoinConfig := newControlPlaneJoinKubeadmConfig(controlPlaneJoinMachine, "control-plane-join-cfg") + objects := []runtime.Object{ + cluster, + workerMachine, + workerJoinConfig, + controlPlaneJoinMachine, + controlPlaneJoinConfig, + } + + objects = append(objects, createSecrets(t, cluster, initConfig)...) + myclient := fake.NewFakeClientWithScheme(setupScheme(), objects...) + k := &KubeadmConfigReconciler{ + Log: log.Log, + Client: myclient, + SecretsClientFactory: newFakeSecretFactory(), + KubeadmInitLock: &myInitLocker{}, + } + request := ctrl.Request{ + NamespacedName: types.NamespacedName{ + Namespace: "default", + Name: "worker-join-cfg", + }, + } + result, err := k.Reconcile(request) + if err != nil { + t.Fatalf("Failed to reconcile:\n %+v", err) + } + if result.Requeue == true { + t.Fatal("did not expect to requeue") + } + if result.RequeueAfter != time.Duration(0) { + t.Fatal("did not expect to requeue after") + } + cfg, err := getKubeadmConfig(myclient, "worker-join-cfg") + if err != nil { + t.Fatalf("Failed to reconcile:\n %+v", err) + } + if cfg.Status.Ready != true { + t.Fatal("Expected status ready") + } + if cfg.Status.BootstrapData == nil { + t.Fatal("Expected status ready") + } + request = ctrl.Request{ + NamespacedName: types.NamespacedName{ + Namespace: "default", + Name: "control-plane-join-cfg", + }, + } + result, err = k.Reconcile(request) + if err != nil { + t.Fatalf("Failed to reconcile:\n %+v", err) + } + if result.Requeue == true { + t.Fatal("did not expect to requeue") + } + if result.RequeueAfter != time.Duration(0) { + t.Fatal("did not expect to requeue after") + } + cfg, err = getKubeadmConfig(myclient, "control-plane-join-cfg") + if err != nil { + t.Fatalf("Failed to reconcile:\n %+v", err) + } + if cfg.Status.Ready != true { + t.Fatal("Expected status ready") + } + if cfg.Status.BootstrapData == nil { + t.Fatal("Expected status ready") + } + + myremoteclient, _ := k.SecretsClientFactory.NewSecretsClient(nil, nil) + l, err := myremoteclient.List(metav1.ListOptions{}) + if err != nil { + t.Fatalf("Failed to read secrets:\n %+v", err) + } + + if len(l.Items) != 2 { + t.Fatalf("Expected two bootstrap tokens, saw:\n %+d", len(l.Items)) + } + + // ensure that the token is refreshed... + tokenExpires := make([][]byte, len(l.Items)) + + for i, item := range l.Items { + tokenExpires[i] = item.Data[bootstrapapi.BootstrapTokenExpirationKey] + } + + <-time.After(1 * time.Second) + + for _, req := range []ctrl.Request{ + { + NamespacedName: types.NamespacedName{ + Namespace: "default", + Name: "worker-join-cfg", + }, + }, + { + NamespacedName: types.NamespacedName{ + Namespace: "default", + Name: "control-plane-join-cfg", + }, + }, + } { + + result, err := k.Reconcile(req) + if err != nil { + t.Fatalf("Failed to reconcile:\n %+v", err) + } + if result.RequeueAfter >= DefaultTokenTTL { + t.Fatal("expected a requeue duration less than the token TTL") + } + } + + l, err = myremoteclient.List(metav1.ListOptions{}) + if err != nil { + t.Fatalf("Failed to read secrets:\n %+v", err) + } + + if len(l.Items) != 2 { + t.Fatalf("Expected two bootstrap tokens, saw:\n %+d", len(l.Items)) + } + + for i, item := range l.Items { + if bytes.Equal(tokenExpires[i], item.Data[bootstrapapi.BootstrapTokenExpirationKey]) { + t.Fatal("Reconcile should have refreshed bootstrap token's expiration until the infrastructure was ready") + } + tokenExpires[i] = item.Data[bootstrapapi.BootstrapTokenExpirationKey] + } + + // ...until the infrastructure is marked "ready" + workerMachine.Status.InfrastructureReady = true + err = myclient.Update(context.Background(), workerMachine) + if err != nil { + t.Fatalf("unable to set machine infrastructure ready: %v", err) + } + + controlPlaneJoinMachine.Status.InfrastructureReady = true + err = myclient.Update(context.Background(), controlPlaneJoinMachine) + if err != nil { + t.Fatalf("unable to set machine infrastructure ready: %v", err) + } + + <-time.After(1 * time.Second) + + for _, req := range []ctrl.Request{ + { + NamespacedName: types.NamespacedName{ + Namespace: "default", + Name: "worker-join-cfg", + }, + }, + { + NamespacedName: types.NamespacedName{ + Namespace: "default", + Name: "control-plane-join-cfg", + }, + }, + } { + + result, err := k.Reconcile(req) + if err != nil { + t.Fatalf("Failed to reconcile:\n %+v", err) + } + if result.Requeue == true { + t.Fatal("did not expect to requeue") + } + if result.RequeueAfter != time.Duration(0) { + t.Fatal("did not expect to requeue after") + } + } + + l, err = myremoteclient.List(metav1.ListOptions{}) + if err != nil { + t.Fatalf("Failed to read secrets:\n %+v", err) + } + + if len(l.Items) != 2 { + t.Fatalf("Expected two bootstrap tokens, saw:\n %+d", len(l.Items)) + } + + for i, item := range l.Items { + if !bytes.Equal(tokenExpires[i], item.Data[bootstrapapi.BootstrapTokenExpirationKey]) { + t.Fatal("Reconcile should have let the bootstrap token expire after the infrastructure was ready") + } + } +} + // Ensure the discovery portion of the JoinConfiguration gets generated correctly. func TestKubeadmConfigReconciler_Reconcile_DisocveryReconcileBehaviors(t *testing.T) { k := &KubeadmConfigReconciler{ diff --git a/controllers/token.go b/controllers/token.go index 5e2ba21..76b63db 100644 --- a/controllers/token.go +++ b/controllers/token.go @@ -30,8 +30,9 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" ) -const ( - defaultTokenTTL = 10 * time.Minute +var ( + // DefaultTokenTTL is the amount of time a bootstrap token (and therefore a KubeadmConfig) will be valid + DefaultTokenTTL = 15 * time.Minute ) // ClusterSecretsClientFactory support creation of secrets client for clusters @@ -76,7 +77,7 @@ func createToken(client corev1.SecretInterface) (string, error) { Data: map[string][]byte{ bootstrapapi.BootstrapTokenIDKey: []byte(tokenID), bootstrapapi.BootstrapTokenSecretKey: []byte(tokenSecret), - bootstrapapi.BootstrapTokenExpirationKey: []byte(time.Now().UTC().Add(defaultTokenTTL).Format(time.RFC3339)), + bootstrapapi.BootstrapTokenExpirationKey: []byte(time.Now().UTC().Add(DefaultTokenTTL).Format(time.RFC3339)), bootstrapapi.BootstrapTokenUsageSigningKey: []byte("true"), bootstrapapi.BootstrapTokenUsageAuthentication: []byte("true"), bootstrapapi.BootstrapTokenExtraGroupsKey: []byte("system:bootstrappers:kubeadm:default-node-token"), @@ -89,3 +90,26 @@ func createToken(client corev1.SecretInterface) (string, error) { } return token, nil } + +// refreshToken extends the TTL for an existing token +func refreshToken(client corev1.SecretInterface, token string) error { + substrs := bootstraputil.BootstrapTokenRegexp.FindStringSubmatch(token) + if len(substrs) != 3 { + return errors.Errorf("the bootstrap token %q was not of the form %q", token, bootstrapapi.BootstrapTokenPattern) + } + tokenID := substrs[1] + + secretName := bootstraputil.BootstrapTokenSecretName(tokenID) + secret, err := client.Get(secretName, metav1.GetOptions{}) + if err != nil { + return err + } + + if secret.Data == nil { + return errors.Errorf("Invalid bootstrap secret %q, remove the token from the kubadm config to re-create", secretName) + } + secret.Data[bootstrapapi.BootstrapTokenExpirationKey] = []byte(time.Now().UTC().Add(DefaultTokenTTL).Format(time.RFC3339)) + + _, err = client.Update(secret) + return err +} diff --git a/main.go b/main.go index 9e7db03..e3b1935 100644 --- a/main.go +++ b/main.go @@ -77,6 +77,13 @@ func main() { "The minimum interval at which watched resources are reconciled (e.g. 10m)", ) + flag.DurationVar( + &controllers.DefaultTokenTTL, + "bootstrap-token-ttl", + 15*time.Minute, + "The amount of time the bootstrap token will be valid", + ) + flag.StringVar( &watchNamespace, "namespace", @@ -88,6 +95,10 @@ func main() { ctrl.SetLogger(klogr.New()) + if controllers.DefaultTokenTTL-syncPeriod < 1*time.Minute { + setupLog.Info("warning: the sync interval is close to the configured token TTL, tokens may expire temporarily before being refreshed") + } + mgr, err := ctrl.NewManager(ctrl.GetConfigOrDie(), ctrl.Options{ Scheme: scheme, MetricsBindAddress: metricsAddr,