Skip to content
This repository was archived by the owner on Jul 30, 2021. It is now read-only.

🐛 Refresh token for provisioning machines #250

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 36 additions & 13 deletions controllers/kubeadmconfig_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -100,12 +100,6 @@ func (r *KubeadmConfigReconciler) Reconcile(req ctrl.Request) (_ ctrl.Result, re
return ctrl.Result{}, err
}

// bail super early if it's already ready
if config.Status.Ready {
log.Info("ignoring an already ready config")
return ctrl.Result{}, nil
}

// Look up the Machine that owns this KubeConfig if there is one
machine, err := util.GetOwnerMachine(ctx, r.Client, config.ObjectMeta)
if err != nil {
Expand All @@ -118,12 +112,6 @@ func (r *KubeadmConfigReconciler) Reconcile(req ctrl.Request) (_ ctrl.Result, re
}
log = log.WithValues("machine-name", machine.Name)

// Ignore machines that already have bootstrap data
if machine.Spec.Bootstrap.Data != nil {
// TODO: mark the config as ready?
return ctrl.Result{}, nil
}

// Lookup the cluster the machine is associated with
cluster, err := util.GetClusterFromMetadata(ctx, r.Client, machine.ObjectMeta)
if err != nil {
Expand All @@ -140,10 +128,45 @@ func (r *KubeadmConfigReconciler) Reconcile(req ctrl.Request) (_ ctrl.Result, re
return ctrl.Result{}, err
}

switch {
Copy link
Contributor

@chuckha chuckha Sep 30, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was looking more for a switch to organize the quick-to-return cases before generating the cloud-init

What do you think about restructuring this a little bit to include just a few cases that return early like:

case: is the cluster's infrastructure ready?
    if it's not ready then we should return
case: is the config ready but the infrastructure not?
    refresh the token and return
case: is the config ready and the infrastructure ready?
    we're done; return

this switch wouldn't need a default case since it's encapsulating the idea "should we return before doing the work we want to be doing?". The default is no, we should not return (which is the same as doing nothing)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oooh, yeah, I like that much more. I'll give it a shot.

// Wait patiently for the infrastructure to be ready
if !cluster.Status.InfrastructureReady {
case !cluster.Status.InfrastructureReady:
log.Info("Infrastructure is not ready, waiting until ready.")
return ctrl.Result{}, nil
// bail super early if it's already ready
case config.Status.Ready && machine.Status.InfrastructureReady:
log.Info("ignoring config for an already ready machine")
return ctrl.Result{}, nil
// Reconcile status for machines that have already copied bootstrap data
case machine.Spec.Bootstrap.Data != nil && !config.Status.Ready:
config.Status.Ready = true
// Initialize the patch helper
patchHelper, err := patch.NewHelper(config, r)
if err != nil {
return ctrl.Result{}, err
}
err = patchHelper.Patch(ctx, config)
return ctrl.Result{}, err
// If we've already embedded a time-limited join token into a config, but are still waiting for the token to be used, refresh it
case config.Status.Ready:
token := config.Spec.JoinConfiguration.Discovery.BootstrapToken.Token

// gets the remote secret interface client for the current cluster
secretsClient, err := r.SecretsClientFactory.NewSecretsClient(r.Client, cluster)
if err != nil {
return ctrl.Result{}, err
}

log.Info("refreshing token until the infrastructure has a chance to consume it")
err = refreshToken(secretsClient, token)
if err != nil {
// It would be nice to re-create the bootstrap token if the error was "not found", but we have no way to update the Machine's bootstrap data
return ctrl.Result{}, errors.Wrapf(err, "failed to refresh bootstrap token")
}
// NB: this may not be sufficient to keep the token live if we don't see it before it expires, but when we generate a config we will set the status to "ready" which should generate an update event
return ctrl.Result{
RequeueAfter: DefaultTokenTTL / 2,
}, nil
}

// Initialize the patch helper
Expand Down
202 changes: 201 additions & 1 deletion controllers/kubeadmconfig_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ limitations under the License.
package controllers

import (
"bytes"
"context"
"fmt"
"reflect"
Expand All @@ -30,6 +31,7 @@ import (
"k8s.io/apimachinery/pkg/types"
fakeclient "k8s.io/client-go/kubernetes/fake"
typedcorev1 "k8s.io/client-go/kubernetes/typed/core/v1"
bootstrapapi "k8s.io/cluster-bootstrap/token/api"
"k8s.io/klog/klogr"
bootstrapv1 "sigs.k8s.io/cluster-api-bootstrap-provider-kubeadm/api/v1alpha2"
internalcluster "sigs.k8s.io/cluster-api-bootstrap-provider-kubeadm/internal/cluster"
Expand Down Expand Up @@ -577,7 +579,7 @@ func TestReconcileIfJoinNodesAndControlPlaneIsReady(t *testing.T) {
myremoteclient, _ := k.SecretsClientFactory.NewSecretsClient(nil, nil)
l, err := myremoteclient.List(metav1.ListOptions{})
if err != nil {
t.Fatal(fmt.Sprintf("Failed to get secrets after reconcyle:\n %+v", err))
t.Fatal(fmt.Sprintf("Failed to get secrets after reconcile:\n %+v", err))
}

if len(l.Items) != 1 {
Expand All @@ -588,6 +590,204 @@ func TestReconcileIfJoinNodesAndControlPlaneIsReady(t *testing.T) {
}
}

func TestBootstrapTokenTTLExtension(t *testing.T) {
cluster := newCluster("cluster")
cluster.Status.InfrastructureReady = true
cluster.Status.ControlPlaneInitialized = true
cluster.Status.APIEndpoints = []clusterv1.APIEndpoint{{Host: "100.105.150.1", Port: 6443}}

controlPlaneInitMachine := newControlPlaneMachine(cluster, "control-plane-init-machine")
initConfig := newControlPlaneInitKubeadmConfig(controlPlaneInitMachine, "control-plane-init-config")
workerMachine := newWorkerMachine(cluster)
workerJoinConfig := newWorkerJoinKubeadmConfig(workerMachine)
controlPlaneJoinMachine := newControlPlaneMachine(cluster, "control-plane-join-machine")
controlPlaneJoinConfig := newControlPlaneJoinKubeadmConfig(controlPlaneJoinMachine, "control-plane-join-cfg")
objects := []runtime.Object{
cluster,
workerMachine,
workerJoinConfig,
controlPlaneJoinMachine,
controlPlaneJoinConfig,
}

objects = append(objects, createSecrets(t, cluster, initConfig)...)
myclient := fake.NewFakeClientWithScheme(setupScheme(), objects...)
k := &KubeadmConfigReconciler{
Log: log.Log,
Client: myclient,
SecretsClientFactory: newFakeSecretFactory(),
KubeadmInitLock: &myInitLocker{},
}
request := ctrl.Request{
NamespacedName: types.NamespacedName{
Namespace: "default",
Name: "worker-join-cfg",
},
}
result, err := k.Reconcile(request)
if err != nil {
t.Fatalf("Failed to reconcile:\n %+v", err)
}
if result.Requeue == true {
t.Fatal("did not expect to requeue")
}
if result.RequeueAfter != time.Duration(0) {
t.Fatal("did not expect to requeue after")
}
cfg, err := getKubeadmConfig(myclient, "worker-join-cfg")
if err != nil {
t.Fatalf("Failed to reconcile:\n %+v", err)
}
if cfg.Status.Ready != true {
t.Fatal("Expected status ready")
}
if cfg.Status.BootstrapData == nil {
t.Fatal("Expected status ready")
}
request = ctrl.Request{
NamespacedName: types.NamespacedName{
Namespace: "default",
Name: "control-plane-join-cfg",
},
}
result, err = k.Reconcile(request)
if err != nil {
t.Fatalf("Failed to reconcile:\n %+v", err)
}
if result.Requeue == true {
t.Fatal("did not expect to requeue")
}
if result.RequeueAfter != time.Duration(0) {
t.Fatal("did not expect to requeue after")
}
cfg, err = getKubeadmConfig(myclient, "control-plane-join-cfg")
if err != nil {
t.Fatalf("Failed to reconcile:\n %+v", err)
}
if cfg.Status.Ready != true {
t.Fatal("Expected status ready")
}
if cfg.Status.BootstrapData == nil {
t.Fatal("Expected status ready")
}

myremoteclient, _ := k.SecretsClientFactory.NewSecretsClient(nil, nil)
l, err := myremoteclient.List(metav1.ListOptions{})
if err != nil {
t.Fatalf("Failed to read secrets:\n %+v", err)
}

if len(l.Items) != 2 {
t.Fatalf("Expected two bootstrap tokens, saw:\n %+d", len(l.Items))
}

// ensure that the token is refreshed...
tokenExpires := make([][]byte, len(l.Items))

for i, item := range l.Items {
tokenExpires[i] = item.Data[bootstrapapi.BootstrapTokenExpirationKey]
}

<-time.After(1 * time.Second)

for _, req := range []ctrl.Request{
{
NamespacedName: types.NamespacedName{
Namespace: "default",
Name: "worker-join-cfg",
},
},
{
NamespacedName: types.NamespacedName{
Namespace: "default",
Name: "control-plane-join-cfg",
},
},
} {

result, err := k.Reconcile(req)
if err != nil {
t.Fatalf("Failed to reconcile:\n %+v", err)
}
if result.RequeueAfter >= DefaultTokenTTL {
t.Fatal("expected a requeue duration less than the token TTL")
}
}

l, err = myremoteclient.List(metav1.ListOptions{})
if err != nil {
t.Fatalf("Failed to read secrets:\n %+v", err)
}

if len(l.Items) != 2 {
t.Fatalf("Expected two bootstrap tokens, saw:\n %+d", len(l.Items))
}

for i, item := range l.Items {
if bytes.Equal(tokenExpires[i], item.Data[bootstrapapi.BootstrapTokenExpirationKey]) {
t.Fatal("Reconcile should have refreshed bootstrap token's expiration until the infrastructure was ready")
}
tokenExpires[i] = item.Data[bootstrapapi.BootstrapTokenExpirationKey]
}

// ...until the infrastructure is marked "ready"
workerMachine.Status.InfrastructureReady = true
err = myclient.Update(context.Background(), workerMachine)
if err != nil {
t.Fatalf("unable to set machine infrastructure ready: %v", err)
}

controlPlaneJoinMachine.Status.InfrastructureReady = true
err = myclient.Update(context.Background(), controlPlaneJoinMachine)
if err != nil {
t.Fatalf("unable to set machine infrastructure ready: %v", err)
}

<-time.After(1 * time.Second)

for _, req := range []ctrl.Request{
{
NamespacedName: types.NamespacedName{
Namespace: "default",
Name: "worker-join-cfg",
},
},
{
NamespacedName: types.NamespacedName{
Namespace: "default",
Name: "control-plane-join-cfg",
},
},
} {

result, err := k.Reconcile(req)
if err != nil {
t.Fatalf("Failed to reconcile:\n %+v", err)
}
if result.Requeue == true {
t.Fatal("did not expect to requeue")
}
if result.RequeueAfter != time.Duration(0) {
t.Fatal("did not expect to requeue after")
}
}

l, err = myremoteclient.List(metav1.ListOptions{})
if err != nil {
t.Fatalf("Failed to read secrets:\n %+v", err)
}

if len(l.Items) != 2 {
t.Fatalf("Expected two bootstrap tokens, saw:\n %+d", len(l.Items))
}

for i, item := range l.Items {
if !bytes.Equal(tokenExpires[i], item.Data[bootstrapapi.BootstrapTokenExpirationKey]) {
t.Fatal("Reconcile should have let the bootstrap token expire after the infrastructure was ready")
}
}
}

// Ensure the discovery portion of the JoinConfiguration gets generated correctly.
func TestKubeadmConfigReconciler_Reconcile_DisocveryReconcileBehaviors(t *testing.T) {
k := &KubeadmConfigReconciler{
Expand Down
30 changes: 27 additions & 3 deletions controllers/token.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,9 @@ import (
"sigs.k8s.io/controller-runtime/pkg/client"
)

const (
defaultTokenTTL = 10 * time.Minute
var (
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should this still be a const rather than a var?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, that was a half-completed thought to make it configurable via the CLI. I've finished it, but I have no objection to tearing it back out and making it a const again.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

until we do make it configurable let's leave it as a const

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

oh can you make this change @sethp-nr ?

// DefaultTokenTTL is the amount of time a bootstrap token (and therefore a KubeadmConfig) will be valid
DefaultTokenTTL = 15 * time.Minute
)

// ClusterSecretsClientFactory support creation of secrets client for clusters
Expand Down Expand Up @@ -76,7 +77,7 @@ func createToken(client corev1.SecretInterface) (string, error) {
Data: map[string][]byte{
bootstrapapi.BootstrapTokenIDKey: []byte(tokenID),
bootstrapapi.BootstrapTokenSecretKey: []byte(tokenSecret),
bootstrapapi.BootstrapTokenExpirationKey: []byte(time.Now().UTC().Add(defaultTokenTTL).Format(time.RFC3339)),
bootstrapapi.BootstrapTokenExpirationKey: []byte(time.Now().UTC().Add(DefaultTokenTTL).Format(time.RFC3339)),
bootstrapapi.BootstrapTokenUsageSigningKey: []byte("true"),
bootstrapapi.BootstrapTokenUsageAuthentication: []byte("true"),
bootstrapapi.BootstrapTokenExtraGroupsKey: []byte("system:bootstrappers:kubeadm:default-node-token"),
Expand All @@ -89,3 +90,26 @@ func createToken(client corev1.SecretInterface) (string, error) {
}
return token, nil
}

// refreshToken extends the TTL for an existing token
func refreshToken(client corev1.SecretInterface, token string) error {
substrs := bootstraputil.BootstrapTokenRegexp.FindStringSubmatch(token)
if len(substrs) != 3 {
return errors.Errorf("the bootstrap token %q was not of the form %q", token, bootstrapapi.BootstrapTokenPattern)
}
tokenID := substrs[1]

secretName := bootstraputil.BootstrapTokenSecretName(tokenID)
secret, err := client.Get(secretName, metav1.GetOptions{})
if err != nil {
return err
}

if secret.Data == nil {
return errors.Errorf("Invalid bootstrap secret %q, remove the token from the kubadm config to re-create", secretName)
}
secret.Data[bootstrapapi.BootstrapTokenExpirationKey] = []byte(time.Now().UTC().Add(DefaultTokenTTL).Format(time.RFC3339))

_, err = client.Update(secret)
return err
}
11 changes: 11 additions & 0 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,13 @@ func main() {
"The minimum interval at which watched resources are reconciled (e.g. 10m)",
)

flag.DurationVar(
&controllers.DefaultTokenTTL,
"bootstrap-token-ttl",
15*time.Minute,
"The amount of time the bootstrap token will be valid",
)

flag.StringVar(
&watchNamespace,
"namespace",
Expand All @@ -88,6 +95,10 @@ func main() {

ctrl.SetLogger(klogr.New())

if controllers.DefaultTokenTTL-syncPeriod < 1*time.Minute {
setupLog.Info("warning: the sync interval is close to the configured token TTL, tokens may expire temporarily before being refreshed")
}

mgr, err := ctrl.NewManager(ctrl.GetConfigOrDie(), ctrl.Options{
Scheme: scheme,
MetricsBindAddress: metricsAddr,
Expand Down