Skip to content

Commit e48409b

Browse files
authored
Merge pull request #11941 from sbueringer/pr-ms-preflight-check
✨ Add ControlPlaneVersionSkew MS preflight check & preflight check cmd line flag
2 parents caabe3e + 2d061b8 commit e48409b

File tree

12 files changed

+310
-22
lines changed

12 files changed

+310
-22
lines changed

api/v1beta1/common_types.go

+13-5
Original file line numberDiff line numberDiff line change
@@ -218,27 +218,35 @@ const (
218218

219219
// MachineSetPreflightCheckKubeadmVersionSkew is the name of the preflight check
220220
// that verifies if the machine being created or remediated for the MachineSet conforms to the kubeadm version
221-
// skew policy that requires the machine to be at the same version as the control plane.
222-
// Note: This is a stopgap while the root cause of the problem is fixed in kubeadm; this check will become
223-
// a no-op when this check will be available in kubeadm, and then eventually be dropped when all the
224-
// supported Kuberenetes/kubeadm versions have implemented the fix.
221+
// skew policy that requires the machine to be at the same minor version as the control plane.
225222
// The preflight check is only run if a ControlPlane is used (controlPlaneRef must exist in the Cluster),
226223
// the ControlPlane has a version, the MachineSet has a version and the MachineSet uses the Kubeadm bootstrap
227224
// provider.
228225
MachineSetPreflightCheckKubeadmVersionSkew MachineSetPreflightCheck = "KubeadmVersionSkew"
229226

230227
// MachineSetPreflightCheckKubernetesVersionSkew is the name of the preflight check that verifies
231228
// if the machines being created or remediated for the MachineSet conform to the Kubernetes version skew policy
232-
// that requires the machines to be at a version that is not more than 2 minor lower than the ControlPlane version.
229+
// that requires the machines to be at a version that is not more than 2 (< v1.28) or 3 (>= v1.28) minor
230+
// lower than the ControlPlane version.
233231
// The preflight check is only run if a ControlPlane is used (controlPlaneRef must exist in the Cluster),
234232
// the ControlPlane has a version and the MachineSet has a version.
235233
MachineSetPreflightCheckKubernetesVersionSkew MachineSetPreflightCheck = "KubernetesVersionSkew"
236234

237235
// MachineSetPreflightCheckControlPlaneIsStable is the name of the preflight check
238236
// that verifies if the control plane is not provisioning and not upgrading.
237+
// For Clusters with a managed topology it also checks if a control plane upgrade is pending.
239238
// The preflight check is only run if a ControlPlane is used (controlPlaneRef must exist in the Cluster)
240239
// and the ControlPlane has a version.
241240
MachineSetPreflightCheckControlPlaneIsStable MachineSetPreflightCheck = "ControlPlaneIsStable"
241+
242+
// MachineSetPreflightCheckControlPlaneVersionSkew is the name of the preflight check
243+
// that verifies if the machine being created or remediated for the MachineSet has exactly the same version
244+
// as the control plane.
245+
// The idea behind this check is that it doesn't make sense to create a Machine with an old version, if we already
246+
// know based on the control plane version that the Machine has to be replaced soon.
247+
// The preflight check is only run if the Cluster has a managed topology, a ControlPlane is used (controlPlaneRef
248+
// must exist in the Cluster), the ControlPlane has a version and the MachineSet has a version.
249+
MachineSetPreflightCheckControlPlaneVersionSkew MachineSetPreflightCheck = "ControlPlaneVersionSkew"
242250
)
243251

244252
// NodeOutdatedRevisionTaint can be added to Nodes at rolling updates in general triggered by updating MachineDeployment

bootstrap/kubeadm/main.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -144,7 +144,7 @@ func InitFlags(fs *pflag.FlagSet) {
144144
fs.IntVar(&kubeadmConfigConcurrency, "kubeadmconfig-concurrency", 10,
145145
"Number of kubeadm configs to process simultaneously")
146146

147-
fs.StringArrayVar(&skipCRDMigrationPhases, "skip-crd-migration-phases", []string{},
147+
fs.StringSliceVar(&skipCRDMigrationPhases, "skip-crd-migration-phases", []string{},
148148
"List of CRD migration phases to skip. Valid values are: StorageVersionMigration, CleanupManagedFields.")
149149

150150
fs.DurationVar(&syncPeriod, "sync-period", 10*time.Minute,

controllers/alias.go

+5
Original file line numberDiff line numberDiff line change
@@ -21,10 +21,12 @@ import (
2121
"regexp"
2222
"time"
2323

24+
"k8s.io/apimachinery/pkg/util/sets"
2425
ctrl "sigs.k8s.io/controller-runtime"
2526
"sigs.k8s.io/controller-runtime/pkg/client"
2627
"sigs.k8s.io/controller-runtime/pkg/controller"
2728

29+
clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
2830
"sigs.k8s.io/cluster-api/controllers/clustercache"
2931
runtimeclient "sigs.k8s.io/cluster-api/exp/runtime/client"
3032
clustercontroller "sigs.k8s.io/cluster-api/internal/controllers/cluster"
@@ -94,6 +96,8 @@ type MachineSetReconciler struct {
9496
APIReader client.Reader
9597
ClusterCache clustercache.ClusterCache
9698

99+
PreflightChecks sets.Set[clusterv1.MachineSetPreflightCheck]
100+
97101
// WatchFilterValue is the label value used to filter events prior to reconciliation.
98102
WatchFilterValue string
99103
}
@@ -103,6 +107,7 @@ func (r *MachineSetReconciler) SetupWithManager(ctx context.Context, mgr ctrl.Ma
103107
Client: r.Client,
104108
APIReader: r.APIReader,
105109
ClusterCache: r.ClusterCache,
110+
PreflightChecks: r.PreflightChecks,
106111
WatchFilterValue: r.WatchFilterValue,
107112
}).SetupWithManager(ctx, mgr, options)
108113
}

controlplane/kubeadm/main.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -148,7 +148,7 @@ func InitFlags(fs *pflag.FlagSet) {
148148
fs.IntVar(&kubeadmControlPlaneConcurrency, "kubeadmcontrolplane-concurrency", 10,
149149
"Number of kubeadm control planes to process simultaneously")
150150

151-
fs.StringArrayVar(&skipCRDMigrationPhases, "skip-crd-migration-phases", []string{},
151+
fs.StringSliceVar(&skipCRDMigrationPhases, "skip-crd-migration-phases", []string{},
152152
"List of CRD migration phases to skip. Valid values are: StorageVersionMigration, CleanupManagedFields.")
153153

154154
fs.IntVar(&clusterCacheConcurrency, "clustercache-concurrency", 100,

docs/book/src/tasks/experimental-features/machineset-preflight-checks.md

+20-5
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,8 @@ Enabling `MachineSetPreflightChecks` provides safety in such circumstances by ma
1818

1919
### `ControlPlaneIsStable`
2020

21-
* This preflight check ensures that the ControlPlane is currently stable i.e. the ControlPlane is currently neither provisioning, upgrading nor pending an upgrade.
21+
* This preflight check ensures that the ControlPlane is currently stable i.e. the ControlPlane is currently neither provisioning, upgrading.
22+
* For Clusters with a managed topology it also checks if a control plane upgrade is pending.
2223
* This preflight check is only performed if:
2324
* The Cluster uses a ControlPlane provider.
2425
* ControlPlane version is defined (`ControlPlane.spec.version` is set).
@@ -40,11 +41,25 @@ Enabling `MachineSetPreflightChecks` provides safety in such circumstances by ma
4041
* MachineSet version is defined (`MachineSet.spec.template.spec.version` is set).
4142
* MachineSet uses the `Kubeadm` Bootstrap provider.
4243

43-
## Opting out of PreflightChecks
44+
### `ControlPlaneVersionSkew`
4445

45-
Once the feature flag is enabled the preflight checks are enabled for all the MachineSets including new and existing MachineSets.
46-
It is possible to opt-out of one or all of the preflight checks on a per MachineSet basis by specifying a comma-separated list of the preflight checks on the
47-
`machineset.cluster.x-k8s.io/skip-preflight-checks` annotation on the MachineSet.
46+
* This preflight check ensures that the MachineSet and the ControlPlane have the same version. The idea behind this
47+
check is that it doesn't make sense to create a Machine with an old version, if we already know based on the control
48+
plane version that the Machine has to be replaced soon.
49+
* This preflight check is only performed if:
50+
* The Cluster has a managed topology
51+
* The Cluster uses a ControlPlane provider.
52+
* ControlPlane version is defined (`ControlPlane.spec.version` is set).
53+
* MachineSet version is defined (`MachineSet.spec.template.spec.version` is set).
54+
55+
## Configuring MachineSet PreflightChecks
56+
57+
Per default all preflight checks are enabled for all MachineSets including new and existing MachineSets.
58+
The enabled preflight checks can be overwritten with the `--machineset-preflight-checks` command-line flag.
59+
60+
It is also possible to opt-out of one or all of the preflight checks on a per MachineSet basis by specifying a
61+
comma-separated list of the preflight checks via the `machineset.cluster.x-k8s.io/skip-preflight-checks` annotation
62+
on the MachineSet.
4863

4964
Examples:
5065
* To opt out of all the preflight checks set the `machineset.cluster.x-k8s.io/skip-preflight-checks: All` annotation.

internal/controllers/machineset/machineset_controller.go

+2
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,8 @@ type Reconciler struct {
9595
APIReader client.Reader
9696
ClusterCache clustercache.ClusterCache
9797

98+
PreflightChecks sets.Set[clusterv1.MachineSetPreflightCheck]
99+
98100
// WatchFilterValue is the label value used to filter events prior to reconciliation.
99101
WatchFilterValue string
100102

internal/controllers/machineset/machineset_controller_test.go

+5-2
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ import (
3333
"k8s.io/apimachinery/pkg/runtime"
3434
"k8s.io/apimachinery/pkg/runtime/schema"
3535
"k8s.io/apimachinery/pkg/util/intstr"
36+
"k8s.io/apimachinery/pkg/util/sets"
3637
"k8s.io/client-go/tools/record"
3738
"k8s.io/utils/ptr"
3839
"sigs.k8s.io/controller-runtime/pkg/client"
@@ -1711,7 +1712,8 @@ func TestMachineSetReconciler_reconcileUnhealthyMachines(t *testing.T) {
17111712
machines := []*clusterv1.Machine{unhealthyMachine, healthyMachine}
17121713
fakeClient := fake.NewClientBuilder().WithObjects(controlPlaneUpgrading, unhealthyMachine, healthyMachine).WithStatusSubresource(&clusterv1.Machine{}).Build()
17131714
r := &Reconciler{
1714-
Client: fakeClient,
1715+
Client: fakeClient,
1716+
PreflightChecks: sets.Set[clusterv1.MachineSetPreflightCheck]{}.Insert(clusterv1.MachineSetPreflightCheckAll),
17151717
}
17161718
s := &scope{
17171719
cluster: cluster,
@@ -2324,7 +2326,8 @@ func TestMachineSetReconciler_syncReplicas(t *testing.T) {
23242326

23252327
fakeClient := fake.NewClientBuilder().WithObjects(controlPlaneUpgrading, machineSet).WithStatusSubresource(&clusterv1.MachineSet{}).Build()
23262328
r := &Reconciler{
2327-
Client: fakeClient,
2329+
Client: fakeClient,
2330+
PreflightChecks: sets.Set[clusterv1.MachineSetPreflightCheck]{}.Insert(clusterv1.MachineSetPreflightCheckAll),
23282331
}
23292332
s := &scope{
23302333
cluster: cluster,

internal/controllers/machineset/machineset_preflight.go

+26-4
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ func (r *Reconciler) runPreflightChecks(ctx context.Context, cluster *clusterv1.
5656

5757
skipped := skippedPreflightChecks(ms)
5858
// If all the preflight checks are skipped then return early.
59-
if skipped.Has(clusterv1.MachineSetPreflightCheckAll) {
59+
if len(r.PreflightChecks) == 0 || skipped.Has(clusterv1.MachineSetPreflightCheckAll) {
6060
return nil, nil
6161
}
6262

@@ -90,7 +90,7 @@ func (r *Reconciler) runPreflightChecks(ctx context.Context, cluster *clusterv1.
9090
errList := []error{}
9191
preflightCheckErrs := []preflightCheckErrorMessage{}
9292
// Run the control-plane-stable preflight check.
93-
if !skipped.Has(clusterv1.MachineSetPreflightCheckControlPlaneIsStable) {
93+
if shouldRun(r.PreflightChecks, skipped, clusterv1.MachineSetPreflightCheckControlPlaneIsStable) {
9494
preflightCheckErr, err := r.controlPlaneStablePreflightCheck(controlPlane, cluster, *cpVersion)
9595
if err != nil {
9696
errList = append(errList, err)
@@ -109,15 +109,15 @@ func (r *Reconciler) runPreflightChecks(ctx context.Context, cluster *clusterv1.
109109
}
110110

111111
// Run the kubernetes-version skew preflight check.
112-
if !skipped.Has(clusterv1.MachineSetPreflightCheckKubernetesVersionSkew) {
112+
if shouldRun(r.PreflightChecks, skipped, clusterv1.MachineSetPreflightCheckKubernetesVersionSkew) {
113113
preflightCheckErr := r.kubernetesVersionPreflightCheck(cpSemver, msSemver)
114114
if preflightCheckErr != nil {
115115
preflightCheckErrs = append(preflightCheckErrs, preflightCheckErr)
116116
}
117117
}
118118

119119
// Run the kubeadm-version skew preflight check.
120-
if !skipped.Has(clusterv1.MachineSetPreflightCheckKubeadmVersionSkew) {
120+
if shouldRun(r.PreflightChecks, skipped, clusterv1.MachineSetPreflightCheckKubeadmVersionSkew) {
121121
preflightCheckErr, err := r.kubeadmVersionPreflightCheck(cpSemver, msSemver, ms)
122122
if err != nil {
123123
errList = append(errList, err)
@@ -126,6 +126,13 @@ func (r *Reconciler) runPreflightChecks(ctx context.Context, cluster *clusterv1.
126126
preflightCheckErrs = append(preflightCheckErrs, preflightCheckErr)
127127
}
128128
}
129+
130+
// Run the control plane version skew preflight check.
131+
if shouldRun(r.PreflightChecks, skipped, clusterv1.MachineSetPreflightCheckControlPlaneVersionSkew) {
132+
if preflightCheckErr := r.controlPlaneVersionPreflightCheck(cluster, *cpVersion, msVersion); preflightCheckErr != nil {
133+
preflightCheckErrs = append(preflightCheckErrs, preflightCheckErr)
134+
}
135+
}
129136
}
130137

131138
if len(errList) > 0 {
@@ -142,6 +149,11 @@ func (r *Reconciler) runPreflightChecks(ctx context.Context, cluster *clusterv1.
142149
return nil, nil
143150
}
144151

152+
func shouldRun(preflightChecks, skippedPreflightChecks sets.Set[clusterv1.MachineSetPreflightCheck], preflightCheck clusterv1.MachineSetPreflightCheck) bool {
153+
return (preflightChecks.Has(clusterv1.MachineSetPreflightCheckAll) || preflightChecks.Has(preflightCheck)) &&
154+
!(skippedPreflightChecks.Has(clusterv1.MachineSetPreflightCheckAll) || skippedPreflightChecks.Has(preflightCheck))
155+
}
156+
145157
func (r *Reconciler) controlPlaneStablePreflightCheck(controlPlane *unstructured.Unstructured, cluster *clusterv1.Cluster, controlPlaneVersion string) (preflightCheckErrorMessage, error) {
146158
cpKlogRef := klog.KRef(controlPlane.GetNamespace(), controlPlane.GetName())
147159

@@ -216,6 +228,16 @@ func (r *Reconciler) kubeadmVersionPreflightCheck(cpSemver, msSemver semver.Vers
216228
return nil, nil
217229
}
218230

231+
func (r *Reconciler) controlPlaneVersionPreflightCheck(cluster *clusterv1.Cluster, cpVersion, msVersion string) preflightCheckErrorMessage {
232+
if feature.Gates.Enabled(feature.ClusterTopology) && cluster.Spec.Topology != nil {
233+
if cpVersion != msVersion {
234+
return ptr.To(fmt.Sprintf("MachineSet version (%s) is not yet the same as the ControlPlane version (%s), waiting for version to be propagated to the MachineSet (%q preflight check failed)", msVersion, cpVersion, clusterv1.MachineSetPreflightCheckControlPlaneVersionSkew))
235+
}
236+
}
237+
238+
return nil
239+
}
240+
219241
func skippedPreflightChecks(ms *clusterv1.MachineSet) sets.Set[clusterv1.MachineSetPreflightCheck] {
220242
skipped := sets.Set[clusterv1.MachineSetPreflightCheck]{}
221243
if ms == nil {

0 commit comments

Comments
 (0)