Skip to content

Commit 3f214b9

Browse files
fabriziopandinik8s-infra-cherrypick-robot
authored and
k8s-infra-cherrypick-robot
committed
More feedback
1 parent 4945dde commit 3f214b9

File tree

6 files changed

+101
-15
lines changed

6 files changed

+101
-15
lines changed

controlplane/kubeadm/internal/control_plane.go

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -392,12 +392,12 @@ func (c *ControlPlane) InjectTestManagementCluster(managementCluster ManagementC
392392

393393
// StatusToLogKeyAndValues returns following key/value pairs describing the overall status of the control plane:
394394
// - machines is the list of KCP machines; each machine might have additional notes surfacing
395-
// - if the machine has been created in the current reconcile (new)
396-
// - if machines node is not yet (node ref not set)
397-
// - if the machine has bee marked for remediation (health check failed)
395+
// - if the machine has been created in the current reconcile
396+
// - if machines node ref is not yet set
397+
// - if the machine has been marked for remediation
398398
// - if there are unhealthy control plane component on the machine
399-
// - if the machine has a deletion timestamp/has been deleted in the current reconcile (deleting)
400-
// - if the machine is not up to date with the KCP spec (not up to date)
399+
// - if the machine has a deletion timestamp/has been deleted in the current reconcile
400+
// - if the machine is not up to date with the KCP spec
401401
//
402402
// - etcdMembers list as reported by etcd.
403403
func (c *ControlPlane) StatusToLogKeyAndValues(newMachine, deletedMachine *clusterv1.Machine) []any {
@@ -418,11 +418,11 @@ func (c *ControlPlane) StatusToLogKeyAndValues(newMachine, deletedMachine *clust
418418
notes := []string{}
419419

420420
if m.Status.NodeRef == nil {
421-
notes = append(notes, "node ref not set")
421+
notes = append(notes, "status.nodeRef not set")
422422
}
423423

424424
if c.MachinesToBeRemediatedByKCP().Has(m) {
425-
notes = append(notes, "health check failed")
425+
notes = append(notes, "marked for remediation")
426426
}
427427

428428
for _, condition := range controlPlaneMachineHealthConditions {
@@ -435,10 +435,12 @@ func (c *ControlPlane) StatusToLogKeyAndValues(newMachine, deletedMachine *clust
435435
}
436436

437437
if !c.UpToDateMachines().Has(m) {
438-
notes = append(notes, "not up to date")
438+
notes = append(notes, "not up-to-date")
439439
}
440440

441-
if !m.DeletionTimestamp.IsZero() || (deletedMachine != nil && m.Name == deletedMachine.Name) {
441+
if deletedMachine != nil && m.Name == deletedMachine.Name {
442+
notes = append(notes, "just deleted")
443+
} else if !m.DeletionTimestamp.IsZero() {
442444
notes = append(notes, "deleting")
443445
}
444446

@@ -450,7 +452,7 @@ func (c *ControlPlane) StatusToLogKeyAndValues(newMachine, deletedMachine *clust
450452
}
451453

452454
if newMachine != nil {
453-
machines = append(machines, fmt.Sprintf("%s (new)", newMachine.Name))
455+
machines = append(machines, fmt.Sprintf("%s (just created)", newMachine.Name))
454456
}
455457
sort.Strings(machines)
456458

controlplane/kubeadm/internal/control_plane_test.go

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,15 +17,18 @@ limitations under the License.
1717
package internal
1818

1919
import (
20+
"strings"
2021
"testing"
2122

23+
"github.com/google/go-cmp/cmp"
2224
. "github.com/onsi/gomega"
2325
corev1 "k8s.io/api/core/v1"
2426
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2527
"k8s.io/utils/ptr"
2628

2729
clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
2830
controlplanev1 "sigs.k8s.io/cluster-api/controlplane/kubeadm/api/v1beta1"
31+
"sigs.k8s.io/cluster-api/controlplane/kubeadm/internal/etcd"
2932
"sigs.k8s.io/cluster-api/util/collections"
3033
"sigs.k8s.io/cluster-api/util/conditions"
3134
)
@@ -252,6 +255,75 @@ func TestHasHealthyMachineStillProvisioning(t *testing.T) {
252255
})
253256
}
254257

258+
func TestStatusToLogKeyAndValues(t *testing.T) {
259+
healthyMachine := &clusterv1.Machine{
260+
ObjectMeta: metav1.ObjectMeta{Name: "healthy"},
261+
Status: clusterv1.MachineStatus{
262+
NodeRef: &corev1.ObjectReference{Name: "healthy-node"},
263+
Conditions: []clusterv1.Condition{
264+
{Type: controlplanev1.MachineAPIServerPodHealthyCondition, Status: corev1.ConditionTrue},
265+
{Type: controlplanev1.MachineControllerManagerPodHealthyCondition, Status: corev1.ConditionTrue},
266+
{Type: controlplanev1.MachineSchedulerPodHealthyCondition, Status: corev1.ConditionTrue},
267+
{Type: controlplanev1.MachineEtcdPodHealthyCondition, Status: corev1.ConditionTrue},
268+
{Type: controlplanev1.MachineEtcdMemberHealthyCondition, Status: corev1.ConditionTrue},
269+
},
270+
},
271+
}
272+
273+
machineWithoutNode := &clusterv1.Machine{
274+
ObjectMeta: metav1.ObjectMeta{Name: "without-node"},
275+
Status: clusterv1.MachineStatus{
276+
NodeRef: nil,
277+
Conditions: []clusterv1.Condition{
278+
{Type: controlplanev1.MachineAPIServerPodHealthyCondition, Status: corev1.ConditionUnknown},
279+
{Type: controlplanev1.MachineControllerManagerPodHealthyCondition, Status: corev1.ConditionUnknown},
280+
{Type: controlplanev1.MachineSchedulerPodHealthyCondition, Status: corev1.ConditionUnknown},
281+
{Type: controlplanev1.MachineEtcdPodHealthyCondition, Status: corev1.ConditionUnknown},
282+
{Type: controlplanev1.MachineEtcdMemberHealthyCondition, Status: corev1.ConditionFalse}, // not a real use case, but used to test a code branch.
283+
},
284+
},
285+
}
286+
287+
machineJustCreated := &clusterv1.Machine{ObjectMeta: metav1.ObjectMeta{Name: "just-created"}}
288+
289+
machineJustDeleted := healthyMachine.DeepCopy()
290+
machineJustDeleted.Name = "just-deleted"
291+
292+
machineNotUpToDate := healthyMachine.DeepCopy()
293+
machineNotUpToDate.Name = "not-up-to-date"
294+
295+
machineMarkedForRemediation := healthyMachine.DeepCopy()
296+
machineMarkedForRemediation.Name = "marked-for-remediation"
297+
machineMarkedForRemediation.Status.Conditions = append(machineMarkedForRemediation.Status.Conditions,
298+
clusterv1.Condition{Type: clusterv1.MachineHealthCheckSucceededCondition, Status: corev1.ConditionFalse},
299+
clusterv1.Condition{Type: clusterv1.MachineOwnerRemediatedCondition, Status: corev1.ConditionFalse},
300+
)
301+
302+
g := NewWithT(t)
303+
c := &ControlPlane{
304+
KCP: &controlplanev1.KubeadmControlPlane{},
305+
Machines: collections.FromMachines(healthyMachine, machineWithoutNode, machineJustDeleted, machineNotUpToDate, machineMarkedForRemediation),
306+
machinesNotUptoDate: collections.FromMachines(machineNotUpToDate),
307+
EtcdMembers: []*etcd.Member{{Name: "m1"}, {Name: "m2"}, {Name: "m3"}},
308+
}
309+
310+
got := c.StatusToLogKeyAndValues(machineJustCreated, machineJustDeleted)
311+
312+
g.Expect(got).To((HaveLen(4)))
313+
g.Expect(got[0]).To(Equal("machines"))
314+
machines := strings.Join([]string{
315+
"healthy",
316+
"just-created (just created)",
317+
"just-deleted (just deleted)",
318+
"marked-for-remediation (marked for remediation)",
319+
"not-up-to-date (not up-to-date)",
320+
"without-node (status.nodeRef not set, APIServerPod health unknown, ControllerManagerPod health unknown, SchedulerPod health unknown, EtcdPod health unknown, EtcdMember not healthy)",
321+
}, ", ")
322+
g.Expect(got[1]).To(Equal(machines), cmp.Diff(got[1], machines))
323+
g.Expect(got[2]).To(Equal("etcdMembers"))
324+
g.Expect(got[3]).To(Equal("m1, m2, m3"))
325+
}
326+
255327
type machineOpt func(*clusterv1.Machine)
256328

257329
func failureDomain(controlPlane bool) clusterv1.FailureDomainSpec {

controlplane/kubeadm/internal/controllers/controller.go

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -672,11 +672,13 @@ func (r *KubeadmControlPlaneReconciler) reconcileDelete(ctx context.Context, con
672672
continue
673673
}
674674

675-
log.WithValues(controlPlane.StatusToLogKeyAndValues(nil, machineToDelete)...).
676-
Info("Deleting Machine (KCP deleted)")
677675
if err := r.Client.Delete(ctx, machineToDelete); err != nil && !apierrors.IsNotFound(err) {
678676
errs = append(errs, errors.Wrapf(err, "failed to delete control plane Machine %s", klog.KObj(machineToDelete)))
679677
}
678+
// Note: We intentionally log after Delete because we want this log line to show up only after DeletionTimestamp has been set.
679+
// Also, setting DeletionTimestamp doesn't mean the Machine is actually deleted (deletion takes some time).
680+
log.WithValues(controlPlane.StatusToLogKeyAndValues(nil, machineToDelete)...).
681+
Info("Deleting Machine (KubeadmControlPlane deleted)")
680682
}
681683
if len(errs) > 0 {
682684
err := kerrors.NewAggregate(errs)

controlplane/kubeadm/internal/controllers/remediation.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -311,6 +311,8 @@ func (r *KubeadmControlPlaneReconciler) reconcileUnhealthyMachines(ctx context.C
311311
}
312312

313313
// Surface the operation is in progress.
314+
// Note: We intentionally log after Delete because we want this log line to show up only after DeletionTimestamp has been set.
315+
// Also, setting DeletionTimestamp doesn't mean the Machine is actually deleted (deletion takes some time).
314316
log.WithValues(controlPlane.StatusToLogKeyAndValues(nil, machineToBeRemediated)...).
315317
Info("Deleting Machine (remediating unhealthy Machine)")
316318
conditions.MarkFalse(machineToBeRemediated, clusterv1.MachineOwnerRemediatedCondition, clusterv1.RemediationInProgressReason, clusterv1.ConditionSeverityWarning, "")

controlplane/kubeadm/internal/controllers/scale.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,8 @@ func (r *KubeadmControlPlaneReconciler) scaleDownControlPlane(
158158
"Failed to delete control plane Machine %s for cluster %s control plane: %v", machineToDelete.Name, klog.KObj(controlPlane.Cluster), err)
159159
return ctrl.Result{}, err
160160
}
161+
// Note: We intentionally log after Delete because we want this log line to show up only after DeletionTimestamp has been set.
162+
// Also, setting DeletionTimestamp doesn't mean the Machine is actually deleted (deletion takes some time).
161163
logger.WithValues(controlPlane.StatusToLogKeyAndValues(nil, machineToDelete)...).
162164
Info("Deleting Machine (scale down)", "Machine", klog.KObj(machineToDelete))
163165

internal/controllers/machineset/machineset_controller.go

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -427,10 +427,12 @@ func (r *Reconciler) reconcileDelete(ctx context.Context, s *scope) (ctrl.Result
427427
// else delete owned machines.
428428
for _, machine := range machineList {
429429
if machine.DeletionTimestamp.IsZero() {
430-
log.Info("Deleting Machine (MachineSet deleted)", "Machine", klog.KObj(machine))
431430
if err := r.Client.Delete(ctx, machine); err != nil && !apierrors.IsNotFound(err) {
432431
return ctrl.Result{}, errors.Wrapf(err, "failed to delete Machine %s", klog.KObj(machine))
433432
}
433+
// Note: We intentionally log after Delete because we want this log line to show up only after DeletionTimestamp has been set.
434+
// Also, setting DeletionTimestamp doesn't mean the Machine is actually deleted (deletion takes some time).
435+
log.Info("Deleting Machine (MachineSet deleted)", "Machine", klog.KObj(machine))
434436
}
435437
}
436438

@@ -816,13 +818,15 @@ func (r *Reconciler) syncReplicas(ctx context.Context, s *scope) (ctrl.Result, e
816818
for i, machine := range machinesToDelete {
817819
log := log.WithValues("Machine", klog.KObj(machine))
818820
if machine.GetDeletionTimestamp().IsZero() {
819-
log.Info(fmt.Sprintf("Deleting Machine (scale down, deleting %d of %d)", i+1, diff))
820821
if err := r.Client.Delete(ctx, machine); err != nil {
821822
log.Error(err, "Unable to delete Machine")
822823
r.recorder.Eventf(ms, corev1.EventTypeWarning, "FailedDelete", "Failed to delete machine %q: %v", machine.Name, err)
823824
errs = append(errs, err)
824825
continue
825826
}
827+
// Note: We intentionally log after Delete because we want this log line to show up only after DeletionTimestamp has been set.
828+
// Also, setting DeletionTimestamp doesn't mean the Machine is actually deleted (deletion takes some time).
829+
log.Info(fmt.Sprintf("Deleting Machine (scale down, deleting %d of %d)", i+1, diff))
826830
r.recorder.Eventf(ms, corev1.EventTypeNormal, "SuccessfulDelete", "Deleted machine %q", machine.Name)
827831
} else {
828832
log.Info(fmt.Sprintf("Waiting for Machine to be deleted (scale down, deleting %d of %d)", i+1, diff))
@@ -1492,10 +1496,12 @@ func (r *Reconciler) reconcileUnhealthyMachines(ctx context.Context, s *scope) (
14921496
}
14931497
var errs []error
14941498
for _, m := range machinesToRemediate {
1495-
log.Info("Deleting Machine (remediating unhealthy Machine)", "Machine", klog.KObj(m))
14961499
if err := r.Client.Delete(ctx, m); err != nil && !apierrors.IsNotFound(err) {
14971500
errs = append(errs, errors.Wrapf(err, "failed to delete Machine %s", klog.KObj(m)))
14981501
}
1502+
// Note: We intentionally log after Delete because we want this log line to show up only after DeletionTimestamp has been set.
1503+
// Also, setting DeletionTimestamp doesn't mean the Machine is actually deleted (deletion takes some time).
1504+
log.Info("Deleting Machine (remediating unhealthy Machine)", "Machine", klog.KObj(m))
14991505
}
15001506
if len(errs) > 0 {
15011507
return ctrl.Result{}, errors.Wrapf(kerrors.NewAggregate(errs), "failed to delete unhealthy Machines")

0 commit comments

Comments
 (0)