Skip to content

Commit 1b69b02

Browse files
[release-1.9] 🌱 Improve KCP scale up when using failure domains (#11604)
* Improve KCP scale up when using failure domains * Address comments * Address feedback --------- Co-authored-by: fabriziopandini <[email protected]>
1 parent 2974524 commit 1b69b02

File tree

4 files changed

+822
-74
lines changed

4 files changed

+822
-74
lines changed

controlplane/kubeadm/internal/control_plane.go

Lines changed: 17 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -153,9 +153,10 @@ func (c *ControlPlane) FailureDomains() clusterv1.FailureDomains {
153153
}
154154

155155
// MachineInFailureDomainWithMostMachines returns the first matching failure domain with machines that has the most control-plane machines on it.
156-
func (c *ControlPlane) MachineInFailureDomainWithMostMachines(ctx context.Context, machines collections.Machines) (*clusterv1.Machine, error) {
157-
fd := c.FailureDomainWithMostMachines(ctx, machines)
158-
machinesInFailureDomain := machines.Filter(collections.InFailureDomains(fd))
156+
// Note: if there are eligibleMachines machines in failure domain that do not exists anymore, getting rid of those machines take precedence.
157+
func (c *ControlPlane) MachineInFailureDomainWithMostMachines(ctx context.Context, eligibleMachines collections.Machines) (*clusterv1.Machine, error) {
158+
fd := c.FailureDomainWithMostMachines(ctx, eligibleMachines)
159+
machinesInFailureDomain := eligibleMachines.Filter(collections.InFailureDomains(fd))
159160
machineToMark := machinesInFailureDomain.Oldest()
160161
if machineToMark == nil {
161162
return nil, errors.New("failed to pick control plane Machine to mark for deletion")
@@ -171,11 +172,11 @@ func (c *ControlPlane) MachineWithDeleteAnnotation(machines collections.Machines
171172
return annotatedMachines
172173
}
173174

174-
// FailureDomainWithMostMachines returns a fd which exists both in machines and control-plane machines and has the most
175-
// control-plane machines on it.
176-
func (c *ControlPlane) FailureDomainWithMostMachines(ctx context.Context, machines collections.Machines) *string {
175+
// FailureDomainWithMostMachines returns the fd with most machines in it and at least one eligible machine in it.
176+
// Note: if there are eligibleMachines machines in failure domain that do not exist anymore, cleaning up those failure domains takes precedence.
177+
func (c *ControlPlane) FailureDomainWithMostMachines(ctx context.Context, eligibleMachines collections.Machines) *string {
177178
// See if there are any Machines that are not in currently defined failure domains first.
178-
notInFailureDomains := machines.Filter(
179+
notInFailureDomains := eligibleMachines.Filter(
179180
collections.Not(collections.InFailureDomains(c.FailureDomains().FilterControlPlane().GetIDs()...)),
180181
)
181182
if len(notInFailureDomains) > 0 {
@@ -184,15 +185,21 @@ func (c *ControlPlane) FailureDomainWithMostMachines(ctx context.Context, machin
184185
// in the cluster status.
185186
return notInFailureDomains.Oldest().Spec.FailureDomain
186187
}
187-
return failuredomains.PickMost(ctx, c.Cluster.Status.FailureDomains.FilterControlPlane(), c.Machines, machines)
188+
189+
// Pick the failure domain with most machines in it and at least one eligible machine in it.
190+
return failuredomains.PickMost(ctx, c.Cluster.Status.FailureDomains.FilterControlPlane(), c.Machines, eligibleMachines)
188191
}
189192

190-
// NextFailureDomainForScaleUp returns the failure domain with the fewest number of up-to-date, not deleted machines.
193+
// NextFailureDomainForScaleUp returns the failure domain with the fewest number of up-to-date, not deleted machines
194+
// (the ultimate goal is to achieve ideal spreading of machines at stable state/when only up-to-date machines will exist).
195+
//
196+
// In case of tie (more failure domain with the same number of up-to-date, not deleted machines) the failure domain with the fewest number of
197+
// machine overall is picked to ensure a better spreading of machines while the rollout is performed.
191198
func (c *ControlPlane) NextFailureDomainForScaleUp(ctx context.Context) (*string, error) {
192199
if len(c.Cluster.Status.FailureDomains.FilterControlPlane()) == 0 {
193200
return nil, nil
194201
}
195-
return failuredomains.PickFewest(ctx, c.FailureDomains().FilterControlPlane(), c.UpToDateMachines().Filter(collections.Not(collections.HasDeletionTimestamp))), nil
202+
return failuredomains.PickFewest(ctx, c.FailureDomains().FilterControlPlane(), c.Machines, c.UpToDateMachines().Filter(collections.Not(collections.HasDeletionTimestamp))), nil
196203
}
197204

198205
// InitialControlPlaneConfig returns a new KubeadmConfigSpec that is to be used for an initializing control plane.

controlplane/kubeadm/internal/controllers/scale.go

Lines changed: 21 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -248,17 +248,31 @@ func preflightCheckCondition(kind string, obj conditions.Getter, condition clust
248248
return nil
249249
}
250250

251+
// selectMachineForScaleDown select a machine candidate for scaling down. The selection is a two phase process:
252+
//
253+
// In the first phase it selects a subset of machines eligible for deletion:
254+
// - if there are outdated machines with the delete machine annotation, use them as eligible subset (priority to user requests, part 1)
255+
// - if there are machines (also not outdated) with the delete machine annotation, use them (priority to user requests, part 2)
256+
// - if there are outdated machines with unhealthy control plane components, use them (priority to restore control plane health)
257+
// - if there are outdated machines consider all the outdated machines as eligible subset (rollout)
258+
// - otherwise consider all the machines
259+
//
260+
// Once the subset of machines eligible for deletion is identified, one machine is picked out of this subset by
261+
// selecting the machine in the failure domain with most machines (including both eligible and not eligible machines).
251262
func selectMachineForScaleDown(ctx context.Context, controlPlane *internal.ControlPlane, outdatedMachines collections.Machines) (*clusterv1.Machine, error) {
252-
machines := controlPlane.Machines
263+
// Select the subset of machines eligible for scale down.
264+
eligibleMachines := controlPlane.Machines
253265
switch {
254266
case controlPlane.MachineWithDeleteAnnotation(outdatedMachines).Len() > 0:
255-
machines = controlPlane.MachineWithDeleteAnnotation(outdatedMachines)
256-
case controlPlane.MachineWithDeleteAnnotation(machines).Len() > 0:
257-
machines = controlPlane.MachineWithDeleteAnnotation(machines)
267+
eligibleMachines = controlPlane.MachineWithDeleteAnnotation(outdatedMachines)
268+
case controlPlane.MachineWithDeleteAnnotation(eligibleMachines).Len() > 0:
269+
eligibleMachines = controlPlane.MachineWithDeleteAnnotation(eligibleMachines)
258270
case controlPlane.UnhealthyMachinesWithUnhealthyControlPlaneComponents(outdatedMachines).Len() > 0:
259-
machines = controlPlane.UnhealthyMachinesWithUnhealthyControlPlaneComponents(outdatedMachines)
271+
eligibleMachines = controlPlane.UnhealthyMachinesWithUnhealthyControlPlaneComponents(outdatedMachines)
260272
case outdatedMachines.Len() > 0:
261-
machines = outdatedMachines
273+
eligibleMachines = outdatedMachines
262274
}
263-
return controlPlane.MachineInFailureDomainWithMostMachines(ctx, machines)
275+
276+
// Pick an eligible machine from the failure domain with most machines in (including both eligible and not eligible machines)
277+
return controlPlane.MachineInFailureDomainWithMostMachines(ctx, eligibleMachines)
264278
}

util/failuredomains/failure_domains.go

Lines changed: 74 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,9 @@ import (
3030
)
3131

3232
type failureDomainAggregation struct {
33-
id string
34-
count int
33+
id string
34+
countPriority int
35+
countAll int
3536
}
3637
type failureDomainAggregations []failureDomainAggregation
3738

@@ -43,67 +44,87 @@ func (f failureDomainAggregations) Len() int {
4344
// Less reports whether the element with
4445
// index i should sort before the element with index j.
4546
func (f failureDomainAggregations) Less(i, j int) bool {
46-
return f[i].count < f[j].count
47+
// If a failure domain has less priority machines then the other, it goes first
48+
if f[i].countPriority < f[j].countPriority {
49+
return true
50+
}
51+
if f[i].countPriority > f[j].countPriority {
52+
return false
53+
}
54+
55+
// If a failure domain has the same number of priority machines then the other,
56+
// use the number of overall machines to pick which one goes first.
57+
if f[i].countAll < f[j].countAll {
58+
return true
59+
}
60+
if f[i].countAll > f[j].countAll {
61+
return false
62+
}
63+
64+
// If both failure domain have the same number of priority machines and overall machines, we keep the order
65+
// in the list which ensure a certain degree of randomness because the list originates from a map.
66+
// This helps to spread machines e.g. when concurrently working on many clusters.
67+
return i < j
4768
}
4869

4970
// Swap swaps the elements with indexes i and j.
5071
func (f failureDomainAggregations) Swap(i, j int) {
5172
f[i], f[j] = f[j], f[i]
5273
}
5374

54-
// PickMost returns a failure domain that is in machines and has most of the group of machines on.
55-
func PickMost(ctx context.Context, failureDomains clusterv1.FailureDomains, groupMachines, machines collections.Machines) *string {
56-
// orderDescending sorts failure domains according to all machines belonging to the group.
57-
fds := orderDescending(ctx, failureDomains, groupMachines)
58-
for _, fd := range fds {
59-
for _, m := range machines {
60-
if m.Spec.FailureDomain == nil {
61-
continue
62-
}
63-
if *m.Spec.FailureDomain == fd.id {
64-
return &fd.id
65-
}
66-
}
67-
}
68-
return nil
69-
}
70-
71-
// orderDescending returns the sorted failure domains in decreasing order.
72-
func orderDescending(ctx context.Context, failureDomains clusterv1.FailureDomains, machines collections.Machines) failureDomainAggregations {
73-
aggregations := pick(ctx, failureDomains, machines)
75+
// PickMost returns the failure domain from which we have to delete a control plane machine, which is the failure domain with most machines and at least one eligible machine in it.
76+
func PickMost(ctx context.Context, failureDomains clusterv1.FailureDomains, allMachines, eligibleMachines collections.Machines) *string {
77+
aggregations := countByFailureDomain(ctx, failureDomains, allMachines, eligibleMachines)
7478
if len(aggregations) == 0 {
7579
return nil
7680
}
7781
sort.Sort(sort.Reverse(aggregations))
78-
return aggregations
82+
if len(aggregations) > 0 && aggregations[0].countPriority > 0 {
83+
return ptr.To(aggregations[0].id)
84+
}
85+
return nil
7986
}
8087

81-
// PickFewest returns the failure domain with the fewest number of machines.
82-
func PickFewest(ctx context.Context, failureDomains clusterv1.FailureDomains, machines collections.Machines) *string {
83-
aggregations := pick(ctx, failureDomains, machines)
88+
// PickFewest returns the failure domain that will be used for placement of a new control plane machine, which is the failure domain with the fewest
89+
// number of up-to-date, not deleted machines.
90+
//
91+
// Ensuring proper spreading of up-to-date, not deleted machines, is the highest priority to achieve ideal spreading of machines
92+
// at stable state/when only up-to-date machines will exist.
93+
//
94+
// In case of tie (more failure domain with the same number of up-to-date, not deleted machines) the failure domain with the fewest number of
95+
// machine overall is picked to ensure a better spreading of machines while the rollout is performed.
96+
func PickFewest(ctx context.Context, failureDomains clusterv1.FailureDomains, allMachines, upToDateMachines collections.Machines) *string {
97+
aggregations := countByFailureDomain(ctx, failureDomains, allMachines, upToDateMachines)
8498
if len(aggregations) == 0 {
8599
return nil
86100
}
87101
sort.Sort(aggregations)
88102
return ptr.To(aggregations[0].id)
89103
}
90104

91-
func pick(ctx context.Context, failureDomains clusterv1.FailureDomains, machines collections.Machines) failureDomainAggregations {
105+
// countByFailureDomain returns failure domains with the number of machines in it.
106+
// Note: countByFailureDomain computes both the number of machines as well as the number of a subset of machines with higher priority.
107+
// E.g. for deletion out of date machines have higher priority vs other machines.
108+
func countByFailureDomain(ctx context.Context, failureDomains clusterv1.FailureDomains, allMachines, priorityMachines collections.Machines) failureDomainAggregations {
92109
log := ctrl.LoggerFrom(ctx)
93110

94111
if len(failureDomains) == 0 {
95112
return failureDomainAggregations{}
96113
}
97114

98-
counters := map[string]int{}
115+
counters := map[string]failureDomainAggregation{}
99116

100117
// Initialize the known failure domain keys to find out if an existing machine is in an unsupported failure domain.
101-
for fd := range failureDomains {
102-
counters[fd] = 0
118+
for id := range failureDomains {
119+
counters[id] = failureDomainAggregation{
120+
id: id,
121+
countPriority: 0,
122+
countAll: 0,
123+
}
103124
}
104125

105126
// Count how many machines are in each failure domain.
106-
for _, m := range machines {
127+
for _, m := range allMachines {
107128
if m.Spec.FailureDomain == nil {
108129
continue
109130
}
@@ -116,15 +137,30 @@ func pick(ctx context.Context, failureDomains clusterv1.FailureDomains, machines
116137
log.Info(fmt.Sprintf("Unknown failure domain %q for Machine %s (known failure domains: %v)", id, m.GetName(), knownFailureDomains))
117138
continue
118139
}
119-
counters[id]++
140+
a := counters[id]
141+
a.countAll++
142+
counters[id] = a
120143
}
121144

122-
aggregations := make(failureDomainAggregations, 0)
123-
124-
// Gather up tuples of failure domains ids and counts
125-
for fd, count := range counters {
126-
aggregations = append(aggregations, failureDomainAggregation{id: fd, count: count})
145+
for _, m := range priorityMachines {
146+
if m.Spec.FailureDomain == nil {
147+
continue
148+
}
149+
id := *m.Spec.FailureDomain
150+
if _, ok := failureDomains[id]; !ok {
151+
continue
152+
}
153+
a := counters[id]
154+
a.countPriority++
155+
counters[id] = a
127156
}
128157

158+
// Collect failure domain aggregations.
159+
// Note: by creating the list from a map, we get a certain degree of randomness that helps to spread machines
160+
// e.g. when concurrently working on many clusters.
161+
aggregations := make(failureDomainAggregations, 0)
162+
for _, count := range counters {
163+
aggregations = append(aggregations, count)
164+
}
129165
return aggregations
130166
}

0 commit comments

Comments
 (0)