Skip to content

Commit 1edc9c1

Browse files
authored
fix: Fix ownership of ClusterAutoscaler resources (#810)
The Cluster Autoscaler addon (either HelmChartProxy or ClusterResourceSet ) has to be deployed in the management cluster namespace and as such cannot be owned by the workload cluster, which commonly resides in a different namespace. This commit fixes that and introduces a BeforeClusterDelete hook to delete the HelmChartProxy or ClusterResourceSet rather than relying no Kubernetes GC as was previously expected. Tested locally. Created workload cluster in different namespace to management cluster. HCP does not have ownership set: ``` apiVersion: addons.cluster.x-k8s.io/v1alpha1 kind: HelmChartProxy metadata: creationTimestamp: "2024-07-18T13:02:30Z" finalizers: - helmchartproxy.addons.cluster.x-k8s.io generation: 1 name: cluster-autoscaler-my-docker-cluster namespace: test resourceVersion: "6238" uid: 0a5ebb11-5318-4114-bedd-9bcc9459f85e spec: ``` And is properly cleaned up on deletion: ``` $ k get hcp -A No resources found ```
1 parent 88d3ee8 commit 1edc9c1

File tree

13 files changed

+275
-39
lines changed

13 files changed

+275
-39
lines changed

pkg/handlers/generic/lifecycle/ccm/aws/strategy_crs.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,7 @@ func (s crsStrategy) Apply(
7474
ccmConfigMap.Name,
7575
s.client,
7676
cluster,
77+
handlersutils.DefaultEnsureCRSForClusterFromObjectsOptions(),
7778
ccmConfigMap,
7879
)
7980
if err != nil {

pkg/handlers/generic/lifecycle/clusterautoscaler/handler.go

Lines changed: 97 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,12 @@ type addonStrategy interface {
3030
string,
3131
logr.Logger,
3232
) error
33+
34+
delete(
35+
context.Context,
36+
*clusterv1.Cluster,
37+
logr.Logger,
38+
) error
3339
}
3440
type Config struct {
3541
*options.GlobalOptions
@@ -110,29 +116,18 @@ func (n *DefaultClusterAutoscaler) apply(
110116
clusterKey,
111117
)
112118

113-
varMap := variables.ClusterVariablesToVariablesMap(cluster.Spec.Topology.Variables)
114-
115-
caVar, err := variables.Get[v1alpha1.ClusterAutoscaler](
116-
varMap,
117-
n.variableName,
118-
n.variablePath...)
119+
caVar, err := n.getCAVariable(cluster)
119120
if err != nil {
120-
if variables.IsNotFoundError(err) {
121-
log.V(5).Info(
122-
"Skipping cluster-autoscaler handler, cluster does not specify request cluster-autoscaler addon deployment",
123-
)
124-
return
125-
}
126-
log.Error(
127-
err,
128-
"failed to read cluster-autoscaler variable from cluster definition",
129-
)
121+
log.Error(err, "failed to read cluster-autoscaler variable from cluster definition")
130122
resp.SetStatus(runtimehooksv1.ResponseStatusFailure)
131-
resp.SetMessage(
132-
fmt.Sprintf("failed to read cluster-autoscaler variable from cluster definition: %v",
133-
err,
134-
),
123+
resp.SetMessage(err.Error())
124+
return
125+
}
126+
if caVar == nil {
127+
log.V(5).Info(
128+
"Skipping cluster-autoscaler handler, cluster does not specify request cluster-autoscaler addon deployment",
135129
)
130+
resp.SetStatus(runtimehooksv1.ResponseStatusSuccess)
136131
return
137132
}
138133

@@ -186,3 +181,85 @@ func (n *DefaultClusterAutoscaler) apply(
186181

187182
resp.SetStatus(runtimehooksv1.ResponseStatusSuccess)
188183
}
184+
185+
func (n *DefaultClusterAutoscaler) BeforeClusterDelete(
186+
ctx context.Context,
187+
req *runtimehooksv1.BeforeClusterDeleteRequest,
188+
resp *runtimehooksv1.BeforeClusterDeleteResponse,
189+
) {
190+
cluster := &req.Cluster
191+
192+
clusterKey := ctrlclient.ObjectKeyFromObject(cluster)
193+
194+
log := ctrl.LoggerFrom(ctx).WithValues(
195+
"cluster",
196+
clusterKey,
197+
)
198+
199+
caVar, err := n.getCAVariable(cluster)
200+
if err != nil {
201+
log.Error(err, "failed to read cluster-autoscaler variable from cluster definition")
202+
resp.SetStatus(runtimehooksv1.ResponseStatusFailure)
203+
resp.SetMessage(err.Error())
204+
return
205+
}
206+
if caVar == nil {
207+
log.V(5).Info(
208+
"Skipping cluster-autoscaler before cluster delete handler, cluster does not specify request cluster-autoscaler" +
209+
"addon deployment",
210+
)
211+
resp.SetStatus(runtimehooksv1.ResponseStatusSuccess)
212+
return
213+
}
214+
215+
var strategy addonStrategy
216+
switch ptr.Deref(caVar.Strategy, "") {
217+
case v1alpha1.AddonStrategyClusterResourceSet:
218+
strategy = crsStrategy{
219+
config: n.config.crsConfig,
220+
client: n.client,
221+
}
222+
case v1alpha1.AddonStrategyHelmAddon:
223+
strategy = helmAddonStrategy{
224+
config: n.config.helmAddonConfig,
225+
client: n.client,
226+
}
227+
case "":
228+
resp.SetStatus(runtimehooksv1.ResponseStatusFailure)
229+
resp.SetMessage("strategy not specified for cluster-autoscaler addon")
230+
default:
231+
resp.SetStatus(runtimehooksv1.ResponseStatusFailure)
232+
resp.SetMessage(
233+
fmt.Sprintf("unknown cluster-autoscaler addon deployment strategy %q", *caVar.Strategy),
234+
)
235+
return
236+
}
237+
238+
if err = strategy.delete(ctx, cluster, log); err != nil {
239+
resp.SetStatus(runtimehooksv1.ResponseStatusFailure)
240+
resp.SetMessage(err.Error())
241+
return
242+
}
243+
244+
resp.SetStatus(runtimehooksv1.ResponseStatusSuccess)
245+
}
246+
247+
func (n *DefaultClusterAutoscaler) getCAVariable(
248+
cluster *clusterv1.Cluster,
249+
) (*v1alpha1.ClusterAutoscaler, error) {
250+
varMap := variables.ClusterVariablesToVariablesMap(cluster.Spec.Topology.Variables)
251+
252+
caVar, err := variables.Get[v1alpha1.ClusterAutoscaler](
253+
varMap,
254+
n.variableName,
255+
n.variablePath...)
256+
if err != nil {
257+
if variables.IsNotFoundError(err) {
258+
return nil, nil
259+
}
260+
261+
return nil, err
262+
}
263+
264+
return &caVar, nil
265+
}

pkg/handlers/generic/lifecycle/clusterautoscaler/strategy_crs.go

Lines changed: 51 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ import (
1313
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
1414
clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
1515
"sigs.k8s.io/cluster-api/controllers/remote"
16+
crsv1 "sigs.k8s.io/cluster-api/exp/addons/api/v1beta1"
1617
ctrlclient "sigs.k8s.io/controller-runtime/pkg/client"
1718

1819
"github.com/nutanix-cloud-native/cluster-api-runtime-extensions-nutanix/common/pkg/k8s/client"
@@ -74,7 +75,7 @@ func (s crsStrategy) apply(
7475
},
7576
ObjectMeta: metav1.ObjectMeta{
7677
Namespace: cluster.Namespace,
77-
Name: defaultCM.Name + "-" + cluster.Name,
78+
Name: s.crsNameForCluster(cluster),
7879
},
7980
Data: data,
8081
}
@@ -110,7 +111,17 @@ func (s crsStrategy) apply(
110111
)
111112
}
112113

113-
if err = utils.EnsureCRSForClusterFromObjects(ctx, cm.Name, s.client, targetCluster, cm); err != nil {
114+
// NOTE Unlike other addons, the cluster-autoscaler ClusterResourceSet is created in the management cluster
115+
// namespace and thus cannot be owned by the workload cluster which will commonly exist in a different namespace.
116+
// Deletion is handled by a BeforeClusterDelete hook instead of relying on Kubernetes GC.
117+
if err = utils.EnsureCRSForClusterFromObjects(
118+
ctx,
119+
cm.Name,
120+
s.client,
121+
targetCluster,
122+
utils.EnsureCRSForClusterFromObjectsOptions{SetClusterOwnership: false},
123+
cm,
124+
); err != nil {
114125
return fmt.Errorf(
115126
"failed to apply cluster-autoscaler installation ClusterResourceSet: %w",
116127
err,
@@ -119,3 +130,41 @@ func (s crsStrategy) apply(
119130

120131
return nil
121132
}
133+
134+
func (s crsStrategy) delete(
135+
ctx context.Context,
136+
cluster *clusterv1.Cluster,
137+
log logr.Logger,
138+
) error {
139+
// The cluster-autoscaler is different from other addons.
140+
// It requires all resources to be created in the management cluster,
141+
// which means creating the ClusterResourceSet always targeting the management cluster.
142+
targetCluster, err := findTargetCluster(ctx, s.client, cluster)
143+
if err != nil {
144+
return err
145+
}
146+
147+
crs := &crsv1.ClusterResourceSet{
148+
TypeMeta: metav1.TypeMeta{
149+
APIVersion: crsv1.GroupVersion.String(),
150+
Kind: "ClusterResourceSet",
151+
},
152+
ObjectMeta: metav1.ObjectMeta{
153+
Namespace: targetCluster.Namespace,
154+
Name: s.crsNameForCluster(cluster),
155+
},
156+
}
157+
158+
if err := ctrlclient.IgnoreNotFound(s.client.Delete(ctx, crs)); err != nil {
159+
return fmt.Errorf(
160+
"failed to delete cluster-autoscaler installation ClusterResourceSet: %w",
161+
err,
162+
)
163+
}
164+
165+
return nil
166+
}
167+
168+
func (s crsStrategy) crsNameForCluster(cluster *clusterv1.Cluster) string {
169+
return s.config.defaultClusterAutoscalerConfigMap + "-" + cluster.Name
170+
}

pkg/handlers/generic/lifecycle/clusterautoscaler/strategy_helmaddon.go

Lines changed: 38 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@ import (
1212
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
1313
clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
1414
ctrlclient "sigs.k8s.io/controller-runtime/pkg/client"
15-
"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
1615

1716
caaphv1 "github.com/nutanix-cloud-native/cluster-api-runtime-extensions-nutanix/api/external/sigs.k8s.io/cluster-api-addon-provider-helm/api/v1alpha1"
1817
"github.com/nutanix-cloud-native/cluster-api-runtime-extensions-nutanix/common/pkg/k8s/client"
@@ -103,16 +102,48 @@ func (s helmAddonStrategy) apply(
103102
}
104103

105104
handlersutils.SetTLSConfigForHelmChartProxyIfNeeded(hcp)
106-
if err = controllerutil.SetOwnerReference(cluster, hcp, s.client.Scheme()); err != nil {
107-
return fmt.Errorf(
108-
"failed to set owner reference on cluster-autoscaler installation HelmChartProxy: %w",
109-
err,
110-
)
111-
}
105+
106+
// NOTE Unlike other addons, the cluster-autoscaler HelmChartProxy is created in the management cluster
107+
// namespace and thus cannot be owned by the workload cluster which will commonly exist in a different namespace.
108+
// Deletion is handled by a BeforeClusterDelete hook instead of relying on Kubernetes GC.
112109

113110
if err = client.ServerSideApply(ctx, s.client, hcp, client.ForceOwnership); err != nil {
114111
return fmt.Errorf("failed to apply cluster-autoscaler installation HelmChartProxy: %w", err)
115112
}
116113

117114
return nil
118115
}
116+
117+
func (s helmAddonStrategy) delete(
118+
ctx context.Context,
119+
cluster *clusterv1.Cluster,
120+
log logr.Logger,
121+
) error {
122+
// The cluster-autoscaler is different from other addons.
123+
// It requires all resources to be created in the management cluster,
124+
// which means creating the HelmChartProxy always targeting the management cluster.
125+
targetCluster, err := findTargetCluster(ctx, s.client, cluster)
126+
if err != nil {
127+
return err
128+
}
129+
130+
hcp := &caaphv1.HelmChartProxy{
131+
TypeMeta: metav1.TypeMeta{
132+
APIVersion: caaphv1.GroupVersion.String(),
133+
Kind: "HelmChartProxy",
134+
},
135+
ObjectMeta: metav1.ObjectMeta{
136+
Namespace: targetCluster.Namespace,
137+
Name: "cluster-autoscaler-" + cluster.Name,
138+
},
139+
}
140+
141+
if err := ctrlclient.IgnoreNotFound(s.client.Delete(ctx, hcp)); err != nil {
142+
return fmt.Errorf(
143+
"failed to delete cluster-autoscaler installation HelmChartProxy: %w",
144+
err,
145+
)
146+
}
147+
148+
return nil
149+
}

pkg/handlers/generic/lifecycle/cni/calico/strategy_crs.go

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -150,7 +150,15 @@ func (s crsStrategy) ensureCNICRSForCluster(
150150
)
151151
}
152152

153-
if err := utils.EnsureCRSForClusterFromObjects(ctx, cm.Name, s.client, cluster, tigeraConfigMap, cm); err != nil {
153+
if err := utils.EnsureCRSForClusterFromObjects(
154+
ctx,
155+
cm.Name,
156+
s.client,
157+
cluster,
158+
utils.DefaultEnsureCRSForClusterFromObjectsOptions(),
159+
tigeraConfigMap,
160+
cm,
161+
); err != nil {
154162
return fmt.Errorf(
155163
"failed to apply Calico CNI installation ClusterResourceSet: %w",
156164
err,

pkg/handlers/generic/lifecycle/cni/cilium/strategy_crs.go

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,14 @@ func (s crsStrategy) apply(
8585
)
8686
}
8787

88-
if err := utils.EnsureCRSForClusterFromObjects(ctx, cm.Name, s.client, cluster, cm); err != nil {
88+
if err := utils.EnsureCRSForClusterFromObjects(
89+
ctx,
90+
cm.Name,
91+
s.client,
92+
cluster,
93+
utils.DefaultEnsureCRSForClusterFromObjectsOptions(),
94+
cm,
95+
); err != nil {
8996
return fmt.Errorf(
9097
"failed to apply Cilium CNI installation ClusterResourceSet: %w",
9198
err,

pkg/handlers/generic/lifecycle/csi/awsebs/strategy_crs.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@ func (s crsStrategy) Apply(
7272
cm.Name,
7373
s.client,
7474
cluster,
75+
handlersutils.DefaultEnsureCRSForClusterFromObjectsOptions(),
7576
cm,
7677
)
7778
if err != nil {

pkg/handlers/generic/lifecycle/csi/localpath/strategy_crs.go

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,14 @@ func (s crsStrategy) Apply(
8585
)
8686
}
8787

88-
if err := utils.EnsureCRSForClusterFromObjects(ctx, cm.Name, s.client, cluster, cm); err != nil {
88+
if err := utils.EnsureCRSForClusterFromObjects(
89+
ctx,
90+
cm.Name,
91+
s.client,
92+
cluster,
93+
utils.DefaultEnsureCRSForClusterFromObjectsOptions(),
94+
cm,
95+
); err != nil {
8996
return fmt.Errorf(
9097
"failed to apply local-path CSI installation ClusterResourceSet: %w",
9198
err,

pkg/handlers/generic/lifecycle/csi/snapshotcontroller/strategy_crs.go

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,14 @@ func (s crsStrategy) Apply(
8585
)
8686
}
8787

88-
if err := utils.EnsureCRSForClusterFromObjects(ctx, cm.Name, s.client, cluster, cm); err != nil {
88+
if err := utils.EnsureCRSForClusterFromObjects(
89+
ctx,
90+
cm.Name,
91+
s.client,
92+
cluster,
93+
utils.DefaultEnsureCRSForClusterFromObjectsOptions(),
94+
cm,
95+
); err != nil {
8996
return fmt.Errorf(
9097
"failed to apply snapshot-controller installation ClusterResourceSet: %w",
9198
err,

pkg/handlers/generic/lifecycle/nfd/strategy_crs.go

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,14 @@ func (s crsStrategy) Apply(
8585
)
8686
}
8787

88-
if err := utils.EnsureCRSForClusterFromObjects(ctx, cm.Name, s.client, cluster, cm); err != nil {
88+
if err := utils.EnsureCRSForClusterFromObjects(
89+
ctx,
90+
cm.Name,
91+
s.client,
92+
cluster,
93+
utils.DefaultEnsureCRSForClusterFromObjectsOptions(),
94+
cm,
95+
); err != nil {
8996
return fmt.Errorf(
9097
"failed to apply NFD installation ClusterResourceSet: %w",
9198
err,

0 commit comments

Comments
 (0)