Skip to content

Commit 10adc65

Browse files
committed
fix: Fix ownership of ClusterAutoscaler resources
The Cluster Autoscaler addon (either HelmChartProxy or ClusterResourceSet ) has to be deployed in the management cluster namespace and as such cannot be owned by the workload cluster, which commonly resides in a different namespace. This commit fixes that and introduces a BeforeClusterDelete hook to delete the HelmChartProxy or ClusterResourceSet rather than relying no Kubernetes GC as was previously expected.
1 parent d51805f commit 10adc65

File tree

14 files changed

+270
-43
lines changed

14 files changed

+270
-43
lines changed

api/v1alpha1/zz_generated.deepcopy.go

Lines changed: 1 addition & 3 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pkg/handlers/generic/lifecycle/ccm/aws/strategy_crs.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,7 @@ func (s crsStrategy) Apply(
7474
ccmConfigMap.Name,
7575
s.client,
7676
cluster,
77+
handlersutils.DefaultEnsureCRSForClusterFromObjectsOptions(),
7778
ccmConfigMap,
7879
)
7980
if err != nil {

pkg/handlers/generic/lifecycle/clusterautoscaler/handler.go

Lines changed: 97 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,12 @@ type addonStrategy interface {
3030
string,
3131
logr.Logger,
3232
) error
33+
34+
delete(
35+
context.Context,
36+
*clusterv1.Cluster,
37+
logr.Logger,
38+
) error
3339
}
3440
type Config struct {
3541
*options.GlobalOptions
@@ -110,29 +116,18 @@ func (n *DefaultClusterAutoscaler) apply(
110116
clusterKey,
111117
)
112118

113-
varMap := variables.ClusterVariablesToVariablesMap(cluster.Spec.Topology.Variables)
114-
115-
caVar, err := variables.Get[v1alpha1.ClusterAutoscaler](
116-
varMap,
117-
n.variableName,
118-
n.variablePath...)
119+
caVar, err := n.getCAVariable(cluster)
119120
if err != nil {
120-
if variables.IsNotFoundError(err) {
121-
log.V(5).Info(
122-
"Skipping cluster-autoscaler handler, cluster does not specify request cluster-autoscaler addon deployment",
123-
)
124-
return
125-
}
126-
log.Error(
127-
err,
128-
"failed to read cluster-autoscaler variable from cluster definition",
129-
)
121+
log.Error(err, "failed to read cluster-autoscaler variable from cluster definition")
130122
resp.SetStatus(runtimehooksv1.ResponseStatusFailure)
131-
resp.SetMessage(
132-
fmt.Sprintf("failed to read cluster-autoscaler variable from cluster definition: %v",
133-
err,
134-
),
123+
resp.SetMessage(err.Error())
124+
return
125+
}
126+
if caVar == nil {
127+
log.V(5).Info(
128+
"Skipping cluster-autoscaler handler, cluster does not specify request cluster-autoscaler addon deployment",
135129
)
130+
resp.SetStatus(runtimehooksv1.ResponseStatusSuccess)
136131
return
137132
}
138133

@@ -186,3 +181,85 @@ func (n *DefaultClusterAutoscaler) apply(
186181

187182
resp.SetStatus(runtimehooksv1.ResponseStatusSuccess)
188183
}
184+
185+
func (n *DefaultClusterAutoscaler) BeforeClusterDelete(
186+
ctx context.Context,
187+
req *runtimehooksv1.BeforeClusterDeleteRequest,
188+
resp *runtimehooksv1.BeforeClusterDeleteResponse,
189+
) {
190+
cluster := &req.Cluster
191+
192+
clusterKey := ctrlclient.ObjectKeyFromObject(cluster)
193+
194+
log := ctrl.LoggerFrom(ctx).WithValues(
195+
"cluster",
196+
clusterKey,
197+
)
198+
199+
caVar, err := n.getCAVariable(cluster)
200+
if err != nil {
201+
log.Error(err, "failed to read cluster-autoscaler variable from cluster definition")
202+
resp.SetStatus(runtimehooksv1.ResponseStatusFailure)
203+
resp.SetMessage(err.Error())
204+
return
205+
}
206+
if caVar == nil {
207+
log.V(5).Info(
208+
"Skipping cluster-autoscaler before cluster delete handler, cluster does not specify request cluster-autoscaler" +
209+
"addon deployment",
210+
)
211+
resp.SetStatus(runtimehooksv1.ResponseStatusSuccess)
212+
return
213+
}
214+
215+
var strategy addonStrategy
216+
switch ptr.Deref(caVar.Strategy, "") {
217+
case v1alpha1.AddonStrategyClusterResourceSet:
218+
strategy = crsStrategy{
219+
config: n.config.crsConfig,
220+
client: n.client,
221+
}
222+
case v1alpha1.AddonStrategyHelmAddon:
223+
strategy = helmAddonStrategy{
224+
config: n.config.helmAddonConfig,
225+
client: n.client,
226+
}
227+
case "":
228+
resp.SetStatus(runtimehooksv1.ResponseStatusFailure)
229+
resp.SetMessage("strategy not specified for cluster-autoscaler addon")
230+
default:
231+
resp.SetStatus(runtimehooksv1.ResponseStatusFailure)
232+
resp.SetMessage(
233+
fmt.Sprintf("unknown cluster-autoscaler addon deployment strategy %q", *caVar.Strategy),
234+
)
235+
return
236+
}
237+
238+
if err = strategy.delete(ctx, cluster, log); err != nil {
239+
resp.SetStatus(runtimehooksv1.ResponseStatusFailure)
240+
resp.SetMessage(err.Error())
241+
return
242+
}
243+
244+
resp.SetStatus(runtimehooksv1.ResponseStatusSuccess)
245+
}
246+
247+
func (n *DefaultClusterAutoscaler) getCAVariable(
248+
cluster *clusterv1.Cluster,
249+
) (*v1alpha1.ClusterAutoscaler, error) {
250+
varMap := variables.ClusterVariablesToVariablesMap(cluster.Spec.Topology.Variables)
251+
252+
caVar, err := variables.Get[v1alpha1.ClusterAutoscaler](
253+
varMap,
254+
n.variableName,
255+
n.variablePath...)
256+
if err != nil {
257+
if variables.IsNotFoundError(err) {
258+
return nil, nil
259+
}
260+
261+
return nil, err
262+
}
263+
264+
return &caVar, nil
265+
}

pkg/handlers/generic/lifecycle/clusterautoscaler/strategy_crs.go

Lines changed: 46 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ import (
1313
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
1414
clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
1515
"sigs.k8s.io/cluster-api/controllers/remote"
16+
crsv1 "sigs.k8s.io/cluster-api/exp/addons/api/v1beta1"
1617
ctrlclient "sigs.k8s.io/controller-runtime/pkg/client"
1718

1819
"github.com/nutanix-cloud-native/cluster-api-runtime-extensions-nutanix/common/pkg/k8s/client"
@@ -110,7 +111,17 @@ func (s crsStrategy) apply(
110111
)
111112
}
112113

113-
if err = utils.EnsureCRSForClusterFromObjects(ctx, cm.Name, s.client, targetCluster, cm); err != nil {
114+
// NOTE Unlike other addons, the cluster-autoscaler ClusterResourceSet is created in the management cluster
115+
// namespace and thus cannot be owned by the workload cluster which will commonly exist in a different namespace.
116+
// Deletion is handled by a BeforeClusterDelete hook instead of relying on Kubernetes GC.
117+
if err = utils.EnsureCRSForClusterFromObjects(
118+
ctx,
119+
cm.Name,
120+
s.client,
121+
targetCluster,
122+
utils.EnsureCRSForClusterFromObjectsOptions{SetClusterOwnership: false},
123+
cm,
124+
); err != nil {
114125
return fmt.Errorf(
115126
"failed to apply cluster-autoscaler installation ClusterResourceSet: %w",
116127
err,
@@ -119,3 +130,37 @@ func (s crsStrategy) apply(
119130

120131
return nil
121132
}
133+
134+
func (s crsStrategy) delete(
135+
ctx context.Context,
136+
cluster *clusterv1.Cluster,
137+
log logr.Logger,
138+
) error {
139+
// The cluster-autoscaler is different from other addons.
140+
// It requires all resources to be created in the management cluster,
141+
// which means creating the ClusterResourceSet always targeting the management cluster.
142+
targetCluster, err := findTargetCluster(ctx, s.client, cluster)
143+
if err != nil {
144+
return err
145+
}
146+
147+
crs := &crsv1.ClusterResourceSet{
148+
TypeMeta: metav1.TypeMeta{
149+
APIVersion: crsv1.GroupVersion.String(),
150+
Kind: "ClusterResourceSet",
151+
},
152+
ObjectMeta: metav1.ObjectMeta{
153+
Namespace: targetCluster.Namespace,
154+
Name: s.config.defaultClusterAutoscalerConfigMap + "-" + cluster.Name,
155+
},
156+
}
157+
158+
if err := ctrlclient.IgnoreNotFound(s.client.Delete(ctx, crs)); err != nil {
159+
return fmt.Errorf(
160+
"failed to delete cluster-autoscaler installation ClusterResourceSet: %w",
161+
err,
162+
)
163+
}
164+
165+
return nil
166+
}

pkg/handlers/generic/lifecycle/clusterautoscaler/strategy_helmaddon.go

Lines changed: 38 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@ import (
1212
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
1313
clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
1414
ctrlclient "sigs.k8s.io/controller-runtime/pkg/client"
15-
"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
1615

1716
caaphv1 "github.com/nutanix-cloud-native/cluster-api-runtime-extensions-nutanix/api/external/sigs.k8s.io/cluster-api-addon-provider-helm/api/v1alpha1"
1817
"github.com/nutanix-cloud-native/cluster-api-runtime-extensions-nutanix/common/pkg/k8s/client"
@@ -103,16 +102,48 @@ func (s helmAddonStrategy) apply(
103102
}
104103

105104
handlersutils.SetTLSConfigForHelmChartProxyIfNeeded(hcp)
106-
if err = controllerutil.SetOwnerReference(cluster, hcp, s.client.Scheme()); err != nil {
107-
return fmt.Errorf(
108-
"failed to set owner reference on cluster-autoscaler installation HelmChartProxy: %w",
109-
err,
110-
)
111-
}
105+
106+
// NOTE Unlike other addons, the cluster-autoscaler HelmChartProxy is created in the management cluster
107+
// namespace and thus cannot be owned by the workload cluster which will commonly exist in a different namespace.
108+
// Deletion is handled by a BeforeClusterDelete hook instead of relying on Kubernetes GC.
112109

113110
if err = client.ServerSideApply(ctx, s.client, hcp, client.ForceOwnership); err != nil {
114111
return fmt.Errorf("failed to apply cluster-autoscaler installation HelmChartProxy: %w", err)
115112
}
116113

117114
return nil
118115
}
116+
117+
func (s helmAddonStrategy) delete(
118+
ctx context.Context,
119+
cluster *clusterv1.Cluster,
120+
log logr.Logger,
121+
) error {
122+
// The cluster-autoscaler is different from other addons.
123+
// It requires all resources to be created in the management cluster,
124+
// which means creating the HelmChartProxy always targeting the management cluster.
125+
targetCluster, err := findTargetCluster(ctx, s.client, cluster)
126+
if err != nil {
127+
return err
128+
}
129+
130+
hcp := &caaphv1.HelmChartProxy{
131+
TypeMeta: metav1.TypeMeta{
132+
APIVersion: caaphv1.GroupVersion.String(),
133+
Kind: "HelmChartProxy",
134+
},
135+
ObjectMeta: metav1.ObjectMeta{
136+
Namespace: targetCluster.Namespace,
137+
Name: "cluster-autoscaler-" + cluster.Name,
138+
},
139+
}
140+
141+
if err := ctrlclient.IgnoreNotFound(s.client.Delete(ctx, hcp)); err != nil {
142+
return fmt.Errorf(
143+
"failed to delete cluster-autoscaler installation HelmChartProxy: %w",
144+
err,
145+
)
146+
}
147+
148+
return nil
149+
}

pkg/handlers/generic/lifecycle/cni/calico/strategy_crs.go

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -150,7 +150,15 @@ func (s crsStrategy) ensureCNICRSForCluster(
150150
)
151151
}
152152

153-
if err := utils.EnsureCRSForClusterFromObjects(ctx, cm.Name, s.client, cluster, tigeraConfigMap, cm); err != nil {
153+
if err := utils.EnsureCRSForClusterFromObjects(
154+
ctx,
155+
cm.Name,
156+
s.client,
157+
cluster,
158+
utils.DefaultEnsureCRSForClusterFromObjectsOptions(),
159+
tigeraConfigMap,
160+
cm,
161+
); err != nil {
154162
return fmt.Errorf(
155163
"failed to apply Calico CNI installation ClusterResourceSet: %w",
156164
err,

pkg/handlers/generic/lifecycle/cni/cilium/strategy_crs.go

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,14 @@ func (s crsStrategy) apply(
8585
)
8686
}
8787

88-
if err := utils.EnsureCRSForClusterFromObjects(ctx, cm.Name, s.client, cluster, cm); err != nil {
88+
if err := utils.EnsureCRSForClusterFromObjects(
89+
ctx,
90+
cm.Name,
91+
s.client,
92+
cluster,
93+
utils.DefaultEnsureCRSForClusterFromObjectsOptions(),
94+
cm,
95+
); err != nil {
8996
return fmt.Errorf(
9097
"failed to apply Cilium CNI installation ClusterResourceSet: %w",
9198
err,

pkg/handlers/generic/lifecycle/csi/awsebs/strategy_crs.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@ func (s crsStrategy) Apply(
7272
cm.Name,
7373
s.client,
7474
cluster,
75+
handlersutils.DefaultEnsureCRSForClusterFromObjectsOptions(),
7576
cm,
7677
)
7778
if err != nil {

pkg/handlers/generic/lifecycle/csi/localpath/strategy_crs.go

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,14 @@ func (s crsStrategy) Apply(
8585
)
8686
}
8787

88-
if err := utils.EnsureCRSForClusterFromObjects(ctx, cm.Name, s.client, cluster, cm); err != nil {
88+
if err := utils.EnsureCRSForClusterFromObjects(
89+
ctx,
90+
cm.Name,
91+
s.client,
92+
cluster,
93+
utils.DefaultEnsureCRSForClusterFromObjectsOptions(),
94+
cm,
95+
); err != nil {
8996
return fmt.Errorf(
9097
"failed to apply local-path CSI installation ClusterResourceSet: %w",
9198
err,

pkg/handlers/generic/lifecycle/csi/snapshotcontroller/strategy_crs.go

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,14 @@ func (s crsStrategy) Apply(
8585
)
8686
}
8787

88-
if err := utils.EnsureCRSForClusterFromObjects(ctx, cm.Name, s.client, cluster, cm); err != nil {
88+
if err := utils.EnsureCRSForClusterFromObjects(
89+
ctx,
90+
cm.Name,
91+
s.client,
92+
cluster,
93+
utils.DefaultEnsureCRSForClusterFromObjectsOptions(),
94+
cm,
95+
); err != nil {
8996
return fmt.Errorf(
9097
"failed to apply snapshot-controller installation ClusterResourceSet: %w",
9198
err,

pkg/handlers/generic/lifecycle/nfd/strategy_crs.go

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,14 @@ func (s crsStrategy) Apply(
8585
)
8686
}
8787

88-
if err := utils.EnsureCRSForClusterFromObjects(ctx, cm.Name, s.client, cluster, cm); err != nil {
88+
if err := utils.EnsureCRSForClusterFromObjects(
89+
ctx,
90+
cm.Name,
91+
s.client,
92+
cluster,
93+
utils.DefaultEnsureCRSForClusterFromObjectsOptions(),
94+
cm,
95+
); err != nil {
8996
return fmt.Errorf(
9097
"failed to apply NFD installation ClusterResourceSet: %w",
9198
err,

0 commit comments

Comments
 (0)