Skip to content

fix: Fix ownership of ClusterAutoscaler resources #810

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jul 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions pkg/handlers/generic/lifecycle/ccm/aws/strategy_crs.go
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ func (s crsStrategy) Apply(
ccmConfigMap.Name,
s.client,
cluster,
handlersutils.DefaultEnsureCRSForClusterFromObjectsOptions(),
ccmConfigMap,
)
if err != nil {
Expand Down
117 changes: 97 additions & 20 deletions pkg/handlers/generic/lifecycle/clusterautoscaler/handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,12 @@ type addonStrategy interface {
string,
logr.Logger,
) error

delete(
context.Context,
*clusterv1.Cluster,
logr.Logger,
) error
}
type Config struct {
*options.GlobalOptions
Expand Down Expand Up @@ -110,29 +116,18 @@ func (n *DefaultClusterAutoscaler) apply(
clusterKey,
)

varMap := variables.ClusterVariablesToVariablesMap(cluster.Spec.Topology.Variables)

caVar, err := variables.Get[v1alpha1.ClusterAutoscaler](
varMap,
n.variableName,
n.variablePath...)
caVar, err := n.getCAVariable(cluster)
if err != nil {
if variables.IsNotFoundError(err) {
log.V(5).Info(
"Skipping cluster-autoscaler handler, cluster does not specify request cluster-autoscaler addon deployment",
)
return
}
log.Error(
err,
"failed to read cluster-autoscaler variable from cluster definition",
)
log.Error(err, "failed to read cluster-autoscaler variable from cluster definition")
resp.SetStatus(runtimehooksv1.ResponseStatusFailure)
resp.SetMessage(
fmt.Sprintf("failed to read cluster-autoscaler variable from cluster definition: %v",
err,
),
resp.SetMessage(err.Error())
return
}
if caVar == nil {
log.V(5).Info(
"Skipping cluster-autoscaler handler, cluster does not specify request cluster-autoscaler addon deployment",
)
resp.SetStatus(runtimehooksv1.ResponseStatusSuccess)
return
}

Expand Down Expand Up @@ -186,3 +181,85 @@ func (n *DefaultClusterAutoscaler) apply(

resp.SetStatus(runtimehooksv1.ResponseStatusSuccess)
}

func (n *DefaultClusterAutoscaler) BeforeClusterDelete(
ctx context.Context,
req *runtimehooksv1.BeforeClusterDeleteRequest,
resp *runtimehooksv1.BeforeClusterDeleteResponse,
) {
cluster := &req.Cluster

clusterKey := ctrlclient.ObjectKeyFromObject(cluster)

log := ctrl.LoggerFrom(ctx).WithValues(
"cluster",
clusterKey,
)

caVar, err := n.getCAVariable(cluster)
if err != nil {
log.Error(err, "failed to read cluster-autoscaler variable from cluster definition")
resp.SetStatus(runtimehooksv1.ResponseStatusFailure)
resp.SetMessage(err.Error())
return
}
if caVar == nil {
log.V(5).Info(
"Skipping cluster-autoscaler before cluster delete handler, cluster does not specify request cluster-autoscaler" +
"addon deployment",
)
resp.SetStatus(runtimehooksv1.ResponseStatusSuccess)
return
}

var strategy addonStrategy
switch ptr.Deref(caVar.Strategy, "") {
case v1alpha1.AddonStrategyClusterResourceSet:
strategy = crsStrategy{
config: n.config.crsConfig,
client: n.client,
}
case v1alpha1.AddonStrategyHelmAddon:
strategy = helmAddonStrategy{
config: n.config.helmAddonConfig,
client: n.client,
}
case "":
resp.SetStatus(runtimehooksv1.ResponseStatusFailure)
resp.SetMessage("strategy not specified for cluster-autoscaler addon")
default:
resp.SetStatus(runtimehooksv1.ResponseStatusFailure)
resp.SetMessage(
fmt.Sprintf("unknown cluster-autoscaler addon deployment strategy %q", *caVar.Strategy),
)
return
}

if err = strategy.delete(ctx, cluster, log); err != nil {
resp.SetStatus(runtimehooksv1.ResponseStatusFailure)
resp.SetMessage(err.Error())
return
}

resp.SetStatus(runtimehooksv1.ResponseStatusSuccess)
}

func (n *DefaultClusterAutoscaler) getCAVariable(
cluster *clusterv1.Cluster,
) (*v1alpha1.ClusterAutoscaler, error) {
varMap := variables.ClusterVariablesToVariablesMap(cluster.Spec.Topology.Variables)

caVar, err := variables.Get[v1alpha1.ClusterAutoscaler](
varMap,
n.variableName,
n.variablePath...)
if err != nil {
if variables.IsNotFoundError(err) {
return nil, nil
}

return nil, err
}

return &caVar, nil
}
53 changes: 51 additions & 2 deletions pkg/handlers/generic/lifecycle/clusterautoscaler/strategy_crs.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
"sigs.k8s.io/cluster-api/controllers/remote"
crsv1 "sigs.k8s.io/cluster-api/exp/addons/api/v1beta1"
ctrlclient "sigs.k8s.io/controller-runtime/pkg/client"

"github.com/nutanix-cloud-native/cluster-api-runtime-extensions-nutanix/common/pkg/k8s/client"
Expand Down Expand Up @@ -74,7 +75,7 @@ func (s crsStrategy) apply(
},
ObjectMeta: metav1.ObjectMeta{
Namespace: cluster.Namespace,
Name: defaultCM.Name + "-" + cluster.Name,
Name: s.crsNameForCluster(cluster),
},
Data: data,
}
Expand Down Expand Up @@ -110,7 +111,17 @@ func (s crsStrategy) apply(
)
}

if err = utils.EnsureCRSForClusterFromObjects(ctx, cm.Name, s.client, targetCluster, cm); err != nil {
// NOTE Unlike other addons, the cluster-autoscaler ClusterResourceSet is created in the management cluster
// namespace and thus cannot be owned by the workload cluster which will commonly exist in a different namespace.
// Deletion is handled by a BeforeClusterDelete hook instead of relying on Kubernetes GC.
if err = utils.EnsureCRSForClusterFromObjects(
ctx,
cm.Name,
s.client,
targetCluster,
utils.EnsureCRSForClusterFromObjectsOptions{SetClusterOwnership: false},
cm,
); err != nil {
return fmt.Errorf(
"failed to apply cluster-autoscaler installation ClusterResourceSet: %w",
err,
Expand All @@ -119,3 +130,41 @@ func (s crsStrategy) apply(

return nil
}

func (s crsStrategy) delete(
ctx context.Context,
cluster *clusterv1.Cluster,
log logr.Logger,
) error {
// The cluster-autoscaler is different from other addons.
// It requires all resources to be created in the management cluster,
// which means creating the ClusterResourceSet always targeting the management cluster.
targetCluster, err := findTargetCluster(ctx, s.client, cluster)
if err != nil {
return err
}

crs := &crsv1.ClusterResourceSet{
TypeMeta: metav1.TypeMeta{
APIVersion: crsv1.GroupVersion.String(),
Kind: "ClusterResourceSet",
},
ObjectMeta: metav1.ObjectMeta{
Namespace: targetCluster.Namespace,
Name: s.crsNameForCluster(cluster),
},
}

if err := ctrlclient.IgnoreNotFound(s.client.Delete(ctx, crs)); err != nil {
return fmt.Errorf(
"failed to delete cluster-autoscaler installation ClusterResourceSet: %w",
err,
)
}

return nil
}

func (s crsStrategy) crsNameForCluster(cluster *clusterv1.Cluster) string {
return s.config.defaultClusterAutoscalerConfigMap + "-" + cluster.Name
}
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
ctrlclient "sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"

caaphv1 "github.com/nutanix-cloud-native/cluster-api-runtime-extensions-nutanix/api/external/sigs.k8s.io/cluster-api-addon-provider-helm/api/v1alpha1"
"github.com/nutanix-cloud-native/cluster-api-runtime-extensions-nutanix/common/pkg/k8s/client"
Expand Down Expand Up @@ -103,16 +102,48 @@ func (s helmAddonStrategy) apply(
}

handlersutils.SetTLSConfigForHelmChartProxyIfNeeded(hcp)
if err = controllerutil.SetOwnerReference(cluster, hcp, s.client.Scheme()); err != nil {
return fmt.Errorf(
"failed to set owner reference on cluster-autoscaler installation HelmChartProxy: %w",
err,
)
}

// NOTE Unlike other addons, the cluster-autoscaler HelmChartProxy is created in the management cluster
// namespace and thus cannot be owned by the workload cluster which will commonly exist in a different namespace.
// Deletion is handled by a BeforeClusterDelete hook instead of relying on Kubernetes GC.

if err = client.ServerSideApply(ctx, s.client, hcp, client.ForceOwnership); err != nil {
return fmt.Errorf("failed to apply cluster-autoscaler installation HelmChartProxy: %w", err)
}

return nil
}

func (s helmAddonStrategy) delete(
ctx context.Context,
cluster *clusterv1.Cluster,
log logr.Logger,
) error {
// The cluster-autoscaler is different from other addons.
// It requires all resources to be created in the management cluster,
// which means creating the HelmChartProxy always targeting the management cluster.
targetCluster, err := findTargetCluster(ctx, s.client, cluster)
if err != nil {
return err
}

hcp := &caaphv1.HelmChartProxy{
TypeMeta: metav1.TypeMeta{
APIVersion: caaphv1.GroupVersion.String(),
Kind: "HelmChartProxy",
},
ObjectMeta: metav1.ObjectMeta{
Namespace: targetCluster.Namespace,
Name: "cluster-autoscaler-" + cluster.Name,
},
}

if err := ctrlclient.IgnoreNotFound(s.client.Delete(ctx, hcp)); err != nil {
return fmt.Errorf(
"failed to delete cluster-autoscaler installation HelmChartProxy: %w",
err,
)
}

return nil
}
10 changes: 9 additions & 1 deletion pkg/handlers/generic/lifecycle/cni/calico/strategy_crs.go
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,15 @@ func (s crsStrategy) ensureCNICRSForCluster(
)
}

if err := utils.EnsureCRSForClusterFromObjects(ctx, cm.Name, s.client, cluster, tigeraConfigMap, cm); err != nil {
if err := utils.EnsureCRSForClusterFromObjects(
ctx,
cm.Name,
s.client,
cluster,
utils.DefaultEnsureCRSForClusterFromObjectsOptions(),
tigeraConfigMap,
cm,
); err != nil {
return fmt.Errorf(
"failed to apply Calico CNI installation ClusterResourceSet: %w",
err,
Expand Down
9 changes: 8 additions & 1 deletion pkg/handlers/generic/lifecycle/cni/cilium/strategy_crs.go
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,14 @@ func (s crsStrategy) apply(
)
}

if err := utils.EnsureCRSForClusterFromObjects(ctx, cm.Name, s.client, cluster, cm); err != nil {
if err := utils.EnsureCRSForClusterFromObjects(
ctx,
cm.Name,
s.client,
cluster,
utils.DefaultEnsureCRSForClusterFromObjectsOptions(),
cm,
); err != nil {
return fmt.Errorf(
"failed to apply Cilium CNI installation ClusterResourceSet: %w",
err,
Expand Down
1 change: 1 addition & 0 deletions pkg/handlers/generic/lifecycle/csi/awsebs/strategy_crs.go
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ func (s crsStrategy) Apply(
cm.Name,
s.client,
cluster,
handlersutils.DefaultEnsureCRSForClusterFromObjectsOptions(),
cm,
)
if err != nil {
Expand Down
9 changes: 8 additions & 1 deletion pkg/handlers/generic/lifecycle/csi/localpath/strategy_crs.go
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,14 @@ func (s crsStrategy) Apply(
)
}

if err := utils.EnsureCRSForClusterFromObjects(ctx, cm.Name, s.client, cluster, cm); err != nil {
if err := utils.EnsureCRSForClusterFromObjects(
ctx,
cm.Name,
s.client,
cluster,
utils.DefaultEnsureCRSForClusterFromObjectsOptions(),
cm,
); err != nil {
return fmt.Errorf(
"failed to apply local-path CSI installation ClusterResourceSet: %w",
err,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,14 @@ func (s crsStrategy) Apply(
)
}

if err := utils.EnsureCRSForClusterFromObjects(ctx, cm.Name, s.client, cluster, cm); err != nil {
if err := utils.EnsureCRSForClusterFromObjects(
ctx,
cm.Name,
s.client,
cluster,
utils.DefaultEnsureCRSForClusterFromObjectsOptions(),
cm,
); err != nil {
return fmt.Errorf(
"failed to apply snapshot-controller installation ClusterResourceSet: %w",
err,
Expand Down
9 changes: 8 additions & 1 deletion pkg/handlers/generic/lifecycle/nfd/strategy_crs.go
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,14 @@ func (s crsStrategy) Apply(
)
}

if err := utils.EnsureCRSForClusterFromObjects(ctx, cm.Name, s.client, cluster, cm); err != nil {
if err := utils.EnsureCRSForClusterFromObjects(
ctx,
cm.Name,
s.client,
cluster,
utils.DefaultEnsureCRSForClusterFromObjectsOptions(),
cm,
); err != nil {
return fmt.Errorf(
"failed to apply NFD installation ClusterResourceSet: %w",
err,
Expand Down
Loading
Loading