diff --git a/controllers/openstackcluster_controller.go b/controllers/openstackcluster_controller.go index 3823b6ec3a..dfe56c8a9d 100644 --- a/controllers/openstackcluster_controller.go +++ b/controllers/openstackcluster_controller.go @@ -290,7 +290,18 @@ func deleteBastion(scope *scope.WithLogger, cluster *clusterv1.Cluster, openStac } } - if instanceStatus != nil { + // If no instance was created we currently need to check for orphaned + // volumes. This requires resolving the instance spec. + // TODO: write volumes to status resources on creation so this is no longer required. + if instanceStatus == nil { + instanceSpec, err := bastionToInstanceSpec(openStackCluster, cluster) + if err != nil { + return err + } + if err := computeService.DeleteVolumes(instanceSpec); err != nil { + return fmt.Errorf("delete volumes: %w", err) + } + } else { instanceNS, err := instanceStatus.NetworkStatus() if err != nil { return err @@ -307,11 +318,7 @@ func deleteBastion(scope *scope.WithLogger, cluster *clusterv1.Cluster, openStac } } - instanceSpec, err := bastionToInstanceSpec(openStackCluster, cluster) - if err != nil { - return err - } - if err = computeService.DeleteInstance(openStackCluster, instanceStatus, instanceSpec); err != nil { + if err = computeService.DeleteInstance(openStackCluster, instanceStatus); err != nil { handleUpdateOSCError(openStackCluster, fmt.Errorf("failed to delete bastion: %w", err)) return fmt.Errorf("failed to delete bastion: %w", err) } diff --git a/controllers/openstackcluster_controller_test.go b/controllers/openstackcluster_controller_test.go index 86fb45173d..62a7f6fcfa 100644 --- a/controllers/openstackcluster_controller_test.go +++ b/controllers/openstackcluster_controller_test.go @@ -208,6 +208,9 @@ var _ = Describe("OpenStackCluster controller", func() { testCluster.Status = infrav1.OpenStackClusterStatus{ Bastion: &infrav1.BastionStatus{ ID: "bastion-uuid", + Resolved: &infrav1.ResolvedMachineSpec{ + ImageID: "imageID", + }, }, } err = k8sClient.Status().Update(ctx, testCluster) @@ -221,8 +224,8 @@ var _ = Describe("OpenStackCluster controller", func() { computeClientRecorder.GetServer("bastion-uuid").Return(nil, gophercloud.ErrResourceNotFound{}) err = deleteBastion(scope, capiCluster, testCluster) - Expect(testCluster.Status.Bastion).To(BeNil()) Expect(err).To(BeNil()) + Expect(testCluster.Status.Bastion).To(BeNil()) }) It("should adopt an existing bastion even if its uuid is not stored in status", func() { testCluster.SetName("adopt-existing-bastion") diff --git a/controllers/openstackmachine_controller.go b/controllers/openstackmachine_controller.go index 26691d8430..12eb53d465 100644 --- a/controllers/openstackmachine_controller.go +++ b/controllers/openstackmachine_controller.go @@ -169,18 +169,13 @@ func resolveMachineResources(scope *scope.WithLogger, clusterResourceName string openStackMachine.Status.Resolved = resolved } // Resolve and store resources - changed, err := compute.ResolveMachineSpec(scope, + return compute.ResolveMachineSpec(scope, &openStackMachine.Spec, resolved, clusterResourceName, openStackMachine.Name, openStackCluster, getManagedSecurityGroup(openStackCluster, machine)) - if err != nil { - return false, err - } - if changed { - // If the resolved machine spec changed we need to start the reconcile again to prevent inconsistency between reconciles. - return true, nil - } +} +func adoptMachineResources(scope *scope.WithLogger, openStackMachine *infrav1.OpenStackMachine) error { resources := openStackMachine.Status.Resources if resources == nil { resources = &infrav1.MachineResources{} @@ -188,7 +183,7 @@ func resolveMachineResources(scope *scope.WithLogger, clusterResourceName string } // Adopt any existing resources - return false, compute.AdoptMachineResources(scope, resolved, resources) + return compute.AdoptMachineResources(scope, openStackMachine.Status.Resolved, resources) } func patchMachine(ctx context.Context, patchHelper *patch.Helper, openStackMachine *infrav1.OpenStackMachine, machine *clusterv1.Machine, options ...patch.Option) error { @@ -256,66 +251,72 @@ func (r *OpenStackMachineReconciler) reconcileDelete(scope *scope.WithLogger, cl return ctrl.Result{}, err } - // We may have resources to adopt if the cluster is ready - if openStackCluster.Status.Ready && openStackCluster.Status.Network != nil { - if _, err := resolveMachineResources(scope, clusterResourceName, openStackCluster, openStackMachine, machine); err != nil { - return ctrl.Result{}, err - } + // Nothing to do if the cluster is not ready because no machine resources were created. + if !openStackCluster.Status.Ready || openStackCluster.Status.Network == nil { + // The finalizer should not have been added yet in this case, + // but the following handles the upgrade case. + controllerutil.RemoveFinalizer(openStackMachine, infrav1.MachineFinalizer) + return ctrl.Result{}, nil } - if openStackCluster.Spec.APIServerLoadBalancer.IsEnabled() { - loadBalancerService, err := loadbalancer.NewService(scope) - if err != nil { - return ctrl.Result{}, err - } + // For machines created after v0.10, or any machine which has been + // reconciled at least once by v0.10 or later, status.Resolved always + // exists before any resources are created. We can therefore assume + // that if it does not exist, no resources were created. + // + // There is an upgrade edge case where a machine may have been marked + // deleted before upgrade but we are completing it after upgrade. For + // this use case only we make a best effort to resolve resources before + // continuing, but if we get an error we log it and continue anyway. + // This has the potential to leak resources, but only in this specific + // edge case. The alternative is to continue retrying until it succeeds, + // but that risks never deleting a machine which cannot be resolved due + // to a spec error. + // + // This code can and should be deleted in a future release when we are + // sure that all machines have been reconciled at least by a v0.10 or + // later controller. + if _, err := resolveMachineResources(scope, clusterResourceName, openStackCluster, openStackMachine, machine); err != nil { + // Return the error, but allow the resource to be removed anyway. + controllerutil.RemoveFinalizer(openStackMachine, infrav1.MachineFinalizer) + return ctrl.Result{}, err + } - err = loadBalancerService.DeleteLoadBalancerMember(openStackCluster, machine, openStackMachine, clusterResourceName) - if err != nil { - conditions.MarkFalse(openStackMachine, infrav1.APIServerIngressReadyCondition, infrav1.LoadBalancerMemberErrorReason, clusterv1.ConditionSeverityWarning, "Machine could not be removed from load balancer: %v", err) - return ctrl.Result{}, err - } + // Check for any orphaned resources + // N.B. Unlike resolveMachineResources, we must always look for orphaned resources in the delete path. + if err := adoptMachineResources(scope, openStackMachine); err != nil { + return ctrl.Result{}, fmt.Errorf("adopting machine resources: %w", err) } - var instanceStatus *compute.InstanceStatus - if openStackMachine.Status.InstanceID != nil { - instanceStatus, err = computeService.GetInstanceStatus(*openStackMachine.Status.InstanceID) - if err != nil { - return ctrl.Result{}, err - } - } else if instanceStatus, err = computeService.GetInstanceStatusByName(openStackMachine, openStackMachine.Name); err != nil { + instanceStatus, err := getInstanceStatus(openStackMachine, computeService) + if err != nil { return ctrl.Result{}, err } - if !openStackCluster.Spec.APIServerLoadBalancer.IsEnabled() && util.IsControlPlaneMachine(machine) && openStackCluster.Spec.APIServerFloatingIP == nil { - if instanceStatus != nil { - instanceNS, err := instanceStatus.NetworkStatus() - if err != nil { - openStackMachine.SetFailure( - capierrors.UpdateMachineError, - fmt.Errorf("get network status for OpenStack instance %s with ID %s: %v", instanceStatus.Name(), instanceStatus.ID(), err), - ) - return ctrl.Result{}, nil - } - addresses := instanceNS.Addresses() - for _, address := range addresses { - if address.Type == corev1.NodeExternalIP { - if err = networkingService.DeleteFloatingIP(openStackMachine, address.Address); err != nil { - conditions.MarkFalse(openStackMachine, infrav1.APIServerIngressReadyCondition, infrav1.FloatingIPErrorReason, clusterv1.ConditionSeverityError, "Deleting floating IP failed: %v", err) - return ctrl.Result{}, fmt.Errorf("delete floating IP %q: %w", address.Address, err) - } - } - } + if util.IsControlPlaneMachine(machine) { + if err := removeAPIServerEndpoint(scope, openStackCluster, openStackMachine, instanceStatus, clusterResourceName); err != nil { + return ctrl.Result{}, err } } - instanceSpec, err := machineToInstanceSpec(openStackCluster, machine, openStackMachine, "") - if err != nil { - return ctrl.Result{}, err + // If no instance was created we currently need to check for orphaned + // volumes. This requires resolving the instance spec. + // TODO: write volumes to status resources on creation so this is no longer required. + if instanceStatus == nil && openStackMachine.Status.Resolved != nil { + instanceSpec, err := machineToInstanceSpec(openStackCluster, machine, openStackMachine, "") + if err != nil { + return ctrl.Result{}, err + } + if err := computeService.DeleteVolumes(instanceSpec); err != nil { + return ctrl.Result{}, fmt.Errorf("delete volumes: %w", err) + } } - if err := computeService.DeleteInstance(openStackMachine, instanceStatus, instanceSpec); err != nil { - conditions.MarkFalse(openStackMachine, infrav1.InstanceReadyCondition, infrav1.InstanceDeleteFailedReason, clusterv1.ConditionSeverityError, "Deleting instance failed: %v", err) - return ctrl.Result{}, fmt.Errorf("delete instance: %w", err) + if instanceStatus != nil { + if err := computeService.DeleteInstance(openStackMachine, instanceStatus); err != nil { + conditions.MarkFalse(openStackMachine, infrav1.InstanceReadyCondition, infrav1.InstanceDeleteFailedReason, clusterv1.ConditionSeverityError, "Deleting instance failed: %v", err) + return ctrl.Result{}, fmt.Errorf("delete instance: %w", err) + } } trunkSupported, err := networkingService.IsTrunkExtSupported() @@ -341,6 +342,62 @@ func (r *OpenStackMachineReconciler) reconcileDelete(scope *scope.WithLogger, cl return ctrl.Result{}, nil } +func getInstanceStatus(openStackMachine *infrav1.OpenStackMachine, computeService *compute.Service) (*compute.InstanceStatus, error) { + if openStackMachine.Status.InstanceID != nil { + return computeService.GetInstanceStatus(*openStackMachine.Status.InstanceID) + } + return computeService.GetInstanceStatusByName(openStackMachine, openStackMachine.Name) +} + +func removeAPIServerEndpoint(scope *scope.WithLogger, openStackCluster *infrav1.OpenStackCluster, openStackMachine *infrav1.OpenStackMachine, instanceStatus *compute.InstanceStatus, clusterResourceName string) error { + if openStackCluster.Spec.APIServerLoadBalancer.IsEnabled() { + loadBalancerService, err := loadbalancer.NewService(scope) + if err != nil { + return err + } + + err = loadBalancerService.DeleteLoadBalancerMember(openStackCluster, openStackMachine, clusterResourceName) + if err != nil { + conditions.MarkFalse(openStackMachine, infrav1.APIServerIngressReadyCondition, infrav1.LoadBalancerMemberErrorReason, clusterv1.ConditionSeverityWarning, "Machine could not be removed from load balancer: %v", err) + return err + } + return nil + } + + // XXX(mdbooth): This looks wrong to me. Surely we should only ever + // disassociate the floating IP here. I would expect the API server + // floating IP to be created and deleted with the cluster. And if the + // delete behaviour is correct, we leak it if the instance was + // previously deleted. + if openStackCluster.Spec.APIServerFloatingIP == nil && instanceStatus != nil { + instanceNS, err := instanceStatus.NetworkStatus() + if err != nil { + openStackMachine.SetFailure( + capierrors.UpdateMachineError, + fmt.Errorf("get network status for OpenStack instance %s with ID %s: %v", instanceStatus.Name(), instanceStatus.ID(), err), + ) + return nil + } + + networkingService, err := networking.NewService(scope) + if err != nil { + return err + } + + addresses := instanceNS.Addresses() + for _, address := range addresses { + if address.Type == corev1.NodeExternalIP { + if err = networkingService.DeleteFloatingIP(openStackMachine, address.Address); err != nil { + conditions.MarkFalse(openStackMachine, infrav1.APIServerIngressReadyCondition, infrav1.FloatingIPErrorReason, clusterv1.ConditionSeverityError, "Deleting floating IP failed: %v", err) + return fmt.Errorf("delete floating IP %q: %w", address.Address, err) + } + } + } + } + + return nil +} + // GetPortIDs returns a list of port IDs from a list of PortStatus. func GetPortIDs(ports []infrav1.PortStatus) []string { portIDs := make([]string, len(ports)) @@ -489,34 +546,50 @@ func (r *OpenStackMachineReconciler) reconcileNormal(ctx context.Context, scope return ctrl.Result{}, nil } - // If the OpenStackMachine doesn't have our finalizer, add it. - if controllerutil.AddFinalizer(openStackMachine, infrav1.MachineFinalizer) { - // Register the finalizer immediately to avoid orphaning OpenStack resources on delete - return ctrl.Result{}, nil - } - if !openStackCluster.Status.Ready { scope.Logger().Info("Cluster infrastructure is not ready yet, re-queuing machine") conditions.MarkFalse(openStackMachine, infrav1.InstanceReadyCondition, infrav1.WaitingForClusterInfrastructureReason, clusterv1.ConditionSeverityInfo, "") return ctrl.Result{RequeueAfter: waitForClusterInfrastructureReadyDuration}, nil } - if changed, err := resolveMachineResources(scope, clusterResourceName, openStackCluster, openStackMachine, machine); changed || err != nil { - scope.Logger().V(6).Info("Machine resources updated, requeuing") - return ctrl.Result{}, err - } - // Make sure bootstrap data is available and populated. if machine.Spec.Bootstrap.DataSecretName == nil { scope.Logger().Info("Bootstrap data secret reference is not yet available") conditions.MarkFalse(openStackMachine, infrav1.InstanceReadyCondition, infrav1.WaitingForBootstrapDataReason, clusterv1.ConditionSeverityInfo, "") return ctrl.Result{}, nil } - userData, err := r.getBootstrapData(ctx, machine, openStackMachine) + + changed, err := resolveMachineResources(scope, clusterResourceName, openStackCluster, openStackMachine, machine) if err != nil { return ctrl.Result{}, err } + + // Also add the finalizer when writing resolved resources so we can start creating resources on the next reconcile. + if controllerutil.AddFinalizer(openStackMachine, infrav1.MachineFinalizer) { + changed = true + } + + // We requeue if we either added the finalizer or resolved machine + // resources. This means that we never create any resources unless we + // have observed that the finalizer and resolved machine resources were + // successfully written in a previous transaction. This in turn means + // that in the delete path we can be sure that if there are no resolved + // resources then no resources were created. + if changed { + scope.Logger().V(6).Info("Machine resources updated, requeuing") + return ctrl.Result{}, nil + } + + // Check for orphaned resources previously created but not written to the status + if err := adoptMachineResources(scope, openStackMachine); err != nil { + return ctrl.Result{}, fmt.Errorf("adopting machine resources: %w", err) + } + scope.Logger().Info("Reconciling Machine") + userData, err := r.getBootstrapData(ctx, machine, openStackMachine) + if err != nil { + return ctrl.Result{}, err + } computeService, err := compute.NewService(scope) if err != nil { diff --git a/controllers/openstackmachine_controller_test.go b/controllers/openstackmachine_controller_test.go index f40802aa98..c77220bc4c 100644 --- a/controllers/openstackmachine_controller_test.go +++ b/controllers/openstackmachine_controller_test.go @@ -17,17 +17,30 @@ limitations under the License. package controllers import ( + "fmt" "reflect" "testing" + "github.com/go-logr/logr/testr" + "github.com/golang/mock/gomock" "github.com/google/go-cmp/cmp" + "github.com/gophercloud/gophercloud" + "github.com/gophercloud/gophercloud/openstack/blockstorage/v3/volumes" + "github.com/gophercloud/gophercloud/openstack/compute/v2/servers" + "github.com/gophercloud/gophercloud/openstack/imageservice/v2/images" + "github.com/gophercloud/gophercloud/openstack/networking/v2/extensions" + "github.com/gophercloud/gophercloud/openstack/networking/v2/extensions/trunks" + "github.com/gophercloud/gophercloud/openstack/networking/v2/ports" . "github.com/onsi/gomega" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/utils/pointer" clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" infrav1 "sigs.k8s.io/cluster-api-provider-openstack/api/v1beta1" + "sigs.k8s.io/cluster-api-provider-openstack/pkg/clients" + "sigs.k8s.io/cluster-api-provider-openstack/pkg/clients/mock" "sigs.k8s.io/cluster-api-provider-openstack/pkg/cloud/services/compute" + "sigs.k8s.io/cluster-api-provider-openstack/pkg/scope" ) const ( @@ -206,3 +219,355 @@ func TestGetPortIDs(t *testing.T) { }) } } + +func Test_reconcileDelete(t *testing.T) { + const ( + instanceUUID = "8308882f-5e46-47e6-8e12-1fe869c43d1d" + portUUID = "55eac199-4836-4a98-b31c-9f65f382ad46" + rootVolumeUUID = "4724a66d-bd5e-47f3-bb57-a67fcb4168e0" + trunkUUID = "9d348baa-93b1-4e63-932f-dd0527fbd789" + + imageName = "my-image" + ) + + // ******************* + // START OF TEST CASES + // ******************* + + type recorders struct { + compute *mock.MockComputeClientMockRecorder + image *mock.MockImageClientMockRecorder + network *mock.MockNetworkClientMockRecorder + volume *mock.MockVolumeClientMockRecorder + } + + defaultImage := infrav1.ImageParam{ + Filter: &infrav1.ImageFilter{ + Name: pointer.String(imageName), + }, + } + + defaultResolvedPorts := []infrav1.ResolvedPortSpec{ + { + Name: openStackMachineName + "-0", + Description: "my test port", + NetworkID: networkUUID, + }, + } + defaultPortsStatus := []infrav1.PortStatus{ + { + ID: portUUID, + }, + } + + deleteDefaultPorts := func(r *recorders) { + trunkExtension := extensions.Extension{} + trunkExtension.Alias = "trunk" + r.network.ListExtensions().Return([]extensions.Extension{trunkExtension}, nil) + r.network.ListTrunk(trunks.ListOpts{PortID: portUUID}).Return([]trunks.Trunk{{ID: trunkUUID}}, nil) + r.network.DeleteTrunk(trunkUUID).Return(nil) + r.network.DeletePort(portUUID).Return(nil) + } + + deleteServerByID := func(r *recorders) { + r.compute.GetServer(instanceUUID).Return(&clients.ServerExt{ + Server: servers.Server{ + ID: instanceUUID, + Name: openStackMachineName, + }, + }, nil) + r.compute.DeleteServer(instanceUUID).Return(nil) + r.compute.GetServer(instanceUUID).Return(nil, gophercloud.ErrDefault404{}) + } + deleteServerByName := func(r *recorders) { + r.compute.ListServers(servers.ListOpts{ + Name: "^" + openStackMachineName + "$", + }).Return([]clients.ServerExt{ + {Server: servers.Server{ + ID: instanceUUID, + Name: openStackMachineName, + }}, + }, nil) + r.compute.DeleteServer(instanceUUID).Return(nil) + r.compute.GetServer(instanceUUID).Return(nil, gophercloud.ErrDefault404{}) + } + + deleteMissingServerByName := func(r *recorders) { + // Lookup server by name because it is not in status. + // Don't find it. + r.compute.ListServers(servers.ListOpts{ + Name: "^" + openStackMachineName + "$", + }).Return([]clients.ServerExt{}, nil) + } + + deleteRootVolume := func(r *recorders) { + // Fetch volume by name + volumeName := fmt.Sprintf("%s-root", openStackMachineName) + r.volume.ListVolumes(volumes.ListOpts{ + AllTenants: false, + Name: volumeName, + TenantID: "", + }).Return([]volumes.Volume{{ + ID: rootVolumeUUID, + Name: volumeName, + }}, nil) + + // Delete volume + r.volume.DeleteVolume(rootVolumeUUID, volumes.DeleteOpts{}).Return(nil) + } + + adoptExistingPorts := func(r *recorders) { + r.network.ListPort(ports.ListOpts{ + NetworkID: networkUUID, + Name: openStackMachineName + "-0", + }).Return([]ports.Port{{ID: portUUID}}, nil) + } + + resolveImage := func(r *recorders) { + r.image.ListImages(images.ListOpts{ + Name: imageName, + }).Return([]images.Image{{ID: imageUUID}}, nil) + } + + tests := []struct { + name string + osMachine infrav1.OpenStackMachine + expect func(r *recorders) + wantErr bool + wantRemoveFinalizer bool + clusterNotReady bool + }{ + { + name: "No volumes, resolved and resources populated", + osMachine: infrav1.OpenStackMachine{ + Spec: infrav1.OpenStackMachineSpec{ + Image: defaultImage, + }, + Status: infrav1.OpenStackMachineStatus{ + InstanceID: pointer.String(instanceUUID), + Resolved: &infrav1.ResolvedMachineSpec{ + ImageID: imageUUID, + Ports: defaultResolvedPorts, + }, + Resources: &infrav1.MachineResources{ + Ports: defaultPortsStatus, + }, + }, + }, + expect: func(r *recorders) { + deleteServerByID(r) + deleteDefaultPorts(r) + }, + wantRemoveFinalizer: true, + }, + { + name: "Root volume, resolved and resources populated", + osMachine: infrav1.OpenStackMachine{ + Spec: infrav1.OpenStackMachineSpec{ + Image: defaultImage, + RootVolume: &infrav1.RootVolume{ + Size: 50, + }, + }, + Status: infrav1.OpenStackMachineStatus{ + InstanceID: pointer.String(instanceUUID), + Resolved: &infrav1.ResolvedMachineSpec{ + ImageID: imageUUID, + Ports: defaultResolvedPorts, + }, + Resources: &infrav1.MachineResources{ + Ports: defaultPortsStatus, + }, + }, + }, + expect: func(r *recorders) { + // Server exists, so we don't delete root volume explicitly + deleteServerByID(r) + deleteDefaultPorts(r) + }, + wantRemoveFinalizer: true, + }, + { + name: "Root volume, machine not created, resolved and resources populated", + osMachine: infrav1.OpenStackMachine{ + Spec: infrav1.OpenStackMachineSpec{ + Image: defaultImage, + RootVolume: &infrav1.RootVolume{ + Size: 50, + }, + }, + Status: infrav1.OpenStackMachineStatus{ + Resolved: &infrav1.ResolvedMachineSpec{ + ImageID: imageUUID, + Ports: defaultResolvedPorts, + }, + Resources: &infrav1.MachineResources{ + Ports: defaultPortsStatus, + }, + }, + }, + expect: func(r *recorders) { + deleteMissingServerByName(r) + deleteRootVolume(r) + deleteDefaultPorts(r) + }, + wantRemoveFinalizer: true, + }, + { + // N.B. The 'no resolved but resource exist' case can + // only happen across an upgrade. At some point in the + // future we should stop handling it. + name: "No volumes, no resolved or resources, instance exists", + osMachine: infrav1.OpenStackMachine{ + Spec: infrav1.OpenStackMachineSpec{ + Image: defaultImage, + }, + Status: infrav1.OpenStackMachineStatus{ + // Unlike resolved and resources, + // instanceID will have been converted + // from the previous API version. + InstanceID: pointer.String(instanceUUID), + }, + }, + expect: func(r *recorders) { + resolveImage(r) + adoptExistingPorts(r) + deleteServerByID(r) + deleteDefaultPorts(r) + }, + wantRemoveFinalizer: true, + }, + { + // This is an upgrade case because from v0.10 onwards + // we don't add the finalizer until we add resolved, so + // this can no longer occur. This will stop working when + // we remove handling for empty resolved on delete. + name: "Invalid image, no resolved or resources", + osMachine: infrav1.OpenStackMachine{ + Spec: infrav1.OpenStackMachineSpec{ + Image: defaultImage, + }, + }, + expect: func(r *recorders) { + r.image.ListImages(images.ListOpts{Name: imageName}).Return([]images.Image{}, nil) + }, + wantErr: true, + wantRemoveFinalizer: true, + }, + { + name: "No instance id, server and ports exist", + osMachine: infrav1.OpenStackMachine{ + Spec: infrav1.OpenStackMachineSpec{ + Image: defaultImage, + }, + Status: infrav1.OpenStackMachineStatus{ + Resolved: &infrav1.ResolvedMachineSpec{ + ImageID: imageUUID, + Ports: defaultResolvedPorts, + }, + Resources: &infrav1.MachineResources{ + Ports: defaultPortsStatus, + }, + }, + }, + expect: func(r *recorders) { + deleteServerByName(r) + deleteDefaultPorts(r) + }, + wantRemoveFinalizer: true, + }, + { + name: "Adopt ports error should fail deletion and retry", + osMachine: infrav1.OpenStackMachine{ + Spec: infrav1.OpenStackMachineSpec{ + Image: defaultImage, + }, + Status: infrav1.OpenStackMachineStatus{ + Resolved: &infrav1.ResolvedMachineSpec{ + ImageID: imageUUID, + Ports: defaultResolvedPorts, + }, + }, + }, + expect: func(r *recorders) { + r.network.ListPort(ports.ListOpts{ + NetworkID: networkUUID, + Name: openStackMachineName + "-0", + }).Return(nil, fmt.Errorf("error adopting ports")) + }, + wantErr: true, + wantRemoveFinalizer: false, + }, + { + // This is an upgrade case because from v0.10 onwards we + // should not have added the finalizer until the cluster + // is ready. + name: "Cluster not ready should remove finalizer", + osMachine: infrav1.OpenStackMachine{ + Spec: infrav1.OpenStackMachineSpec{ + Image: defaultImage, + }, + }, + clusterNotReady: true, + wantRemoveFinalizer: true, + }, + } + for i := range tests { + tt := &tests[i] + t.Run(tt.name, func(t *testing.T) { + g := NewGomegaWithT(t) + log := testr.New(t) + + mockCtrl := gomock.NewController(t) + mockScopeFactory := scope.NewMockScopeFactory(mockCtrl, "") + + reconciler := OpenStackMachineReconciler{} + + computeRecorder := mockScopeFactory.ComputeClient.EXPECT() + imageRecorder := mockScopeFactory.ImageClient.EXPECT() + networkRecorder := mockScopeFactory.NetworkClient.EXPECT() + volumeRecorder := mockScopeFactory.VolumeClient.EXPECT() + + if tt.expect != nil { + tt.expect(&recorders{computeRecorder, imageRecorder, networkRecorder, volumeRecorder}) + } + scopeWithLogger := scope.NewWithLogger(mockScopeFactory, log) + + openStackCluster := infrav1.OpenStackCluster{} + openStackCluster.Status.Ready = !tt.clusterNotReady + openStackCluster.Status.Network = &infrav1.NetworkStatusWithSubnets{ + NetworkStatus: infrav1.NetworkStatus{ + Name: "my-network", + ID: networkUUID, + }, + Subnets: []infrav1.Subnet{ + { + Name: "my-subnet", + ID: subnetUUID, + CIDR: "192.168.0.0/24", + }, + }, + } + + machine := clusterv1.Machine{} + + osMachine := &tt.osMachine + osMachine.Name = openStackMachineName + osMachine.Finalizers = []string{infrav1.MachineFinalizer} + + _, err := reconciler.reconcileDelete(scopeWithLogger, openStackMachineName, &openStackCluster, &machine, &tt.osMachine) + + if tt.wantErr { + g.Expect(err).To(HaveOccurred()) + } else { + g.Expect(err).ToNot(HaveOccurred()) + } + + if tt.wantRemoveFinalizer { + g.Expect(osMachine.Finalizers).To(BeEmpty()) + } else { + g.Expect(osMachine.Finalizers).To(ConsistOf(infrav1.MachineFinalizer)) + } + }) + } +} diff --git a/pkg/cloud/services/compute/instance.go b/pkg/cloud/services/compute/instance.go index ac0b377bed..ee808c05d6 100644 --- a/pkg/cloud/services/compute/instance.go +++ b/pkg/cloud/services/compute/instance.go @@ -383,33 +383,63 @@ func (s *Service) GetManagementPort(openStackCluster *infrav1.OpenStackCluster, return &allPorts[0], nil } -func (s *Service) DeleteInstance(eventObject runtime.Object, instanceStatus *InstanceStatus, instanceSpec *InstanceSpec) error { - if instanceStatus == nil { - /* - Attaching volumes to an instance is a two-step process: +func (s *Service) DeleteInstance(eventObject runtime.Object, instanceStatus *InstanceStatus) error { + instance := instanceStatus.InstanceIdentifier() - 1. Create the volume - 2. Create the instance with the created volumes in RootVolume and AdditionalBlockDevices fields with DeleteOnTermination=true + err := s.getComputeClient().DeleteServer(instance.ID) + if err != nil { + if capoerrors.IsNotFound(err) { + record.Eventf(eventObject, "SuccessfulDeleteServer", "Server %s with id %s did not exist", instance.Name, instance.ID) + return nil + } + record.Warnf(eventObject, "FailedDeleteServer", "Failed to delete server %s with id %s: %v", instance.Name, instance.ID, err) + return err + } + + err = wait.PollUntilContextTimeout(context.TODO(), retryIntervalInstanceStatus, timeoutInstanceDelete, true, func(_ context.Context) (bool, error) { + i, err := s.GetInstanceStatus(instance.ID) + if err != nil { + return false, err + } + if i != nil { + return false, nil + } + return true, nil + }) + if err != nil { + record.Warnf(eventObject, "FailedDeleteServer", "Failed to delete server %s with id %s: %v", instance.Name, instance.ID, err) + return err + } - This has a possible failure mode where creating the volume succeeds but creating the instance - fails. In this case, we want to make sure that the dangling volumes are cleaned up. + record.Eventf(eventObject, "SuccessfulDeleteServer", "Deleted server %s with id %s", instance.Name, instance.ID) + return nil +} - To handle this safely, we ensure that we never remove a machine finalizer until all resources - associated with the instance, including volumes, have been deleted. To achieve this: +// DeleteVolumes deletes any cinder volumes which were created for the instance. +// Note that this must only be called when the server was not successfully +// created. If the server was created the volume will have been added with +// DeleteOnTermination=true, and will be automatically cleaned up with the +// server. +func (s *Service) DeleteVolumes(instanceSpec *InstanceSpec) error { + /* + Attaching volumes to an instance is a two-step process: - * We always call DeleteInstance when reconciling a delete, even if the instance does not exist - * If the instance was already deleted we check that the volumes are also gone + 1. Create the volume + 2. Create the instance with the created volumes in RootVolume and AdditionalBlockDevices fields with DeleteOnTermination=true - Note that we don't need to separately delete the volumes when deleting the instance because - DeleteOnTermination will ensure it is deleted in that case. - */ - return s.deleteVolumes(instanceSpec) - } + This has a possible failure mode where creating the volume succeeds but creating the instance + fails. In this case, we want to make sure that the dangling volumes are cleaned up. - return s.deleteInstance(eventObject, instanceStatus.InstanceIdentifier()) -} + To handle this safely, we ensure that we never remove a machine finalizer until all resources + associated with the instance, including volumes, have been deleted. To achieve this: + + * We always call DeleteInstance when reconciling a delete, even if the instance does not exist + * If the instance was already deleted we check that the volumes are also gone + + Note that we don't need to separately delete the volumes when deleting the instance because + DeleteOnTermination will ensure it is deleted in that case. + */ -func (s *Service) deleteVolumes(instanceSpec *InstanceSpec) error { if hasRootVolume(instanceSpec) { if err := s.deleteVolume(instanceSpec.Name, "root"); err != nil { return err @@ -437,36 +467,6 @@ func (s *Service) deleteVolume(instanceName string, nameSuffix string) error { return s.getVolumeClient().DeleteVolume(volume.ID, volumes.DeleteOpts{}) } -func (s *Service) deleteInstance(eventObject runtime.Object, instance *InstanceIdentifier) error { - err := s.getComputeClient().DeleteServer(instance.ID) - if err != nil { - if capoerrors.IsNotFound(err) { - record.Eventf(eventObject, "SuccessfulDeleteServer", "Server %s with id %s did not exist", instance.Name, instance.ID) - return nil - } - record.Warnf(eventObject, "FailedDeleteServer", "Failed to delete server %s with id %s: %v", instance.Name, instance.ID, err) - return err - } - - err = wait.PollUntilContextTimeout(context.TODO(), retryIntervalInstanceStatus, timeoutInstanceDelete, true, func(_ context.Context) (bool, error) { - i, err := s.GetInstanceStatus(instance.ID) - if err != nil { - return false, err - } - if i != nil { - return false, nil - } - return true, nil - }) - if err != nil { - record.Warnf(eventObject, "FailedDeleteServer", "Failed to delete server %s with id %s: %v", instance.Name, instance.ID, err) - return err - } - - record.Eventf(eventObject, "SuccessfulDeleteServer", "Deleted server %s with id %s", instance.Name, instance.ID) - return nil -} - func (s *Service) GetInstanceStatus(resourceID string) (instance *InstanceStatus, err error) { if resourceID == "" { return nil, fmt.Errorf("resourceId should be specified to get detail") diff --git a/pkg/cloud/services/compute/instance_test.go b/pkg/cloud/services/compute/instance_test.go index b571b25021..54677279aa 100644 --- a/pkg/cloud/services/compute/instance_test.go +++ b/pkg/cloud/services/compute/instance_test.go @@ -24,14 +24,12 @@ import ( "github.com/go-logr/logr/testr" "github.com/golang/mock/gomock" - "github.com/gophercloud/gophercloud" "github.com/gophercloud/gophercloud/openstack/blockstorage/v3/volumes" "github.com/gophercloud/gophercloud/openstack/compute/v2/extensions/availabilityzones" "github.com/gophercloud/gophercloud/openstack/compute/v2/flavors" "github.com/gophercloud/gophercloud/openstack/compute/v2/servers" "github.com/gophercloud/gophercloud/openstack/imageservice/v2/images" . "github.com/onsi/gomega" - "k8s.io/apimachinery/pkg/runtime" "k8s.io/utils/pointer" infrav1 "sigs.k8s.io/cluster-api-provider-openstack/api/v1beta1" @@ -710,98 +708,3 @@ func TestService_ReconcileInstance(t *testing.T) { }) } } - -func TestService_DeleteInstance(t *testing.T) { - RegisterTestingT(t) - - getDefaultInstanceStatus := func() *InstanceStatus { - return &InstanceStatus{ - server: &clients.ServerExt{ - Server: servers.Server{ - ID: instanceUUID, - }, - }, - } - } - - // ******************* - // START OF TEST CASES - // ******************* - - type recorders struct { - compute *mock.MockComputeClientMockRecorder - network *mock.MockNetworkClientMockRecorder - volume *mock.MockVolumeClientMockRecorder - } - - tests := []struct { - name string - eventObject runtime.Object - instanceStatus func() *InstanceStatus - rootVolume *infrav1.RootVolume - expect func(r *recorders) - wantErr bool - }{ - { - name: "Defaults", - eventObject: &infrav1.OpenStackMachine{}, - instanceStatus: getDefaultInstanceStatus, - expect: func(r *recorders) { - r.compute.DeleteServer(instanceUUID).Return(nil) - r.compute.GetServer(instanceUUID).Return(nil, gophercloud.ErrDefault404{}) - }, - wantErr: false, - }, - { - name: "Dangling volume", - eventObject: &infrav1.OpenStackMachine{}, - instanceStatus: func() *InstanceStatus { return nil }, - rootVolume: &infrav1.RootVolume{ - Size: 50, - }, - expect: func(r *recorders) { - // Fetch volume by name - volumeName := fmt.Sprintf("%s-root", openStackMachineName) - r.volume.ListVolumes(volumes.ListOpts{ - AllTenants: false, - Name: volumeName, - TenantID: "", - }).Return([]volumes.Volume{{ - ID: rootVolumeUUID, - Name: volumeName, - }}, nil) - - // Delete volume - r.volume.DeleteVolume(rootVolumeUUID, volumes.DeleteOpts{}).Return(nil) - }, - wantErr: false, - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - mockCtrl := gomock.NewController(t) - log := testr.New(t) - mockScopeFactory := scope.NewMockScopeFactory(mockCtrl, "") - - computeRecorder := mockScopeFactory.ComputeClient.EXPECT() - networkRecorder := mockScopeFactory.NetworkClient.EXPECT() - volumeRecorder := mockScopeFactory.VolumeClient.EXPECT() - - tt.expect(&recorders{computeRecorder, networkRecorder, volumeRecorder}) - - s, err := NewService(scope.NewWithLogger(mockScopeFactory, log)) - if err != nil { - t.Fatalf("Failed to create service: %v", err) - } - - instanceSpec := &InstanceSpec{ - Name: openStackMachineName, - RootVolume: tt.rootVolume, - } - - if err := s.DeleteInstance(tt.eventObject, tt.instanceStatus(), instanceSpec); (err != nil) != tt.wantErr { - t.Errorf("Service.DeleteInstance() error = %v, wantErr %v", err, tt.wantErr) - } - }) - } -} diff --git a/pkg/cloud/services/loadbalancer/loadbalancer.go b/pkg/cloud/services/loadbalancer/loadbalancer.go index 2de72c81d6..fdff5e58ec 100644 --- a/pkg/cloud/services/loadbalancer/loadbalancer.go +++ b/pkg/cloud/services/loadbalancer/loadbalancer.go @@ -31,8 +31,6 @@ import ( "k8s.io/apimachinery/pkg/util/wait" utilsnet "k8s.io/utils/net" "k8s.io/utils/pointer" - clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" - "sigs.k8s.io/cluster-api/util" infrav1 "sigs.k8s.io/cluster-api-provider-openstack/api/v1beta1" "sigs.k8s.io/cluster-api-provider-openstack/pkg/record" @@ -645,9 +643,9 @@ func (s *Service) DeleteLoadBalancer(openStackCluster *infrav1.OpenStackCluster, return nil } -func (s *Service) DeleteLoadBalancerMember(openStackCluster *infrav1.OpenStackCluster, machine *clusterv1.Machine, openStackMachine *infrav1.OpenStackMachine, clusterResourceName string) error { - if openStackMachine == nil || !util.IsControlPlaneMachine(machine) { - return nil +func (s *Service) DeleteLoadBalancerMember(openStackCluster *infrav1.OpenStackCluster, openStackMachine *infrav1.OpenStackMachine, clusterResourceName string) error { + if openStackMachine == nil { + return errors.New("openStackMachine is nil") } loadBalancerName := getLoadBalancerName(clusterResourceName)