diff --git a/api/v1alpha5/types.go b/api/v1alpha5/types.go index d7b2c9ba28..fd3a58e240 100644 --- a/api/v1alpha5/types.go +++ b/api/v1alpha5/types.go @@ -273,8 +273,8 @@ func (r SecurityGroupRule) Equal(x SecurityGroupRule) bool { type InstanceState string var ( - // InstanceStateBuilding is the string representing an instance in a building state. - InstanceStateBuilding = InstanceState("BUILDING") + // InstanceStateBuild is the string representing an instance in a build state. + InstanceStateBuild = InstanceState("BUILD") // InstanceStateActive is the string representing an instance in an active state. InstanceStateActive = InstanceState("ACTIVE") @@ -290,6 +290,9 @@ var ( // InstanceStateDeleted is the string representing an instance in a deleted state. InstanceStateDeleted = InstanceState("DELETED") + + // InstanceStateUndefined is the string representing an undefined instance state. + InstanceStateUndefined = InstanceState("") ) // Bastion represents basic information about the bastion node. diff --git a/api/v1alpha6/types.go b/api/v1alpha6/types.go index 2e81c35fd2..0301d8d8cc 100644 --- a/api/v1alpha6/types.go +++ b/api/v1alpha6/types.go @@ -285,8 +285,8 @@ func (r SecurityGroupRule) Equal(x SecurityGroupRule) bool { type InstanceState string var ( - // InstanceStateBuilding is the string representing an instance in a building state. - InstanceStateBuilding = InstanceState("BUILDING") + // InstanceStateBuild is the string representing an instance in a build state. + InstanceStateBuild = InstanceState("BUILD") // InstanceStateActive is the string representing an instance in an active state. InstanceStateActive = InstanceState("ACTIVE") @@ -302,6 +302,9 @@ var ( // InstanceStateDeleted is the string representing an instance in a deleted state. InstanceStateDeleted = InstanceState("DELETED") + + // InstanceStateUndefined is the string representing an undefined instance state. + InstanceStateUndefined = InstanceState("") ) // Bastion represents basic information about the bastion node. diff --git a/api/v1alpha7/types.go b/api/v1alpha7/types.go index 3ef5305ff2..dd14b84877 100644 --- a/api/v1alpha7/types.go +++ b/api/v1alpha7/types.go @@ -315,8 +315,8 @@ func (r SecurityGroupRule) Equal(x SecurityGroupRule) bool { type InstanceState string var ( - // InstanceStateBuilding is the string representing an instance in a building state. - InstanceStateBuilding = InstanceState("BUILDING") + // InstanceStateBuild is the string representing an instance in a build state. + InstanceStateBuild = InstanceState("BUILD") // InstanceStateActive is the string representing an instance in an active state. InstanceStateActive = InstanceState("ACTIVE") @@ -332,6 +332,9 @@ var ( // InstanceStateDeleted is the string representing an instance in a deleted state. InstanceStateDeleted = InstanceState("DELETED") + + // InstanceStateUndefined is the string representing an undefined instance state. + InstanceStateUndefined = InstanceState("") ) // Bastion represents basic information about the bastion node. diff --git a/api/v1alpha8/types.go b/api/v1alpha8/types.go index cccf992e70..d945249e00 100644 --- a/api/v1alpha8/types.go +++ b/api/v1alpha8/types.go @@ -321,8 +321,8 @@ func (r SecurityGroupRule) Equal(x SecurityGroupRule) bool { type InstanceState string var ( - // InstanceStateBuilding is the string representing an instance in a building state. - InstanceStateBuilding = InstanceState("BUILDING") + // InstanceStateBuild is the string representing an instance in a build state. + InstanceStateBuild = InstanceState("BUILD") // InstanceStateActive is the string representing an instance in an active state. InstanceStateActive = InstanceState("ACTIVE") @@ -338,6 +338,9 @@ var ( // InstanceStateDeleted is the string representing an instance in a deleted state. InstanceStateDeleted = InstanceState("DELETED") + + // InstanceStateUndefined is the string representing an undefined instance state. + InstanceStateUndefined = InstanceState("") ) // Bastion represents basic information about the bastion node. diff --git a/controllers/openstackcluster_controller.go b/controllers/openstackcluster_controller.go index f83b8b75a9..cc574755d5 100644 --- a/controllers/openstackcluster_controller.go +++ b/controllers/openstackcluster_controller.go @@ -215,10 +215,24 @@ func deleteBastion(scope scope.Scope, cluster *clusterv1.Cluster, openStackClust return err } - instanceName := fmt.Sprintf("%s-bastion", cluster.Name) - instanceStatus, err := computeService.GetInstanceStatusByName(openStackCluster, instanceName) - if err != nil { - return err + if openStackCluster.Status.Bastion != nil && openStackCluster.Status.Bastion.FloatingIP != "" { + if err = networkingService.DeleteFloatingIP(openStackCluster, openStackCluster.Status.Bastion.FloatingIP); err != nil { + handleUpdateOSCError(openStackCluster, fmt.Errorf("failed to delete floating IP: %w", err)) + return fmt.Errorf("failed to delete floating IP: %w", err) + } + } + + var instanceStatus *compute.InstanceStatus + if openStackCluster.Status.Bastion != nil && openStackCluster.Status.Bastion.ID != "" { + instanceStatus, err = computeService.GetInstanceStatus(openStackCluster.Status.Bastion.ID) + if err != nil { + return err + } + } else { + instanceStatus, err = computeService.GetInstanceStatusByName(openStackCluster, bastionName(cluster.Name)) + if err != nil { + return err + } } if instanceStatus != nil { @@ -230,6 +244,7 @@ func deleteBastion(scope scope.Scope, cluster *clusterv1.Cluster, openStackClust for _, address := range addresses { if address.Type == corev1.NodeExternalIP { + // Floating IP may not have properly saved in bastion status (thus not deleted above), delete any remaining floating IP if err = networkingService.DeleteFloatingIP(openStackCluster, address.Address); err != nil { handleUpdateOSCError(openStackCluster, fmt.Errorf("failed to delete floating IP: %w", err)) return fmt.Errorf("failed to delete floating IP: %w", err) @@ -277,8 +292,9 @@ func reconcileNormal(scope scope.Scope, cluster *clusterv1.Cluster, openStackClu return reconcile.Result{}, err } - if err = reconcileBastion(scope, cluster, openStackCluster); err != nil { - return reconcile.Result{}, err + result, err := reconcileBastion(scope, cluster, openStackCluster) + if err != nil || !reflect.DeepEqual(result, reconcile.Result{}) { + return result, err } availabilityZones, err := computeService.GetAvailabilityZones() @@ -308,88 +324,112 @@ func reconcileNormal(scope scope.Scope, cluster *clusterv1.Cluster, openStackClu return reconcile.Result{}, nil } -func reconcileBastion(scope scope.Scope, cluster *clusterv1.Cluster, openStackCluster *infrav1.OpenStackCluster) error { +func reconcileBastion(scope scope.Scope, cluster *clusterv1.Cluster, openStackCluster *infrav1.OpenStackCluster) (ctrl.Result, error) { scope.Logger().Info("Reconciling Bastion") if openStackCluster.Spec.Bastion == nil || !openStackCluster.Spec.Bastion.Enabled { - return deleteBastion(scope, cluster, openStackCluster) + return reconcile.Result{}, deleteBastion(scope, cluster, openStackCluster) } computeService, err := compute.NewService(scope) if err != nil { - return err + return reconcile.Result{}, err } instanceSpec := bastionToInstanceSpec(openStackCluster, cluster.Name) bastionHash, err := compute.HashInstanceSpec(instanceSpec) if err != nil { - return fmt.Errorf("failed computing bastion hash from instance spec: %w", err) + return reconcile.Result{}, fmt.Errorf("failed computing bastion hash from instance spec: %w", err) + } + if bastionHashHasChanged(bastionHash, openStackCluster.ObjectMeta.Annotations) { + if err := deleteBastion(scope, cluster, openStackCluster); err != nil { + return ctrl.Result{}, err + } } - instanceStatus, err := computeService.GetInstanceStatusByName(openStackCluster, fmt.Sprintf("%s-bastion", cluster.Name)) - if err != nil { - return err + var instanceStatus *compute.InstanceStatus + if openStackCluster.Status.Bastion != nil && openStackCluster.Status.Bastion.ID != "" { + if instanceStatus, err = computeService.GetInstanceStatus(openStackCluster.Status.Bastion.ID); err != nil { + return reconcile.Result{}, err + } } - if instanceStatus != nil { - if !bastionHashHasChanged(bastionHash, openStackCluster.ObjectMeta.Annotations) { - bastion, err := instanceStatus.BastionStatus(openStackCluster) - if err != nil { - return err - } - // Add the current hash if no annotation is set. - if _, ok := openStackCluster.ObjectMeta.Annotations[BastionInstanceHashAnnotation]; !ok { - annotations.AddAnnotations(openStackCluster, map[string]string{BastionInstanceHashAnnotation: bastionHash}) - } - openStackCluster.Status.Bastion = bastion - return nil + if instanceStatus == nil { + // Check if there is an existing instance with bastion name, in case where bastion ID would not have been properly stored in cluster status + if instanceStatus, err = computeService.GetInstanceStatusByName(openStackCluster, instanceSpec.Name); err != nil { + return reconcile.Result{}, err } - - if err := deleteBastion(scope, cluster, openStackCluster); err != nil { - return err + } + if instanceStatus == nil { + instanceStatus, err = computeService.CreateInstance(openStackCluster, openStackCluster, instanceSpec, cluster.Name) + if err != nil { + return reconcile.Result{}, fmt.Errorf("failed to create bastion: %w", err) } } - instanceStatus, err = computeService.CreateInstance(openStackCluster, openStackCluster, instanceSpec, cluster.Name) - if err != nil { - return fmt.Errorf("failed to reconcile bastion: %w", err) + // Save hash & status as soon as we know we have an instance + instanceStatus.UpdateBastionStatus(openStackCluster) + annotations.AddAnnotations(openStackCluster, map[string]string{BastionInstanceHashAnnotation: bastionHash}) + + // Make sure that bastion instance has a valid state + switch instanceStatus.State() { + case infrav1.InstanceStateError: + return ctrl.Result{}, fmt.Errorf("failed to reconcile bastion, instance state is ERROR") + case infrav1.InstanceStateBuild, infrav1.InstanceStateUndefined: + scope.Logger().Info("Waiting for bastion instance to become ACTIVE", "id", instanceStatus.ID(), "status", instanceStatus.State()) + return ctrl.Result{RequeueAfter: waitForBuildingInstanceToReconcile}, nil + case infrav1.InstanceStateDeleted: + // This should normally be handled by deleteBastion + openStackCluster.Status.Bastion = nil + return ctrl.Result{}, nil } networkingService, err := networking.NewService(scope) if err != nil { - return err - } - clusterName := fmt.Sprintf("%s-%s", cluster.Namespace, cluster.Name) - fp, err := networkingService.GetOrCreateFloatingIP(openStackCluster, openStackCluster, clusterName, openStackCluster.Spec.Bastion.Instance.FloatingIP) - if err != nil { - handleUpdateOSCError(openStackCluster, fmt.Errorf("failed to get or create floating IP for bastion: %w", err)) - return fmt.Errorf("failed to get or create floating IP for bastion: %w", err) + return ctrl.Result{}, err } port, err := computeService.GetManagementPort(openStackCluster, instanceStatus) if err != nil { err = fmt.Errorf("getting management port for bastion: %w", err) handleUpdateOSCError(openStackCluster, err) - return err + return ctrl.Result{}, err } - err = networkingService.AssociateFloatingIP(openStackCluster, fp, port.ID) + fp, err := networkingService.GetFloatingIPByPortID(port.ID) if err != nil { - handleUpdateOSCError(openStackCluster, fmt.Errorf("failed to associate floating IP with bastion: %w", err)) - return fmt.Errorf("failed to associate floating IP with bastion: %w", err) + handleUpdateOSCError(openStackCluster, fmt.Errorf("failed to get or create floating IP for bastion: %w", err)) + return ctrl.Result{}, fmt.Errorf("failed to get floating IP for bastion port: %w", err) + } + if fp != nil { + // Floating IP is already attached to bastion, no need to proceed + openStackCluster.Status.Bastion.FloatingIP = fp.FloatingIP + return ctrl.Result{}, nil } - bastion, err := instanceStatus.BastionStatus(openStackCluster) + clusterName := fmt.Sprintf("%s-%s", cluster.Namespace, cluster.Name) + floatingIP := openStackCluster.Spec.Bastion.Instance.FloatingIP + if openStackCluster.Status.Bastion.FloatingIP != "" { + // Some floating IP has already been created for this bastion, make sure we re-use it + floatingIP = openStackCluster.Status.Bastion.FloatingIP + } + // Check if there is an existing floating IP attached to bastion, in case where FloatingIP would not yet have been stored in cluster status + fp, err = networkingService.GetOrCreateFloatingIP(openStackCluster, openStackCluster, clusterName, floatingIP) if err != nil { - return err + handleUpdateOSCError(openStackCluster, fmt.Errorf("failed to get or create floating IP for bastion: %w", err)) + return ctrl.Result{}, fmt.Errorf("failed to get or create floating IP for bastion: %w", err) } - bastion.FloatingIP = fp.FloatingIP - openStackCluster.Status.Bastion = bastion - annotations.AddAnnotations(openStackCluster, map[string]string{BastionInstanceHashAnnotation: bastionHash}) - return nil + openStackCluster.Status.Bastion.FloatingIP = fp.FloatingIP + + err = networkingService.AssociateFloatingIP(openStackCluster, fp, port.ID) + if err != nil { + handleUpdateOSCError(openStackCluster, fmt.Errorf("failed to associate floating IP with bastion: %w", err)) + return ctrl.Result{}, fmt.Errorf("failed to associate floating IP with bastion: %w", err) + } + + return ctrl.Result{}, nil } func bastionToInstanceSpec(openStackCluster *infrav1.OpenStackCluster, clusterName string) *compute.InstanceSpec { - name := fmt.Sprintf("%s-bastion", clusterName) instanceSpec := &compute.InstanceSpec{ - Name: name, + Name: bastionName(clusterName), Flavor: openStackCluster.Spec.Bastion.Instance.Flavor, SSHKeyName: openStackCluster.Spec.Bastion.Instance.SSHKeyName, Image: openStackCluster.Spec.Bastion.Instance.Image, @@ -412,6 +452,10 @@ func bastionToInstanceSpec(openStackCluster *infrav1.OpenStackCluster, clusterNa return instanceSpec } +func bastionName(clusterName string) string { + return fmt.Sprintf("%s-bastion", clusterName) +} + // bastionHashHasChanged returns a boolean whether if the latest bastion hash, built from the instance spec, has changed or not. func bastionHashHasChanged(computeHash string, clusterAnnotations map[string]string) bool { latestHash, ok := clusterAnnotations[BastionInstanceHashAnnotation] diff --git a/controllers/openstackcluster_controller_test.go b/controllers/openstackcluster_controller_test.go index d94e590cc2..5c59e7c807 100644 --- a/controllers/openstackcluster_controller_test.go +++ b/controllers/openstackcluster_controller_test.go @@ -22,10 +22,15 @@ import ( "github.com/go-logr/logr" "github.com/golang/mock/gomock" + "github.com/gophercloud/gophercloud" + "github.com/gophercloud/gophercloud/openstack/compute/v2/extensions/attachinterfaces" "github.com/gophercloud/gophercloud/openstack/compute/v2/servers" + "github.com/gophercloud/gophercloud/openstack/networking/v2/extensions" "github.com/gophercloud/gophercloud/openstack/networking/v2/extensions/external" + "github.com/gophercloud/gophercloud/openstack/networking/v2/extensions/layer3/floatingips" "github.com/gophercloud/gophercloud/openstack/networking/v2/extensions/security/groups" "github.com/gophercloud/gophercloud/openstack/networking/v2/networks" + "github.com/gophercloud/gophercloud/openstack/networking/v2/ports" "github.com/gophercloud/gophercloud/openstack/networking/v2/subnets" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" @@ -196,15 +201,176 @@ var _ = Describe("OpenStackCluster controller", func() { Expect(err).To(BeNil()) err = k8sClient.Create(ctx, capiCluster) Expect(err).To(BeNil()) + testCluster.Status = infrav1.OpenStackClusterStatus{ + Bastion: &infrav1.BastionStatus{ + ID: "bastion-uuid", + }, + } + err = k8sClient.Status().Update(ctx, testCluster) + Expect(err).To(BeNil()) scope, err := mockScopeFactory.NewClientScopeFromCluster(ctx, k8sClient, testCluster, nil, logr.Discard()) Expect(err).To(BeNil()) + computeClientRecorder := mockScopeFactory.ComputeClient.EXPECT() + computeClientRecorder.GetServer("bastion-uuid").Return(nil, gophercloud.ErrResourceNotFound{}) + + networkClientRecorder := mockScopeFactory.NetworkClient.EXPECT() + networkClientRecorder.ListSecGroup(gomock.Any()).Return([]groups.SecGroup{}, nil) + + err = deleteBastion(scope, capiCluster, testCluster) + Expect(testCluster.Status.Bastion).To(BeNil()) + Expect(err).To(BeNil()) + }) + It("should adopt an existing bastion even if its uuid is not stored in status", func() { + testCluster.SetName("adopt-existing-bastion") + testCluster.Spec = infrav1.OpenStackClusterSpec{ + Bastion: &infrav1.Bastion{ + Enabled: true, + }, + } + err := k8sClient.Create(ctx, testCluster) + Expect(err).To(BeNil()) + err = k8sClient.Create(ctx, capiCluster) + Expect(err).To(BeNil()) + testCluster.Status = infrav1.OpenStackClusterStatus{ + Network: &infrav1.NetworkStatusWithSubnets{ + NetworkStatus: infrav1.NetworkStatus{ + Name: "network-name", + }, + }, + } + err = k8sClient.Status().Update(ctx, testCluster) + Expect(err).To(BeNil()) + + scope, err := mockScopeFactory.NewClientScopeFromCluster(ctx, k8sClient, testCluster, nil, logr.Discard()) + Expect(err).To(BeNil()) + + server := clients.ServerExt{} + server.ID = "adopted-bastion-uuid" + server.Status = "ACTIVE" + + computeClientRecorder := mockScopeFactory.ComputeClient.EXPECT() + computeClientRecorder.ListServers(servers.ListOpts{ + Name: "^capi-cluster-bastion$", + }).Return([]clients.ServerExt{server}, nil) + + networkClientRecorder := mockScopeFactory.NetworkClient.EXPECT() + networkClientRecorder.ListPort(gomock.Any()).Return([]ports.Port{{ID: "portID"}}, nil) + networkClientRecorder.ListFloatingIP(floatingips.ListOpts{PortID: "portID"}).Return(make([]floatingips.FloatingIP, 1), nil) + + res, err := reconcileBastion(scope, capiCluster, testCluster) + Expect(testCluster.Status.Bastion).To(Equal(&infrav1.BastionStatus{ID: "adopted-bastion-uuid", State: "ACTIVE"})) + Expect(err).To(BeNil()) + Expect(res).To(Equal(reconcile.Result{})) + }) + It("should adopt an existing bastion Floating IP if even if its uuid is not stored in status", func() { + testCluster.SetName("requeue-bastion") + testCluster.Spec = infrav1.OpenStackClusterSpec{ + Bastion: &infrav1.Bastion{ + Enabled: true, + }, + } + err := k8sClient.Create(ctx, testCluster) + Expect(err).To(BeNil()) + err = k8sClient.Create(ctx, capiCluster) + Expect(err).To(BeNil()) + testCluster.Status = infrav1.OpenStackClusterStatus{ + Network: &infrav1.NetworkStatusWithSubnets{ + NetworkStatus: infrav1.NetworkStatus{ + Name: "network-name", + }, + }, + Bastion: &infrav1.BastionStatus{ + ID: "adopted-fip-bastion-uuid", + }, + } + err = k8sClient.Status().Update(ctx, testCluster) + Expect(err).To(BeNil()) + + scope, err := mockScopeFactory.NewClientScopeFromCluster(ctx, k8sClient, testCluster, nil, logr.Discard()) + Expect(err).To(BeNil()) + + server := clients.ServerExt{} + server.ID = "adopted-fip-bastion-uuid" + server.Status = "ACTIVE" + + computeClientRecorder := mockScopeFactory.ComputeClient.EXPECT() + computeClientRecorder.GetServer("adopted-fip-bastion-uuid").Return(&server, nil) + + networkClientRecorder := mockScopeFactory.NetworkClient.EXPECT() + networkClientRecorder.ListPort(gomock.Any()).Return([]ports.Port{{ID: "portID"}}, nil) + networkClientRecorder.ListFloatingIP(floatingips.ListOpts{PortID: "portID"}).Return([]floatingips.FloatingIP{{FloatingIP: "1.2.3.4"}}, nil) + + res, err := reconcileBastion(scope, capiCluster, testCluster) + Expect(testCluster.Status.Bastion).To(Equal(&infrav1.BastionStatus{ID: "adopted-fip-bastion-uuid", State: "ACTIVE", FloatingIP: "1.2.3.4"})) + Expect(err).To(BeNil()) + Expect(res).To(Equal(reconcile.Result{})) + }) + It("should requeue until bastion becomes active", func() { + testCluster.SetName("requeue-bastion") + testCluster.Spec = infrav1.OpenStackClusterSpec{ + Bastion: &infrav1.Bastion{ + Enabled: true, + }, + } + err := k8sClient.Create(ctx, testCluster) + Expect(err).To(BeNil()) + err = k8sClient.Create(ctx, capiCluster) + Expect(err).To(BeNil()) + testCluster.Status = infrav1.OpenStackClusterStatus{ + Network: &infrav1.NetworkStatusWithSubnets{ + NetworkStatus: infrav1.NetworkStatus{ + Name: "network-name", + }, + }, + Bastion: &infrav1.BastionStatus{ + ID: "requeue-bastion-uuid", + }, + } + err = k8sClient.Status().Update(ctx, testCluster) + Expect(err).To(BeNil()) + + scope, err := mockScopeFactory.NewClientScopeFromCluster(ctx, k8sClient, testCluster, nil, logr.Discard()) + Expect(err).To(BeNil()) + + server := clients.ServerExt{} + server.ID = "requeue-bastion-uuid" + server.Status = "BUILD" + + computeClientRecorder := mockScopeFactory.ComputeClient.EXPECT() + computeClientRecorder.GetServer("requeue-bastion-uuid").Return(&server, nil) + + res, err := reconcileBastion(scope, capiCluster, testCluster) + Expect(testCluster.Status.Bastion).To(Equal(&infrav1.BastionStatus{ID: "requeue-bastion-uuid", State: "BUILD"})) + Expect(err).To(BeNil()) + Expect(res).To(Equal(reconcile.Result{RequeueAfter: waitForBuildingInstanceToReconcile})) + }) + It("should delete an existing bastion even if its uuid is not stored in status", func() { + testCluster.SetName("delete-existing-bastion") + testCluster.Spec = infrav1.OpenStackClusterSpec{ + Bastion: &infrav1.Bastion{}, + } + err := k8sClient.Create(ctx, testCluster) + Expect(err).To(BeNil()) + err = k8sClient.Create(ctx, capiCluster) + Expect(err).To(BeNil()) + + scope, err := mockScopeFactory.NewClientScopeFromCluster(ctx, k8sClient, testCluster, nil, logr.Discard()) + Expect(err).To(BeNil()) + + server := clients.ServerExt{} + server.ID = "delete-bastion-uuid" + computeClientRecorder := mockScopeFactory.ComputeClient.EXPECT() computeClientRecorder.ListServers(servers.ListOpts{ Name: "^capi-cluster-bastion$", - }).Return([]clients.ServerExt{}, nil) + }).Return([]clients.ServerExt{server}, nil) + computeClientRecorder.ListAttachedInterfaces("delete-bastion-uuid").Return([]attachinterfaces.Interface{}, nil) + computeClientRecorder.DeleteServer("delete-bastion-uuid").Return(nil) + computeClientRecorder.GetServer("delete-bastion-uuid").Return(nil, gophercloud.ErrResourceNotFound{}) networkClientRecorder := mockScopeFactory.NetworkClient.EXPECT() + networkClientRecorder.ListExtensions().Return([]extensions.Extension{}, nil) networkClientRecorder.ListSecGroup(gomock.Any()).Return([]groups.SecGroup{}, nil) err = deleteBastion(scope, capiCluster, testCluster) diff --git a/controllers/openstackmachine_controller.go b/controllers/openstackmachine_controller.go index c797e218b9..83ac3726a5 100644 --- a/controllers/openstackmachine_controller.go +++ b/controllers/openstackmachine_controller.go @@ -67,6 +67,7 @@ type OpenStackMachineReconciler struct { const ( waitForClusterInfrastructureReadyDuration = 15 * time.Second waitForInstanceBecomeActiveToReconcile = 60 * time.Second + waitForBuildingInstanceToReconcile = 10 * time.Second ) // +kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=openstackmachines,verbs=get;list;watch;create;update;patch;delete @@ -252,8 +253,13 @@ func (r *OpenStackMachineReconciler) reconcileDelete(scope scope.Scope, cluster } } - instanceStatus, err := computeService.GetInstanceStatusByName(openStackMachine, openStackMachine.Name) - if err != nil { + var instanceStatus *compute.InstanceStatus + if openStackMachine.Spec.InstanceID != nil { + instanceStatus, err = computeService.GetInstanceStatus(*openStackMachine.Spec.InstanceID) + if err != nil { + return ctrl.Result{}, err + } + } else if instanceStatus, err = computeService.GetInstanceStatusByName(openStackMachine, openStackMachine.Name); err != nil { return ctrl.Result{}, err } if !openStackCluster.Spec.APIServerLoadBalancer.Enabled && util.IsControlPlaneMachine(machine) && openStackCluster.Spec.APIServerFloatingIP == "" { @@ -385,6 +391,9 @@ func (r *OpenStackMachineReconciler) reconcileNormal(ctx context.Context, scope scope.Logger().Info("Machine instance state is DELETED, no actions") conditions.MarkFalse(openStackMachine, infrav1.InstanceReadyCondition, infrav1.InstanceDeletedReason, clusterv1.ConditionSeverityError, "") return ctrl.Result{}, nil + case infrav1.InstanceStateBuild, infrav1.InstanceStateUndefined: + scope.Logger().Info("Waiting for instance to become ACTIVE", "id", instanceStatus.ID(), "status", instanceStatus.State()) + return ctrl.Result{RequeueAfter: waitForBuildingInstanceToReconcile}, nil default: // The other state is normal (for example, migrating, shutoff) but we don't want to proceed until it's ACTIVE // due to potential conflict or unexpected actions @@ -437,19 +446,28 @@ func (r *OpenStackMachineReconciler) reconcileNormal(ctx context.Context, scope } func (r *OpenStackMachineReconciler) getOrCreate(logger logr.Logger, cluster *clusterv1.Cluster, openStackCluster *infrav1.OpenStackCluster, machine *clusterv1.Machine, openStackMachine *infrav1.OpenStackMachine, computeService *compute.Service, userData string) (*compute.InstanceStatus, error) { - instanceStatus, err := computeService.GetInstanceStatusByName(openStackMachine, openStackMachine.Name) - if err != nil { - logger.Info("Unable to get OpenStack instance", "name", openStackMachine.Name) - conditions.MarkFalse(openStackMachine, infrav1.InstanceReadyCondition, infrav1.OpenStackErrorReason, clusterv1.ConditionSeverityError, err.Error()) - return nil, err + var instanceStatus *compute.InstanceStatus + var err error + if openStackMachine.Spec.InstanceID != nil { + instanceStatus, err = computeService.GetInstanceStatus(*openStackMachine.Spec.InstanceID) + if err != nil { + logger.Info("Unable to get OpenStack instance", "name", openStackMachine.Name) + conditions.MarkFalse(openStackMachine, infrav1.InstanceReadyCondition, infrav1.OpenStackErrorReason, clusterv1.ConditionSeverityError, err.Error()) + return nil, err + } } - if instanceStatus == nil { - if openStackMachine.Spec.InstanceID != nil { - logger.Info("Not reconciling machine in failed state. The previously existing OpenStack instance is no longer available") - conditions.MarkFalse(openStackMachine, infrav1.InstanceReadyCondition, infrav1.InstanceNotFoundReason, clusterv1.ConditionSeverityError, "virtual machine no longer exists") - openStackMachine.SetFailure(capierrors.UpdateMachineError, errors.New("virtual machine no longer exists")) - return nil, nil + // Check if there is an existing instance with machine name, in case where instance ID would not have been stored in machine status + if instanceStatus, err = computeService.GetInstanceStatusByName(openStackMachine, openStackMachine.Name); err == nil { + if instanceStatus != nil { + return instanceStatus, nil + } + if openStackMachine.Spec.InstanceID != nil { + logger.Info("Not reconciling machine in failed state. The previously existing OpenStack instance is no longer available") + conditions.MarkFalse(openStackMachine, infrav1.InstanceReadyCondition, infrav1.InstanceNotFoundReason, clusterv1.ConditionSeverityError, "virtual machine no longer exists") + openStackMachine.SetFailure(capierrors.UpdateMachineError, errors.New("virtual machine no longer exists")) + return nil, nil + } } instanceSpec := machineToInstanceSpec(openStackCluster, machine, openStackMachine, userData) logger.Info("Machine does not exist, creating Machine", "name", openStackMachine.Name) @@ -459,7 +477,6 @@ func (r *OpenStackMachineReconciler) getOrCreate(logger logr.Logger, cluster *cl return nil, fmt.Errorf("create OpenStack instance: %w", err) } } - return instanceStatus, nil } diff --git a/controllers/suite_test.go b/controllers/suite_test.go index 3a63e41d9a..fdbda08855 100644 --- a/controllers/suite_test.go +++ b/controllers/suite_test.go @@ -32,12 +32,14 @@ import ( "k8s.io/apimachinery/pkg/types" "k8s.io/client-go/kubernetes/scheme" "k8s.io/client-go/rest" + "k8s.io/utils/pointer" clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" "sigs.k8s.io/cluster-api/test/framework" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/envtest" infrav1 "sigs.k8s.io/cluster-api-provider-openstack/api/v1alpha8" + "sigs.k8s.io/cluster-api-provider-openstack/pkg/clients" "sigs.k8s.io/cluster-api-provider-openstack/pkg/cloud/services/compute" "sigs.k8s.io/cluster-api-provider-openstack/pkg/scope" "sigs.k8s.io/cluster-api-provider-openstack/test/helpers/external" @@ -148,9 +150,13 @@ var _ = Describe("When calling getOrCreate", func() { cluster := &clusterv1.Cluster{} openStackCluster := &infrav1.OpenStackCluster{} machine := &clusterv1.Machine{} - openStackMachine := &infrav1.OpenStackMachine{} + openStackMachine := &infrav1.OpenStackMachine{ + Spec: infrav1.OpenStackMachineSpec{ + InstanceID: pointer.String("machine-uuid"), + }, + } - mockScopeFactory.ComputeClient.EXPECT().ListServers(gomock.Any()).Return(nil, errors.New("Test error when listing servers")) + mockScopeFactory.ComputeClient.EXPECT().GetServer(gomock.Any()).Return(nil, errors.New("Test error when getting server")) instanceStatus, err := reconsiler.getOrCreate(logger, cluster, openStackCluster, machine, openStackMachine, computeService, "") Expect(err).To(HaveOccurred()) Expect(instanceStatus).To(BeNil()) @@ -163,4 +169,19 @@ var _ = Describe("When calling getOrCreate", func() { } } }) + + It("should retrieve instance by name if no ID is stored", func() { + cluster := &clusterv1.Cluster{} + openStackCluster := &infrav1.OpenStackCluster{} + machine := &clusterv1.Machine{} + openStackMachine := &infrav1.OpenStackMachine{} + servers := make([]clients.ServerExt, 1) + servers[0].ID = "machine-uuid" + + mockScopeFactory.ComputeClient.EXPECT().ListServers(gomock.Any()).Return(servers, nil) + instanceStatus, err := reconsiler.getOrCreate(logger, cluster, openStackCluster, machine, openStackMachine, computeService, "") + Expect(err).ToNot(HaveOccurred()) + Expect(instanceStatus).ToNot(BeNil()) + Expect(instanceStatus.ID()).To(Equal("machine-uuid")) + }) }) diff --git a/pkg/cloud/services/compute/instance.go b/pkg/cloud/services/compute/instance.go index 15e9756895..b5a91b16c2 100644 --- a/pkg/cloud/services/compute/instance.go +++ b/pkg/cloud/services/compute/instance.go @@ -333,27 +333,8 @@ func (s *Service) createInstanceImpl(eventObject runtime.Object, openStackCluste return nil, fmt.Errorf("error creating Openstack instance: %v", err) } - var createdInstance *InstanceStatus - err = wait.PollUntilContextTimeout(context.TODO(), retryInterval, instanceCreateTimeout, true, func(_ context.Context) (bool, error) { - createdInstance, err = s.GetInstanceStatus(server.ID) - if err != nil { - if capoerrors.IsRetryable(err) { - return false, nil - } - return false, err - } - if createdInstance.State() == infrav1.InstanceStateError { - return false, fmt.Errorf("error creating OpenStack instance %s, status changed to error", createdInstance.ID()) - } - return createdInstance.State() == infrav1.InstanceStateActive, nil - }) - if err != nil { - record.Warnf(eventObject, "FailedCreateServer", "Failed to create server %s: %v", instanceSpec.Name, err) - return nil, err - } - - record.Eventf(eventObject, "SuccessfulCreateServer", "Created server %s with id %s", createdInstance.Name(), createdInstance.ID()) - return createdInstance, nil + record.Eventf(eventObject, "SuccessfulCreateServer", "Created server %s with id %s", server.Name, server.ID) + return &InstanceStatus{server, s.scope.Logger()}, nil } func volumeName(instanceName string, nameSuffix string) string { diff --git a/pkg/cloud/services/compute/instance_test.go b/pkg/cloud/services/compute/instance_test.go index 814fd807a9..350a54f74e 100644 --- a/pkg/cloud/services/compute/instance_test.go +++ b/pkg/cloud/services/compute/instance_test.go @@ -406,17 +406,6 @@ func TestService_ReconcileInstance(t *testing.T) { }) } - // Expected calls when polling for server creation - expectServerPoll := func(computeRecorder *mock.MockComputeClientMockRecorder, states []string) { - for _, state := range states { - computeRecorder.GetServer(instanceUUID).Return(returnedServer(state), nil) - } - } - - expectServerPollSuccess := func(computeRecorder *mock.MockComputeClientMockRecorder) { - expectServerPoll(computeRecorder, []string{"ACTIVE"}) - } - returnedVolume := func(uuid string, status string) *volumes.Volume { return &volumes.Volume{ ID: uuid, @@ -460,7 +449,6 @@ func TestService_ReconcileInstance(t *testing.T) { expectDefaultImageAndFlavor(r.compute, r.image) expectCreateServer(r.compute, getDefaultServerMap(), false) - expectServerPollSuccess(r.compute) }, wantErr: false, }, @@ -503,32 +491,6 @@ func TestService_ReconcileInstance(t *testing.T) { }, wantErr: true, }, - { - name: "Poll until server is created", - getInstanceSpec: getDefaultInstanceSpec, - expect: func(r *recorders) { - expectUseExistingDefaultPort(r.network) - expectDefaultImageAndFlavor(r.compute, r.image) - - expectCreateServer(r.compute, getDefaultServerMap(), false) - expectServerPoll(r.compute, []string{"BUILDING", "ACTIVE"}) - }, - wantErr: false, - }, - { - name: "Server errors during creation", - getInstanceSpec: getDefaultInstanceSpec, - expect: func(r *recorders) { - expectUseExistingDefaultPort(r.network) - expectDefaultImageAndFlavor(r.compute, r.image) - - expectCreateServer(r.compute, getDefaultServerMap(), false) - expectServerPoll(r.compute, []string{"BUILDING", "ERROR"}) - - // Don't delete ports because the server is created: DeleteInstance will do it - }, - wantErr: true, - }, { name: "Boot from volume success", getInstanceSpec: func() *InstanceSpec { @@ -567,7 +529,6 @@ func TestService_ReconcileInstance(t *testing.T) { }, } expectCreateServer(r.compute, createMap, false) - expectServerPollSuccess(r.compute) // Don't delete ports because the server is created: DeleteInstance will do it }, @@ -614,7 +575,6 @@ func TestService_ReconcileInstance(t *testing.T) { }, } expectCreateServer(r.compute, createMap, false) - expectServerPollSuccess(r.compute) // Don't delete ports because the server is created: DeleteInstance will do it }, @@ -734,7 +694,6 @@ func TestService_ReconcileInstance(t *testing.T) { }, } expectCreateServer(r.compute, createMap, false) - expectServerPollSuccess(r.compute) // Don't delete ports because the server is created: DeleteInstance will do it }, @@ -809,7 +768,6 @@ func TestService_ReconcileInstance(t *testing.T) { }, } expectCreateServer(r.compute, createMap, false) - expectServerPollSuccess(r.compute) // Don't delete ports because the server is created: DeleteInstance will do it }, @@ -870,7 +828,6 @@ func TestService_ReconcileInstance(t *testing.T) { }, } expectCreateServer(r.compute, createMap, false) - expectServerPollSuccess(r.compute) // Don't delete ports because the server is created: DeleteInstance will do it }, diff --git a/pkg/cloud/services/compute/instance_types.go b/pkg/cloud/services/compute/instance_types.go index d7b5634605..edac01b89d 100644 --- a/pkg/cloud/services/compute/instance_types.go +++ b/pkg/cloud/services/compute/instance_types.go @@ -100,25 +100,25 @@ func (is *InstanceStatus) AvailabilityZone() string { return is.server.AvailabilityZone } -// BastionStatus returns an infrav1.BastionStatus for use in the cluster status. -func (is *InstanceStatus) BastionStatus(openStackCluster *infrav1.OpenStackCluster) (*infrav1.BastionStatus, error) { - i := infrav1.BastionStatus{ - ID: is.ID(), - Name: is.Name(), - SSHKeyName: is.SSHKeyName(), - State: is.State(), +// BastionStatus updates BastionStatus in openStackCluster. +func (is *InstanceStatus) UpdateBastionStatus(openStackCluster *infrav1.OpenStackCluster) { + if openStackCluster.Status.Bastion == nil { + openStackCluster.Status.Bastion = &infrav1.BastionStatus{} } + openStackCluster.Status.Bastion.ID = is.ID() + openStackCluster.Status.Bastion.Name = is.Name() + openStackCluster.Status.Bastion.SSHKeyName = is.SSHKeyName() + openStackCluster.Status.Bastion.State = is.State() + ns, err := is.NetworkStatus() if err != nil { - return nil, err + // Bastion IP won't be saved in status, error is not critical + return } clusterNetwork := openStackCluster.Status.Network.Name - i.IP = ns.IP(clusterNetwork) - i.FloatingIP = ns.FloatingIP(clusterNetwork) - - return &i, nil + openStackCluster.Status.Bastion.IP = ns.IP(clusterNetwork) } // InstanceIdentifier returns an InstanceIdentifier object for an InstanceStatus.