-
Notifications
You must be signed in to change notification settings - Fork 270
🐛 Fix random instance port deletion #1753
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -215,10 +215,24 @@ func deleteBastion(scope scope.Scope, cluster *clusterv1.Cluster, openStackClust | |
return err | ||
} | ||
|
||
instanceName := fmt.Sprintf("%s-bastion", cluster.Name) | ||
instanceStatus, err := computeService.GetInstanceStatusByName(openStackCluster, instanceName) | ||
if err != nil { | ||
return err | ||
if openStackCluster.Status.Bastion != nil && openStackCluster.Status.Bastion.FloatingIP != "" { | ||
if err = networkingService.DeleteFloatingIP(openStackCluster, openStackCluster.Status.Bastion.FloatingIP); err != nil { | ||
handleUpdateOSCError(openStackCluster, fmt.Errorf("failed to delete floating IP: %w", err)) | ||
mdbooth marked this conversation as resolved.
Show resolved
Hide resolved
|
||
return fmt.Errorf("failed to delete floating IP: %w", err) | ||
} | ||
} | ||
|
||
var instanceStatus *compute.InstanceStatus | ||
if openStackCluster.Status.Bastion != nil && openStackCluster.Status.Bastion.ID != "" { | ||
instanceStatus, err = computeService.GetInstanceStatus(openStackCluster.Status.Bastion.ID) | ||
if err != nil { | ||
return err | ||
} | ||
} else { | ||
instanceStatus, err = computeService.GetInstanceStatusByName(openStackCluster, bastionName(cluster.Name)) | ||
if err != nil { | ||
return err | ||
} | ||
} | ||
|
||
if instanceStatus != nil { | ||
|
@@ -230,6 +244,7 @@ func deleteBastion(scope scope.Scope, cluster *clusterv1.Cluster, openStackClust | |
|
||
for _, address := range addresses { | ||
if address.Type == corev1.NodeExternalIP { | ||
// Floating IP may not have properly saved in bastion status (thus not deleted above), delete any remaining floating IP | ||
if err = networkingService.DeleteFloatingIP(openStackCluster, address.Address); err != nil { | ||
handleUpdateOSCError(openStackCluster, fmt.Errorf("failed to delete floating IP: %w", err)) | ||
return fmt.Errorf("failed to delete floating IP: %w", err) | ||
|
@@ -277,8 +292,9 @@ func reconcileNormal(scope scope.Scope, cluster *clusterv1.Cluster, openStackClu | |
return reconcile.Result{}, err | ||
} | ||
|
||
if err = reconcileBastion(scope, cluster, openStackCluster); err != nil { | ||
return reconcile.Result{}, err | ||
result, err := reconcileBastion(scope, cluster, openStackCluster) | ||
if err != nil || !reflect.DeepEqual(result, reconcile.Result{}) { | ||
return result, err | ||
} | ||
|
||
availabilityZones, err := computeService.GetAvailabilityZones() | ||
|
@@ -308,88 +324,112 @@ func reconcileNormal(scope scope.Scope, cluster *clusterv1.Cluster, openStackClu | |
return reconcile.Result{}, nil | ||
} | ||
|
||
func reconcileBastion(scope scope.Scope, cluster *clusterv1.Cluster, openStackCluster *infrav1.OpenStackCluster) error { | ||
func reconcileBastion(scope scope.Scope, cluster *clusterv1.Cluster, openStackCluster *infrav1.OpenStackCluster) (ctrl.Result, error) { | ||
scope.Logger().Info("Reconciling Bastion") | ||
|
||
if openStackCluster.Spec.Bastion == nil || !openStackCluster.Spec.Bastion.Enabled { | ||
return deleteBastion(scope, cluster, openStackCluster) | ||
return reconcile.Result{}, deleteBastion(scope, cluster, openStackCluster) | ||
} | ||
|
||
computeService, err := compute.NewService(scope) | ||
if err != nil { | ||
return err | ||
return reconcile.Result{}, err | ||
} | ||
|
||
instanceSpec := bastionToInstanceSpec(openStackCluster, cluster.Name) | ||
bastionHash, err := compute.HashInstanceSpec(instanceSpec) | ||
if err != nil { | ||
return fmt.Errorf("failed computing bastion hash from instance spec: %w", err) | ||
return reconcile.Result{}, fmt.Errorf("failed computing bastion hash from instance spec: %w", err) | ||
} | ||
if bastionHashHasChanged(bastionHash, openStackCluster.ObjectMeta.Annotations) { | ||
if err := deleteBastion(scope, cluster, openStackCluster); err != nil { | ||
return ctrl.Result{}, err | ||
} | ||
} | ||
|
||
instanceStatus, err := computeService.GetInstanceStatusByName(openStackCluster, fmt.Sprintf("%s-bastion", cluster.Name)) | ||
if err != nil { | ||
return err | ||
var instanceStatus *compute.InstanceStatus | ||
if openStackCluster.Status.Bastion != nil && openStackCluster.Status.Bastion.ID != "" { | ||
if instanceStatus, err = computeService.GetInstanceStatus(openStackCluster.Status.Bastion.ID); err != nil { | ||
return reconcile.Result{}, err | ||
} | ||
} | ||
if instanceStatus != nil { | ||
if !bastionHashHasChanged(bastionHash, openStackCluster.ObjectMeta.Annotations) { | ||
bastion, err := instanceStatus.BastionStatus(openStackCluster) | ||
if err != nil { | ||
return err | ||
} | ||
// Add the current hash if no annotation is set. | ||
if _, ok := openStackCluster.ObjectMeta.Annotations[BastionInstanceHashAnnotation]; !ok { | ||
annotations.AddAnnotations(openStackCluster, map[string]string{BastionInstanceHashAnnotation: bastionHash}) | ||
} | ||
openStackCluster.Status.Bastion = bastion | ||
return nil | ||
if instanceStatus == nil { | ||
// Check if there is an existing instance with bastion name, in case where bastion ID would not have been properly stored in cluster status | ||
if instanceStatus, err = computeService.GetInstanceStatusByName(openStackCluster, instanceSpec.Name); err != nil { | ||
return reconcile.Result{}, err | ||
} | ||
|
||
if err := deleteBastion(scope, cluster, openStackCluster); err != nil { | ||
return err | ||
} | ||
if instanceStatus == nil { | ||
instanceStatus, err = computeService.CreateInstance(openStackCluster, openStackCluster, instanceSpec, cluster.Name) | ||
if err != nil { | ||
return reconcile.Result{}, fmt.Errorf("failed to create bastion: %w", err) | ||
} | ||
} | ||
|
||
instanceStatus, err = computeService.CreateInstance(openStackCluster, openStackCluster, instanceSpec, cluster.Name) | ||
if err != nil { | ||
return fmt.Errorf("failed to reconcile bastion: %w", err) | ||
// Save hash & status as soon as we know we have an instance | ||
instanceStatus.UpdateBastionStatus(openStackCluster) | ||
annotations.AddAnnotations(openStackCluster, map[string]string{BastionInstanceHashAnnotation: bastionHash}) | ||
|
||
// Make sure that bastion instance has a valid state | ||
switch instanceStatus.State() { | ||
case infrav1.InstanceStateError: | ||
return ctrl.Result{}, fmt.Errorf("failed to reconcile bastion, instance state is ERROR") | ||
case infrav1.InstanceStateBuild, infrav1.InstanceStateUndefined: | ||
scope.Logger().Info("Waiting for bastion instance to become ACTIVE", "id", instanceStatus.ID(), "status", instanceStatus.State()) | ||
return ctrl.Result{RequeueAfter: waitForBuildingInstanceToReconcile}, nil | ||
case infrav1.InstanceStateDeleted: | ||
// This should normally be handled by deleteBastion | ||
openStackCluster.Status.Bastion = nil | ||
return ctrl.Result{}, nil | ||
} | ||
|
||
networkingService, err := networking.NewService(scope) | ||
if err != nil { | ||
return err | ||
} | ||
clusterName := fmt.Sprintf("%s-%s", cluster.Namespace, cluster.Name) | ||
fp, err := networkingService.GetOrCreateFloatingIP(openStackCluster, openStackCluster, clusterName, openStackCluster.Spec.Bastion.Instance.FloatingIP) | ||
if err != nil { | ||
handleUpdateOSCError(openStackCluster, fmt.Errorf("failed to get or create floating IP for bastion: %w", err)) | ||
return fmt.Errorf("failed to get or create floating IP for bastion: %w", err) | ||
return ctrl.Result{}, err | ||
} | ||
port, err := computeService.GetManagementPort(openStackCluster, instanceStatus) | ||
if err != nil { | ||
err = fmt.Errorf("getting management port for bastion: %w", err) | ||
handleUpdateOSCError(openStackCluster, err) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We shouldn't use handleUpdateOSCError() here either, but this is existing code 🤦 Guessing you cribbed the other uses from here. Would you mind deleting this one, too? There are very few circumstances when we would ever set the cluster to failed. It would only really be before it has become initially ready, and even then only if the cause was due to an invalid spec. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Indeed, as I was essentially reorganizing existing code, I kept all the |
||
return err | ||
return ctrl.Result{}, err | ||
} | ||
err = networkingService.AssociateFloatingIP(openStackCluster, fp, port.ID) | ||
fp, err := networkingService.GetFloatingIPByPortID(port.ID) | ||
mdbooth marked this conversation as resolved.
Show resolved
Hide resolved
|
||
if err != nil { | ||
handleUpdateOSCError(openStackCluster, fmt.Errorf("failed to associate floating IP with bastion: %w", err)) | ||
return fmt.Errorf("failed to associate floating IP with bastion: %w", err) | ||
handleUpdateOSCError(openStackCluster, fmt.Errorf("failed to get or create floating IP for bastion: %w", err)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Shouldn't use handleUpdateOSCError() here. |
||
return ctrl.Result{}, fmt.Errorf("failed to get floating IP for bastion port: %w", err) | ||
} | ||
if fp != nil { | ||
// Floating IP is already attached to bastion, no need to proceed | ||
openStackCluster.Status.Bastion.FloatingIP = fp.FloatingIP | ||
return ctrl.Result{}, nil | ||
} | ||
|
||
bastion, err := instanceStatus.BastionStatus(openStackCluster) | ||
clusterName := fmt.Sprintf("%s-%s", cluster.Namespace, cluster.Name) | ||
mdbooth marked this conversation as resolved.
Show resolved
Hide resolved
|
||
floatingIP := openStackCluster.Spec.Bastion.Instance.FloatingIP | ||
if openStackCluster.Status.Bastion.FloatingIP != "" { | ||
// Some floating IP has already been created for this bastion, make sure we re-use it | ||
floatingIP = openStackCluster.Status.Bastion.FloatingIP | ||
} | ||
// Check if there is an existing floating IP attached to bastion, in case where FloatingIP would not yet have been stored in cluster status | ||
fp, err = networkingService.GetOrCreateFloatingIP(openStackCluster, openStackCluster, clusterName, floatingIP) | ||
if err != nil { | ||
return err | ||
handleUpdateOSCError(openStackCluster, fmt.Errorf("failed to get or create floating IP for bastion: %w", err)) | ||
mdbooth marked this conversation as resolved.
Show resolved
Hide resolved
|
||
return ctrl.Result{}, fmt.Errorf("failed to get or create floating IP for bastion: %w", err) | ||
} | ||
bastion.FloatingIP = fp.FloatingIP | ||
openStackCluster.Status.Bastion = bastion | ||
annotations.AddAnnotations(openStackCluster, map[string]string{BastionInstanceHashAnnotation: bastionHash}) | ||
return nil | ||
openStackCluster.Status.Bastion.FloatingIP = fp.FloatingIP | ||
|
||
err = networkingService.AssociateFloatingIP(openStackCluster, fp, port.ID) | ||
if err != nil { | ||
handleUpdateOSCError(openStackCluster, fmt.Errorf("failed to associate floating IP with bastion: %w", err)) | ||
mdbooth marked this conversation as resolved.
Show resolved
Hide resolved
|
||
return ctrl.Result{}, fmt.Errorf("failed to associate floating IP with bastion: %w", err) | ||
} | ||
|
||
return ctrl.Result{}, nil | ||
} | ||
|
||
func bastionToInstanceSpec(openStackCluster *infrav1.OpenStackCluster, clusterName string) *compute.InstanceSpec { | ||
name := fmt.Sprintf("%s-bastion", clusterName) | ||
instanceSpec := &compute.InstanceSpec{ | ||
Name: name, | ||
Name: bastionName(clusterName), | ||
Flavor: openStackCluster.Spec.Bastion.Instance.Flavor, | ||
SSHKeyName: openStackCluster.Spec.Bastion.Instance.SSHKeyName, | ||
Image: openStackCluster.Spec.Bastion.Instance.Image, | ||
|
@@ -412,6 +452,10 @@ func bastionToInstanceSpec(openStackCluster *infrav1.OpenStackCluster, clusterNa | |
return instanceSpec | ||
} | ||
|
||
func bastionName(clusterName string) string { | ||
return fmt.Sprintf("%s-bastion", clusterName) | ||
} | ||
|
||
// bastionHashHasChanged returns a boolean whether if the latest bastion hash, built from the instance spec, has changed or not. | ||
func bastionHashHasChanged(computeHash string, clusterAnnotations map[string]string) bool { | ||
latestHash, ok := clusterAnnotations[BastionInstanceHashAnnotation] | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
While it is being created Nova server status is BUILD and not BUILDING. As this variable was not used elsewhere I felt it was ok to change it, please let me know it you foresee any issue to do that.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
not sure whether there are some consistent
https://github.com/openstack/nova/blob/master/nova/objects/fields.py#L969
seems indicate it's BUILDING
but in the CLI it's BUILD
which makes me a little bit confused, maybe something updated in the nova client?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
VM state seems to be derived from status, and the correspondance is not staightforward, for BUILDING state, we have a BUILD status