Skip to content

Commit 75ffe73

Browse files
committed
Use get instead of list to retrieve servers
Under certain circumstances, when they are non-responsive cells, nova may return a partial result instead of an error. (this is controlled by list_records_by_skipping_down_cells parameter that is true by default - no error will be thrown) When it faces this issue, capo controller will try to create a new server for the machine, resulting in deleting ports of existing one. In order avoid this issue, use GET instead of LIST with server uuid stored in machine spec when it is created, and store server ID in machine spec immediately. For that purpose server status polling that used to take place in CreateMachine has to be moved to main reconcile loop. ReconcileBastion also needed to be revisited to properly support requeuing.
1 parent 5440246 commit 75ffe73

File tree

11 files changed

+349
-151
lines changed

11 files changed

+349
-151
lines changed

api/v1alpha5/types.go

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -273,8 +273,8 @@ func (r SecurityGroupRule) Equal(x SecurityGroupRule) bool {
273273
type InstanceState string
274274

275275
var (
276-
// InstanceStateBuilding is the string representing an instance in a building state.
277-
InstanceStateBuilding = InstanceState("BUILDING")
276+
// InstanceStateBuild is the string representing an instance in a build state.
277+
InstanceStateBuild = InstanceState("BUILD")
278278

279279
// InstanceStateActive is the string representing an instance in an active state.
280280
InstanceStateActive = InstanceState("ACTIVE")
@@ -290,6 +290,9 @@ var (
290290

291291
// InstanceStateDeleted is the string representing an instance in a deleted state.
292292
InstanceStateDeleted = InstanceState("DELETED")
293+
294+
// InstanceStateUndefined is the string representing an undefined instance state.
295+
InstanceStateUndefined = InstanceState("")
293296
)
294297

295298
// Bastion represents basic information about the bastion node.

api/v1alpha6/types.go

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -285,8 +285,8 @@ func (r SecurityGroupRule) Equal(x SecurityGroupRule) bool {
285285
type InstanceState string
286286

287287
var (
288-
// InstanceStateBuilding is the string representing an instance in a building state.
289-
InstanceStateBuilding = InstanceState("BUILDING")
288+
// InstanceStateBuild is the string representing an instance in a build state.
289+
InstanceStateBuild = InstanceState("BUILD")
290290

291291
// InstanceStateActive is the string representing an instance in an active state.
292292
InstanceStateActive = InstanceState("ACTIVE")
@@ -302,6 +302,9 @@ var (
302302

303303
// InstanceStateDeleted is the string representing an instance in a deleted state.
304304
InstanceStateDeleted = InstanceState("DELETED")
305+
306+
// InstanceStateUndefined is the string representing an undefined instance state.
307+
InstanceStateUndefined = InstanceState("")
305308
)
306309

307310
// Bastion represents basic information about the bastion node.

api/v1alpha7/types.go

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -315,8 +315,8 @@ func (r SecurityGroupRule) Equal(x SecurityGroupRule) bool {
315315
type InstanceState string
316316

317317
var (
318-
// InstanceStateBuilding is the string representing an instance in a building state.
319-
InstanceStateBuilding = InstanceState("BUILDING")
318+
// InstanceStateBuild is the string representing an instance in a build state.
319+
InstanceStateBuild = InstanceState("BUILD")
320320

321321
// InstanceStateActive is the string representing an instance in an active state.
322322
InstanceStateActive = InstanceState("ACTIVE")
@@ -332,6 +332,9 @@ var (
332332

333333
// InstanceStateDeleted is the string representing an instance in a deleted state.
334334
InstanceStateDeleted = InstanceState("DELETED")
335+
336+
// InstanceStateUndefined is the string representing an undefined instance state.
337+
InstanceStateUndefined = InstanceState("")
335338
)
336339

337340
// Bastion represents basic information about the bastion node.

api/v1alpha8/types.go

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -321,8 +321,8 @@ func (r SecurityGroupRule) Equal(x SecurityGroupRule) bool {
321321
type InstanceState string
322322

323323
var (
324-
// InstanceStateBuilding is the string representing an instance in a building state.
325-
InstanceStateBuilding = InstanceState("BUILDING")
324+
// InstanceStateBuild is the string representing an instance in a build state.
325+
InstanceStateBuild = InstanceState("BUILD")
326326

327327
// InstanceStateActive is the string representing an instance in an active state.
328328
InstanceStateActive = InstanceState("ACTIVE")
@@ -338,6 +338,9 @@ var (
338338

339339
// InstanceStateDeleted is the string representing an instance in a deleted state.
340340
InstanceStateDeleted = InstanceState("DELETED")
341+
342+
// InstanceStateUndefined is the string representing an undefined instance state.
343+
InstanceStateUndefined = InstanceState("")
341344
)
342345

343346
// Bastion represents basic information about the bastion node.

controllers/openstackcluster_controller.go

Lines changed: 94 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -215,10 +215,24 @@ func deleteBastion(scope scope.Scope, cluster *clusterv1.Cluster, openStackClust
215215
return err
216216
}
217217

218-
instanceName := fmt.Sprintf("%s-bastion", cluster.Name)
219-
instanceStatus, err := computeService.GetInstanceStatusByName(openStackCluster, instanceName)
220-
if err != nil {
221-
return err
218+
if openStackCluster.Status.Bastion != nil && openStackCluster.Status.Bastion.FloatingIP != "" {
219+
if err = networkingService.DeleteFloatingIP(openStackCluster, openStackCluster.Status.Bastion.FloatingIP); err != nil {
220+
handleUpdateOSCError(openStackCluster, fmt.Errorf("failed to delete floating IP: %w", err))
221+
return fmt.Errorf("failed to delete floating IP: %w", err)
222+
}
223+
}
224+
225+
var instanceStatus *compute.InstanceStatus
226+
if openStackCluster.Status.Bastion != nil && openStackCluster.Status.Bastion.ID != "" {
227+
instanceStatus, err = computeService.GetInstanceStatus(openStackCluster.Status.Bastion.ID)
228+
if err != nil {
229+
return err
230+
}
231+
} else {
232+
instanceStatus, err = computeService.GetInstanceStatusByName(openStackCluster, bastionName(cluster.Name))
233+
if err != nil {
234+
return err
235+
}
222236
}
223237

224238
if instanceStatus != nil {
@@ -230,6 +244,7 @@ func deleteBastion(scope scope.Scope, cluster *clusterv1.Cluster, openStackClust
230244

231245
for _, address := range addresses {
232246
if address.Type == corev1.NodeExternalIP {
247+
// Floating IP may not have properly saved in bastion status (thus not deleted above), delete any remaining floating IP
233248
if err = networkingService.DeleteFloatingIP(openStackCluster, address.Address); err != nil {
234249
handleUpdateOSCError(openStackCluster, fmt.Errorf("failed to delete floating IP: %w", err))
235250
return fmt.Errorf("failed to delete floating IP: %w", err)
@@ -277,8 +292,9 @@ func reconcileNormal(scope scope.Scope, cluster *clusterv1.Cluster, openStackClu
277292
return reconcile.Result{}, err
278293
}
279294

280-
if err = reconcileBastion(scope, cluster, openStackCluster); err != nil {
281-
return reconcile.Result{}, err
295+
result, err := reconcileBastion(scope, cluster, openStackCluster)
296+
if err != nil || !reflect.DeepEqual(result, reconcile.Result{}) {
297+
return result, err
282298
}
283299

284300
availabilityZones, err := computeService.GetAvailabilityZones()
@@ -308,88 +324,112 @@ func reconcileNormal(scope scope.Scope, cluster *clusterv1.Cluster, openStackClu
308324
return reconcile.Result{}, nil
309325
}
310326

311-
func reconcileBastion(scope scope.Scope, cluster *clusterv1.Cluster, openStackCluster *infrav1.OpenStackCluster) error {
327+
func reconcileBastion(scope scope.Scope, cluster *clusterv1.Cluster, openStackCluster *infrav1.OpenStackCluster) (ctrl.Result, error) {
312328
scope.Logger().Info("Reconciling Bastion")
313329

314330
if openStackCluster.Spec.Bastion == nil || !openStackCluster.Spec.Bastion.Enabled {
315-
return deleteBastion(scope, cluster, openStackCluster)
331+
return reconcile.Result{}, deleteBastion(scope, cluster, openStackCluster)
316332
}
317333

318334
computeService, err := compute.NewService(scope)
319335
if err != nil {
320-
return err
336+
return reconcile.Result{}, err
321337
}
322338

323339
instanceSpec := bastionToInstanceSpec(openStackCluster, cluster.Name)
324340
bastionHash, err := compute.HashInstanceSpec(instanceSpec)
325341
if err != nil {
326-
return fmt.Errorf("failed computing bastion hash from instance spec: %w", err)
342+
return reconcile.Result{}, fmt.Errorf("failed computing bastion hash from instance spec: %w", err)
343+
}
344+
if bastionHashHasChanged(bastionHash, openStackCluster.ObjectMeta.Annotations) {
345+
if err := deleteBastion(scope, cluster, openStackCluster); err != nil {
346+
return ctrl.Result{}, err
347+
}
327348
}
328349

329-
instanceStatus, err := computeService.GetInstanceStatusByName(openStackCluster, fmt.Sprintf("%s-bastion", cluster.Name))
330-
if err != nil {
331-
return err
350+
var instanceStatus *compute.InstanceStatus
351+
if openStackCluster.Status.Bastion != nil && openStackCluster.Status.Bastion.ID != "" {
352+
if instanceStatus, err = computeService.GetInstanceStatus(openStackCluster.Status.Bastion.ID); err != nil {
353+
return reconcile.Result{}, err
354+
}
332355
}
333-
if instanceStatus != nil {
334-
if !bastionHashHasChanged(bastionHash, openStackCluster.ObjectMeta.Annotations) {
335-
bastion, err := instanceStatus.BastionStatus(openStackCluster)
336-
if err != nil {
337-
return err
338-
}
339-
// Add the current hash if no annotation is set.
340-
if _, ok := openStackCluster.ObjectMeta.Annotations[BastionInstanceHashAnnotation]; !ok {
341-
annotations.AddAnnotations(openStackCluster, map[string]string{BastionInstanceHashAnnotation: bastionHash})
342-
}
343-
openStackCluster.Status.Bastion = bastion
344-
return nil
356+
if instanceStatus == nil {
357+
// Check if there is an existing instance with bastion name, in case where bastion ID would not have been properly stored in cluster status
358+
if instanceStatus, err = computeService.GetInstanceStatusByName(openStackCluster, instanceSpec.Name); err != nil {
359+
return reconcile.Result{}, err
345360
}
346-
347-
if err := deleteBastion(scope, cluster, openStackCluster); err != nil {
348-
return err
361+
}
362+
if instanceStatus == nil {
363+
instanceStatus, err = computeService.CreateInstance(openStackCluster, openStackCluster, instanceSpec, cluster.Name)
364+
if err != nil {
365+
return reconcile.Result{}, fmt.Errorf("failed to create bastion: %w", err)
349366
}
350367
}
351368

352-
instanceStatus, err = computeService.CreateInstance(openStackCluster, openStackCluster, instanceSpec, cluster.Name)
353-
if err != nil {
354-
return fmt.Errorf("failed to reconcile bastion: %w", err)
369+
// Save hash & status as soon as we know we have an instance
370+
instanceStatus.UpdateBastionStatus(openStackCluster)
371+
annotations.AddAnnotations(openStackCluster, map[string]string{BastionInstanceHashAnnotation: bastionHash})
372+
373+
// Make sure that bastion instance has a valid state
374+
switch instanceStatus.State() {
375+
case infrav1.InstanceStateError:
376+
return ctrl.Result{}, fmt.Errorf("failed to reconcile bastion, instance state is ERROR")
377+
case infrav1.InstanceStateBuild, infrav1.InstanceStateUndefined:
378+
scope.Logger().Info("Waiting for bastion instance to become ACTIVE", "id", instanceStatus.ID(), "status", instanceStatus.State())
379+
return ctrl.Result{RequeueAfter: waitForBuildingInstanceToReconcile}, nil
380+
case infrav1.InstanceStateDeleted:
381+
// This should normally be handled by deleteBastion
382+
openStackCluster.Status.Bastion = nil
383+
return ctrl.Result{}, nil
355384
}
356385

357386
networkingService, err := networking.NewService(scope)
358387
if err != nil {
359-
return err
360-
}
361-
clusterName := fmt.Sprintf("%s-%s", cluster.Namespace, cluster.Name)
362-
fp, err := networkingService.GetOrCreateFloatingIP(openStackCluster, openStackCluster, clusterName, openStackCluster.Spec.Bastion.Instance.FloatingIP)
363-
if err != nil {
364-
handleUpdateOSCError(openStackCluster, fmt.Errorf("failed to get or create floating IP for bastion: %w", err))
365-
return fmt.Errorf("failed to get or create floating IP for bastion: %w", err)
388+
return ctrl.Result{}, err
366389
}
367390
port, err := computeService.GetManagementPort(openStackCluster, instanceStatus)
368391
if err != nil {
369392
err = fmt.Errorf("getting management port for bastion: %w", err)
370393
handleUpdateOSCError(openStackCluster, err)
371-
return err
394+
return ctrl.Result{}, err
372395
}
373-
err = networkingService.AssociateFloatingIP(openStackCluster, fp, port.ID)
396+
fp, err := networkingService.GetFloatingIPByPortID(port.ID)
374397
if err != nil {
375-
handleUpdateOSCError(openStackCluster, fmt.Errorf("failed to associate floating IP with bastion: %w", err))
376-
return fmt.Errorf("failed to associate floating IP with bastion: %w", err)
398+
handleUpdateOSCError(openStackCluster, fmt.Errorf("failed to get or create floating IP for bastion: %w", err))
399+
return ctrl.Result{}, fmt.Errorf("failed to get floating IP for bastion port: %w", err)
400+
}
401+
if fp != nil {
402+
// Floating IP is already attached to bastion, no need to proceed
403+
openStackCluster.Status.Bastion.FloatingIP = fp.FloatingIP
404+
return ctrl.Result{}, nil
377405
}
378406

379-
bastion, err := instanceStatus.BastionStatus(openStackCluster)
407+
clusterName := fmt.Sprintf("%s-%s", cluster.Namespace, cluster.Name)
408+
floatingIP := openStackCluster.Spec.Bastion.Instance.FloatingIP
409+
if openStackCluster.Status.Bastion.FloatingIP != "" {
410+
// Some floating IP has already been created for this bastion, make sure we re-use it
411+
floatingIP = openStackCluster.Status.Bastion.FloatingIP
412+
}
413+
// Check if there is an existing floating IP attached to bastion, in case where FloatingIP would not yet have been stored in cluster status
414+
fp, err = networkingService.GetOrCreateFloatingIP(openStackCluster, openStackCluster, clusterName, floatingIP)
380415
if err != nil {
381-
return err
416+
handleUpdateOSCError(openStackCluster, fmt.Errorf("failed to get or create floating IP for bastion: %w", err))
417+
return ctrl.Result{}, fmt.Errorf("failed to get or create floating IP for bastion: %w", err)
382418
}
383-
bastion.FloatingIP = fp.FloatingIP
384-
openStackCluster.Status.Bastion = bastion
385-
annotations.AddAnnotations(openStackCluster, map[string]string{BastionInstanceHashAnnotation: bastionHash})
386-
return nil
419+
openStackCluster.Status.Bastion.FloatingIP = fp.FloatingIP
420+
421+
err = networkingService.AssociateFloatingIP(openStackCluster, fp, port.ID)
422+
if err != nil {
423+
handleUpdateOSCError(openStackCluster, fmt.Errorf("failed to associate floating IP with bastion: %w", err))
424+
return ctrl.Result{}, fmt.Errorf("failed to associate floating IP with bastion: %w", err)
425+
}
426+
427+
return ctrl.Result{}, nil
387428
}
388429

389430
func bastionToInstanceSpec(openStackCluster *infrav1.OpenStackCluster, clusterName string) *compute.InstanceSpec {
390-
name := fmt.Sprintf("%s-bastion", clusterName)
391431
instanceSpec := &compute.InstanceSpec{
392-
Name: name,
432+
Name: bastionName(clusterName),
393433
Flavor: openStackCluster.Spec.Bastion.Instance.Flavor,
394434
SSHKeyName: openStackCluster.Spec.Bastion.Instance.SSHKeyName,
395435
Image: openStackCluster.Spec.Bastion.Instance.Image,
@@ -412,6 +452,10 @@ func bastionToInstanceSpec(openStackCluster *infrav1.OpenStackCluster, clusterNa
412452
return instanceSpec
413453
}
414454

455+
func bastionName(clusterName string) string {
456+
return fmt.Sprintf("%s-bastion", clusterName)
457+
}
458+
415459
// bastionHashHasChanged returns a boolean whether if the latest bastion hash, built from the instance spec, has changed or not.
416460
func bastionHashHasChanged(computeHash string, clusterAnnotations map[string]string) bool {
417461
latestHash, ok := clusterAnnotations[BastionInstanceHashAnnotation]

0 commit comments

Comments
 (0)