Skip to content

Commit 19a6cb5

Browse files
committed
Use get instead of list to retrieve servers
Under certain circumstances, when they are non-responsive cells, nova may return a partial result instead of an error. (this is controlled by list_records_by_skipping_down_cells parameter that is true by default - no error will be thrown) When it faces this issue, capo controller will try to create a new server for the machine, resulting in deleting ports of existing one. In order avoid this issue, use GET instead of LIST with server uuid stored in machine spec when it is created, and store server ID in machine spec immediately. For that purpose server status polling that used to take place in CreateMachine has to be moved to main reconcile loop. ReconcileBastion also needed to be revisited to properly support requeuing.
1 parent 141013b commit 19a6cb5

File tree

10 files changed

+175
-144
lines changed

10 files changed

+175
-144
lines changed

api/v1alpha5/types.go

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -273,8 +273,8 @@ func (r SecurityGroupRule) Equal(x SecurityGroupRule) bool {
273273
type InstanceState string
274274

275275
var (
276-
// InstanceStateBuilding is the string representing an instance in a building state.
277-
InstanceStateBuilding = InstanceState("BUILDING")
276+
// InstanceStateBuild is the string representing an instance in a build state.
277+
InstanceStateBuild = InstanceState("BUILD")
278278

279279
// InstanceStateActive is the string representing an instance in an active state.
280280
InstanceStateActive = InstanceState("ACTIVE")
@@ -290,6 +290,9 @@ var (
290290

291291
// InstanceStateDeleted is the string representing an instance in a deleted state.
292292
InstanceStateDeleted = InstanceState("DELETED")
293+
294+
// InstanceStateUndefined is the string representing an undefined instance state.
295+
InstanceStateUndefined = InstanceState("")
293296
)
294297

295298
// Bastion represents basic information about the bastion node.

api/v1alpha6/types.go

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -285,8 +285,8 @@ func (r SecurityGroupRule) Equal(x SecurityGroupRule) bool {
285285
type InstanceState string
286286

287287
var (
288-
// InstanceStateBuilding is the string representing an instance in a building state.
289-
InstanceStateBuilding = InstanceState("BUILDING")
288+
// InstanceStateBuild is the string representing an instance in a build state.
289+
InstanceStateBuild = InstanceState("BUILD")
290290

291291
// InstanceStateActive is the string representing an instance in an active state.
292292
InstanceStateActive = InstanceState("ACTIVE")
@@ -302,6 +302,9 @@ var (
302302

303303
// InstanceStateDeleted is the string representing an instance in a deleted state.
304304
InstanceStateDeleted = InstanceState("DELETED")
305+
306+
// InstanceStateUndefined is the string representing an undefined instance state.
307+
InstanceStateUndefined = InstanceState("")
305308
)
306309

307310
// Bastion represents basic information about the bastion node.

api/v1alpha7/types.go

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -315,8 +315,8 @@ func (r SecurityGroupRule) Equal(x SecurityGroupRule) bool {
315315
type InstanceState string
316316

317317
var (
318-
// InstanceStateBuilding is the string representing an instance in a building state.
319-
InstanceStateBuilding = InstanceState("BUILDING")
318+
// InstanceStateBuild is the string representing an instance in a build state.
319+
InstanceStateBuild = InstanceState("BUILD")
320320

321321
// InstanceStateActive is the string representing an instance in an active state.
322322
InstanceStateActive = InstanceState("ACTIVE")
@@ -332,6 +332,9 @@ var (
332332

333333
// InstanceStateDeleted is the string representing an instance in a deleted state.
334334
InstanceStateDeleted = InstanceState("DELETED")
335+
336+
// InstanceStateUndefined is the string representing an undefined instance state.
337+
InstanceStateUndefined = InstanceState("")
335338
)
336339

337340
// Bastion represents basic information about the bastion node.

controllers/openstackcluster_controller.go

Lines changed: 87 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -215,10 +215,25 @@ func deleteBastion(scope scope.Scope, cluster *clusterv1.Cluster, openStackClust
215215
return err
216216
}
217217

218-
instanceName := fmt.Sprintf("%s-bastion", cluster.Name)
219-
instanceStatus, err := computeService.GetInstanceStatusByName(openStackCluster, instanceName)
220-
if err != nil {
221-
return err
218+
if openStackCluster.Status.Bastion != nil && openStackCluster.Status.Bastion.FloatingIP != "" {
219+
if err = networkingService.DeleteFloatingIP(openStackCluster, openStackCluster.Status.Bastion.FloatingIP); err != nil {
220+
handleUpdateOSCError(openStackCluster, fmt.Errorf("failed to delete floating IP: %w", err))
221+
return fmt.Errorf("failed to delete floating IP: %w", err)
222+
}
223+
}
224+
225+
var instanceStatus *compute.InstanceStatus
226+
if openStackCluster.Status.Bastion != nil && openStackCluster.Status.Bastion.ID != "" {
227+
instanceStatus, err = computeService.GetInstanceStatus(openStackCluster.Status.Bastion.ID)
228+
if err != nil {
229+
return err
230+
}
231+
} else {
232+
instanceName := fmt.Sprintf("%s-bastion", cluster.Name)
233+
instanceStatus, err = computeService.GetInstanceStatusByName(openStackCluster, instanceName)
234+
if err != nil {
235+
return err
236+
}
222237
}
223238

224239
if instanceStatus != nil {
@@ -230,6 +245,7 @@ func deleteBastion(scope scope.Scope, cluster *clusterv1.Cluster, openStackClust
230245

231246
for _, address := range addresses {
232247
if address.Type == corev1.NodeExternalIP {
248+
// Floating IP may not have properly saved in bastion status (thus not delete above), delete any remaining floating IP
233249
if err = networkingService.DeleteFloatingIP(openStackCluster, address.Address); err != nil {
234250
handleUpdateOSCError(openStackCluster, fmt.Errorf("failed to delete floating IP: %w", err))
235251
return fmt.Errorf("failed to delete floating IP: %w", err)
@@ -277,8 +293,9 @@ func reconcileNormal(scope scope.Scope, cluster *clusterv1.Cluster, openStackClu
277293
return reconcile.Result{}, err
278294
}
279295

280-
if err = reconcileBastion(scope, cluster, openStackCluster); err != nil {
281-
return reconcile.Result{}, err
296+
result, err := reconcileBastion(scope, cluster, openStackCluster)
297+
if err != nil || !reflect.DeepEqual(result, reconcile.Result{}) {
298+
return result, err
282299
}
283300

284301
availabilityZones, err := computeService.GetAvailabilityZones()
@@ -308,82 +325,104 @@ func reconcileNormal(scope scope.Scope, cluster *clusterv1.Cluster, openStackClu
308325
return reconcile.Result{}, nil
309326
}
310327

311-
func reconcileBastion(scope scope.Scope, cluster *clusterv1.Cluster, openStackCluster *infrav1.OpenStackCluster) error {
328+
func reconcileBastion(scope scope.Scope, cluster *clusterv1.Cluster, openStackCluster *infrav1.OpenStackCluster) (ctrl.Result, error) {
312329
scope.Logger().Info("Reconciling Bastion")
313330

314331
if openStackCluster.Spec.Bastion == nil || !openStackCluster.Spec.Bastion.Enabled {
315-
return deleteBastion(scope, cluster, openStackCluster)
332+
return reconcile.Result{}, deleteBastion(scope, cluster, openStackCluster)
316333
}
317334

318335
computeService, err := compute.NewService(scope)
319336
if err != nil {
320-
return err
337+
return reconcile.Result{}, err
321338
}
322339

323340
instanceSpec := bastionToInstanceSpec(openStackCluster, cluster.Name)
324341
bastionHash, err := compute.HashInstanceSpec(instanceSpec)
325342
if err != nil {
326-
return fmt.Errorf("failed computing bastion hash from instance spec: %w", err)
343+
return reconcile.Result{}, fmt.Errorf("failed computing bastion hash from instance spec: %w", err)
344+
} else if bastionHashHasChanged(bastionHash, openStackCluster.ObjectMeta.Annotations) {
345+
if err := deleteBastion(scope, cluster, openStackCluster); err != nil {
346+
return ctrl.Result{}, err
347+
}
327348
}
328349

329-
instanceStatus, err := computeService.GetInstanceStatusByName(openStackCluster, fmt.Sprintf("%s-bastion", cluster.Name))
330-
if err != nil {
331-
return err
350+
var instanceStatus *compute.InstanceStatus
351+
if openStackCluster.Status.Bastion != nil && openStackCluster.Status.Bastion.ID != "" {
352+
if instanceStatus, err = computeService.GetInstanceStatus(openStackCluster.Status.Bastion.ID); err != nil {
353+
return reconcile.Result{}, err
354+
}
332355
}
333-
if instanceStatus != nil {
334-
if !bastionHashHasChanged(bastionHash, openStackCluster.ObjectMeta.Annotations) {
335-
bastion, err := instanceStatus.BastionStatus(openStackCluster)
336-
if err != nil {
337-
return err
338-
}
339-
// Add the current hash if no annotation is set.
340-
if _, ok := openStackCluster.ObjectMeta.Annotations[BastionInstanceHashAnnotation]; !ok {
341-
annotations.AddAnnotations(openStackCluster, map[string]string{BastionInstanceHashAnnotation: bastionHash})
342-
}
343-
openStackCluster.Status.Bastion = bastion
344-
return nil
356+
if instanceStatus == nil {
357+
// Check if there is an existing instance with bastion name, in case where bastion ID would not have been properly stored in cluster status
358+
if instanceStatus, err = computeService.GetInstanceStatusByName(openStackCluster, instanceSpec.Name); err != nil {
359+
return reconcile.Result{}, err
345360
}
346-
347-
if err := deleteBastion(scope, cluster, openStackCluster); err != nil {
348-
return err
361+
}
362+
if instanceStatus == nil {
363+
instanceStatus, err = computeService.CreateInstance(openStackCluster, openStackCluster, instanceSpec, cluster.Name)
364+
if err != nil {
365+
return reconcile.Result{}, fmt.Errorf("failed to create bastion: %w", err)
349366
}
350367
}
351368

352-
instanceStatus, err = computeService.CreateInstance(openStackCluster, openStackCluster, instanceSpec, cluster.Name)
353-
if err != nil {
354-
return fmt.Errorf("failed to reconcile bastion: %w", err)
369+
// Save hash & status as soon as we know we have an instance
370+
instanceStatus.UpdateBastionStatus(openStackCluster)
371+
annotations.AddAnnotations(openStackCluster, map[string]string{BastionInstanceHashAnnotation: bastionHash})
372+
373+
// Make sure that bastion instance has a valid state
374+
switch instanceStatus.State() {
375+
case infrav1.InstanceStateError:
376+
return ctrl.Result{}, fmt.Errorf("failed to reconcile bastion, instance state is ERROR")
377+
case infrav1.InstanceStateBuild, infrav1.InstanceStateUndefined:
378+
scope.Logger().Info("Waiting for bastion instance to become ACTIVE", "id", instanceStatus.ID(), "status", instanceStatus.State())
379+
return ctrl.Result{RequeueAfter: waitForBuildingInstanceToReconcile}, nil
380+
case infrav1.InstanceStateDeleted:
381+
// This should normally be handled by deleteBastion
382+
openStackCluster.Status.Bastion = nil
383+
return ctrl.Result{}, nil
355384
}
356385

357386
networkingService, err := networking.NewService(scope)
358387
if err != nil {
359-
return err
360-
}
361-
clusterName := fmt.Sprintf("%s-%s", cluster.Namespace, cluster.Name)
362-
fp, err := networkingService.GetOrCreateFloatingIP(openStackCluster, openStackCluster, clusterName, openStackCluster.Spec.Bastion.FloatingIP)
363-
if err != nil {
364-
handleUpdateOSCError(openStackCluster, fmt.Errorf("failed to get or create floating IP for bastion: %w", err))
365-
return fmt.Errorf("failed to get or create floating IP for bastion: %w", err)
388+
return ctrl.Result{}, err
366389
}
367390
port, err := computeService.GetManagementPort(openStackCluster, instanceStatus)
368391
if err != nil {
369392
err = fmt.Errorf("getting management port for bastion: %w", err)
370393
handleUpdateOSCError(openStackCluster, err)
371-
return err
394+
return ctrl.Result{}, err
372395
}
373-
err = networkingService.AssociateFloatingIP(openStackCluster, fp, port.ID)
396+
fp, err := networkingService.GetFloatingIPByPortID(port.ID)
374397
if err != nil {
375-
handleUpdateOSCError(openStackCluster, fmt.Errorf("failed to associate floating IP with bastion: %w", err))
376-
return fmt.Errorf("failed to associate floating IP with bastion: %w", err)
398+
handleUpdateOSCError(openStackCluster, fmt.Errorf("failed to get or create floating IP for bastion: %w", err))
399+
return ctrl.Result{}, fmt.Errorf("failed to get floating IP for bastion port: %w", err)
400+
} else if fp != nil {
401+
// Floating IP is already attached to bastion, no need to proceed
402+
openStackCluster.Status.Bastion.FloatingIP = fp.FloatingIP
403+
return ctrl.Result{}, nil
377404
}
378405

379-
bastion, err := instanceStatus.BastionStatus(openStackCluster)
406+
clusterName := fmt.Sprintf("%s-%s", cluster.Namespace, cluster.Name)
407+
floatingIP := openStackCluster.Spec.Bastion.FloatingIP
408+
if openStackCluster.Status.Bastion.FloatingIP != "" {
409+
// Some floating IP has already been created for this bastion, make sure we re-use it
410+
floatingIP = openStackCluster.Status.Bastion.FloatingIP
411+
}
412+
fp, err = networkingService.GetOrCreateFloatingIP(openStackCluster, openStackCluster, clusterName, floatingIP)
380413
if err != nil {
381-
return err
414+
handleUpdateOSCError(openStackCluster, fmt.Errorf("failed to get or create floating IP for bastion: %w", err))
415+
return ctrl.Result{}, fmt.Errorf("failed to get or create floating IP for bastion: %w", err)
382416
}
383-
bastion.FloatingIP = fp.FloatingIP
384-
openStackCluster.Status.Bastion = bastion
385-
annotations.AddAnnotations(openStackCluster, map[string]string{BastionInstanceHashAnnotation: bastionHash})
386-
return nil
417+
openStackCluster.Status.Bastion.FloatingIP = fp.FloatingIP
418+
419+
err = networkingService.AssociateFloatingIP(openStackCluster, fp, port.ID)
420+
if err != nil {
421+
handleUpdateOSCError(openStackCluster, fmt.Errorf("failed to associate floating IP with bastion: %w", err))
422+
return ctrl.Result{}, fmt.Errorf("failed to associate floating IP with bastion: %w", err)
423+
}
424+
425+
return ctrl.Result{}, nil
387426
}
388427

389428
func bastionToInstanceSpec(openStackCluster *infrav1.OpenStackCluster, clusterName string) *compute.InstanceSpec {

controllers/openstackcluster_controller_test.go

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ import (
2222

2323
"github.com/go-logr/logr"
2424
"github.com/golang/mock/gomock"
25-
"github.com/gophercloud/gophercloud/openstack/compute/v2/servers"
25+
"github.com/gophercloud/gophercloud"
2626
"github.com/gophercloud/gophercloud/openstack/networking/v2/extensions/security/groups"
2727
"github.com/gophercloud/gophercloud/openstack/networking/v2/networks"
2828
"github.com/gophercloud/gophercloud/openstack/networking/v2/subnets"
@@ -38,7 +38,6 @@ import (
3838
"sigs.k8s.io/controller-runtime/pkg/reconcile"
3939

4040
infrav1 "sigs.k8s.io/cluster-api-provider-openstack/api/v1alpha7"
41-
"sigs.k8s.io/cluster-api-provider-openstack/pkg/clients"
4241
"sigs.k8s.io/cluster-api-provider-openstack/pkg/scope"
4342
)
4443

@@ -195,18 +194,24 @@ var _ = Describe("OpenStackCluster controller", func() {
195194
Expect(err).To(BeNil())
196195
err = k8sClient.Create(ctx, capiCluster)
197196
Expect(err).To(BeNil())
197+
testCluster.Status = infrav1.OpenStackClusterStatus{
198+
Bastion: &infrav1.BastionStatus{
199+
ID: "bastion-uuid",
200+
},
201+
}
202+
err = k8sClient.Status().Update(ctx, testCluster)
203+
Expect(err).To(BeNil())
198204
scope, err := mockScopeFactory.NewClientScopeFromCluster(ctx, k8sClient, testCluster, nil, logr.Discard())
199205
Expect(err).To(BeNil())
200206

201207
computeClientRecorder := mockScopeFactory.ComputeClient.EXPECT()
202-
computeClientRecorder.ListServers(servers.ListOpts{
203-
Name: "^capi-cluster-bastion$",
204-
}).Return([]clients.ServerExt{}, nil)
208+
computeClientRecorder.GetServer("bastion-uuid").Return(nil, gophercloud.ErrResourceNotFound{})
205209

206210
networkClientRecorder := mockScopeFactory.NetworkClient.EXPECT()
207211
networkClientRecorder.ListSecGroup(gomock.Any()).Return([]groups.SecGroup{}, nil)
208212

209213
err = deleteBastion(scope, capiCluster, testCluster)
214+
Expect(testCluster.Status.Bastion).To(BeNil())
210215
Expect(err).To(BeNil())
211216
})
212217
It("should implicitly filter cluster subnets by cluster network", func() {

controllers/openstackmachine_controller.go

Lines changed: 26 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@ type OpenStackMachineReconciler struct {
6767
const (
6868
waitForClusterInfrastructureReadyDuration = 15 * time.Second
6969
waitForInstanceBecomeActiveToReconcile = 60 * time.Second
70+
waitForBuildingInstanceToReconcile = 10 * time.Second
7071
)
7172

7273
// +kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=openstackmachines,verbs=get;list;watch;create;update;patch;delete
@@ -246,8 +247,13 @@ func (r *OpenStackMachineReconciler) reconcileDelete(scope scope.Scope, cluster
246247
}
247248
}
248249

249-
instanceStatus, err := computeService.GetInstanceStatusByName(openStackMachine, openStackMachine.Name)
250-
if err != nil {
250+
var instanceStatus *compute.InstanceStatus
251+
if openStackMachine.Spec.InstanceID != nil {
252+
instanceStatus, err = computeService.GetInstanceStatus(*openStackMachine.Spec.InstanceID)
253+
if err != nil {
254+
return ctrl.Result{}, err
255+
}
256+
} else if instanceStatus, err = computeService.GetInstanceStatusByName(openStackMachine, openStackMachine.Name); err != nil {
251257
return ctrl.Result{}, err
252258
}
253259
if !openStackCluster.Spec.APIServerLoadBalancer.Enabled && util.IsControlPlaneMachine(machine) && openStackCluster.Spec.APIServerFloatingIP == "" {
@@ -379,6 +385,9 @@ func (r *OpenStackMachineReconciler) reconcileNormal(ctx context.Context, scope
379385
scope.Logger().Info("Machine instance state is DELETED, no actions")
380386
conditions.MarkFalse(openStackMachine, infrav1.InstanceReadyCondition, infrav1.InstanceDeletedReason, clusterv1.ConditionSeverityError, "")
381387
return ctrl.Result{}, nil
388+
case infrav1.InstanceStateBuild, infrav1.InstanceStateUndefined:
389+
scope.Logger().Info("Waiting for instance to become ACTIVE", "id", instanceStatus.ID(), "status", instanceStatus.State())
390+
return ctrl.Result{RequeueAfter: waitForBuildingInstanceToReconcile}, nil
382391
default:
383392
// The other state is normal (for example, migrating, shutoff) but we don't want to proceed until it's ACTIVE
384393
// due to potential conflict or unexpected actions
@@ -431,14 +440,24 @@ func (r *OpenStackMachineReconciler) reconcileNormal(ctx context.Context, scope
431440
}
432441

433442
func (r *OpenStackMachineReconciler) getOrCreate(logger logr.Logger, cluster *clusterv1.Cluster, openStackCluster *infrav1.OpenStackCluster, machine *clusterv1.Machine, openStackMachine *infrav1.OpenStackMachine, computeService *compute.Service, userData string) (*compute.InstanceStatus, error) {
434-
instanceStatus, err := computeService.GetInstanceStatusByName(openStackMachine, openStackMachine.Name)
435-
if err != nil {
436-
logger.Info("Unable to get OpenStack instance", "name", openStackMachine.Name)
437-
conditions.MarkFalse(openStackMachine, infrav1.InstanceReadyCondition, infrav1.OpenStackErrorReason, clusterv1.ConditionSeverityError, err.Error())
438-
return nil, err
443+
var instanceStatus *compute.InstanceStatus
444+
var err error
445+
if openStackMachine.Spec.InstanceID != nil {
446+
instanceStatus, err = computeService.GetInstanceStatus(*openStackMachine.Spec.InstanceID)
447+
if err != nil {
448+
logger.Info("Unable to get OpenStack instance", "name", openStackMachine.Name)
449+
conditions.MarkFalse(openStackMachine, infrav1.InstanceReadyCondition, infrav1.OpenStackErrorReason, clusterv1.ConditionSeverityError, err.Error())
450+
return nil, err
451+
}
439452
}
440453

441454
if instanceStatus == nil {
455+
// First check if there is an existing instance with machine name, in case where instance ID would not have been stored in machine status
456+
if instanceStatus, err = computeService.GetInstanceStatusByName(openStackMachine, openStackMachine.Name); err == nil {
457+
if instanceStatus != nil {
458+
return instanceStatus, nil
459+
}
460+
}
442461
instanceSpec := machineToInstanceSpec(openStackCluster, machine, openStackMachine, userData)
443462
logger.Info("Machine does not exist, creating Machine", "name", openStackMachine.Name)
444463
instanceStatus, err = computeService.CreateInstance(openStackMachine, openStackCluster, instanceSpec, cluster.Name)

0 commit comments

Comments
 (0)