Skip to content

Commit efb7b61

Browse files
committed
Use get instead of list to retrieve servers
Under certain circumstances, when they are non-responsive cells, nova may return a partial result instead of an error. (this is controlled by list_records_by_skipping_down_cells parameter that is true by default - no error will be thrown) When it faces this issue, capo controller will try to create a new server for the machine, resulting in deleting ports of existing one. In order avoid this issue, use GET instead of LIST with server uuid stored in machine spec when it is created, and store server ID in machine spec immediately. For that purpose serviver polling that used to take place in CreateMachine has to be moved to main reconcile loop.
1 parent 821a1a2 commit efb7b61

File tree

6 files changed

+80
-102
lines changed

6 files changed

+80
-102
lines changed

controllers/openstackcluster_controller.go

Lines changed: 44 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -206,6 +206,9 @@ func contains(arr []string, target string) bool {
206206
}
207207

208208
func deleteBastion(scope scope.Scope, cluster *clusterv1.Cluster, openStackCluster *infrav1.OpenStackCluster) error {
209+
if openStackCluster.Status.Bastion == nil || openStackCluster.Status.Bastion.ID == "" {
210+
return nil
211+
}
209212
computeService, err := compute.NewService(scope)
210213
if err != nil {
211214
return err
@@ -215,8 +218,7 @@ func deleteBastion(scope scope.Scope, cluster *clusterv1.Cluster, openStackClust
215218
return err
216219
}
217220

218-
instanceName := fmt.Sprintf("%s-bastion", cluster.Name)
219-
instanceStatus, err := computeService.GetInstanceStatusByName(openStackCluster, instanceName)
221+
instanceStatus, err := computeService.GetInstanceStatus(openStackCluster.Status.Bastion.ID)
220222
if err != nil {
221223
return err
222224
}
@@ -277,8 +279,8 @@ func reconcileNormal(scope scope.Scope, cluster *clusterv1.Cluster, openStackClu
277279
return reconcile.Result{}, err
278280
}
279281

280-
if err = reconcileBastion(scope, cluster, openStackCluster); err != nil {
281-
return reconcile.Result{}, err
282+
if result, err := reconcileBastion(scope, cluster, openStackCluster); err != nil {
283+
return result, err
282284
}
283285

284286
availabilityZones, err := computeService.GetAvailabilityZones()
@@ -308,82 +310,99 @@ func reconcileNormal(scope scope.Scope, cluster *clusterv1.Cluster, openStackClu
308310
return reconcile.Result{}, nil
309311
}
310312

311-
func reconcileBastion(scope scope.Scope, cluster *clusterv1.Cluster, openStackCluster *infrav1.OpenStackCluster) error {
313+
func reconcileBastion(scope scope.Scope, cluster *clusterv1.Cluster, openStackCluster *infrav1.OpenStackCluster) (ctrl.Result, error) {
312314
scope.Logger().Info("Reconciling Bastion")
313315

314316
if openStackCluster.Spec.Bastion == nil || !openStackCluster.Spec.Bastion.Enabled {
315-
return deleteBastion(scope, cluster, openStackCluster)
317+
return reconcile.Result{}, deleteBastion(scope, cluster, openStackCluster)
316318
}
317319

318320
computeService, err := compute.NewService(scope)
319321
if err != nil {
320-
return err
322+
return reconcile.Result{}, err
321323
}
322324

323325
instanceSpec := bastionToInstanceSpec(openStackCluster, cluster.Name)
324326
bastionHash, err := compute.HashInstanceSpec(instanceSpec)
325327
if err != nil {
326-
return fmt.Errorf("failed computing bastion hash from instance spec: %w", err)
328+
return reconcile.Result{}, fmt.Errorf("failed computing bastion hash from instance spec: %w", err)
327329
}
328330

329-
instanceStatus, err := computeService.GetInstanceStatusByName(openStackCluster, fmt.Sprintf("%s-bastion", cluster.Name))
330-
if err != nil {
331-
return err
331+
var instanceStatus *compute.InstanceStatus
332+
if openStackCluster.Status.Bastion != nil && openStackCluster.Status.Bastion.ID != "" {
333+
if instanceStatus, err = computeService.GetInstanceStatus(openStackCluster.Status.Bastion.ID); err != nil {
334+
return reconcile.Result{}, err
335+
}
332336
}
333-
if instanceStatus != nil {
337+
if instanceStatus == nil {
338+
instanceStatus, err = computeService.CreateInstance(openStackCluster, openStackCluster, instanceSpec, cluster.Name)
339+
if err != nil {
340+
return reconcile.Result{}, fmt.Errorf("failed to create bastion: %w", err)
341+
}
342+
openStackCluster.Status.Bastion = &infrav1.BastionStatus{
343+
ID: instanceStatus.ID(),
344+
}
345+
} else {
334346
if !bastionHashHasChanged(bastionHash, openStackCluster.ObjectMeta.Annotations) {
335347
bastion, err := instanceStatus.BastionStatus(openStackCluster)
336348
if err != nil {
337-
return err
349+
return reconcile.Result{}, err
338350
}
339351
// Add the current hash if no annotation is set.
340352
if _, ok := openStackCluster.ObjectMeta.Annotations[BastionInstanceHashAnnotation]; !ok {
341353
annotations.AddAnnotations(openStackCluster, map[string]string{BastionInstanceHashAnnotation: bastionHash})
342354
}
343355
openStackCluster.Status.Bastion = bastion
344-
return nil
356+
return ctrl.Result{}, nil
345357
}
346358

347359
if err := deleteBastion(scope, cluster, openStackCluster); err != nil {
348-
return err
360+
return ctrl.Result{}, err
349361
}
350362
}
351-
352-
instanceStatus, err = computeService.CreateInstance(openStackCluster, openStackCluster, instanceSpec, cluster.Name)
353-
if err != nil {
354-
return fmt.Errorf("failed to reconcile bastion: %w", err)
363+
// Make sure that bastion instance has a valid state
364+
switch instanceStatus.State() {
365+
case infrav1.InstanceStateError:
366+
return ctrl.Result{}, fmt.Errorf("failed to reconcile bastion, instance state is ERROR")
367+
case infrav1.InstanceStateBuilding:
368+
// Requeue until bastion becomes active
369+
return ctrl.Result{RequeueAfter: waitForInstanceBecomeActiveToReconcile}, nil
370+
case infrav1.InstanceStateDeleted:
371+
// This should normally be handled by deleteBastion
372+
openStackCluster.Status.Bastion = nil
373+
return ctrl.Result{}, nil
355374
}
356375

357376
networkingService, err := networking.NewService(scope)
358377
if err != nil {
359-
return err
378+
return ctrl.Result{}, err
360379
}
361380
clusterName := fmt.Sprintf("%s-%s", cluster.Namespace, cluster.Name)
362381
fp, err := networkingService.GetOrCreateFloatingIP(openStackCluster, openStackCluster, clusterName, openStackCluster.Spec.Bastion.FloatingIP)
363382
if err != nil {
364383
handleUpdateOSCError(openStackCluster, fmt.Errorf("failed to get or create floating IP for bastion: %w", err))
365-
return fmt.Errorf("failed to get or create floating IP for bastion: %w", err)
384+
return ctrl.Result{}, fmt.Errorf("failed to get or create floating IP for bastion: %w", err)
366385
}
367386
port, err := computeService.GetManagementPort(openStackCluster, instanceStatus)
368387
if err != nil {
369388
err = fmt.Errorf("getting management port for bastion: %w", err)
370389
handleUpdateOSCError(openStackCluster, err)
371-
return err
390+
return ctrl.Result{}, err
372391
}
373392
err = networkingService.AssociateFloatingIP(openStackCluster, fp, port.ID)
374393
if err != nil {
375394
handleUpdateOSCError(openStackCluster, fmt.Errorf("failed to associate floating IP with bastion: %w", err))
376-
return fmt.Errorf("failed to associate floating IP with bastion: %w", err)
395+
return ctrl.Result{}, fmt.Errorf("failed to associate floating IP with bastion: %w", err)
377396
}
378397

379398
bastion, err := instanceStatus.BastionStatus(openStackCluster)
380399
if err != nil {
381-
return err
400+
return ctrl.Result{}, err
382401
}
383402
bastion.FloatingIP = fp.FloatingIP
384403
openStackCluster.Status.Bastion = bastion
385404
annotations.AddAnnotations(openStackCluster, map[string]string{BastionInstanceHashAnnotation: bastionHash})
386-
return nil
405+
return ctrl.Result{}, nil
387406
}
388407

389408
func bastionToInstanceSpec(openStackCluster *infrav1.OpenStackCluster, clusterName string) *compute.InstanceSpec {

controllers/openstackcluster_controller_test.go

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ import (
2222

2323
"github.com/go-logr/logr"
2424
"github.com/golang/mock/gomock"
25-
"github.com/gophercloud/gophercloud/openstack/compute/v2/servers"
25+
"github.com/gophercloud/gophercloud"
2626
"github.com/gophercloud/gophercloud/openstack/networking/v2/extensions/security/groups"
2727
"github.com/gophercloud/gophercloud/openstack/networking/v2/networks"
2828
"github.com/gophercloud/gophercloud/openstack/networking/v2/subnets"
@@ -38,7 +38,6 @@ import (
3838
"sigs.k8s.io/controller-runtime/pkg/reconcile"
3939

4040
infrav1 "sigs.k8s.io/cluster-api-provider-openstack/api/v1alpha7"
41-
"sigs.k8s.io/cluster-api-provider-openstack/pkg/clients"
4241
"sigs.k8s.io/cluster-api-provider-openstack/pkg/scope"
4342
)
4443

@@ -195,18 +194,24 @@ var _ = Describe("OpenStackCluster controller", func() {
195194
Expect(err).To(BeNil())
196195
err = k8sClient.Create(ctx, capiCluster)
197196
Expect(err).To(BeNil())
197+
testCluster.Status = infrav1.OpenStackClusterStatus{
198+
Bastion: &infrav1.BastionStatus{
199+
ID: "bastion-uuid",
200+
},
201+
}
202+
err = k8sClient.Status().Update(ctx, testCluster)
203+
Expect(err).To(BeNil())
198204
scope, err := mockScopeFactory.NewClientScopeFromCluster(ctx, k8sClient, testCluster, nil, logr.Discard())
199205
Expect(err).To(BeNil())
200206

201207
computeClientRecorder := mockScopeFactory.ComputeClient.EXPECT()
202-
computeClientRecorder.ListServers(servers.ListOpts{
203-
Name: "^capi-cluster-bastion$",
204-
}).Return([]clients.ServerExt{}, nil)
208+
computeClientRecorder.GetServer("bastion-uuid").Return(nil, gophercloud.ErrResourceNotFound{})
205209

206210
networkClientRecorder := mockScopeFactory.NetworkClient.EXPECT()
207211
networkClientRecorder.ListSecGroup(gomock.Any()).Return([]groups.SecGroup{}, nil)
208212

209213
err = deleteBastion(scope, capiCluster, testCluster)
214+
Expect(testCluster.Status.Bastion).To(BeNil())
210215
Expect(err).To(BeNil())
211216
})
212217
It("should implicitly filter cluster subnets by cluster network", func() {

controllers/openstackmachine_controller.go

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -246,7 +246,10 @@ func (r *OpenStackMachineReconciler) reconcileDelete(scope scope.Scope, cluster
246246
}
247247
}
248248

249-
instanceStatus, err := computeService.GetInstanceStatusByName(openStackMachine, openStackMachine.Name)
249+
var instanceStatus *compute.InstanceStatus
250+
if openStackMachine.Spec.InstanceID != nil {
251+
instanceStatus, err = computeService.GetInstanceStatusByName(openStackMachine, openStackMachine.Name)
252+
}
250253
if err != nil {
251254
return ctrl.Result{}, err
252255
}
@@ -431,18 +434,26 @@ func (r *OpenStackMachineReconciler) reconcileNormal(ctx context.Context, scope
431434
}
432435

433436
func (r *OpenStackMachineReconciler) getOrCreate(logger logr.Logger, cluster *clusterv1.Cluster, openStackCluster *infrav1.OpenStackCluster, machine *clusterv1.Machine, openStackMachine *infrav1.OpenStackMachine, computeService *compute.Service, userData string) (*compute.InstanceStatus, error) {
434-
instanceStatus, err := computeService.GetInstanceStatusByName(openStackMachine, openStackMachine.Name)
435-
if err != nil {
436-
logger.Info("Unable to get OpenStack instance", "name", openStackMachine.Name)
437-
conditions.MarkFalse(openStackMachine, infrav1.InstanceReadyCondition, infrav1.OpenStackErrorReason, clusterv1.ConditionSeverityError, err.Error())
438-
return nil, err
437+
var instanceStatus *compute.InstanceStatus
438+
var err error
439+
if openStackMachine.Spec.InstanceID != nil {
440+
instanceStatus, err = computeService.GetInstanceStatus(*openStackMachine.Spec.InstanceID)
441+
if err != nil {
442+
logger.Info("Unable to get OpenStack instance", "name", openStackMachine.Name)
443+
conditions.MarkFalse(openStackMachine, infrav1.InstanceReadyCondition, infrav1.OpenStackErrorReason, clusterv1.ConditionSeverityError, err.Error())
444+
return nil, err
445+
}
439446
}
440447

441448
if instanceStatus == nil {
442449
instanceSpec := machineToInstanceSpec(openStackCluster, machine, openStackMachine, userData)
443450
logger.Info("Machine does not exist, creating Machine", "name", openStackMachine.Name)
444451
instanceStatus, err = computeService.CreateInstance(openStackMachine, openStackCluster, instanceSpec, cluster.Name)
445452
if err != nil {
453+
if instanceStatus != nil {
454+
// Instance was created but in error, store instanceID for subsequent calls to this function
455+
openStackMachine.Spec.InstanceID = pointer.String(instanceStatus.ID())
456+
}
446457
conditions.MarkFalse(openStackMachine, infrav1.InstanceReadyCondition, infrav1.InstanceCreateFailedReason, clusterv1.ConditionSeverityError, err.Error())
447458
return nil, fmt.Errorf("create OpenStack instance: %w", err)
448459
}

controllers/suite_test.go

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ import (
3232
"k8s.io/apimachinery/pkg/types"
3333
"k8s.io/client-go/kubernetes/scheme"
3434
"k8s.io/client-go/rest"
35+
"k8s.io/utils/pointer"
3536
clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
3637
"sigs.k8s.io/cluster-api/test/framework"
3738
"sigs.k8s.io/controller-runtime/pkg/client"
@@ -148,9 +149,13 @@ var _ = Describe("When calling getOrCreate", func() {
148149
cluster := &clusterv1.Cluster{}
149150
openStackCluster := &infrav1.OpenStackCluster{}
150151
machine := &clusterv1.Machine{}
151-
openStackMachine := &infrav1.OpenStackMachine{}
152+
openStackMachine := &infrav1.OpenStackMachine{
153+
Spec: infrav1.OpenStackMachineSpec{
154+
InstanceID: pointer.String("machine-uuid"),
155+
},
156+
}
152157

153-
mockScopeFactory.ComputeClient.EXPECT().ListServers(gomock.Any()).Return(nil, errors.New("Test error when listing servers"))
158+
mockScopeFactory.ComputeClient.EXPECT().GetServer(gomock.Any()).Return(nil, errors.New("Test error when getting server"))
154159
instanceStatus, err := reconsiler.getOrCreate(logger, cluster, openStackCluster, machine, openStackMachine, computeService, "")
155160
Expect(err).To(HaveOccurred())
156161
Expect(instanceStatus).To(BeNil())

pkg/cloud/services/compute/instance.go

Lines changed: 2 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -333,27 +333,8 @@ func (s *Service) createInstanceImpl(eventObject runtime.Object, openStackCluste
333333
return nil, fmt.Errorf("error creating Openstack instance: %v", err)
334334
}
335335

336-
var createdInstance *InstanceStatus
337-
err = wait.PollUntilContextTimeout(context.TODO(), retryInterval, instanceCreateTimeout, true, func(_ context.Context) (bool, error) {
338-
createdInstance, err = s.GetInstanceStatus(server.ID)
339-
if err != nil {
340-
if capoerrors.IsRetryable(err) {
341-
return false, nil
342-
}
343-
return false, err
344-
}
345-
if createdInstance.State() == infrav1.InstanceStateError {
346-
return false, fmt.Errorf("error creating OpenStack instance %s, status changed to error", createdInstance.ID())
347-
}
348-
return createdInstance.State() == infrav1.InstanceStateActive, nil
349-
})
350-
if err != nil {
351-
record.Warnf(eventObject, "FailedCreateServer", "Failed to create server %s: %v", createdInstance.Name(), err)
352-
return nil, err
353-
}
354-
355-
record.Eventf(eventObject, "SuccessfulCreateServer", "Created server %s with id %s", createdInstance.Name(), createdInstance.ID())
356-
return createdInstance, nil
336+
record.Eventf(eventObject, "SuccessfulCreateServer", "Created server %s with id %s", server.Name, server.ID)
337+
return &InstanceStatus{server, s.scope.Logger()}, nil
357338
}
358339

359340
func volumeName(instanceName string, nameSuffix string) string {

pkg/cloud/services/compute/instance_test.go

Lines changed: 0 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -406,17 +406,6 @@ func TestService_ReconcileInstance(t *testing.T) {
406406
})
407407
}
408408

409-
// Expected calls when polling for server creation
410-
expectServerPoll := func(computeRecorder *mock.MockComputeClientMockRecorder, states []string) {
411-
for _, state := range states {
412-
computeRecorder.GetServer(instanceUUID).Return(returnedServer(state), nil)
413-
}
414-
}
415-
416-
expectServerPollSuccess := func(computeRecorder *mock.MockComputeClientMockRecorder) {
417-
expectServerPoll(computeRecorder, []string{"ACTIVE"})
418-
}
419-
420409
returnedVolume := func(uuid string, status string) *volumes.Volume {
421410
return &volumes.Volume{
422411
ID: uuid,
@@ -460,7 +449,6 @@ func TestService_ReconcileInstance(t *testing.T) {
460449
expectDefaultImageAndFlavor(r.compute, r.image)
461450

462451
expectCreateServer(r.compute, getDefaultServerMap(), false)
463-
expectServerPollSuccess(r.compute)
464452
},
465453
wantErr: false,
466454
},
@@ -503,32 +491,6 @@ func TestService_ReconcileInstance(t *testing.T) {
503491
},
504492
wantErr: true,
505493
},
506-
{
507-
name: "Poll until server is created",
508-
getInstanceSpec: getDefaultInstanceSpec,
509-
expect: func(r *recorders) {
510-
expectUseExistingDefaultPort(r.network)
511-
expectDefaultImageAndFlavor(r.compute, r.image)
512-
513-
expectCreateServer(r.compute, getDefaultServerMap(), false)
514-
expectServerPoll(r.compute, []string{"BUILDING", "ACTIVE"})
515-
},
516-
wantErr: false,
517-
},
518-
{
519-
name: "Server errors during creation",
520-
getInstanceSpec: getDefaultInstanceSpec,
521-
expect: func(r *recorders) {
522-
expectUseExistingDefaultPort(r.network)
523-
expectDefaultImageAndFlavor(r.compute, r.image)
524-
525-
expectCreateServer(r.compute, getDefaultServerMap(), false)
526-
expectServerPoll(r.compute, []string{"BUILDING", "ERROR"})
527-
528-
// Don't delete ports because the server is created: DeleteInstance will do it
529-
},
530-
wantErr: true,
531-
},
532494
{
533495
name: "Boot from volume success",
534496
getInstanceSpec: func() *InstanceSpec {
@@ -567,7 +529,6 @@ func TestService_ReconcileInstance(t *testing.T) {
567529
},
568530
}
569531
expectCreateServer(r.compute, createMap, false)
570-
expectServerPollSuccess(r.compute)
571532

572533
// Don't delete ports because the server is created: DeleteInstance will do it
573534
},
@@ -614,7 +575,6 @@ func TestService_ReconcileInstance(t *testing.T) {
614575
},
615576
}
616577
expectCreateServer(r.compute, createMap, false)
617-
expectServerPollSuccess(r.compute)
618578

619579
// Don't delete ports because the server is created: DeleteInstance will do it
620580
},
@@ -734,7 +694,6 @@ func TestService_ReconcileInstance(t *testing.T) {
734694
},
735695
}
736696
expectCreateServer(r.compute, createMap, false)
737-
expectServerPollSuccess(r.compute)
738697

739698
// Don't delete ports because the server is created: DeleteInstance will do it
740699
},
@@ -809,7 +768,6 @@ func TestService_ReconcileInstance(t *testing.T) {
809768
},
810769
}
811770
expectCreateServer(r.compute, createMap, false)
812-
expectServerPollSuccess(r.compute)
813771

814772
// Don't delete ports because the server is created: DeleteInstance will do it
815773
},
@@ -870,7 +828,6 @@ func TestService_ReconcileInstance(t *testing.T) {
870828
},
871829
}
872830
expectCreateServer(r.compute, createMap, false)
873-
expectServerPollSuccess(r.compute)
874831

875832
// Don't delete ports because the server is created: DeleteInstance will do it
876833
},

0 commit comments

Comments
 (0)