Skip to content

Commit c940ccd

Browse files
committed
Use get instead of list to retrieve servers
Under certain circumstances, when they are non-responsive cells, nova may return a partial result instead of an error. (this is controlled by list_records_by_skipping_down_cells parameter that is true by default - no error will be thrown) When it faces this issue, capo controller will try to create a new server for the machine, resulting in deleting ports of existing one. In order avoid this issue, use GET instead of LIST with server uuid stored in machine spec when it is created, and store server ID in machine spec immediately. For that purpose serviver polling that used to take place in CreateMachine has to be moved to main reconcile loop.
1 parent 821a1a2 commit c940ccd

File tree

6 files changed

+80
-102
lines changed

6 files changed

+80
-102
lines changed

controllers/openstackcluster_controller.go

Lines changed: 44 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -206,6 +206,9 @@ func contains(arr []string, target string) bool {
206206
}
207207

208208
func deleteBastion(scope scope.Scope, cluster *clusterv1.Cluster, openStackCluster *infrav1.OpenStackCluster) error {
209+
if openStackCluster.Status.Bastion == nil || openStackCluster.Status.Bastion.ID == "" {
210+
return nil
211+
}
209212
computeService, err := compute.NewService(scope)
210213
if err != nil {
211214
return err
@@ -215,8 +218,7 @@ func deleteBastion(scope scope.Scope, cluster *clusterv1.Cluster, openStackClust
215218
return err
216219
}
217220

218-
instanceName := fmt.Sprintf("%s-bastion", cluster.Name)
219-
instanceStatus, err := computeService.GetInstanceStatusByName(openStackCluster, instanceName)
221+
instanceStatus, err := computeService.GetInstanceStatus(openStackCluster.Status.Bastion.ID)
220222
if err != nil {
221223
return err
222224
}
@@ -277,8 +279,8 @@ func reconcileNormal(scope scope.Scope, cluster *clusterv1.Cluster, openStackClu
277279
return reconcile.Result{}, err
278280
}
279281

280-
if err = reconcileBastion(scope, cluster, openStackCluster); err != nil {
281-
return reconcile.Result{}, err
282+
if result, err := reconcileBastion(scope, cluster, openStackCluster); err != nil {
283+
return result, err
282284
}
283285

284286
availabilityZones, err := computeService.GetAvailabilityZones()
@@ -308,82 +310,99 @@ func reconcileNormal(scope scope.Scope, cluster *clusterv1.Cluster, openStackClu
308310
return reconcile.Result{}, nil
309311
}
310312

311-
func reconcileBastion(scope scope.Scope, cluster *clusterv1.Cluster, openStackCluster *infrav1.OpenStackCluster) error {
313+
func reconcileBastion(scope scope.Scope, cluster *clusterv1.Cluster, openStackCluster *infrav1.OpenStackCluster) (ctrl.Result, error) {
312314
scope.Logger().Info("Reconciling Bastion")
313315

314316
if openStackCluster.Spec.Bastion == nil || !openStackCluster.Spec.Bastion.Enabled {
315-
return deleteBastion(scope, cluster, openStackCluster)
317+
return reconcile.Result{}, deleteBastion(scope, cluster, openStackCluster)
316318
}
317319

318320
computeService, err := compute.NewService(scope)
319321
if err != nil {
320-
return err
322+
return reconcile.Result{}, err
321323
}
322324

323325
instanceSpec := bastionToInstanceSpec(openStackCluster, cluster.Name)
324326
bastionHash, err := compute.HashInstanceSpec(instanceSpec)
325327
if err != nil {
326-
return fmt.Errorf("failed computing bastion hash from instance spec: %w", err)
328+
return reconcile.Result{}, fmt.Errorf("failed computing bastion hash from instance spec: %w", err)
327329
}
328330

329-
instanceStatus, err := computeService.GetInstanceStatusByName(openStackCluster, fmt.Sprintf("%s-bastion", cluster.Name))
330-
if err != nil {
331-
return err
331+
var instanceStatus *compute.InstanceStatus
332+
if openStackCluster.Status.Bastion != nil && openStackCluster.Status.Bastion.ID != "" {
333+
if instanceStatus, err = computeService.GetInstanceStatus(openStackCluster.Status.Bastion.ID); err != nil {
334+
return reconcile.Result{}, err
335+
}
332336
}
333-
if instanceStatus != nil {
337+
if instanceStatus == nil {
338+
instanceStatus, err = computeService.CreateInstance(openStackCluster, openStackCluster, instanceSpec, cluster.Name)
339+
if err != nil {
340+
return reconcile.Result{}, fmt.Errorf("failed to create bastion: %w", err)
341+
}
342+
openStackCluster.Status.Bastion = &infrav1.BastionStatus{
343+
ID: instanceStatus.ID(),
344+
}
345+
} else {
334346
if !bastionHashHasChanged(bastionHash, openStackCluster.ObjectMeta.Annotations) {
335347
bastion, err := instanceStatus.BastionStatus(openStackCluster)
336348
if err != nil {
337-
return err
349+
return reconcile.Result{}, err
338350
}
339351
// Add the current hash if no annotation is set.
340352
if _, ok := openStackCluster.ObjectMeta.Annotations[BastionInstanceHashAnnotation]; !ok {
341353
annotations.AddAnnotations(openStackCluster, map[string]string{BastionInstanceHashAnnotation: bastionHash})
342354
}
343355
openStackCluster.Status.Bastion = bastion
344-
return nil
356+
return ctrl.Result{}, nil
345357
}
346358

347359
if err := deleteBastion(scope, cluster, openStackCluster); err != nil {
348-
return err
360+
return ctrl.Result{}, err
349361
}
350362
}
351-
352-
instanceStatus, err = computeService.CreateInstance(openStackCluster, openStackCluster, instanceSpec, cluster.Name)
353-
if err != nil {
354-
return fmt.Errorf("failed to reconcile bastion: %w", err)
363+
// Make sure that bastion instance has a valid state
364+
switch instanceStatus.State() {
365+
case infrav1.InstanceStateError:
366+
return ctrl.Result{}, fmt.Errorf("failed to reconcile bastion, instance state is ERROR")
367+
case infrav1.InstanceStateBuilding:
368+
// Requeue until bastion becomes active
369+
return ctrl.Result{RequeueAfter: waitForBuildingInstanceToReconcile}, nil
370+
case infrav1.InstanceStateDeleted:
371+
// This should normally be handled by deleteBastion
372+
openStackCluster.Status.Bastion = nil
373+
return ctrl.Result{}, nil
355374
}
356375

357376
networkingService, err := networking.NewService(scope)
358377
if err != nil {
359-
return err
378+
return ctrl.Result{}, err
360379
}
361380
clusterName := fmt.Sprintf("%s-%s", cluster.Namespace, cluster.Name)
362381
fp, err := networkingService.GetOrCreateFloatingIP(openStackCluster, openStackCluster, clusterName, openStackCluster.Spec.Bastion.FloatingIP)
363382
if err != nil {
364383
handleUpdateOSCError(openStackCluster, fmt.Errorf("failed to get or create floating IP for bastion: %w", err))
365-
return fmt.Errorf("failed to get or create floating IP for bastion: %w", err)
384+
return ctrl.Result{}, fmt.Errorf("failed to get or create floating IP for bastion: %w", err)
366385
}
367386
port, err := computeService.GetManagementPort(openStackCluster, instanceStatus)
368387
if err != nil {
369388
err = fmt.Errorf("getting management port for bastion: %w", err)
370389
handleUpdateOSCError(openStackCluster, err)
371-
return err
390+
return ctrl.Result{}, err
372391
}
373392
err = networkingService.AssociateFloatingIP(openStackCluster, fp, port.ID)
374393
if err != nil {
375394
handleUpdateOSCError(openStackCluster, fmt.Errorf("failed to associate floating IP with bastion: %w", err))
376-
return fmt.Errorf("failed to associate floating IP with bastion: %w", err)
395+
return ctrl.Result{}, fmt.Errorf("failed to associate floating IP with bastion: %w", err)
377396
}
378397

379398
bastion, err := instanceStatus.BastionStatus(openStackCluster)
380399
if err != nil {
381-
return err
400+
return ctrl.Result{}, err
382401
}
383402
bastion.FloatingIP = fp.FloatingIP
384403
openStackCluster.Status.Bastion = bastion
385404
annotations.AddAnnotations(openStackCluster, map[string]string{BastionInstanceHashAnnotation: bastionHash})
386-
return nil
405+
return ctrl.Result{}, nil
387406
}
388407

389408
func bastionToInstanceSpec(openStackCluster *infrav1.OpenStackCluster, clusterName string) *compute.InstanceSpec {

controllers/openstackcluster_controller_test.go

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ import (
2222

2323
"github.com/go-logr/logr"
2424
"github.com/golang/mock/gomock"
25-
"github.com/gophercloud/gophercloud/openstack/compute/v2/servers"
25+
"github.com/gophercloud/gophercloud"
2626
"github.com/gophercloud/gophercloud/openstack/networking/v2/extensions/security/groups"
2727
"github.com/gophercloud/gophercloud/openstack/networking/v2/networks"
2828
"github.com/gophercloud/gophercloud/openstack/networking/v2/subnets"
@@ -38,7 +38,6 @@ import (
3838
"sigs.k8s.io/controller-runtime/pkg/reconcile"
3939

4040
infrav1 "sigs.k8s.io/cluster-api-provider-openstack/api/v1alpha7"
41-
"sigs.k8s.io/cluster-api-provider-openstack/pkg/clients"
4241
"sigs.k8s.io/cluster-api-provider-openstack/pkg/scope"
4342
)
4443

@@ -195,18 +194,24 @@ var _ = Describe("OpenStackCluster controller", func() {
195194
Expect(err).To(BeNil())
196195
err = k8sClient.Create(ctx, capiCluster)
197196
Expect(err).To(BeNil())
197+
testCluster.Status = infrav1.OpenStackClusterStatus{
198+
Bastion: &infrav1.BastionStatus{
199+
ID: "bastion-uuid",
200+
},
201+
}
202+
err = k8sClient.Status().Update(ctx, testCluster)
203+
Expect(err).To(BeNil())
198204
scope, err := mockScopeFactory.NewClientScopeFromCluster(ctx, k8sClient, testCluster, nil, logr.Discard())
199205
Expect(err).To(BeNil())
200206

201207
computeClientRecorder := mockScopeFactory.ComputeClient.EXPECT()
202-
computeClientRecorder.ListServers(servers.ListOpts{
203-
Name: "^capi-cluster-bastion$",
204-
}).Return([]clients.ServerExt{}, nil)
208+
computeClientRecorder.GetServer("bastion-uuid").Return(nil, gophercloud.ErrResourceNotFound{})
205209

206210
networkClientRecorder := mockScopeFactory.NetworkClient.EXPECT()
207211
networkClientRecorder.ListSecGroup(gomock.Any()).Return([]groups.SecGroup{}, nil)
208212

209213
err = deleteBastion(scope, capiCluster, testCluster)
214+
Expect(testCluster.Status.Bastion).To(BeNil())
210215
Expect(err).To(BeNil())
211216
})
212217
It("should implicitly filter cluster subnets by cluster network", func() {

controllers/openstackmachine_controller.go

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@ type OpenStackMachineReconciler struct {
6767
const (
6868
waitForClusterInfrastructureReadyDuration = 15 * time.Second
6969
waitForInstanceBecomeActiveToReconcile = 60 * time.Second
70+
waitForBuildingInstanceToReconcile = 10 * time.Second
7071
)
7172

7273
// +kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=openstackmachines,verbs=get;list;watch;create;update;patch;delete
@@ -246,7 +247,10 @@ func (r *OpenStackMachineReconciler) reconcileDelete(scope scope.Scope, cluster
246247
}
247248
}
248249

249-
instanceStatus, err := computeService.GetInstanceStatusByName(openStackMachine, openStackMachine.Name)
250+
var instanceStatus *compute.InstanceStatus
251+
if openStackMachine.Spec.InstanceID != nil {
252+
instanceStatus, err = computeService.GetInstanceStatusByName(openStackMachine, openStackMachine.Name)
253+
}
250254
if err != nil {
251255
return ctrl.Result{}, err
252256
}
@@ -379,6 +383,9 @@ func (r *OpenStackMachineReconciler) reconcileNormal(ctx context.Context, scope
379383
scope.Logger().Info("Machine instance state is DELETED, no actions")
380384
conditions.MarkFalse(openStackMachine, infrav1.InstanceReadyCondition, infrav1.InstanceDeletedReason, clusterv1.ConditionSeverityError, "")
381385
return ctrl.Result{}, nil
386+
case infrav1.InstanceStateBuilding:
387+
scope.Logger().Info("Waiting for instance to become ACTIVE", "id", instanceStatus.ID(), "status", instanceStatus.State())
388+
return ctrl.Result{RequeueAfter: waitForBuildingInstanceToReconcile}, nil
382389
default:
383390
// The other state is normal (for example, migrating, shutoff) but we don't want to proceed until it's ACTIVE
384391
// due to potential conflict or unexpected actions
@@ -431,11 +438,15 @@ func (r *OpenStackMachineReconciler) reconcileNormal(ctx context.Context, scope
431438
}
432439

433440
func (r *OpenStackMachineReconciler) getOrCreate(logger logr.Logger, cluster *clusterv1.Cluster, openStackCluster *infrav1.OpenStackCluster, machine *clusterv1.Machine, openStackMachine *infrav1.OpenStackMachine, computeService *compute.Service, userData string) (*compute.InstanceStatus, error) {
434-
instanceStatus, err := computeService.GetInstanceStatusByName(openStackMachine, openStackMachine.Name)
435-
if err != nil {
436-
logger.Info("Unable to get OpenStack instance", "name", openStackMachine.Name)
437-
conditions.MarkFalse(openStackMachine, infrav1.InstanceReadyCondition, infrav1.OpenStackErrorReason, clusterv1.ConditionSeverityError, err.Error())
438-
return nil, err
441+
var instanceStatus *compute.InstanceStatus
442+
var err error
443+
if openStackMachine.Spec.InstanceID != nil {
444+
instanceStatus, err = computeService.GetInstanceStatus(*openStackMachine.Spec.InstanceID)
445+
if err != nil {
446+
logger.Info("Unable to get OpenStack instance", "name", openStackMachine.Name)
447+
conditions.MarkFalse(openStackMachine, infrav1.InstanceReadyCondition, infrav1.OpenStackErrorReason, clusterv1.ConditionSeverityError, err.Error())
448+
return nil, err
449+
}
439450
}
440451

441452
if instanceStatus == nil {

controllers/suite_test.go

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ import (
3232
"k8s.io/apimachinery/pkg/types"
3333
"k8s.io/client-go/kubernetes/scheme"
3434
"k8s.io/client-go/rest"
35+
"k8s.io/utils/pointer"
3536
clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
3637
"sigs.k8s.io/cluster-api/test/framework"
3738
"sigs.k8s.io/controller-runtime/pkg/client"
@@ -148,9 +149,13 @@ var _ = Describe("When calling getOrCreate", func() {
148149
cluster := &clusterv1.Cluster{}
149150
openStackCluster := &infrav1.OpenStackCluster{}
150151
machine := &clusterv1.Machine{}
151-
openStackMachine := &infrav1.OpenStackMachine{}
152+
openStackMachine := &infrav1.OpenStackMachine{
153+
Spec: infrav1.OpenStackMachineSpec{
154+
InstanceID: pointer.String("machine-uuid"),
155+
},
156+
}
152157

153-
mockScopeFactory.ComputeClient.EXPECT().ListServers(gomock.Any()).Return(nil, errors.New("Test error when listing servers"))
158+
mockScopeFactory.ComputeClient.EXPECT().GetServer(gomock.Any()).Return(nil, errors.New("Test error when getting server"))
154159
instanceStatus, err := reconsiler.getOrCreate(logger, cluster, openStackCluster, machine, openStackMachine, computeService, "")
155160
Expect(err).To(HaveOccurred())
156161
Expect(instanceStatus).To(BeNil())

pkg/cloud/services/compute/instance.go

Lines changed: 2 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -333,27 +333,8 @@ func (s *Service) createInstanceImpl(eventObject runtime.Object, openStackCluste
333333
return nil, fmt.Errorf("error creating Openstack instance: %v", err)
334334
}
335335

336-
var createdInstance *InstanceStatus
337-
err = wait.PollUntilContextTimeout(context.TODO(), retryInterval, instanceCreateTimeout, true, func(_ context.Context) (bool, error) {
338-
createdInstance, err = s.GetInstanceStatus(server.ID)
339-
if err != nil {
340-
if capoerrors.IsRetryable(err) {
341-
return false, nil
342-
}
343-
return false, err
344-
}
345-
if createdInstance.State() == infrav1.InstanceStateError {
346-
return false, fmt.Errorf("error creating OpenStack instance %s, status changed to error", createdInstance.ID())
347-
}
348-
return createdInstance.State() == infrav1.InstanceStateActive, nil
349-
})
350-
if err != nil {
351-
record.Warnf(eventObject, "FailedCreateServer", "Failed to create server %s: %v", createdInstance.Name(), err)
352-
return nil, err
353-
}
354-
355-
record.Eventf(eventObject, "SuccessfulCreateServer", "Created server %s with id %s", createdInstance.Name(), createdInstance.ID())
356-
return createdInstance, nil
336+
record.Eventf(eventObject, "SuccessfulCreateServer", "Created server %s with id %s", server.Name, server.ID)
337+
return &InstanceStatus{server, s.scope.Logger()}, nil
357338
}
358339

359340
func volumeName(instanceName string, nameSuffix string) string {

0 commit comments

Comments
 (0)