Skip to content

Commit 6376101

Browse files
committed
Use get instead of list to retrieve servers
Under certain circumstances, when they are non-responsive cells, nova may return a partial result instead of an error. (this is controlled by list_records_by_skipping_down_cells parameter that is true by default - no error will be thrown) When it faces this issue, capo controller will try to create a new server for the machine, resulting in deleting ports of existing one. In order ovoid this issue, use GET instead of LIST with server uuid stored in machine spec when it is created, and store server ID in machine spec immediately
1 parent 821a1a2 commit 6376101

File tree

3 files changed

+26
-9
lines changed

3 files changed

+26
-9
lines changed

controllers/openstackmachine_controller.go

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -246,7 +246,10 @@ func (r *OpenStackMachineReconciler) reconcileDelete(scope scope.Scope, cluster
246246
}
247247
}
248248

249-
instanceStatus, err := computeService.GetInstanceStatusByName(openStackMachine, openStackMachine.Name)
249+
var instanceStatus *compute.InstanceStatus
250+
if openStackMachine.Spec.InstanceID != nil {
251+
instanceStatus, err = computeService.GetInstanceStatusByName(openStackMachine, openStackMachine.Name)
252+
}
250253
if err != nil {
251254
return ctrl.Result{}, err
252255
}
@@ -431,18 +434,27 @@ func (r *OpenStackMachineReconciler) reconcileNormal(ctx context.Context, scope
431434
}
432435

433436
func (r *OpenStackMachineReconciler) getOrCreate(logger logr.Logger, cluster *clusterv1.Cluster, openStackCluster *infrav1.OpenStackCluster, machine *clusterv1.Machine, openStackMachine *infrav1.OpenStackMachine, computeService *compute.Service, userData string) (*compute.InstanceStatus, error) {
434-
instanceStatus, err := computeService.GetInstanceStatusByName(openStackMachine, openStackMachine.Name)
435-
if err != nil {
436-
logger.Info("Unable to get OpenStack instance", "name", openStackMachine.Name)
437-
conditions.MarkFalse(openStackMachine, infrav1.InstanceReadyCondition, infrav1.OpenStackErrorReason, clusterv1.ConditionSeverityError, err.Error())
438-
return nil, err
437+
438+
var instanceStatus *compute.InstanceStatus
439+
var err error
440+
if openStackMachine.Spec.InstanceID != nil {
441+
instanceStatus, err = computeService.GetInstanceStatus(*openStackMachine.Spec.InstanceID)
442+
if err != nil {
443+
logger.Info("Unable to get OpenStack instance", "name", openStackMachine.Name)
444+
conditions.MarkFalse(openStackMachine, infrav1.InstanceReadyCondition, infrav1.OpenStackErrorReason, clusterv1.ConditionSeverityError, err.Error())
445+
return nil, err
446+
}
439447
}
440448

441449
if instanceStatus == nil {
442450
instanceSpec := machineToInstanceSpec(openStackCluster, machine, openStackMachine, userData)
443451
logger.Info("Machine does not exist, creating Machine", "name", openStackMachine.Name)
444452
instanceStatus, err = computeService.CreateInstance(openStackMachine, openStackCluster, instanceSpec, cluster.Name)
445453
if err != nil {
454+
if instanceStatus != nil {
455+
// Instance was created but in error, store instanceID for subsequent calls to this function
456+
openStackMachine.Spec.InstanceID = pointer.String(instanceStatus.ID())
457+
}
446458
conditions.MarkFalse(openStackMachine, infrav1.InstanceReadyCondition, infrav1.InstanceCreateFailedReason, clusterv1.ConditionSeverityError, err.Error())
447459
return nil, fmt.Errorf("create OpenStack instance: %w", err)
448460
}

controllers/suite_test.go

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ import (
3232
"k8s.io/apimachinery/pkg/types"
3333
"k8s.io/client-go/kubernetes/scheme"
3434
"k8s.io/client-go/rest"
35+
"k8s.io/utils/pointer"
3536
clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
3637
"sigs.k8s.io/cluster-api/test/framework"
3738
"sigs.k8s.io/controller-runtime/pkg/client"
@@ -148,9 +149,13 @@ var _ = Describe("When calling getOrCreate", func() {
148149
cluster := &clusterv1.Cluster{}
149150
openStackCluster := &infrav1.OpenStackCluster{}
150151
machine := &clusterv1.Machine{}
151-
openStackMachine := &infrav1.OpenStackMachine{}
152+
openStackMachine := &infrav1.OpenStackMachine{
153+
Spec: infrav1.OpenStackMachineSpec{
154+
InstanceID: pointer.String("machine-uuid"),
155+
},
156+
}
152157

153-
mockScopeFactory.ComputeClient.EXPECT().ListServers(gomock.Any()).Return(nil, errors.New("Test error when listing servers"))
158+
mockScopeFactory.ComputeClient.EXPECT().GetServer(gomock.Any()).Return(nil, errors.New("Test error when getting server"))
154159
instanceStatus, err := reconsiler.getOrCreate(logger, cluster, openStackCluster, machine, openStackMachine, computeService, "")
155160
Expect(err).To(HaveOccurred())
156161
Expect(instanceStatus).To(BeNil())

pkg/cloud/services/compute/instance.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -349,7 +349,7 @@ func (s *Service) createInstanceImpl(eventObject runtime.Object, openStackCluste
349349
})
350350
if err != nil {
351351
record.Warnf(eventObject, "FailedCreateServer", "Failed to create server %s: %v", createdInstance.Name(), err)
352-
return nil, err
352+
return createdInstance, err
353353
}
354354

355355
record.Eventf(eventObject, "SuccessfulCreateServer", "Created server %s with id %s", createdInstance.Name(), createdInstance.ID())

0 commit comments

Comments
 (0)