Skip to content

Commit 1347c00

Browse files
committed
Set failure only on instance error when no nodeRef
Setting a failure message is permanent and requires user action to get rid of. We cannot know if the error is permanent though. The idea with this commit is that if the machine has a nodeRef, we know that the instance was working at some point, so the error could well be temporary. Therefore we do not set failure in this case. On the other hand, if the machine does not have a nodeRef, we set failure since it is more likely that there is some configuration error that will not go away. Remove dead code and improve logging slightly in similar cases.
1 parent 01b5263 commit 1347c00

File tree

2 files changed

+11
-11
lines changed

2 files changed

+11
-11
lines changed

api/v1alpha7/conditions_consts.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,8 @@ const (
4040
InstanceNotReadyReason = "InstanceNotReady"
4141
// InstanceDeleteFailedReason used when deleting the instance failed.
4242
InstanceDeleteFailedReason = "InstanceDeleteFailed"
43+
// OpenstackErrorReason used when there is an error communicating with OpenStack.
44+
OpenStackErrorReason = "OpenStackError"
4345
)
4446

4547
const (

controllers/openstackmachine_controller.go

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -334,14 +334,6 @@ func (r *OpenStackMachineReconciler) reconcileNormal(ctx context.Context, scope
334334
return ctrl.Result{}, err
335335
}
336336

337-
// Set an error message if we couldn't find the instance.
338-
if instanceStatus == nil {
339-
err = errors.New("OpenStack instance not found")
340-
openStackMachine.SetFailure(capierrors.UpdateMachineError, err)
341-
conditions.MarkFalse(openStackMachine, infrav1.InstanceReadyCondition, infrav1.InstanceNotFoundReason, clusterv1.ConditionSeverityError, "")
342-
return ctrl.Result{}, nil
343-
}
344-
345337
// TODO(sbueringer) From CAPA: TODO(ncdc): move this validation logic into a validating webhook (for us: create validation logic in webhook)
346338

347339
openStackMachine.Spec.ProviderID = pointer.String(fmt.Sprintf("openstack:///%s", instanceStatus.ID()))
@@ -364,10 +356,14 @@ func (r *OpenStackMachineReconciler) reconcileNormal(ctx context.Context, scope
364356
conditions.MarkTrue(openStackMachine, infrav1.InstanceReadyCondition)
365357
openStackMachine.Status.Ready = true
366358
case infrav1.InstanceStateError:
367-
// Error is unexpected, thus we report error and never retry
359+
// If the machine has a NodeRef then it must have been working at some point,
360+
// so the error could be something temporary.
361+
// If not, it is more likely a configuration error so we set failure and never retry.
368362
scope.Logger().Info("Machine instance state is ERROR", "id", instanceStatus.ID())
369-
err = fmt.Errorf("instance state %q is unexpected", instanceStatus.State())
370-
openStackMachine.SetFailure(capierrors.UpdateMachineError, err)
363+
if machine.Status.NodeRef == nil {
364+
err = fmt.Errorf("instance state %q is unexpected", instanceStatus.State())
365+
openStackMachine.SetFailure(capierrors.UpdateMachineError, err)
366+
}
371367
conditions.MarkFalse(openStackMachine, infrav1.InstanceReadyCondition, infrav1.InstanceStateErrorReason, clusterv1.ConditionSeverityError, "")
372368
return ctrl.Result{}, nil
373369
case infrav1.InstanceStateDeleted:
@@ -429,6 +425,8 @@ func (r *OpenStackMachineReconciler) reconcileNormal(ctx context.Context, scope
429425
func (r *OpenStackMachineReconciler) getOrCreate(logger logr.Logger, cluster *clusterv1.Cluster, openStackCluster *infrav1.OpenStackCluster, machine *clusterv1.Machine, openStackMachine *infrav1.OpenStackMachine, computeService *compute.Service, userData string) (*compute.InstanceStatus, error) {
430426
instanceStatus, err := computeService.GetInstanceStatusByName(openStackMachine, openStackMachine.Name)
431427
if err != nil {
428+
logger.Info("Unable to get OpenStack instance", "name", openStackMachine.Name)
429+
conditions.MarkFalse(openStackMachine, infrav1.InstanceReadyCondition, infrav1.OpenStackErrorReason, clusterv1.ConditionSeverityError, err.Error())
432430
return nil, err
433431
}
434432

0 commit comments

Comments
 (0)